3

I am creating a very simple kubeflow pipeline which has the following steps:

  1. Data Loading
  2. Data Preprocessing
  3. Training
  4. Evaluation The first 3 steps runs successfully but as soon as I try to run the evaluator the step fails with the error mentioned above. Here is the python code for the kubeflow pipeline:
import kfp
from kfp import dsl
from kfp.compiler import Compiler


def dataloader_op():
    return dsl.ContainerOp(
        name="Data Loader",
        command= "python",
        image="racahu23/ml-blueprint_dataloader:8",
        arguments=[
            "dataloader.py",
            "--platform", "aws",
            "--bucketname", "new-classification",
            "--remoteDirectoryName", "datasets"
        ],
        # arguments=[
        #     "dataloader.py"
        # ],
        file_outputs={
            "datasets": "/home/user/datasets"
        }
    )
def datapreprocessor(datasets):
    return dsl.ContainerOp(
        name="Data Preprocessor",
        command= "python",
        image="racahu23/ml-blueprint_preprocessor:7",
        arguments=[
            "datapreprocessor.py",
            "--input_dir", datasets,
            "--output_dir", "Data"
        ],
        file_outputs={
            "Data": "/home/user/Data"

        }

    )


def trainer(Data):
    return dsl.ContainerOp(
        name="Model Trainer",
        command="python",
        #image="racahu23/ml-blueprint_trainer:1",
        image="racahu23/ml-blueprint_trainer:12",
        arguments=[
            "primary_trainer.py",
            "--input_dir", Data,
            "--output_dir", "model/",
            "--num_labels", 9,
            "--logging_dir", "logs/",
            '--num_train_epochs', 1,
            '--evaluation_strategy', "epoch",
            '--per_device_train_batch_size', 32,
            '--per_device_eval_batch_size', 64,
            '--save_strategy', "epoch",
            '--logging_strategy', "epoch",
            '--eval_steps', 100,

        ],
        file_outputs={
            "model": "/home/user/model"
        }
    )
def evaluator(model, Data):
    return dsl.ContainerOp(
        name= "Model Evaluator",
        command= "python",
        image= "racahu23/ml-blueprint_evaluator:3",
        arguments=[
            '--data_dir', Data,
            '--model_dir', model,
            '--output_dir', "output/",
            '--save_strategy', "accuracy",


        ],
        file_outputs={
            "output": "/home/user/output"
        }

    )

@dsl.pipeline(
    name="ML_BLUEPRINT",
    description="A generic kubeflow pipeline that trains hugging face transformers for text classification"
)
def blueprint_pipeline():
    _dataloader_op = dataloader_op()

    _preprocessor_op = datapreprocessor(
        dsl.InputArgumentPath(_dataloader_op.outputs["datasets"])
    ).after(_dataloader_op)

    _trainer_op = trainer(
        dsl.InputArgumentPath(_preprocessor_op.outputs["Data"])
    ).after(_preprocessor_op)
    _evaluator_op = evaluator(dsl.InputArgumentPath(_trainer_op.outputs["model"]),
                              dsl.InputArgumentPath(_preprocessor_op.outputs["Data"])).after(_trainer_op)

client = kfp.Client(namespace="kubeflow",host="http://localhost:8080")
client.create_run_from_pipeline_func(blueprint_pipeline, arguments={})

The error that is displayed is : enter image description here

R.Ahuja
  • 51
  • 4

0 Answers0