I am creating a custom myflow.pyfunc object that I would like to save to MLFlow and retrieve later. I don't understand the relationship between the object that is saved with mlflow.pyfunc.save_model()
, and the one that is retrieved with mlflow.pyfunc.load_model()
.
The loaded model is a 'PythonModelContext' object rather than my original python class. When I try to use the predict method in the loaded version I get an error.
Here I initialise MLflow and create a dummy example of my class
# load
import os
import tempfile
from pathlib import Path
import pandas as pd
import mlflow
from mlflow.tracking import MlflowClient
import mlflow.pyfunc
from mlflow.pyfunc import PythonModelContext
# initialise MLFlow
mlflow_var = os.getenv('HYMIND_REPO_TRACKING_URI')
mlflow.set_tracking_uri(mlflow_var)
client = MlflowClient()
# Define the class that will be used for fit and predict (dummy example)
class PredictSpeciality(mlflow.pyfunc.PythonModel):
def fit(self):
print('fit')
d = {'col1': [1, 2], 'col2': [3, 4]}
df = pd.DataFrame(data=d)
return df
def predict(self, X, y=None):
print('predict')
print(X.shape)
return
If I now run the class as it is the predict method works:
# Use of this predictor before saving works fine
m = PredictSpeciality()
df = m.fit()
m.predict(df)
But if I save the model to the registry, and then re-load it, the predict method no longer works:
counter +=1
exp_name = 'MLflow-test-' + str(counter)
os.environ["MLFLOW_EXPERIMENT_NAME"] = exp_name
experiment_id = mlflow.create_experiment(exp_name)
mlflow.set_experiment(exp_name)
experiment = dict(mlflow.get_experiment_by_name(exp_name))
experiment_id = experiment['experiment_id']
with mlflow.start_run():
# dummy code here for fitting a model
m = PredictSpeciality()
df = m.fit()
# mark best run
runs = mlflow.search_runs()
best_run_id = runs['run_id'][0]
# tag the best run and save model
with mlflow.start_run(run_id=best_run_id):
mlflow.set_tag('best_run_', 1)
mlflow_model_path = f'/data/hymind/repo/{experiment_id}/{best_run_id}/artifacts/model/'
mlflow.pyfunc.save_model(path=mlflow_model_path, python_model=m)
# end experiment and register best model
model_name = 'MLflow-test' + str(counter)
registered_model = mlflow.register_model(f'runs:/{best_run_id}/model', model_name)
# now attempt to make a prediction using the loaded model
model_version = 1
m = mlflow.pyfunc.load_model(f"models:/{model_name}/{model_version}")
m.predict(df)
In this case, I get the attribute error
AttributeError: 'PythonModelContext' object has no attribute 'shape'
How do I get the original model back from the 'PythonModelContext' object?