how to mlflow autolog with custom parameters

Question

I'm trying to log my ML trials with mlflow.keras.autolog and mlflow.log_param simultaneously (mlflow v 1.22.0). However, the only things that are recorded are autolog's products, but not those of log_param.

experiment = mlf_client.get_experiment_by_name(experiment_name)
with mlflow.start_run(experiment_id=experiment.experiment_id):
    mlflow.keras.autolog(log_input_examples=True)
    mlflow.log_param('batch_size', self.batch_size)
    mlflow.log_param('training_set_size', len(kwargs['training_ID_list']))
    mlflow.log_param('testing_set_size', len(kwargs['testing_ID_list']))
    
    history = self.train_NN_model(**kwargs)

I know I can use log_param with log_model to save the model itself, but then I lose some useful stuff that autolog can record for me automatically (e.g., model summary).

Is it possible to use autolog with custom parameters for logging?

score 2 · Answer 1 · answered May 20 '23 at 06:26

Yes, you can. Let me demonstrate this by adapting examples from the mlflow docs

from pprint import pprint
import numpy as np
from sklearn.linear_model import LinearRegression
import mlflow
from mlflow import MlflowClient


def fetch_logged_data(run_id):
    client = MlflowClient()
    data = client.get_run(run_id).data
    tags = {k: v for k, v in data.tags.items() if not k.startswith("mlflow.")}
    artifacts = [f.path for f in client.list_artifacts(run_id, "model")]
    return data.params, data.metrics, tags, artifacts


# enable autologging
mlflow.sklearn.autolog()

# prepare training data
X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
y = np.dot(X, np.array([1, 2])) + 3

# train a model
model = LinearRegression()
with mlflow.start_run() as run:
    model.fit(X, y)
    mlflow.log_param('my-param',1)
    mlflow.log_metric('my-metric',2)

# fetch logged data
params, metrics, tags, artifacts = fetch_logged_data(run.info.run_id)

pprint(params)
# {'copy_X': 'True',
#  'fit_intercept': 'True',
#   my-param': '1',
#  'n_jobs': 'None',
#  'normalize': 'False'}

pprint(metrics)
# {'my-metric': 2.0,
#  'training_score': 1.0,
#  'training_mean_absolute_error': 2.220446049250313e-16,
#  'training_mean_squared_error': 1.9721522630525295e-31,
#  'training_r2_score': 1.0,
#  'training_root_mean_squared_error': 4.440892098500626e-16}

pprint(tags)
# {'estimator_class': 'sklearn.linear_model._base.LinearRegression',
#  'estimator_name': 'LinearRegression'}

pprint(artifacts)
# ['model/MLmodel', 'model/conda.yaml', 'model/model.pkl']

See this fully working example that I shared as a gist.

how to mlflow autolog with custom parameters

1 Answers1