Model was first logged to mlflow using pycaret
from pycaret.regression import *
s = setup(df_cleaned,
target = 'SalePrice',
use_gpu = True,
categorical_features = categorical_features_cleaned,
handle_unknown_categorical = True,
unknown_categorical_method = 'most_frequent',
normalize = True,
normalize_method = 'robust',
transformation = True,
feature_selection = True,
fold = 5,
log_experiment = True,
experiment_name = 'project-2',
silent = True
)
best = compare_models()
While running the setup code block, the following error quickly flashed before being replaced by pycaret processing message. I manage to capture interrupting the kernel.
2022/09/24 18:01:17 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '34dfd8d701a64a18b5043bb0529620bb', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow
2022/09/24 18:01:17 WARNING mlflow.utils: Truncated the value of the key `steps`. Truncated value: `[('dtypes', DataTypes_Auto_infer(categorical_features=['MS SubClass', 'MS Zoning', 'Street',
'Alley', 'Lot Shape', 'Land Contour',
'Utilities', 'Lot Config',
'Land Slope', 'Neighborhood',
'Condition 1', 'Condition 2',
'Bldg Type', 'House Style',
...`
2022/09/24 18:01:17 WARNING mlflow.utils: Truncated the value of the key `dtypes`. Truncated value: `DataTypes_Auto_infer(categorical_features=['MS SubClass', 'MS Zoning', 'Street',
'Alley', 'Lot Shape', 'Land Contour',
'Utilities', 'Lot Config',
'Land Slope', 'Neighborhood',
'Condition 1', 'Condition 2',
'Bldg Type', 'House Style',
'Roof...`
2022/09/24 18:01:17 WARNING mlflow.utils: Truncated the value of the key `dtypes__categorical_features`. Truncated value: `['MS SubClass', 'MS Zoning', 'Street', 'Alley', 'Lot Shape', 'Land Contour', 'Utilities', 'Lot Config', 'Land Slope', 'Neighborhood', 'Condition 1', 'Condition 2', 'Bldg Type', 'House Style', 'Roof Style', 'Roof Matl', 'Exterior 1st', 'Mas Vnr Type', 'Exter Qual', 'Exter Cond', 'Foundation', 'Bsmt Qual', 'Bsmt Cond', 'Bsmt Exposure', 'BsmtFin Type 1', 'BsmtFin Type 2', 'Heating', 'Heating QC', 'Central Air', 'Electrical', 'Kitchen Qual', 'Functional', 'Garage Type', 'Garage Finish', 'Garage C...`
Ignoring the warning, i downloaded the artifact from mlflow using the following code and tried running it.
from mlflow.artifacts import download_artifacts
import mlflow.pyfunc
full_path = './mlruns/1/6ff4f982fb7446b6a61b1ddf51feedc6/artifacts/model'
download_artifacts(full_path, dst_path='.')
model = mlflow.pyfunc.load_model(model_uri="./model")
model.predict(df_submit_cleaned)
The error code shown is as follows:
---------------------------------------------------------------------------
NotFittedError Traceback (most recent call last)
Cell In [45], line 5, in predict_submit(model, test_df, test_df_original, filename)
3 try:
4 # we run this for gridsearchcv or normal sklearn regression
----> 5 predictions = model.predict(test_df)
6 #zip the predictions with respective id and save to designated filename
File ~\anaconda3\envs\dsi-sg\lib\site-packages\mlflow\pyfunc\__init__.py:373, in PyFuncModel.predict(self, data)
372 data = _enforce_schema(data, input_schema)
--> 373 return self._predict_fn(data)
File ~\anaconda3\envs\dsi-sg\lib\site-packages\mlflow\sklearn\__init__.py:1686, in _autolog.<locals>._apply_sklearn_descriptor_unbound_method_call_fix.<locals>.patched_IffHasAttrDescriptor__get__.<locals>.out(*args, **kwargs)
1685 def out(*args, **kwargs):
-> 1686 return self.fn(obj, *args, **kwargs)
File ~\anaconda3\envs\dsi-sg\lib\site-packages\mlflow\utils\autologging_utils\safety.py:435, in safe_patch.<locals>.safe_patch_function(*args, **kwargs)
431 with set_non_mlflow_warnings_behavior_for_current_thread(
432 disable_warnings=False,
433 reroute_warnings=False,
434 ):
--> 435 return original(*args, **kwargs)
437 # Whether or not the original / underlying function has been called during the
438 # execution of patched code
File ~\AppData\Roaming\Python\Python38\site-packages\sklearn\pipeline.py:408, in Pipeline.predict(self, X, **predict_params)
407 Xt = transform.transform(Xt)
--> 408 return self.steps[-1][-1].predict(Xt, **predict_params)
File ~\anaconda3\envs\dsi-sg\lib\site-packages\mlflow\utils\autologging_utils\safety.py:435, in safe_patch.<locals>.safe_patch_function(*args, **kwargs)
431 with set_non_mlflow_warnings_behavior_for_current_thread(
432 disable_warnings=False,
433 reroute_warnings=False,
434 ):
--> 435 return original(*args, **kwargs)
437 # Whether or not the original / underlying function has been called during the
438 # execution of patched code
File ~\AppData\Roaming\Python\Python38\site-packages\sklearn\linear_model\_bayes.py:316, in BayesianRidge.predict(self, X, return_std)
295 """Predict using the linear model.
296
297 In addition to the mean of the predictive distribution, also its
(...)
314 Standard deviation of predictive distribution of query points.
315 """
--> 316 y_mean = self._decision_function(X)
317 if return_std is False:
File ~\AppData\Roaming\Python\Python38\site-packages\sklearn\linear_model\_base.py:216, in LinearModel._decision_function(self, X)
215 def _decision_function(self, X):
--> 216 check_is_fitted(self)
218 X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
File ~\AppData\Roaming\Python\Python38\site-packages\sklearn\utils\validation.py:72, in _deprecate_positional_args.<locals>.inner_f(*args, **kwargs)
71 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 72 return f(**kwargs)
File ~\AppData\Roaming\Python\Python38\site-packages\sklearn\utils\validation.py:1019, in check_is_fitted(estimator, attributes, msg, all_or_any)
1018 if not attrs:
-> 1019 raise NotFittedError(msg % {'name': type(estimator).__name__})
NotFittedError: This BayesianRidge instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
Cell In [58], line 2
1 # Run predictions as per normal with 'model' object
----> 2 predict_submit(model, df_submit_cleaned, df_submit_org, 'downloaded_model')
Cell In [45], line 10, in predict_submit(model, test_df, test_df_original, filename)
7 df_to_submit = zip(test_df_original['Id'],predictions)
8 except:
9 #we run this for pycaret model
---> 10 predictions = predict_model(model, data=test_df)
11 #zip the predictions with respective id and save to designated filename
12 df_to_submit = zip(test_df_original['Id'],predictions['Label'])
File ~\AppData\Roaming\Python\Python38\site-packages\pycaret\regression.py:1854, in predict_model(estimator, data, drift_report, round, verbose, drift_kwargs)
1793 def predict_model(
1794 estimator,
1795 data: Optional[pd.DataFrame] = None,
(...)
1799 drift_kwargs: Optional[dict] = None,
1800 ) -> pd.DataFrame:
1802 """
1803 This function predicts ``Label`` using a trained model. When ``data`` is
1804 None, it predicts label on the holdout set.
(...)
1851
1852 """
-> 1854 return pycaret.internal.tabular.predict_model(
1855 estimator=estimator,
1856 data=data,
1857 drift_report=drift_report,
1858 probability_threshold=None,
1859 encoded_labels=True,
1860 round=round,
1861 verbose=verbose,
1862 ml_usecase=MLUsecase.REGRESSION,
1863 drift_kwargs=drift_kwargs,
1864 )
File ~\AppData\Roaming\Python\Python38\site-packages\pycaret\internal\tabular.py:9116, in predict_model(estimator, data, probability_threshold, encoded_labels, drift_report, raw_score, round, verbose, ml_usecase, display, drift_kwargs)
9113 probability_threshold = estimator.probability_threshold
9114 estimator = get_estimator_from_meta_estimator(estimator)
-> 9116 pred = np.nan_to_num(estimator.predict(X_test_))
9118 try:
9119 score = estimator.predict_proba(X_test_)
File ~\anaconda3\envs\dsi-sg\lib\site-packages\mlflow\sklearn\__init__.py:1686, in _autolog.<locals>._apply_sklearn_descriptor_unbound_method_call_fix.<locals>.patched_IffHasAttrDescriptor__get__.<locals>.out(*args, **kwargs)
1685 def out(*args, **kwargs):
-> 1686 return self.fn(obj, *args, **kwargs)
File ~\anaconda3\envs\dsi-sg\lib\site-packages\mlflow\utils\autologging_utils\safety.py:435, in safe_patch.<locals>.safe_patch_function(*args, **kwargs)
420 if (
421 active_session_failed
422 or autologging_is_disabled(autologging_integration)
(...)
429 # warning behavior during original function execution, since autologging is being
430 # skipped
431 with set_non_mlflow_warnings_behavior_for_current_thread(
432 disable_warnings=False,
433 reroute_warnings=False,
434 ):
--> 435 return original(*args, **kwargs)
437 # Whether or not the original / underlying function has been called during the
438 # execution of patched code
439 original_has_been_called = False
File ~\AppData\Roaming\Python\Python38\site-packages\sklearn\pipeline.py:408, in Pipeline.predict(self, X, **predict_params)
406 for _, name, transform in self._iter(with_final=False):
407 Xt = transform.transform(Xt)
--> 408 return self.steps[-1][-1].predict(Xt, **predict_params)
File ~\anaconda3\envs\dsi-sg\lib\site-packages\mlflow\pyfunc\__init__.py:373, in PyFuncModel.predict(self, data)
371 if input_schema is not None:
372 data = _enforce_schema(data, input_schema)
--> 373 return self._predict_fn(data)
File ~\anaconda3\envs\dsi-sg\lib\site-packages\mlflow\sklearn\__init__.py:1686, in _autolog.<locals>._apply_sklearn_descriptor_unbound_method_call_fix.<locals>.patched_IffHasAttrDescriptor__get__.<locals>.out(*args, **kwargs)
1685 def out(*args, **kwargs):
-> 1686 return self.fn(obj, *args, **kwargs)
File ~\anaconda3\envs\dsi-sg\lib\site-packages\mlflow\utils\autologging_utils\safety.py:435, in safe_patch.<locals>.safe_patch_function(*args, **kwargs)
420 if (
421 active_session_failed
422 or autologging_is_disabled(autologging_integration)
(...)
429 # warning behavior during original function execution, since autologging is being
430 # skipped
431 with set_non_mlflow_warnings_behavior_for_current_thread(
432 disable_warnings=False,
433 reroute_warnings=False,
434 ):
--> 435 return original(*args, **kwargs)
437 # Whether or not the original / underlying function has been called during the
438 # execution of patched code
439 original_has_been_called = False
File ~\AppData\Roaming\Python\Python38\site-packages\sklearn\pipeline.py:407, in Pipeline.predict(self, X, **predict_params)
405 Xt = X
406 for _, name, transform in self._iter(with_final=False):
--> 407 Xt = transform.transform(Xt)
408 return self.steps[-1][-1].predict(Xt, **predict_params)
File ~\AppData\Roaming\Python\Python38\site-packages\pycaret\internal\preprocess.py:364, in DataTypes_Auto_infer.transform(self, dataset, y)
362 # drop any columns that were asked to drop
363 data.drop(columns=self.features_todrop, errors="ignore", inplace=True)
--> 364 data = data[self.final_training_columns]
366 # also make sure that all the column names are string
367 data.columns = [str(i) for i in data.columns]
File ~\anaconda3\envs\dsi-sg\lib\site-packages\pandas\core\frame.py:3511, in DataFrame.__getitem__(self, key)
3509 if is_iterator(key):
3510 key = list(key)
-> 3511 indexer = self.columns._get_indexer_strict(key, "columns")[1]
3513 # take() does not accept boolean indexers
3514 if getattr(indexer, "dtype", None) == bool:
File ~\anaconda3\envs\dsi-sg\lib\site-packages\pandas\core\indexes\base.py:5796, in Index._get_indexer_strict(self, key, axis_name)
5793 else:
5794 keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr)
-> 5796 self._raise_if_missing(keyarr, indexer, axis_name)
5798 keyarr = self.take(indexer)
5799 if isinstance(key, Index):
5800 # GH 42790 - Preserve name from an Index
File ~\anaconda3\envs\dsi-sg\lib\site-packages\pandas\core\indexes\base.py:5859, in Index._raise_if_missing(self, key, indexer, axis_name)
5856 raise KeyError(f"None of [{key}] are in the [{axis_name}]")
5858 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())
-> 5859 raise KeyError(f"{not_found} not in index")
KeyError: "['MS SubClass', 'MS Zoning', 'Street', 'Alley', 'Lot Shape', 'Land Contour', 'Utilities', 'Lot Config', 'Land Slope', 'Neighborhood', 'Condition 1', 'Condition 2', 'Bldg Type', 'House Style', 'Roof Style', 'Roof Matl', 'Exterior 1st', 'Mas Vnr Type', 'Exter Qual', 'Exter Cond', 'Foundation', 'Bsmt Qual', 'Bsmt Cond', 'Bsmt Exposure', 'BsmtFin Type 1', 'BsmtFin Type 2', 'Heating', 'Heating QC', 'Central Air', 'Electrical', 'Kitchen Qual', 'Functional', 'Garage Type', 'Garage Finish', 'Garage Cond', 'Paved Drive', 'Fence', 'Misc Feature', 'Sale Type'] not in index"
I am using mlflow 1.27.0
Really appreciate any advice to resolve this. Thanks!