I am trying to use grid search cv and get best params on multiple models which used make_pipeline and make_column_transformer.
numerical_features = ['Pclass','Parch','Fare', 'Age']
categorical_features= ['Sex','Embarked']
numerical_pipeline= make_pipeline(SimpleImputer(), StandardScaler())
categorical_pipeline= make_pipeline(SimpleImputer(strategy='most_frequent'),
OneHotEncoder())
preprocessor = make_column_transformer((numerical_pipeline, numerical_features),
(categorical_pipeline,categorical_features ))
X = train_data[['Parch','Fare', 'Age','Pclass','Sex','Embarked']]
y = train_data['Survived']
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import GridSearchCV
# Créer les pipelines pour chaque type de modèle
knn_pipeline = make_pipeline(preprocessor, KNeighborsClassifier())
rf_pipeline = make_pipeline(preprocessor, RandomForestClassifier())
sgd_pipeline =make_pipeline(preprocessor, SGDClassifier())
# Créer un dictionnaire de modèles pour l'utilisation avec GridSearchCV
models = {'knn': knn_pipeline, 'randomforest': rf_pipeline, 'sgd':sgd_pipeline}
# Paramètres pour la recherche par grille
param_grid = {'knn__kneighborsclassifier__n_neighbors': [5, 10, 15],
'randomforest__randomforestclassifier__n_estimators': [100, 200, 300],
'randomforest__randomforestclassifier__max_depth': [5, 10, 15],
'sgd__sgdclassifier__penalty': ['l1', 'l2', 'elasticnet']}
# Créer un objet GridSearchCV pour chaque modèle
grid_searches = {name: GridSearchCV(model, param_grid, cv=5)
for name, model in models.items()}
# Ajuster chaque modèle à l'ensemble de formation et imprimer les meilleurs paramètres
for name, grid_search in grid_searches.items():
grid_search.fit(X, y)
print(f"Best parameters for {name}: {grid_search.best_params_}")
and I get this error:
ValueError: Invalid parameter knn for estimator Pipeline(steps=[('columntransformer',
ColumnTransformer(transformers=[('pipeline-1',
Pipeline(steps=[('simpleimputer',
SimpleImputer()),
('standardscaler',
StandardScaler())]),
['Pclass', 'Parch', 'Fare',
'Age']),
('pipeline-2',
Pipeline(steps=[('simpleimputer',
SimpleImputer(strategy='most_frequent')),
('onehotencoder',
OneHotEncoder())]),
['Sex', 'Embarked'])])),
('kneighborsclassifier', KNeighborsClassifier())]). Check the list of available parameters with `estimator.get_params().keys()`.