I am trying to run a RandomizedSearchCV on various classification models through "For" loop for Hyperparameter tuning. There is no issue with running any other models except CatBoost. Also the issue with Catboost arises when I used Pipeline in defining function.
My Code:
#Building the models:
lr = LogisticRegression()
knn = KNeighborsClassifier()
svm = SVC()
dt = DecisionTreeClassifier(random_state=1)
bag = BaggingClassifier(random_state=1)
adb = AdaBoostClassifier(random_state=1)
gb = GradientBoostingClassifier(random_state=1)
rf = RandomForestClassifier(random_state=1)
xgb = XGBClassifier()
cgb = CatBoostClassifier()
lgb = LGBMClassifier()
#Defining a function:
def fun_exp(model, name, x_tr, x_te, y_tr, y_te):
start = time.time()
pipe = Pipeline([('scale', StandardScaler()), ('pca', PCA(n_components = 62)), (name, model)])
rscv = RandomizedSearchCV(pipe, params, cv=10, random_state=1)
rscv.fit(x_tr, y_tr)
rscv_best_params = rscv.best_params_
rscv_best_score = rscv.best_score_
rscv_score_train = rscv.score(x_tr, y_tr)
rscv_score_test = rscv.score(x_te, y_te)
rscv_pred = rscv.predict(x_te)
end = time.time()
pickle.dump(rscv, open(name, 'wb'))
rscv_duration = end-start
return rscv_best_params, rscv_best_score, rscv_score_train, rscv_score_test, rscv_duration, rscv_pred
Running the above function in for loop & saving the result in a dictionary:
exp_result = {}
\#Fitting & Testing the model
for model, name in zip([lr, knn, svm, dt, bag, adb, gb, rf, lgb, cgb, xgb], ['Logistic Regression', 'KNeighbors', 'SVM', 'DecisionTree', 'Bagging', 'AdaBoost', 'GradientBoost', 'Random Forest', 'LightGBM', 'CatBoost', 'XGBoost']):
if model == lr:
params = {'Logistic Regression__solver': ['liblinear', 'lfbgs', 'sag', 'saga'], 'Logistic Regression__penalty':['elasticnet', 'l1', 'l2', 'none'], 'Logistic Regression__multi_class': ['auto', 'ovr', 'multinomial'], 'Logistic Regression__C':[0.1, 1, 10], 'Logistic Regression__tol': [0.00001, 0.0001, 0.001], 'Logistic Regression__class_weight': ['balanced', None]}
if model == knn:
params = {'KNeighbors__n_neighbors':np.arange(5,50,5), 'KNeighbors__weights': ['uniform', 'distance'], 'KNeighbors__algorithm':['auto', 'knn__ball_tree', 'kd_tree', 'brute'], 'KNeighbors__leaf_size': np.arange(10,51,10), 'KNeighbors__metric': ['minkowski', 'euclidean', 'manhattan']}
if model == svm:
params = {'SVM__gamma': [10, 1, 0.1, 0.01, 0.001, 0.0001], 'SVM__C': [1000, 100, 10, 1, 0.1, 0.01, 0.001], 'SVM__kernel': ['poly', 'rbf', 'sigmoid'], 'SVM__class_weight': ['balanced', None], 'SVM__decision_function_shape': ['ovo', 'ovr']}
if model == dt:
params = {'DecisionTree__criterion':['gini', 'entropy', 'log_loss'], 'DecisionTree__splitter':['best', 'random'], 'DecisionTree__max_depth':[None, np.arange(1,11)], 'DecisionTree__max_features': np.arange(8, 21, 2), 'DecisionTree__random_state':[1], 'DecisionTree__class_weight':['balanced', None]}
if model==bag:
params = {'Bagging__n_estimators': [10, 30, 50, 100, 500], 'Bagging__max_features': np.arange(8, 21, 2), 'Bagging__random_state':[1]}
if model == adb:
params = {'AdaBoost__n_estimators': [10, 30, 50, 100, 500], 'AdaBoost__learning_rate':[0.001, 0.01, 0.1, 1, 10], 'AdaBoost__algorithm':['SAMME.R', 'SAMME'], 'AdaBoost__random_state':[1]}
if model == gb:
params = {'GradientBoost__loss':['log_loss', 'exponential'], 'GradientBoost__learning_rate':[0.001, 0.01, 0.1, 1, 10], 'GradientBoost__n_estimators': [10, 30, 50, 100, 500], 'GradientBoost__max_depth':np.arange(1,11), 'GradientBoost__random_state':[1], 'GradientBoost__max_features': np.arange(8, 21, 2)}
if model == rf:
params = {'Random Forest__n_estimators': [10, 30, 50, 100, 500], 'Random Forest__criterion':['gini', 'entropy', 'log_loss'], 'Random Forest__max_depth':np.arange(1,11), 'Random Forest__max_features': np.arange(8, 21, 2), 'Random Forest__random_state':[1]}
if model == lgb:
params = {'LightGBM__boosting_type':['gbdt', 'rf'], 'LightGBM__num_leaves':np.arange(20, 40), 'LightGBM__max_depth':np.arange(1,11), 'LightGBM__learning_rate':[0.001, 0.01, 0.1, 1, 10], 'LightGBM__n_estimators': [10, 30, 50, 100, 500], 'LightGBM__class_weight': ['balanced', None], 'LightGBM__random_state':[1]}
if model == cgb:
params = {'CatBoost__learning_rate':[0.001, 0.01, 0.1, 1], 'CatBoost__n_estimators': [100, 500], 'CatBoost__max_depth':np.arange(1,11), 'CatBoost__random_state':[1], 'CatBoost__feature_border_type': ['Median', 'Uniform', 'UniformAndQuantiles', 'GreedyLogSum', 'MaxLogSum', 'MinEntropy']}
if model == xgb:
le = LabelEncoder()
y_tr = le.fit_transform(y_tr)
y_te = le.fit_transform(y_te)
params = {'XGBoost__n_estimators': [10, 30, 50, 100, 500], 'XGBoost__max_depth':np.arange(1,11), 'XGBoost__max_leaves': np.arange(0, 150), 'XGBoost__learning_rate':[0.001, 0.01, 0.1, 1, 10], 'XGBoost__random_state':[1]}
exp_result[name] = fun_exp(model, name, x_tr, x_te, y_tr, y_te)