0

I want to perform feature selection and nested cross validation on a data set. I wrote this script:

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold,KFold
from sklearn.feature_selection import SelectKBest
#from xgboost import XGBClassifier
from sklearn.feature_selection import mutual_info_classif
from sklearn.feature_selection import SelectKBest, RFECV
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import make_scorer, recall_score, accuracy_score, precision_score
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import make_scorer
from sklearn.metrics import precision_score,recall_score,f1_score,roc_auc_score
from sklearn import metrics
from sklearn.datasets import make_classification
from numpy import mean
from sklearn.model_selection import train_test_split
from numpy import std
from sklearn.utils import shuffle
import numpy as np
from sklearn.metrics import roc_curve
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
import pickle
#import neptune.new as neptune
import pandas as pd
import shap


#full_X_train = df
full_X_train,full_y_train = make_classification(n_samples =500,n_features = 20, random_state=1, n_informative=10,n_redundant=10)


def run_model_with_grid_search(param_grid={},output_plt_file = 'plt.png',model_name=RandomForestClassifier(),X_train=full_X_train,y_train=full_y_train,model_id='random_forest')

      cv_outer = KFold(n_splits=5,shuffle=True,random_state=1)


      for train_ix,test_ix in cv_outer.split(X_train):
              split_x_train, split_x_test = X_train[train_ix,:],X_train[test_ix,:] #add in .iloc
              split_y_train, split_y_test = y_train[train_ix],y_train[test_ix]  #add in .iloc

              cv_inner = KFold(n_splits=3,shuffle=True,random_state=1)
              model = model_name
              #model.set_params(**best_params)
              rfecv = {'RFECV Features': {'cv': 5,
                            'estimator': model,
                            'step': 1,
                            'scoring': 'accuracy',
                            'verbose': 50}}


              rfecv.fit(split_x_train,split_y_train)
              print(rfecv.n_features_)
              X_selected_train = rfecv.transform(split_x_train)
              X_selected_test = rfecv.transform(split_x_test)

              search = GridSearchCV(model,param_grid=param_grid,scoring='roc_auc',cv=cv_inner,refit=True)
              result = search.fit(X_selected_train,split_y_train)
              best_model = result.best_estimator_
              y_pred_train = best_model.predict(X_selected_train)
              y_pred_test = best_model.predict(X_selected_test)
              accuracy_train = metrics.accuracy_score(split_y_train, y_pred_train)
              accuracy_test = metrics.accuracy_score(split_y_test, y_pred_test)
      return





param_grid = [{
#               'random_forest_with_hpo_no_fs_geno_class__bootstrap':[True,False],
#               'random_forest_with_hpo_no_fs_geno_class__max_depth':[10,20,30,40,50,60,70,80],
#               'random_forest_with_hpo_no_fs_geno_class__max_features':['auto','sqrt'],
               'min_samples_leaf':[1,3,5],
#               'random_forest_with_hpo_no_fs_geno_class__n_estimators':[200,500,700,1000,1500,2000]
              }]

run_model_with_grid_search(param_grid=param_grid)

And I receive the error:

  File "test3.py", line 83, in <module>
    run_model_with_grid_search(param_grid=param_grid)
  File "test3.py", line 57, in run_model_with_grid_search
    rfecv.fit(split_x_train,split_y_train)
AttributeError: 'dict' object has no attribute 'fit'

Could someone please tell me how to fix this? Thank you.

Slowat_Kela
  • 1,377
  • 2
  • 22
  • 60
  • This may answer your question: https://stackoverflow.com/questions/51689145/google-cloud-ml-with-scikit-learn-raises-dict-object-has-no-attribute-lower – DonCarleone Jun 22 '22 at 19:54
  • Thanks for the help, but after looking through that, i'm not sure how to extract a solution from it (it's not doing rfecv, the attribute that i'm stuck on is different, i'm not using json etc). Maybe the solution is there but I'm not a strong enough programmer to see it though. – Slowat_Kela Jun 22 '22 at 19:59

0 Answers0