I am working on tuning the random forest to get different results. I got different results for svms using gridsearchcv but facing problems in getting same types of results for random forest. when I process model I got following error.
> # Tuning hyper-parameters for precision
>
> --------------------------------------------------------------------------- AttributeError Traceback (most recent call
> last) <ipython-input-26-2d3979d9cbc5> in <module>()
> 24
> 25 clf = GridSearchCV(clf, tuned_parameters, cv=10,
> ---> 26 scoring='%s_macro' % score)
> 27 clf.fit(X_train, Y_train)
> 28
>
> /usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_search.py
> in __init__(self, estimator, param_grid, scoring, fit_params, n_jobs,
> iid, refit, cv, verbose, pre_dispatch, error_score,
> return_train_score) 1075
> return_train_score=return_train_score) 1076 self.param_grid
> = param_grid
> -> 1077 _check_param_grid(param_grid) 1078 1079 def _get_param_iterator(self):
>
> /usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_search.py
> in _check_param_grid(param_grid)
> 346
> 347 for p in param_grid:
> --> 348 for name, v in p.items():
> 349 if isinstance(v, np.ndarray) and v.ndim > 1:
> 350 raise ValueError("Parameter array should be one-dimensional.")
>
> AttributeError: 'set' object has no attribute 'items'
I processed following code to set the parameters. please solve my problem as I run this process on sentiment analysis dataset. Dataset is in csv format.
#To Create a Validation Dataset
# Split-out validation dataset
X = df.ix[:,1:18] #training define
Y = df.ix[:,0] #class define
validation_size = 0.20
#seed = 7
X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=validation_size, random_state=0)
# Test options and evaluation metric
num_folds = 10
num_instances = len(X_train)
scoring = 'accuracy'
Set the parameters by cross-validation
tuned_parameters = [{RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
max_depth=2, max_features='auto', max_leaf_nodes=None,
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
oob_score=False, random_state=0, verbose=0, warm_start=False)}]
X, Y = make_classification(n_samples=1000, n_features=4,
n_informative=2, n_redundant=0,
random_state=0, shuffle=False)
clf = RandomForestClassifier(max_depth=2, random_state=0)
clf.fit(X, Y)
scores = ['precision', 'recall']
for score in scores:
print("# Tuning hyper-parameters for %s" % score)
print()
clf = GridSearchCV(clf, tuned_parameters, cv=10,
scoring='%s_macro' % score)
clf.fit(X_train, Y_train)
print("Best parameters set found on development set:")
print()
print(clf.best_params_)
print()
print("Grid scores on development set:")
print()
means = clf.cv_results_['mean_test_score']
stds = clf.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf.cv_results_['params']):
print("%0.3f (+/-%0.03f) for %r"
% (mean, std * 2, params))
print()
print("Detailed classification report:")
print()
# print("The model is trained on the full development set.")
# print("The scores are computed on the full evaluation set.")
print()
y_true, y_pred = Y_test , clf.predict(X_test)
print(classification_report(y_true, y_pred))
print()