I get the ValueError in the title when I try to perform a GridsearchCV on an MLP classifier. Ofcourse I checked if any of np.inf or np.nan exist in my dataset, but they dont:
print(np.any(np.isnan(X)))
returns False
print(np.all(np.isfinite(X)))
Returns True
I also casted all my values to np.float64
X = X.values.astype(np.float64)
Y = Y.values
My scikit-learn version is 0.22.2.post1 (latest)
The code i'm trying to execute:
from scipy.stats import randint as sp_randint
hiddenlayers = [(sp_randint.rvs(100,600,1),sp_randint.rvs(100,600,1),), (sp_randint.rvs(100,600,1),)]
alpha_range = 10.0 ** np.arange(-2, 1)
param_grid_MLP = [{'solver': ['lbfgs'],
'hidden_layer_sizes': hiddenlayers,
'activation': ['identity','tanh', 'relu', 'logistic'],
'alpha': alpha_range
},
{'solver': ['sgd'],
'hidden_layer_sizes': hiddenlayers,
'activation': ['identity','tanh', 'relu', 'logistic'],
'alpha': alpha_range,
'learning_rate':['constant','invscaling','adaptive']
},
{'solver': ['adam'],
'hidden_layer_sizes': hiddenlayers,
'activation': ['identity','tanh', 'relu', 'logistic'],
'alpha': alpha_range
}]
mlp = MLPClassifier(random_state=0)
cross_validation = StratifiedKFold(5)
# scoring = {'AUC': 'roc_auc',
# 'Accuracy': make_scorer(accuracy_score),
# 'Recall':make_scorer(recall_score,pos_label='crafted'),
# 'Precision': make_scorer(precision_score,pos_label='crafted')}
scoring = {'AUC': 'roc_auc',
'Accuracy': make_scorer(accuracy_score),
'Recall':make_scorer(recall_score,pos_label='crafted')}
grid_search_MLP = GridSearchCV(estimator=mlp,
param_grid=param_grid_MLP,
scoring=scoring,cv=cross_validation.split(X_train,y_train),
refit='Recall',
n_jobs=-1,
verbose=True)
grid_search_MLP.fit(X_train,y_train)
print('Best score: {}'.format(grid_search_MLP.best_score_))
print('Best index: {}'.format(grid_search_MLP.best_index_))
print('Best parameters: {}'.format(grid_search_MLP.best_params_))
mlp = grid_search_MLP.best_estimator_
mlp
The full error traceback:
Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/joblib/externals/loky/process_executor.py", line 418, in _process_worker
r = call_item()
File "/usr/local/lib/python3.7/dist-packages/joblib/externals/loky/process_executor.py", line 272, in __call__
return self.fn(*self.args, **self.kwargs)
File "/usr/local/lib/python3.7/dist-packages/joblib/_parallel_backends.py", line 608, in __call__
return self.func(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/joblib/parallel.py", line 256, in __call__
for func, args, kwargs in self.items]
File "/usr/local/lib/python3.7/dist-packages/joblib/parallel.py", line 256, in <listcomp>
for func, args, kwargs in self.items]
File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 544, in _fit_and_score
test_scores = _score(estimator, X_test, y_test, scorer)
File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 591, in _score
scores = scorer(estimator, X_test, y_test)
File "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_scorer.py", line 87, in __call__
*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_scorer.py", line 332, in _score
return self._sign * self._score_func(y, y_pred, **self._kwargs)
File "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_ranking.py", line 369, in roc_auc_score
y_score = check_array(y_score, ensure_2d=False)
File "/usr/local/lib/python3.7/dist-packages/sklearn/utils/validation.py", line 578, in check_array
allow_nan=force_all_finite == 'allow-nan')
File "/usr/local/lib/python3.7/dist-packages/sklearn/utils/validation.py", line 60, in _assert_all_finite
msg_dtype if msg_dtype is not None else X.dtype)
ValueError: Input contains NaN, infinity or a value too large for dtype('float64').