I get this error when using a classifier and SFS as a part of sklearn pipeline:
Traceback (most recent call last):
File "main.py", line 45, in <module>
rs.fit(X_train, y_train)
File "/home/runner/SFSpredictproba/venv/lib/python3.10/site-packages/sklearn/base.py", line 1151, in wrapper
return fit_method(estimator, *args, **kwargs)
File "/home/runner/SFSpredictproba/venv/lib/python3.10/site-packages/sklearn/model_selection/_search.py", line 898, in fit
self._run_search(evaluate_candidates)
File "/home/runner/SFSpredictproba/venv/lib/python3.10/site-packages/sklearn/model_selection/_search.py", line 1419, in _run_search
evaluate_candidates(ParameterGrid(self.param_grid))
File "/home/runner/SFSpredictproba/venv/lib/python3.10/site-packages/sklearn/model_selection/_search.py", line 845, in evaluate_candidates
out = parallel(
File "/home/runner/SFSpredictproba/venv/lib/python3.10/site-packages/sklearn/utils/parallel.py", line 65, in __call__
return super().__call__(iterable_with_config)
File "/home/runner/SFSpredictproba/venv/lib/python3.10/site-packages/joblib/parallel.py", line 1855, in __call__
return output if self.return_generator else list(output)
File "/home/runner/SFSpredictproba/venv/lib/python3.10/site-packages/joblib/parallel.py", line 1784, in _get_sequential_output
res = func(*args, **kwargs)
File "/home/runner/SFSpredictproba/venv/lib/python3.10/site-packages/sklearn/utils/parallel.py", line 127, in __call__
return self.function(*args, **kwargs)
File "/home/runner/SFSpredictproba/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 754, in _fit_and_score
test_scores = _score(estimator, X_test, y_test, scorer, error_score)
File "/home/runner/SFSpredictproba/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
scores = scorer(estimator, X_test, y_test)
File "/home/runner/SFSpredictproba/venv/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
File "/home/runner/SFSpredictproba/venv/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 459, in _score
y_pred = method_caller(clf, "decision_function", X, pos_label=pos_label)
File "/home/runner/SFSpredictproba/venv/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 86, in _cached_call
result, _ = _get_response_values(
File "/home/runner/SFSpredictproba/venv/lib/python3.10/site-packages/sklearn/utils/_response.py", line 103, in _get_response_values
raise ValueError(
ValueError: Pipeline should either be a classifier to be used with response_method=decision_function or the response_method should be 'predict'. Got a regressor with response_method=decision_function instead.
Code to reproduce (replit):
clf = LogisticRegression()
cv = StratifiedKFold(n_splits=2)
sfs = SFS(clf, n_features_to_select=1, scoring='accuracy', cv=cv, n_jobs=-1)
imputer = SimpleImputer(missing_values=np.nan, strategy='median')
lr_param_grid = {
'sequentialfeatureselector__estimator__class_weight': ['balanced', None]
}
pipe = make_pipeline(imputer, sfs)
rs = GridSearchCV(estimator=pipe,
param_grid=lr_param_grid,
cv=cv,
scoring="roc_auc",
error_score="raise")
# Generate random data for binary classification
X, y = make_classification(
n_samples=10, # Number of samples
n_features=3, # Number of features
n_informative=2, # Number of informative features
n_redundant=1, # Number of redundant features
n_clusters_per_class=1, # Number of clusters per class
random_state=42)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
rs.fit(X_train, y_train)
I get the same error when using other classifiers, other performance metrics, and when using mlxtend
version of SFS.
Versions of packages:
- python = 3.10.8
- scikit-learn = 1.3.0