I need to test a custom metrics on a early stoppinig rounds for LGBM Classifier and I have this error :
ValueError: Classification metrics can't handle a mix of binary and continuous targets
Description of the set :
My Y_train is a 0 or 1 and I have 215000 rows and an unbalanced DF with 12* more 0 than 1. My X_train is 795 features it's hard to show you.
My Custom Score :
def custom_metric(y, y_predict):
is_higher_better = False
cm = confusion_matrix(y, y_predict)
mesure = cm[1][0]*10 + cm[0][1]
return round((mesure/(11*(len(y)))),3)
custom_score = make_scorer(custom_metric, greater_is_better = False)
Test I Tried:
I test my custom metrics with other method like :
- cross_val_score :
cross_val_score(LGBMClassifier(verbose = -1, objective="binary"), X_train, y_train, scoring = custom_score, cv = 3)
==> Results are OK for me : array([-0.071, -0.071, -0.071])
- GridSearchCV :
params = {'learning_rate': [0.1, 0.01]}
grid = GridSearchCV(LGBMClassifier(random_state=0, verbose = -1, class_weight={0:1, 1:12}, objective="binary", n_estimators = 100), params, scoring = custom_score, cv = 3, n_jobs = -1)
grid.fit(X_train, y_train)
==> grid.best_score_ == -0.0453
- EarlyStoppingRound :
modelLGB1 = LGBMClassifier(metric="custom", random_state = 42, learning_rate = 0.1, n_estimators = 1000, max_depth=5, subsample=0.8, colsample_bytree=0.8, n_jobs = -1, class_weight={0:1, 1:12}, verbose=-1, objective="binary", early_stopping_rounds=20).fit(X_train, y_train, eval_set = [(X_train, y_train), (X_test, y_test)], eval_metric = custom_metric)
BestEstimator = modelLGB1.best_iteration_
print(modelLGB1.best_score_)
print(BestEstimator)
==> I have this error and I can't find the solution :
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[19], line 2
1 # Step 1: Fix the learning rate and number of estimators for tuning tree-based parameters.
----> 2 modelLGB1 = LGBMClassifier(metric="custom", random_state = 42, learning_rate = 0.1, n_estimators = 1000, max_depth=5, subsample=0.8, colsample_bytree=0.8, n_jobs = -1, class_weight={0:1, 1:12}, verbose=-1, objective="binary", early_stopping_rounds=20).fit(X_train, y_train, eval_set = [(X_train, y_train), (X_test, y_test)], eval_metric = custom_metric)
3 BestEstimator = modelLGB1.best_iteration_
4 print(modelLGB1.best_score_)
File ~\anaconda3\Lib\site-packages\lightgbm\sklearn.py:1142, in LGBMClassifier.fit(self, X, y, sample_weight, init_score, eval_set, eval_names, eval_sample_weight, eval_class_weight, eval_init_score, eval_metric, feature_name, categorical_feature, callbacks, init_model)
1139 else:
1140 valid_sets.append((valid_x, self._le.transform(valid_y)))
-> 1142 super().fit(
1143 X,
1144 _y,
1145 sample_weight=sample_weight,
1146 init_score=init_score,
1147 eval_set=valid_sets,
1148 eval_names=eval_names,
1149 eval_sample_weight=eval_sample_weight,
1150 eval_class_weight=eval_class_weight,
1151 eval_init_score=eval_init_score,
1152 eval_metric=eval_metric,
1153 feature_name=feature_name,
1154 categorical_feature=categorical_feature,
1155 callbacks=callbacks,
1156 init_model=init_model
1157 )
1158 return self
File ~\anaconda3\Lib\site-packages\lightgbm\sklearn.py:842, in LGBMModel.fit(self, X, y, sample_weight, init_score, group, eval_set, eval_names, eval_sample_weight, eval_class_weight, eval_init_score, eval_group, eval_metric, feature_name, categorical_feature, callbacks, init_model)
839 evals_result: _EvalResultDict = {}
840 callbacks.append(record_evaluation(evals_result))
--> 842 self._Booster = train(
843 params=params,
844 train_set=train_set,
845 num_boost_round=self.n_estimators,
846 valid_sets=valid_sets,
847 valid_names=eval_names,
848 feval=eval_metrics_callable, # type: ignore[arg-type]
849 init_model=init_model,
850 feature_name=feature_name,
851 callbacks=callbacks
852 )
854 self._evals_result = evals_result
855 self._best_iteration = self._Booster.best_iteration
File ~\anaconda3\Lib\site-packages\lightgbm\engine.py:272, in train(params, train_set, num_boost_round, valid_sets, valid_names, feval, init_model, feature_name, categorical_feature, keep_training_booster, callbacks)
270 if valid_sets is not None:
271 if is_valid_contain_train:
--> 272 evaluation_result_list.extend(booster.eval_train(feval))
273 evaluation_result_list.extend(booster.eval_valid(feval))
274 try:
File ~\anaconda3\Lib\site-packages\lightgbm\basic.py:3791, in Booster.eval_train(self, feval)
3759 def eval_train(
3760 self,
3761 feval: Optional[Union[_LGBM_CustomEvalFunction, List[_LGBM_CustomEvalFunction]]] = None
3762 ) -> List[_LGBM_BoosterEvalMethodResultType]:
3763 """Evaluate for training data.
3764
3765 Parameters
(...)
3789 List with (train_dataset_name, eval_name, eval_result, is_higher_better) tuples.
3790 """
-> 3791 return self.__inner_eval(self._train_data_name, 0, feval)
File ~\anaconda3\Lib\site-packages\lightgbm\basic.py:4536, in Booster.__inner_eval(self, data_name, data_idx, feval)
4534 if eval_function is None:
4535 continue
-> 4536 feval_ret = eval_function(self.__inner_predict(data_idx), cur_data)
4537 if isinstance(feval_ret, list):
4538 for eval_name, val, is_higher_better in feval_ret:
File ~\anaconda3\Lib\site-packages\lightgbm\sklearn.py:235, in _EvalFunctionWrapper.__call__(self, preds, dataset)
233 argc = len(signature(self.func).parameters)
234 if argc == 2:
--> 235 return self.func(labels, preds) # type: ignore[call-arg]
236 elif argc == 3:
237 return self.func(labels, preds, dataset.get_weight()) # type: ignore[call-arg]
Cell In[4], line 16, in custom_metric(y, y_predict)
14 def custom_metric(y, y_predict):
15 is_higher_better = False
---> 16 cm = confusion_matrix(y, y_predict)
17 mesure = cm[1][0]*10 + cm[0][1]
18 return round((mesure/(11*(len(y)))),3)
File ~\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py:211, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
205 try:
206 with config_context(
207 skip_parameter_validation=(
208 prefer_skip_nested_validation or global_skip_validation
209 )
210 ):
--> 211 return func(*args, **kwargs)
212 except InvalidParameterError as e:
213 # When the function is just a wrapper around an estimator, we allow
214 # the function to delegate validation to the estimator, but we replace
215 # the name of the estimator by the name of the function in the error
216 # message to avoid confusion.
217 msg = re.sub(
218 r"parameter of \w+ must be",
219 f"parameter of {func.__qualname__} must be",
220 str(e),
221 )
File ~\anaconda3\Lib\site-packages\sklearn\metrics\_classification.py:326, in confusion_matrix(y_true, y_pred, labels, sample_weight, normalize)
231 @validate_params(
232 {
233 "y_true": ["array-like"],
(...)
242 y_true, y_pred, *, labels=None, sample_weight=None, normalize=None
243 ):
244 """Compute confusion matrix to evaluate the accuracy of a classification.
245
246 By definition a confusion matrix :math:`C` is such that :math:`C_{i, j}`
(...)
324 (0, 2, 1, 1)
325 """
--> 326 y_type, y_true, y_pred = _check_targets(y_true, y_pred)
327 if y_type not in ("binary", "multiclass"):
328 raise ValueError("%s is not supported" % y_type)
File ~\anaconda3\Lib\site-packages\sklearn\metrics\_classification.py:93, in _check_targets(y_true, y_pred)
90 y_type = {"multiclass"}
92 if len(y_type) > 1:
---> 93 raise ValueError(
94 "Classification metrics can't handle a mix of {0} and {1} targets".format(
95 type_true, type_pred
96 )
97 )
99 # We can't have more than one value on y_type => The set is no more needed
100 y_type = y_type.pop()
ValueError: Classification metrics can't handle a mix of binary and continuous targets
I tried with this solution too :
def custom_metric(y, y_predict):
is_higher_better = False
cm = confusion_matrix(y, y_predict)
mesure = cm[1][0]*10 + cm[0][1]
return 'cutom_metric', round((mesure/(11*(len(y)))),3), is_higher_better
But I have the same error than above.
I guess it's a problem with my y_pred but I can't see them if they are directly calculate by the model, and i don't have this problem with the grid search and cross val score. Thanks for your help and be kind.