Context
I am trying to finetuning my hdbscan algorithm from the hdbscan python library using sklearn RandomizedSearchCV. However I am facing the following error :
scores = scorer(estimator, X_test)
^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: _BaseScorer.__call__() missing 1 required positional argument: 'y_true'
Code
My code is a conglomerate of different forum answers that I found online :
def DBCV(model, X, y=None):
preds = model.fit_predict(X)
return hdbscan.validity.validity_index(X, preds) if len(set(preds)) > 1 else float('nan')
def HDBScanFinetune(vectors,
min_samples=[10,30,50,60,100],
min_cluster_size=[100,200,300,400,500,600],
cluster_selection_method=['eom','leaf'],
seed_num=0,
verbose=True):
#model setup
hdb = hdbscan.HDBSCAN(gen_min_span_tree=True)
# specify parameters and distributions to sample from
param_dist = {'min_samples': min_samples,
'min_cluster_size':min_cluster_size,
'cluster_selection_method':cluster_selection_method
}
#validity_scroer = "hdbscan__hdbscan___HDBSCAN__validity_index"
validity_scorer = make_scorer(DBCV, greater_is_better=True)
#parameters research
n_iter_search = 2
random_search = RandomizedSearchCV(hdb,
param_distributions=param_dist,
n_iter=n_iter_search,
scoring=validity_scorer,
random_state=seed(seed_num))
random_search.fit(vectors)
if verbose:
print(f"Best Parameters {random_search.best_params_}")
print(f"DBCV score :{random_search.best_estimator_.relative_validity_}")
return {"best_params": random_search.best_params_, "dbcv_score":random_search.best_estimator_.relative_validity_}
If you have any idea that could help me solve this error it would be greatly appreciated. Thanks in advance for you help !