HDBScan Random Search Finetuning

Question

Context

I am trying to finetuning my hdbscan algorithm from the hdbscan python library using sklearn RandomizedSearchCV. However I am facing the following error :

scores = scorer(estimator, X_test)
         ^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: _BaseScorer.__call__() missing 1 required positional argument: 'y_true'

Code

My code is a conglomerate of different forum answers that I found online :

def DBCV(model, X, y=None):
    preds = model.fit_predict(X)
    return hdbscan.validity.validity_index(X, preds) if len(set(preds)) > 1 else float('nan')

def HDBScanFinetune(vectors, 
                    min_samples=[10,30,50,60,100], 
                    min_cluster_size=[100,200,300,400,500,600],
                    cluster_selection_method=['eom','leaf'],
                    seed_num=0, 
                    verbose=True):
    
    #model setup
    hdb = hdbscan.HDBSCAN(gen_min_span_tree=True)

    # specify parameters and distributions to sample from
    param_dist = {'min_samples': min_samples,
                  'min_cluster_size':min_cluster_size,  
                  'cluster_selection_method':cluster_selection_method
                 }

    #validity_scroer = "hdbscan__hdbscan___HDBSCAN__validity_index"
    validity_scorer = make_scorer(DBCV, greater_is_better=True)

    #parameters research  
    n_iter_search = 2
    random_search = RandomizedSearchCV(hdb,
                                       param_distributions=param_dist,
                                       n_iter=n_iter_search,
                                       scoring=validity_scorer,
                                       random_state=seed(seed_num))
    
    random_search.fit(vectors)

    if verbose:
        print(f"Best Parameters {random_search.best_params_}")
        print(f"DBCV score :{random_search.best_estimator_.relative_validity_}")
    return {"best_params": random_search.best_params_, "dbcv_score":random_search.best_estimator_.relative_validity_}

If you have any idea that could help me solve this error it would be greatly appreciated. Thanks in advance for you help !

HDBScan Random Search Finetuning

0 Answers0