1

I'm using optuna to tune LGBM. Random seeds had been set but each time Optuna got different set of best params.

Here's my optuna code:

def get_hpo_params(opt_X_train, opt_X_val, opt_y_train, opt_y_val, n_trials=180, cat_features=""):

def objective(trial):
    dtrain = lgb.Dataset(opt_X_train, opt_y_train, categorical_feature=cat_features)
    dval = lgb.Dataset(opt_X_val, opt_y_val, categorical_feature=cat_features)
    upper = min(32768, int(opt_X_train.shape[0]))

    params = {
        "objective": "binary",
        "metric": "auc",
        "random_state": 10,
        "verbosity": -1,
        "boosting": "gbdt",
        "num_threads": 4,

        "num_leaves": trial.suggest_int("num_leaves", 4, 30),
        "learning_rate": trial.suggest_loguniform("learning_rate", 0.005, 1.0),
        "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.1, 1.0),
        "feature_fraction": trial.suggest_uniform("feature_fraction", 0.1, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 10, 30),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1000, 3000),
        "num_iterations": trial.suggest_int("num_iterations", 1000, 3000)
    }

    pruning_callback = optuna.integration.LightGBMPruningCallback(trial, "auc")
    clf = lgb.train(
        params, dtrain, valid_sets=[dval], verbose_eval=False, callbacks=[pruning_callback]
    )

    y_val_pred = clf.predict(opt_X_val)
    auc = roc_auc_score(opt_y_val, y_val_pred)
    return auc


start = timeit.default_timer()
study = optuna.create_study(direction="maximize", pruner=optuna.pruners.HyperbandPruner(),
                           sampler=optuna.samplers.TPESampler(seed=10), study_name='lgbm_hpo')
study.optimize(objective, n_trials=n_trials)

print("Number of finished trials: {}".format(len(study.trials)))
best_trial = study.best_trial
print(f"Best trial performance: {best_trial.value}")
stop = timeit.default_timer()
print('Time (min): ', (stop - start)/60)

return best_trial.params

If I restart my ipython notebook kernel, the results will be different. Is there anyway to make optuna output reproducable?

Cherry Wu
  • 3,844
  • 9
  • 43
  • 63

1 Answers1

1

Ohh, I finally solved the problem. Need to set hash seed on my laptop using this method: https://gerrychain.readthedocs.io/en/latest/topics/reproducibility.html#set-pythonhashseed-0

This solution is mentioned in optuna for reproducing pruning behaviour: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.pruners.HyperbandPruner.html

Cherry Wu
  • 3,844
  • 9
  • 43
  • 63