mlr3, benchmarking and nested resampling: how to extract a tuned model from a benchmark object to calculate feature importance

Question

I am using the benchmark() function in mlr3 to compare several ML algorithms. One of them is XGB with hyperparameter tuning. Thus, I have an outer resampling to evaluate the overall performance (hold-out sample) and an inner resampling for the hyper parameter tuning (5-fold Cross-validation). Besides having an estimate of the accuracy for all ML algorithms, I would like to see the feature importance of the tuned XGB. For that, I would have to access the tuned model (within the benchmark object). I do not know how to do that. The object returned by benchmark() is a deeply nested list and I do not understand its structure.

This answer on stackoverflow did not help me, because it uses a different setup (a learner in a pipeline rather than a benchmark object).

This answer on github did not help me, because it shows how to extract all the information about the benchmarking at once but not how to extract one (tuned) model of one of the learners in the benchmark.

Below is the code I am using to carry out the nested resampling. Following the benchmarking, I would like to estimate the feature importance as described here, which requires accessing the tuned XGB model.

require(mlr3verse)

### Parameters

## Tuning

n_folds = 5

grid_search_resolution = 2

measure = msr("classif.acc")

task = tsk("iris")

# Messages mlr3
# https://stackoverflow.com/a/69336802/7219311
options("mlr3.debug" = TRUE)

### Set up hyperparameter tuning
# AutoTuner for the inner resampling

## inner-resampling design
inner_resampling = rsmp("cv", folds = n_folds)
terminator = trm("none")
 
## XGB: no Hyperparameter Tuning
xgb_no_tuning = lrn("classif.xgboost", eval_metric = "mlogloss")
set_threads(xgb_no_tuning, n = 6)

## XGB: AutoTuner
# Setting up Hyperparameter Tuning

xgb_learner_tuning = lrn("classif.xgboost", eval_metric = "mlogloss")
xgb_search_space = ps(nrounds = p_int(lower = 100, upper= 500),
                      max_depth = p_int(lower = 3, upper= 10),
                      colsample_bytree = p_dbl(lower = 0.6, upper = 1)
                  )
xgb_tuner = tnr("grid_search", resolution = grid_search_resolution)

# implicit parallelisation
set_threads(xgb_learner_tuning, n = 6)

xgb_tuned = AutoTuner$new(xgb_learner_tuning, inner_resampling, measure, terminator, xgb_tuner, xgb_search_space, store_tuning_instance = TRUE)

## Outer re-sampling: hold-out
outer_resampling = rsmp("holdout")
outer_resampling$instantiate(task)

bm_design = benchmark_grid(
  tasks = task,
  learners = c(lrn("classif.featureless"), 
               xgb_no_tuning,
               xgb_tuned 
  ),
  resamplings = outer_resampling
)

begin_time = Sys.time()
bmr = benchmark(bm_design, store_models = TRUE)
duration = Sys.time() - begin_time

print(duration)

## Results of benchmarking
benchmark_results = bmr$aggregate(measure)
print(benchmark_results)


## Overview

mlr3misc::map(as.data.table(bmr)$learner, "model")

## Detailed results

# Specification of learners
print(bmr$learners$learner)

Solution

Based on the comments by be-marc

require(mlr3verse)
require(mlr3tuning)
require(mlr3misc)

### Parameters

## Tuning

n_folds = 5

grid_search_resolution = 2

measure = msr("classif.acc")

task = tsk("iris")

# Messages mlr3
# https://stackoverflow.com/a/69336802/7219311
options("mlr3.debug" = TRUE)

### Set up hyperparameter tuning
# AutoTuner for the inner resampling

## inner-resampling design
inner_resampling = rsmp("cv", folds = n_folds)
terminator = trm("none")
 
## XGB: no Hyperparameter Tuning
xgb_no_tuning = lrn("classif.xgboost", eval_metric = "mlogloss")
set_threads(xgb_no_tuning, n = 6)

## XGB: AutoTuner
# Setting up Hyperparameter Tuning

xgb_learner_tuning = lrn("classif.xgboost", eval_metric = "mlogloss")
xgb_search_space = ps(nrounds = p_int(lower = 100, upper= 500),
                      max_depth = p_int(lower = 3, upper= 10),
                      colsample_bytree = p_dbl(lower = 0.6, upper = 1)
                  )
xgb_tuner = tnr("grid_search", resolution = grid_search_resolution)

# implicit parallelisation
set_threads(xgb_learner_tuning, n = 6)

xgb_tuned = AutoTuner$new(xgb_learner_tuning, inner_resampling, measure, terminator, xgb_tuner, xgb_search_space, store_tuning_instance = TRUE)

## Outer re-sampling: hold-out
outer_resampling = rsmp("holdout")
outer_resampling$instantiate(task)

bm_design = benchmark_grid(
  tasks = task,
  learners = c(lrn("classif.featureless"), 
               xgb_no_tuning,
               xgb_tuned 
  ),
  resamplings = outer_resampling
)

begin_time = Sys.time()
bmr = benchmark(bm_design, store_models = TRUE)
duration = Sys.time() - begin_time

print(duration)

## Results of benchmarking
benchmark_results = bmr$aggregate(measure)
print(benchmark_results)


## Overview

mlr3misc::map(as.data.table(bmr)$learner, "model")

## Detailed results

# Specification of learners
print(bmr$learners$learner)

## Feature Importance

# extract models from outer sampling
# https://stackoverflow.com/a/69828801

data = as.data.table(bmr)
outer_learners = map(data$learner, "learner")

xgb_tuned_model = outer_learners[[3]]

print(xgb_tuned_model)

# print feature importance 
# (presumably gain - mlr3 documentation not clear)
print(xgb_tuned_model$importance())

score 3 · Accepted Answer · answered Nov 03 '21 at 16:54

3

library(mlr3tuning)
library(mlr3learners)
library(mlr3misc)

learner = lrn("classif.xgboost", nrounds = to_tune(100, 500), eval_metric = "logloss")

at = AutoTuner$new(
  learner = learner,
  resampling = rsmp("cv", folds = 3),
  measure = msr("classif.ce"),
  terminator = trm("evals", n_evals = 5),
  tuner = tnr("random_search"),
  store_models = TRUE
)

design = benchmark_grid(task = tsk("pima"), learner = at, resampling = rsmp("cv", folds = 5))
bmr = benchmark(design, store_models = TRUE)

To extract learners fitted in the outer loop

data = as.data.table(bmr)
outer_learners = map(data$learner, "learner")

To extract learners fitted in the inner loop

archives = extract_inner_tuning_archives(bmr)
inner_learners = map(archives$resample_result, "learners")

answered Nov 03 '21 at 16:54

be-marc

1,276
5
5

Thank you for the reply! Using your code in my example to extract the outer_learners return three lists, two of them are NULL and one is the xgb model. Is that because the outer resampling contains only the best model (according to the outer resampling)? – AW2 Nov 04 '21 at 08:13
No. The outer resampling is a 5-fold cross-validation, what results in 5 `AutoTuner`s. Each `AutoTuner` gets 4/5 of the complete data set (train set of the outer resampling) to tune an xgboost model. The final step of each `AutoTuner` is to fit an xgboost model with optimized hyperparameters on the 4/5 data set. This fitted xgboost learner is stored in `AutoTuner$learner`. You can extract all 5 tuned xgboost models with `map(data$learner, "learner")` in one go. – be-marc Nov 04 '21 at 11:06
1

Since you are just using hold-out validation in your example, you get just one xgboost model. The other two entries in `as.data.table(bmr)$learner` are already learners (`classif.featureless` and untuned `classif.xgboost`), so they don't have a learner slot. `map(data$learner, "learner")` only works on `AutoTuners`. You can use the other learners directly to get the importance values. – be-marc Nov 04 '21 at 11:06

mlr3, benchmarking and nested resampling: how to extract a tuned model from a benchmark object to calculate feature importance

1 Answers1