I'm using the mlr3pipelines
package to It define a pipeline object named "stack" which is used for stacking ensemble learning.
However, I'm unable to find an alternative to the 'po' function, it's not suitable for stacking models as I get this error:
Error: Element with key 'stack' not found in DictionaryPipeOp!
What is a good way to create stacking?
Here is the code whole code if it helps:
super_train <- function(data, n_models = 10, cpus = 32) {
train <- as.data.table(data[[1]])
test <- as.data.table(data[[2]])
task <- TaskClassif$new(id = "my_task", backend = train, target = "CR")
base_learners <- lapply(1:n_models, function(i) {
lrn_lgbm <- lrn("classif.lightgbm", predict_type = "response", objective = "binary")
lrn_lgbm$param_set$values$num_iterations <- 1000 # set a large number of trees
lrn_lgbm$param_set$values$early_stopping_rounds <- 50 # early stopping
lrn_lgbm$param_set$values$verbose <- -1 # suppress lightGBM's output
lrn_lgbm
})
meta_learners <- lapply(1:n_models, function(i) {
lrn_rf <- lrn("classif.randomForest", predict_type = "response", mtry = 3, ntree = 500, importance = "gini")
return(lrn_rf)
})
base_resampling <- rsmp("cv", folds = 10)
# set up stacking
stack <- mlr3pipelines::po(.obj = "stack", base_learners = base_learners, metalearner = lrn("classif.gam"))
# set up stacking resampling
stack_resampling <- rsmp("cv", folds = 10)
ps_lgbm <- ParamSet$new(params = list(
makeDiscreteParam("booster", values = c("gbtree","gblinear")),
makeNumericParam("eta", lower = 0.01, upper = 0.06),
makeIntegerParam("max_depth", lower = 3L, upper = 10L),
makeNumericParam("min_child_weight", lower = 2L, upper = 8L),
makeNumericParam("subsample", lower = 0.5, upper = 0.5),
makeNumericParam("colsample_bytree", lower = 0.5, upper = 0.8)
))
# set up hyperparameters tuning
base_tuning <- tune_grid(
base_learners,
task,
resampling = base_resampling,
measure = msr("classif.auc"),
control = ctrl_tuning(
method = "random",
repeats = 10,
show_info = FALSE,
save_pred = "none"
),
par.set = ps_lgbm
)
base_models <- lapply(base_tuning$learners, function(lrn) {
train(lrn, task)
})
# train the meta-learners
meta_models <- lapply(meta_learners, function(lrn) {
train(stack, task, stack_resampling)
})
best_meta_model <- meta_models[[which.max(sapply(meta_models, function(m) m$aggregated_result$score))]]
best_base_models <- lapply(base_models, function(m) m$learner)
final_model <- stack$train(best_base_models, best_meta_model, task)
return(final_model)
}