I've tried running this tidymodels workflow to see if I can combine two models, with different features and missing values. Being able to combine different data sources to model the same outcomes would be very handy for real world data, just not sure if its possible with tidymodels stacks as of yet. Is there something obviously wrong with this workflow for stacks to fail?
library(tidyverse)
library(tidymodels)
library(workflows)
library(probably)
library(tune)
library(stacks)
set.seed(1234)
mtcars_tb <- mtcars %>%
as_tibble() %>%
mutate(vs = c(sample(vs, 0), rep(NA_integer_, 10), sample(vs, 22)),
disp = c(sample(disp, 22), rep(NA_integer_, 10)))
train_test_split <- initial_split(mtcars_tb)
train <- training(train_test_split)
test <- testing(train_test_split)
cv_fold_mtc <- vfold_cv(mtcars_tb)
recipe_naomit <- recipe(mpg ~ disp + vs, data = train) %>%
step_naomit(all_predictors(), skip = TRUE) %>%
step_normalize(all_numeric())
ctrl_grid <- control_stack_grid()
ctrl_res <- control_stack_resamples()
wflow <- workflow() %>%
add_recipe(recipe_naomit)
lasso_mod <-
linear_reg() %>%
set_engine("glmnet") %>%
set_args(penalty = tune(),
mixture = 1)
wflow <-
wflow %>%
add_model(lasso_mod)
lasso_tune <-
tune_grid(
object = wflow,
resamples = cv_fold_mtc,
grid = 10,
control = ctrl_grid
)
recipe_rm_vs <- recipe(mpg ~ disp, data = train) %>%
step_naomit(all_predictors(), skip = TRUE) %>%
step_normalize(all_numeric())
wflow <- wflow %>%
update_recipe(recipe_rm_vs)
linear_mod <-
linear_reg() %>%
set_engine("lm")
wflow <-
wflow %>%
update_model(linear_mod)
linear_tune_disp <- fit_resamples(
wflow,
resamples = cv_fold_mtc,
control = ctrl_res
)
model_st <- stacks() %>%
stack_add(lasso_tune) %>%
stack_add(linear_tune_disp) %>%
stack_blend() %>%
stack_fit()
#> Warning: Values are not uniquely identified; output will contain list-cols.
#> * Use `values_fn = list` to suppress this warning.
#> * Use `values_fn = length` to identify where the duplicates arise
#> * Use `values_fn = {summary_fun}` to summarise duplicates
#> x Fold01: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> x Fold02: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> x Fold03: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> x Fold04: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> x Fold05: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> x Fold06: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> x Fold07: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> x Fold08: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> x Fold09: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> x Fold10: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> Warning: All models failed in tune_grid(). See the `.notes` column.
#> Error: All of the models failed. See the .notes column.
model_st
#> Error in eval(expr, envir, enclos): object 'model_st' not found