Is there a reason the recipe
code snippet for xgboost classifier has one_hot = TRUE
? This creates "n" dummy variables instead of "n-1". I usually set it to FALSE but just want to make sure I'm not missing something.
Code -
data <- mtcars %>%
as_tibble() %>%
mutate(cyl = cyl %>% as.factor)
usemodels::use_xgboost(mpg ~ cyl, data = data)
Output -
xgboost_recipe <-
recipe(formula = mpg ~ cyl, data = data) %>%
step_novel(all_nominal(), -all_outcomes()) %>%
step_dummy(all_nominal(), -all_outcomes(), one_hot = TRUE) %>%
step_zv(all_predictors())
xgboost_spec <-
boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(),
loss_reduction = tune(), sample_size = tune()) %>%
set_mode("regression") %>%
set_engine("xgboost")
xgboost_workflow <-
workflow() %>%
add_recipe(xgboost_recipe) %>%
add_model(xgboost_spec)
set.seed(28278)
xgboost_tune <-
tune_grid(xgboost_workflow, resamples = stop("add your rsample object"), grid = stop("add number of candidate points"))