I would like to use tidymodels to tune a ranger random forest in a crossvalidated setup. My dataset is unbalanced. Because of that I would like to use the ranger parameter class.weights.
However, each fold can have different weights. How can I pass the fold specific weights to the engine?
MWE:
library(tidyverse)
library(tidymodels)
set.seed(111)
iris_cut <- iris[30:110,] # Dummy unbalanced dataset
# Create folds
folds <- vfold_cv(iris_cut, v = 3)
# Calculate class weights:
calc_weights <- function(df) {
weights <- df %>%
group_by(Species) %>%
mutate(n_total = n()) %>%
ungroup() %>%
mutate(weight = max(n_total)/n_total) %>%
distinct(n_total, .keep_all = TRUE) %>%
as.data.frame() %>%
.$weight
return(weights)
}
# Use during training of fold 1:
weights_fold1 <- folds$splits[[1]]$data[folds$splits[[1]]$in_id,] %>% calc_weights()
# Use during training of fold 2:
weights_fold2 <- folds$splits[[2]]$data[folds$splits[[2]]$in_id,] %>% calc_weights()
# Use during training of fold 3:
weights_fold3 <- folds$splits[[3]]$data[folds$splits[[3]]$in_id,] %>% calc_weights()
# Defining a recipe
rec <- recipe(Species~ ., data = iris_cut)
# Create Model Specification
rf_mod <- rand_forest(
mtry = tune(),
trees = 1000,
min_n = 1
) %>%
set_mode("classification") %>%
set_engine("ranger",
class.weights=!!weights_fold1 # Wrong! Here for each fold another weight vector should be passed
)
rf_grid <- crossing(
mtry = c(1,2,3)
)
# Setup workflow
tune_wf <- workflow() %>%
add_recipe(rec) %>%
add_model(rf_mod)
# Start rf tuning
tune_res <- tune_grid(
tune_wf,
resamples = folds,
grid = rf_grid
)