1

Let's say I have a tibble like so:

> mod_tbl
# A tibble: 46 × 3
   .parsnip_engine .parsnip_mode .parsnip_fns
   <chr>           <chr>         <chr>       
 1 lm              regression    linear_reg  
 2 brulee          regression    linear_reg  
 3 gee             regression    linear_reg  
 4 glm             regression    linear_reg  
 5 glmer           regression    linear_reg  
 6 glmnet          regression    linear_reg  
 7 gls             regression    linear_reg  
 8 lme             regression    linear_reg  
 9 lmer            regression    linear_reg  
10 stan            regression    linear_reg  

dput output

structure(list(.parsnip_engine = c("lm", "brulee", "gee", "glm", 
"glmer", "glmnet", "gls", "lme", "lmer", "stan"), .parsnip_mode = c("regression", 
"regression", "regression", "regression", "regression", "regression", 
"regression", "regression", "regression", "regression"), .parsnip_fns = c("linear_reg", 
"linear_reg", "linear_reg", "linear_reg", "linear_reg", "linear_reg", 
"linear_reg", "linear_reg", "linear_reg", "linear_reg")), row.names = c(NA, 
-10L), class = c("tbl_df", "tbl", "data.frame"))

Now I want to add the parameters as a column for each line, so I do the following:

model_tbl_with_params <- mod_tbl %>% 
  dplyr::mutate(
    model_params = purrr::pmap(
      dplyr::cur_data(), 
      ~ list(formalArgs(..3))
    ) 
  )

This gives:

> model_tbl_with_params
# A tibble: 46 × 4
   .parsnip_engine .parsnip_mode .parsnip_fns model_params
   <chr>           <chr>         <chr>        <list>      
 1 lm              regression    linear_reg   <list [1]>  
 2 brulee          regression    linear_reg   <list [1]>  
 3 gee             regression    linear_reg   <list [1]>  
 4 glm             regression    linear_reg   <list [1]>  
 5 glmer           regression    linear_reg   <list [1]>  
 6 glmnet          regression    linear_reg   <list [1]>  
 7 gls             regression    linear_reg   <list [1]>  
 8 lme             regression    linear_reg   <list [1]>  
 9 lmer            regression    linear_reg   <list [1]>  
10 stan            regression    linear_reg   <list [1]>  

I then add a .model_id column as a factor and create a dplyr group_split object

mod_factor_tbl <- model_tbl_with_params %>%
  dplyr::mutate(.model_id = dplyr::row_number() %>% 
                  forcats::as_factor()) %>%
  dplyr::select(.model_id, dplyr::everything())

models_list <- mod_factor_tbl %>%
  dplyr::group_split(.model_id)

I then get all the parameters in the fashion I want, in this instance its to set all to tune::tune()

tuned_params_list <- models_list %>%
  purrr::imap(
    .f = function(obj, id){
      
      # Pull the model params
      mod_params <- obj %>% dplyr::pull(5) %>% purrr::pluck(1)
      mod_params_list <- unlist(mod_params) %>% as.list()
      #param_names <- unlist(mod_params)
      names(mod_params_list) <- unlist(mod_params)
      
      # Set mode and engine
      p_mode <- obj %>% dplyr::pull(2) %>% purrr::pluck(1)
      p_engine <- obj %>% dplyr::pull(3) %>% purrr::pluck(1)
      me_list <- list(
        mode = paste0("mode = ", p_mode),
        engine = paste0("engine = ", p_engine)
      )
      
      # Get all other params
      me_vec <- c("mode","engine")
      pv <- unlist(mod_params)
      params_to_modify <- pv[!pv %in% me_vec] %>% as.list()
      names(params_to_modify) <- unlist(params_to_modify)
      
      # Set each item equal to .x = tune::tune()
      tuned_params_list <- purrr::map(
        params_to_modify,
        ~ paste0(.x, " = tune::tune()")
      )
      
      # use modifyList()
      res <- utils::modifyList(mod_params_list, tuned_params_list)
      res <- utils::modifyList(res, me_list)
      
      # Return      
      return(res)
      
    }
  )

How do I take that list object and map the arguments to a model spec?

I make the model spec as a column with a function like this:

internal_make_spec_tbl <- function(.data){

  # Checks ----
  df <- dplyr::as_tibble(.data)

  nms <- unique(names(df))

  if (!".parsnip_engine" %in% nms | !".parsnip_mode" %in% nms | !".parsnip_fns" %in% nms){
    rlang::abort(
      message = "The model tibble must come from the class/reg to parsnip function.",
      use_cli_format = TRUE
    )
  }

  # Make tibble ----
  mod_spec_tbl <- df %>%
    dplyr::mutate(
      model_spec = purrr::pmap(
        dplyr::cur_data(),
        ~ match.fun(..3)(mode = ..2, engine = ..1)
      )
    ) %>%
    # add .model_id column
    dplyr::mutate(.model_id = dplyr::row_number()) %>%
    dplyr::select(.model_id, dplyr::everything())

  # Return ----
  return(mod_spec_tbl)

}

I want to map the values into the arguments of the model spec that is created by the above function.

Here is what the output looks like:

[[46]]
[[46]]$mode
[1] "mode = kernlab"

[[46]]$engine
[1] "engine = regression"

[[46]]$cost
[1] "cost = tune::tune()"

[[46]]$rbf_sigma
[1] "rbf_sigma = tune::tune()"

[[46]]$margin
[1] "margin = tune::tune()"
MCP_infiltrator
  • 3,961
  • 10
  • 45
  • 82
  • Do you want to get something like `tuned_params_list[[1]][models_list[[1]]$model_params[[1]][[1]]]` (for the first element) – akrun Jan 05 '23 at 19:58
  • I think that would do it, maybe imap it? – MCP_infiltrator Jan 05 '23 at 20:00
  • i.e. `models_list2 <- map2(tuned_params_list, models_list, ~ {.y$model_params[[1]][[1]] <- .x[.y$model_params[[1]][[1]]];.y}); models_list2[[1]]$model_params [[1]] [[1]][[1]] [[1]][[1]]$mode [1] "mode = lm" [[1]][[1]]$engine [1] "engine = regression" [[1]][[1]]$penalty [1] "penalty = tune::tune()" [[1]][[1]]$mixture [1] "mixture = tune::tune()"` – akrun Jan 05 '23 at 20:06
  • 1
    In case you need a flattened list `models_list2 <- map2(tuned_params_list, models_list, ~ {.y$model_params <- list(.x[.y$model_params[[1]][[1]]]);.y})` – akrun Jan 05 '23 at 20:09
  • let me fudge around with those @akrun – MCP_infiltrator Jan 05 '23 at 20:11
  • 1
    the flattened one works nice, can you post as answer with explanation of what is going on in the lambda please and also how to set them inside the actual model – MCP_infiltrator Jan 05 '23 at 20:15
  • 1
    I added some explanation, hope it helps – akrun Jan 05 '23 at 20:20

1 Answers1

2

As both lists have the same length and wants to match and replace the column 'model_params' in models_list with the corresponding list element from tuned_params_list, loop over the lists with map2, extract the model_params vector (which is stored as a nested list in models_list column), and use that vector to extract the named matching list element from the tuned_params_list element and assign it back to model_params column, return the data

library(purrr)
models_list2 <- map2(tuned_params_list, models_list, ~ {
     .y$model_params <- list(.x[.y$model_params[[1]][[1]]])
 .y})

It can be done in base R with Map

models_list2 <- Map(\(.x, .y) {
   .y$model_params <- list(.x[.y$model_params[[1]][[1]]])
   .y}, tuned_params_list, models_list)
akrun
  • 874,273
  • 37
  • 540
  • 662