1

I have a tsibble as shown below.

test.data <- structure(list(RSLITM = c("004", "004", "004", "004", "004", 
"004", "004", "004", "004", "004", "004", "004", "004", "004", 
"004", "004", "004", "004", "004", "004", "004", "004", "004", 
"004", "004", "004", "004", "004", "004", "004", "004", "004", 
"004", "004", "004", "004", "004", "005", "005", "005", "005", 
"005", "005", "005", "005", "005", "005", "005", "005", "005", 
"005", "005", "005", "005", "005", "005", "005", "005", "005", 
"005", "005", "005", "005", "005", "005", "005", "005", "005", 
"005", "005", "005", "005", "005", "005"), RSFMTH = structure(c(17713, 
17744, 17775, 17805, 17836, 17866, 17897, 17928, 17956, 17987, 
18017, 18048, 18078, 18109, 18140, 18170, 18201, 18231, 18262, 
18293, 18322, 18353, 18383, 18414, 18444, 18475, 18506, 18536, 
18567, 18597, 18628, 18659, 18687, 18718, 18748, 18779, 18809, 
17713, 17744, 17775, 17805, 17836, 17866, 17897, 17928, 17956, 
17987, 18017, 18048, 18078, 18109, 18140, 18170, 18201, 18231, 
18262, 18293, 18322, 18353, 18383, 18414, 18444, 18475, 18506, 
18536, 18567, 18597, 18628, 18659, 18687, 18718, 18748, 18779, 
18809), class = c("yearmonth", "vctrs_vctr")), RSFQTY = c(285600, 
352200, 273600, 282700, 175800, 138700, 177700, 245900, 165000, 
180100, 298000, 173800, 257300, 282800, 164500, 155100, 232300, 
175500, 226000, 287100, 221400, 270800, 286200, 394400, 336600, 
331000, 224600, 216800, 351600, 374700, 173500, 423700, 357700, 
245200, 454700, 361700, 381200, 79000, 58100, 66300, 52700, 68600, 
33000, 76600, 85600, 84100, 49000, 98000, 113500, 83800, 64000, 
116800, 72000, 65200, 49800, 33300, 79800, 48000, 81600, 125000, 
53500, 97600, 80000, 81900, 80000, 53800, 39000, 73800, 76600, 
33700, 60200, 84000, 66600, 32400), RSSEAS = c("A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A"), RSTREND = c("N", "N", "N", "N", 
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", 
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", 
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", 
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", 
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", 
"N", "N", "N", "N", "N"), RSMODE = c("EXP", "EXP", "EXP", "EXP", 
"EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", 
"EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", 
"EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", 
"EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", 
"EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", 
"EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", 
"EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", 
"EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP")), row.names = c(NA, 
-74L), key = structure(list(RSLITM = c("004", "005"), RSSEAS = c("A", 
"A"), RSTREND = c("N", "N"), RSMODE = c("EXP", "EXP"), .rows = structure(list(
    1:37, 38:74), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, -2L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), index = structure("RSFMTH", ordered = TRUE), index2 = "RSFMTH", interval = structure(list(
    year = 0, quarter = 0, month = 1, week = 0, day = 0, hour = 0, 
    minute = 0, second = 0, millisecond = 0, microsecond = 0, 
    nanosecond = 0, unit = 0), .regular = TRUE, class = c("interval", 
"vctrs_rcrd", "vctrs_vctr")), class = c("tbl_ts", "tbl_df", "tbl", 
"data.frame"))

I would like to apply a modified ETS function using the saved parameters from the tsibble. For instance, whatever is in the RSSEAS and RSTREND columns will be used to estimate the ETS model.

The following works:

test.data %>% model(EXP = ETS(RSFQTY ~ trend("N") + season("A")))

However, when I try to use a function below to extract the parameters for each SKU (since presumably they could be different for each SKU), I get an error message.

ets.function <- function(tsib){
  season.param <- as.character(tsib[1, "RSSEAS"])
  trend.param <- as.character(tsib[1, "RSTREND"])
  tsib %>% model(EXP = ETS(RSFQTY ~ trend(trend.param) + season(season.param))) %>% forecast(h = "3 years")
}

If I call ets.function(test.data) R returns a fable but it is NULL/NA since the model is not being estimated with the specified parameters.

Calling map_dfr(test.data, ets.function) gives me the following error:

Error in tsib[1, "RSSEAS"] : incorrect number of dimensions

This doesn't make sense to me since if I run the code for season.param or trend.param in my console, I get "A" or "N" as appropriate, which is exactly the value the trend and season specials take inside the ETS function.

Basically I am trying to figure out a way to map ETS over my tsibble using prespecified parameters for each unique key combination. I am open to other ideas about how to achieve this (pmap_dfr for vectors of parameters, etc).

Anoushiravan R
  • 21,622
  • 3
  • 18
  • 41
LauraDR
  • 86
  • 9
  • Do you need to build by group i.e. what is the `SKU` here – akrun Aug 30 '21 at 23:07
  • it's the RSLITM column. So i would want a different model for each SKU. the actual tsibble has more columns but i am simplifying for the sake of the example. – LauraDR Aug 31 '21 at 13:30

1 Answers1

2

We could create the formula with glue or paste

library(fable)
ets.function <- function(tsib){
  season.param <- tsib[["RSSEAS"]][1]
  trend.param <- tsib[["RSTREND"]][1]
  fmla <- as.formula(glue::glue("RSFQTY ~ trend('{trend.param}') +",
            " season('{season.param}')"))
  print(fmla)
  tsib %>% 
     model(EXP = ETS(fmla)) %>% 
  forecast(h = "3 years")
}

-testing

> ets.function(test.data)
RSFQTY ~ trend("N") + season("A")
<environment: 0x7fface3d9778>
# A fable: 72 x 8 [1M]
# Key:     RSLITM, RSSEAS, RSTREND, RSMODE, .model [2]
   RSLITM RSSEAS RSTREND RSMODE .model   RSFMTH             RSFQTY   .mean
   <chr>  <chr>  <chr>   <chr>  <chr>     <mth>             <dist>   <dbl>
 1 004    A      N       EXP    EXP    2021 Aug  N(4e+05, 1.4e+10) 395706.
 2 004    A      N       EXP    EXP    2021 Sep N(279181, 8.6e+09) 279181.
 3 004    A      N       EXP    EXP    2021 Oct N(266837, 8.8e+09) 266837.
 4 004    A      N       EXP    EXP    2021 Nov N(349230, 1.4e+10) 349230.
 5 004    A      N       EXP    EXP    2021 Dec N(327811, 1.4e+10) 327811.
 6 004    A      N       EXP    EXP    2022 Jan N(265657, 1.2e+10) 265657.
 7 004    A      N       EXP    EXP    2022 Feb N(375557, 1.9e+10) 375557.
 8 004    A      N       EXP    EXP    2022 Mar  N(3e+05, 1.6e+10) 300908.
 9 004    A      N       EXP    EXP    2022 Apr N(318455, 1.8e+10) 318455.
10 004    A      N       EXP    EXP    2022 May  N(4e+05, 2.4e+10) 400250.
# … with 62 more rows


or may use sprintf as well

ets.function <- function(tsib){
  season.param <- tsib[["RSSEAS"]][1]
  trend.param <- tsib[["RSTREND"]][1]
  fmla <- as.formula(sprintf("RSFQTY ~ trend('%s') + season('%s')",  
           trend.param, season.param))
  
  print(fmla)
  tsib %>% 
     model(EXP = ETS(fmla)) %>% 
  forecast(h = "3 years")
}
ets.function(test.data)

If we need to apply based on group, we could split with group_split and apply the function in a loop using map

library(purrr)
test.data %>%
    group_split(RSLITM) %>% 
    map(~ ets.function(.x))

-output

RSFQTY ~ trend("N") + season("A")
<environment: 0x7ffac94171b8>
RSFQTY ~ trend("N") + season("A")
<environment: 0x7ffacf3f1950>
[[1]]
# A fable: 36 x 8 [1M]
# Key:     RSLITM, RSSEAS, RSTREND, RSMODE, .model [1]
   RSLITM RSSEAS RSTREND RSMODE .model   RSFMTH             RSFQTY   .mean
   <chr>  <chr>  <chr>   <chr>  <chr>     <mth>             <dist>   <dbl>
 1 004    A      N       EXP    EXP    2021 Aug  N(4e+05, 1.4e+10) 395706.
 2 004    A      N       EXP    EXP    2021 Sep N(279181, 8.6e+09) 279181.
 3 004    A      N       EXP    EXP    2021 Oct N(266837, 8.8e+09) 266837.
 4 004    A      N       EXP    EXP    2021 Nov N(349230, 1.4e+10) 349230.
 5 004    A      N       EXP    EXP    2021 Dec N(327811, 1.4e+10) 327811.
 6 004    A      N       EXP    EXP    2022 Jan N(265657, 1.2e+10) 265657.
 7 004    A      N       EXP    EXP    2022 Feb N(375557, 1.9e+10) 375557.
 8 004    A      N       EXP    EXP    2022 Mar  N(3e+05, 1.6e+10) 300908.
 9 004    A      N       EXP    EXP    2022 Apr N(318455, 1.8e+10) 318455.
10 004    A      N       EXP    EXP    2022 May  N(4e+05, 2.4e+10) 400250.
# … with 26 more rows

[[2]]
# A fable: 36 x 8 [1M]
# Key:     RSLITM, RSSEAS, RSTREND, RSMODE, .model [1]
   RSLITM RSSEAS RSTREND RSMODE .model   RSFMTH            RSFQTY   .mean
   <chr>  <chr>  <chr>   <chr>  <chr>     <mth>            <dist>   <dbl>
 1 005    A      N       EXP    EXP    2021 Aug N(67121, 4.1e+08)  67121.
 2 005    A      N       EXP    EXP    2021 Sep N(95706, 8.3e+08)  95706.
 3 005    A      N       EXP    EXP    2021 Oct N(73173, 4.9e+08)  73173.
 4 005    A      N       EXP    EXP    2021 Nov N(57981, 3.1e+08)  57981.
 5 005    A      N       EXP    EXP    2021 Dec N(42901, 1.7e+08)  42901.
 6 005    A      N       EXP    EXP    2022 Jan N(62766, 3.6e+08)  62766.
 7 005    A      N       EXP    EXP    2022 Feb N(79394, 5.7e+08)  79394.
 8 005    A      N       EXP    EXP    2022 Mar N(61960, 3.5e+08)  61960.
 9 005    A      N       EXP    EXP    2022 Apr N(60882, 3.4e+08)  60882.
10 005    A      N       EXP    EXP    2022 May  N(106257, 1e+09) 106257.
# … with 26 more rows
akrun
  • 874,273
  • 37
  • 540
  • 662
  • 2
    That's so funny. I don't even understand the question and you provide not one, no *three* answers. Good job! – Martin Gal Aug 30 '21 at 23:19
  • 1
    @MartinGal thanks, But, I am also not sure about the SKU's. – akrun Aug 30 '21 at 23:20
  • 1
    thank you @akrun for this solution. I was not aware of group_split and it works perfectly for my needs and is scalable for more keys than just RSLITM. Your third solution was what I needed since each key combination could have different params, which i may not have made clear in my example. – LauraDR Aug 31 '21 at 14:27