0

While using tidymodels and stacks, got the exact same error as FatBertLee trying to predict:

Error: Can't subset columns that don't exist.
x Location 3 doesn't exist.
i There are only 2 columns.
Run `rlang::last_error()` to see where the error occurred.

Sample from train data:

structure(list(id = c(24, 269, 316, 382, 424, 505, 551, 572, 
716, 794, 848, 971, 1016, 1076, 1133, 1189, 1234, 1312, 1365, 
1532), ticker = c("ABEV3", "ALPA4", "ALSC3", "ALUP11", "AMAR3", 
"ANIM3", "ARTR3", "ARZZ3", "BBRK3", "BEEF3", "BEMA3", "BPHA3", 
"BRAP4", "BRFS3", "BRKM5", "BRML3", "BRPR3", "BTOW3", "BVMF3", 
"CCRO3"), data = structure(c(16525, 16525, 16525, 16525, 16525, 
16525, 16525, 16525, 16525, 16525, 16525, 16525, 16525, 16525, 
16525, 16525, 16525, 16525, 16525, 16525), class = "Date"), quarter = c(2015.1, 
2015.1, 2015.1, 2015.1, 2015.1, 2015.1, 2015.1, 2015.1, 2015.1, 
2015.1, 2015.1, 2015.1, 2015.1, 2015.1, 2015.1, 2015.1, 2015.1, 
2015.1, 2015.1, 2015.1), ret3m = c(0.1648611076, 0.4624908206, 
0.0662460568, 0.1028571429, -0.0282563749, -0.5321228611, -0.3796526055, 
-0.0484496124, 0.1234567901, -0.2246835443, 0.0394890001, -0.7213114754, 
-0.2236070381, 0.0180122226, -0.3681792074, 0.0992861778, 0.2915019763, 
-0.0927694407, 0.1747368421, 0.1073369565), lret = c(0.0245911872, 
-0.1918554545, -0.2690459849, -0.1322073384, -0.293577729, -0.5385714286, 
-0.0052356021, -0.1671799162, -0.2081447964, 0.1805309735, 0.2878354861, 
-0.1428571429, -0.206195547, 0.1062529384, 0.2842835131, -0.280110117, 
0.0209923664, -0.2422233554, -0.0283757382, -0.1956378057), alvo = structure(c(2L, 
3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 1L, 1L, 3L, 3L, 1L, 1L, 3L, 2L, 
3L, 2L, 3L), .Label = c("buy", "keep", "sell"), class = "factor"), 
    p_l = c(22.7777943671, 17.013232239, 14.3857944343, 5.3290052757, 
    81.6607210863, 7.0093163352, 6.2590675835, 19.1883018103, 
    -20.310374125, -1.2175821514, 9.4653309443, -0.4779179664, 
    -4.6062594326, 22.7418227837, 16.3991693792, 18.9767524219, 
    16.2272067076, -32.6503400222, 20.0746301096, 24.3704146153
    ), vpa = c(2.9345399772, 4.4559425807, 11.9759963667, 19.1280966065, 
    6.3576040791, 7.9663703125, 6.3450343162, 6.6939288694, 2.9627168318, 
    -0.1624250847, 8.7869794656, 3.0582252548, 26.7151370268, 
    17.5274286146, 4.3957933828, 21.0059526254, 20.4579281623, 
    11.8652171379, 10.8462018391, 2.3057105308), lpa = c(0.8099994101, 
    0.5754344538, 1.1747700189, 3.6216890398, 0.1726656318, 2.1713957927, 
    1.1982615461, 1.2794253625, -0.1344140676, -6.0365536664, 
    1.0269054571, -1.4228383275, -2.2990454956, 2.7834180488, 
    0.67076568, 0.8926711812, 0.8054374505, -0.6110196704, 0.5559255607, 
    0.6688437705), roe = c(27.6022619, 12.9138659965, 9.8093718716, 
    18.9338704957, 2.7158915475, 27.2570280759, 18.8850286135, 
    19.1132201654, -4.5368516535, 3716.51563365, 11.6866718663, 
    -46.5249682066, -8.6057784141, 15.880355927, 15.2592631543, 
    4.249610561, 3.937043107, -5.1496712056, 5.1255321353, 29.0081413769
    ), payout = c(1.0195292727, 0.931375658, 0.270250729, 0.3893211108, 
    0.633884108, 0.2684079991, 0.4913832489, 0.4968062245, -5.7744104024, 
    0, 0.6504980525, 0, -0.7461901921, 0.340716886, 0.9037266218, 
    0.5255796718, 7.4942170294, 0, 0.7291198294, 1.1855942496
    ), dy12m = c(0.0447696477, 0.0547441923, 0.018785944, 0.0730569948, 
    0.0077624113, 0.0382930355, 0.0785074202, 0.0258910991, 0.2843084212, 
    0, 0.0687242798, 0, 0.1619948253, 0.0149819515, 0.0551080729, 
    0.0276959756, 0.4618303793, 0, 0.0363204615, 0.0486489158
    ), p_vpa = c(6.2871864562, 2.197066013, 1.4111560727, 1.0089869576, 
    2.2178166216, 1.9105313214, 1.1820267041, 3.667502371, 0.9214515443, 
    -45.2516310079, 1.1061821685, 0.2223511819, 0.39640448, 3.6114824023, 
    2.5023924107, 0.8064380751, 0.6388721231, 1.6813851587, 1.0289316173, 
    7.0694043257), ativo_circulante = c(19241017000, 2349169000, 
    458771000, 2256679000, 1935689000, 372126000, 1329203000, 
    668561000, 172137000, 4902444000, 280288000, 874442000, 944722000, 
    17774588000, 15339781000, 1190542000, 898780000, 3609719000, 
    3118127000, 3384242000), liq_corr = c(0.8839175747, 1.8308613158, 
    1.2004998037, 1.6443639188, 2.2566264077, 2.1538559489, 0.7579286675, 
    3.0155975841, 3.430185521, 2.038108597, 2.4949751204, 0.6588875853, 
    0.7662929788, 1.8775702843, 0.9387753091, 1.3318633571, 1.9499653954, 
    1.4177171092, 1.4075117803, 0.6784915709), divida_bruta = c(2691043000, 
    573308000, 1376110000, 4124207000, 1175307000, 121178000, 
    5855332000, 98138000, 0, 6395461000, 76266000, 814095000, 
    1199600000, 12721903000, 23126794000, 5363491000, 4338561000, 
    2302677000, 1982951000, 11538724000), quant_on = c(15713667000, 
    241609000, 159060920, 461243596, 185532000, 82865593, 344444000, 
    88682000, 184936000, 178002062, 50923870, 363051086, 122171000, 
    851501628, 451668652, 462653000, 298228000, 255484410, 1801392256, 
    1765587200), ibov3m = c(0.0543782982, 0.0543782982, 0.0543782982, 
    0.0543782982, 0.0543782982, 0.0543782982, 0.0543782982, 0.0543782982, 
    0.0543782982, 0.0543782982, 0.0543782982, 0.0543782982, 0.0543782982, 
    0.0543782982, 0.0543782982, 0.0543782982, 0.0543782982, 0.0543782982, 
    0.0543782982, 0.0543782982), volume3m = c(194265323.885, 
    7227171.01639, 5639329.77049, 2949280.65574, 1908696.86885, 
    14730050.7541, 8171198.16393, 2894382.19672, 2480344.04918, 
    6642555.04918, 1400969.86885, 1363838.03279, 23186487.6721, 
    145235386.705, 31063969.0984, 39373887.7049, 31119377.2951, 
    9243986.29508, 134097842.066, 89693737.7049), volat3m = c(0.204997689, 
    0.3147291039, 0.3302444855, 0.3489084169, 0.3506794611, 0.9888233707, 
    0.702416568, 0.4027960265, 0.4375357642, 0.4431475177, 0.3459458746, 
    0.5925460944, 0.5092286168, 0.2317206403, 0.6327772099, 0.4529299407, 
    0.3838324421, 0.5354411807, 0.4211022767, 0.4499589031)), row.names = c(NA, 
-20L), class = c("tbl_df", "tbl", "data.frame"))                                                                                                                        "tbl", "data.frame"))

Sample from test data:

structure(list(id = c(28, 183, 273, 320, 386, 428, 509, 576, 
607, 720, 798, 975, 1020, 1080, 1137, 1193, 1238, 1316, 1369, 
1536), ticker = c("ABEV3", "AGRO3", "ALPA4", "ALSC3", "ALUP11", 
"AMAR3", "ANIM3", "ARZZ3", "ATOM3", "BBRK3", "BEEF3", "BPHA3", 
"BRAP4", "BRFS3", "BRKM5", "BRML3", "BRPR3", "BTOW3", "BVMF3", 
"CCRO3"), data = structure(c(16891, 16891, 16891, 16891, 16891, 
16891, 16891, 16891, 16891, 16891, 16891, 16891, 16891, 16891, 
16891, 16891, 16891, 16891, 16891, 16891), class = "Date"), quarter = c(2016.1, 
2016.1, 2016.1, 2016.1, 2016.1, 2016.1, 2016.1, 2016.1, 2016.1, 
2016.1, 2016.1, 2016.1, 2016.1, 2016.1, 2016.1, 2016.1, 2016.1, 
2016.1, 2016.1, 2016.1), ret3m = c(0.1012222277, 0.0319634703, 
0.1027634922, 0.1883116883, 0.2222222222, 0.4556962025, -0.2316602317, 
0.182278481, -0.2413793103, 0.5338345865, -0.1048, 0.1645299145, 
0.3021276596, -0.0515214007, -0.1361623616, 0.3569457222, 0.0615199035, 
-0.0131124914, 0.4717703349, 0.1786774098), lret = c(0.03817432, 
-0.0520231214, -0.0473753747, 0.0996441281, 0.1307692308, 0.0386206897, 
-0.1059841384, -0.0144292093, 3.515625, 0.0429447853, 0.0549450549, 
0.7995867769, 0.0941704036, 0.1914686141, 0.3044155844, -0.0435458787, 
0.125, 0.722462203, -0.0545407775, 0.0029603316), alvo = structure(c(2L, 
2L, 2L, 2L, 1L, 2L, 3L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 
1L, 2L, 2L), .Label = c("buy", "keep", "sell"), class = "factor"), 
    p_l = c(23.0573185652, 3.3563383009, 12.8681457975, 14.9857549371, 
    5.7506300614, -29.5485112365, 15.148977643, 17.8311538826, 
    11494.45, -3.8616285071, -12.9081671576, -0.9445857551, -1.2627480655, 
    15.2860347064, 5.4145204372, 20.9079325197, -3.7834135118, 
    -7.3497358543, 12.1400903644, 29.8833296129), vpa = c(3.1233889525, 
    13.0105940304, 4.2986973442, 11.7880085771, 19.9345321137, 
    5.3605538842, 8.4874412011, 7.1659097312, -0.025481283, 2.2820475371, 
    2.4196835373, 7.5371197883, 23.0122143239, 17.1629328541, 
    4.5752795223, 21.5958806513, 16.1318320211, 10.0098335772, 
    10.4684534694, 1.970423211), lpa = c(0.8162267415, 3.3667643089, 
    0.6022623711, 0.8548117898, 2.582325735, -0.2335143028, 0.6568099996, 
    1.309505832, 1.91397e-05, -0.528274534, -0.8668930192, -5.7697249515, 
    -4.8465724613, 3.3566586094, 4.3235592647, 0.7054738667, 
    -2.3259418968, -1.9456481544, 1.2668768962, 0.4694925292), 
    roe = c(26.1327280688, 25.8770990857, 14.0103459935, 7.2515368839, 
    12.9540323309, -4.356159976, 7.7386103075, 18.2741044904, 
    -0.075112669, -23.1491467811, -35.8267106362, -76.5507927904, 
    -21.0608696456, 19.5576049728, 94.4982540111, 3.2667057117, 
    -14.4183369487, -19.4373676581, 12.1018534387, 23.8269893767
    ), payout = c(0.6493293751, 0.4151463755, 1.0411536668, 0.7851771571, 
    0.4646973787, -0.2545662897, 0.7198428773, 0.5905991163, 
    0, 0, 0, 0, -0.2132565767, 0.3937093446, 0.1402200562, 0.6486989038, 
    -0.9594607727, 0, 0.630799119, 1.8705547268), dy12m = c(0.0281615303, 
    0.1236902655, 0.0809093776, 0.0523949017, 0.0808080808, 0.0086151985, 
    0, 0.0331217553, 0, 0, 0, 0, 0.168882917, 0.0257561462, 0.0258970407, 
    0.0310264491, 0.253596592, 0, 0.051960002, 0.062595258), 
    p_vpa = c(6.0255063606, 0.8685229878, 1.8028717492, 1.0866975466, 
    0.7449384774, 1.28718042, 1.1723203454, 3.2584836923, -8.6337881823, 
    0.8939340512, 4.624571696, 0.7230878841, 0.265945724, 2.9895822839, 
    5.1166272762, 0.6830006258, 0.5455053083, 1.4285951799, 1.4691759432, 
    7.1202977723), ativo_circulante = c(19074487000, 299277000, 
    2150935000, 270778000, 2420634000, 1696592000, 443924000, 
    707625000, 577000, 82327000, 5478151000, 552019000, 382414000, 
    18648055000, 16421999000, 1217472000, 1365695000, 4099507000, 
    9070336000, 4236905000), liq_corr = c(0.8708215307, 2.7670608467, 
    1.817552289, 0.83880241, 1.3131298117, 2.3898018114, 1.7125640393, 
    3.0835617453, 3.7225806452, 2.3941315032, 1.6971953056, 0.499031352, 
    11.2484630997, 1.6280799129, 1.1349097628, 1.1491243801, 
    1.9005998107, 1.6616057257, 2.9185225036, 0.5096682763), 
    divida_bruta = c(3959840000, 102754000, 571852000, 0, 4726141000, 
    1050089000, 371280000, 114349000, 0, 0, 6596405000, 494431000, 
    1394158000, 17221384000, 25378755000, 5416594000, 3042997000, 
    4220957000, 2222227000, 14776601000), quant_on = c(15694525000, 
    56365989, 241609000, 162018121, 461243596, 204086000, 80878793, 
    88735000, 313485000, 184866000, 191993702, 113081127, 122171000, 
    806643545, 451668652, 465996000, 298228000, 257318364, 1786432452, 
    1765587200), ibov3m = c(0.1877981064, 0.1877981064, 0.1877981064, 
    0.1877981064, 0.1877981064, 0.1877981064, 0.1877981064, 0.1877981064, 
    0.1877981064, 0.1877981064, 0.1877981064, 0.1877981064, 0.1877981064, 
    0.1877981064, 0.1877981064, 0.1877981064, 0.1877981064, 0.1877981064, 
    0.1877981064, 0.1877981064), volume3m = c(275044574.417, 
    1368185.05, 4606640.51667, 4818698.68333, 2655531.03333, 
    410574.616667, 3624254.31667, 3090654.93333, 597455.25, 463022.95, 
    10915321.5833, 714718.383333, 15407212.8, 127378298.5, 56920489.7667, 
    48747632.1833, 6739975.55, 7690274.23333, 191011333.95, 66406667.5833
    ), volat3m = c(0.2896466961, 0.3203497572, 0.4300966906, 
    0.4741045624, 0.4899799943, 0.5525550441, 0.7499944464, 0.4746127812, 
    1.0172509458, 0.732537106, 0.3913906885, 1.6382527637, 0.8704214322, 
    0.3472589383, 0.4783225142, 0.4185663317, 0.3355137457, 0.626691248, 
    0.4644887741, 0.4557810968)), row.names = c(NA, -20L), class = c("tbl_df", 
"tbl", "data.frame"))                                                                                                                                                                                                                                           "tbl", "data.frame"))

Code used:

pacman::p_load(tidyverse, tidymodels, xgboost, nnet, caret, stacks)

rec <-
     recipe(alvo ~ .,
            data = train %>%
     update_role(id, ticker, data, quarter, ret3m, lret,ibov3m,
                 volat3m, new_role = "ID")

wflow <-
     workflow() %>%
     add_recipe(rec)

ctrl_grid <- control_stack_grid()

xb_spec <- 
     boost_tree(trees = 500,
                min_n = tune(), 
                mtry = tune()) %>% 
     set_engine("xgboost") %>% 
     set_mode("classification")

xb_wflow <-
     wflow %>%
     add_model(xb_spec)

RNGkind("L'Ecuyer-CMRG")
set.seed(1090943296)
xb_res <- 
     tune_grid(
          object = xb_wflow,
          resamples = cv_folds,
          grid = 10,
          control = ctrl_grid)

# nnet model ----
nn_spec <- 
     mlp(hidden_units = tune(), 
         penalty = tune(), 
         epochs = tune()) %>% 
     set_engine("nnet") %>% 
     set_mode("classification")

nnet_rec <- 
     rec %>%
     step_normalize(all_predictors())

nn_wflow <-
     workflow() %>%
     add_model(nn_spec) %>%
     add_recipe(nnet_rec)

RNGkind("L'Ecuyer-CMRG")
set.seed(1090943296)
nn_res <- 
     tune_grid(
          object = nn_wflow,
          resamples = cv_folds,
          grid = 10,
          control = ctrl_grid)

# stack model ----
stack_model <- 
     stacks() %>%
     add_candidates(xb_res) %>%
     add_candidates(nn_res) %>%
     blend_predictions() %>%
     fit_members()

# predict ----
stack_pred <- predict(stack_model,
                      test,
                      type = "prob")

Result from rlang::last_error():

<error/vctrs_error_subscript_oob>
Can't subset columns that don't exist.
x Location 3 doesn't exist.
i There are only 2 columns.
Backtrace:
     1. stats::predict(...)
9. tidyr:::pivot_wider.data.frame(...)
10. tidyr::build_wider_spec(...)
11. tidyselect::eval_select(enquo(values_from), data)
12. tidyselect:::eval_select_impl(...)
20. tidyselect:::vars_select_eval(...)
21. tidyselect:::loc_validate(pos, vars)
22. vctrs::vec_as_location(pos, n = length(vars))
24. vctrs:::stop_subscript_oob(...)
25. vctrs:::stop_subscript(...)
Run `rlang::last_trace()` to see the full context.

Result from rlang::last_trace():

<error/vctrs_error_subscript_oob>
     Can't subset columns that don't exist.
x Location 3 doesn't exist.
i There are only 2 columns.
Backtrace:
     x
  1. +-stats::predict(...)
  2. +-stacks::predict.model_stack(...)
  3. | \-`%>%`(...)
  4. +-rlang::eval_tidy(.)
  5. +-stacks:::predict_members_classification(...)
  6. | \-`%>%`(...)
  7. +-dplyr::select(., -rowid)
  8. +-tidyr::pivot_wider(., id_cols = rowid, names_from = name, values_from = 3:ncol(.))
  9. \-tidyr:::pivot_wider.data.frame(...)
 10.   \-tidyr::build_wider_spec(...)
 11.     \-tidyselect::eval_select(enquo(values_from), data)
 12.       \-tidyselect:::eval_select_impl(...)
 13.         +-tidyselect:::with_subscript_errors(...)
 14.         | +-base::tryCatch(...)
 15.         | | \-base:::tryCatchList(expr, classes, parentenv, handlers)
 16.         | |   \-base:::tryCatchOne(expr, names, parentenv, handlers[[1L]])
 17.         | |     \-base:::doTryCatch(return(expr), name, parentenv, handler)
 18.         | \-tidyselect:::instrument_base_errors(expr)
 19.         |   \-base::withCallingHandlers(...)
 20.         \-tidyselect:::vars_select_eval(...)
 21.           \-tidyselect:::loc_validate(pos, vars)
 22.             \-vctrs::vec_as_location(pos, n = length(vars))
 23.               \-(function () ...
 24.                 \-vctrs:::stop_subscript_oob(...)
 25.                   \-vctrs:::stop_subscript(...)

I'm running this code iteratively within a function (rolling data using sliding_period(), from 1 to 53 periods), sometimes the error appears in the beginning, sometimes towards the end.

  • I can't reproduce this error; I can predict on the `test` data just fine after fitting to the `train` data. – Julia Silge Aug 02 '21 at 03:44
  • 1
    Julia Silge herself answering me... wow! I don't even have proper clothes for this occasion. Lol. As soon as I saw your answer, I ran the code and it worked without a hitch! I remembered that since I posted the issue, I've updated tidymodels (and other libraries), so I believe that whatever caused the error has been resolved. I will try again with the seed of the sample I provided, just to make sure everything is really ok. Thank you so much for your reply Julia. – Marco Binda Aug 03 '21 at 15:58

0 Answers0