0

My data set is a weekly data that contains two variables Production and Shipment. Production is the independent variable and Shipment is the dependent variable. First I'm trying to forecast Production values and use that as a regressor to forecast Shipment variable.

If I run the Arima using the training set date range From-> "2018-12-31" To-> "2021-11-22" The model runs within 10 minutes and I could see the model values.

Using the Same model, If I extend the training set data range From -"2018-12-31" To-> "2021-12-27" The model runs for so long as it never finished the model execution and I couldnt view the model output.

Could you please help me with this query. Thank you for the support

Original.df<-structure(list(YearWeek = c("201901", "201902", "201903", "201904", 
"201905", "201906", "201907", "201908", "201909", "201910", "201911", 
"201912", "201913", "201914", "201915", "201916", "201917", "201918", 
"201919", "201920", "201921", "201922", "201923", "201924", "201925", 
"201926", "201927", "201928", "201929", "201930", "201931", "201932", 
"201933", "201934", "201935", "201936", "201937", "201938", "201939", 
"201940", "201941", "201942", "201943", "201944", "201945", "201946", 
"201947", "201948", "201949", "201950", "201951", "201952", "202001", 
"202002", "202003", "202004", "202005", "202006", "202007", "202008", 
"202009", "202010", "202011", "202012", "202013", "202014", "202015", 
"202016", "202017", "202018", "202019", "202020", "202021", "202022", 
"202023", "202024", "202025", "202026", "202027", "202028", "202029", 
"202030", "202031", "202032", "202033", "202034", "202035", "202036", 
"202037", "202038", "202039", "202040", "202041", "202042", "202043", 
"202044", "202045", "202046", "202047", "202048", "202049", "202050", 
"202051", "202052", "202053", "202101", "202102", "202103", "202104", 
"202105", "202106", "202107", "202108", "202109", "202110", "202111", 
"202112", "202113", "202114", "202115", "202116", "202117", "202118", 
"202119", "202120", "202121", "202122", "202123", "202124", "202125", 
"202126", "202127", "202128", "202129", "202130", "202131", "202132", 
"202133", "202134", "202135", "202136", "202137", "202138", "202139", 
"202140", "202141", "202142", "202143", "202144", "202145", "202146", 
"202147", "202148", "202149", "202150", "202151", "202152", "202201", 
"202202", "202203"), Shipment = c(399, 1336, 1018, 1126, 1098, 
1235, 1130, 1258, 897, 1333, 1221, 1294, 1628, 1611, 1484, 1238, 
1645, 1936, 1664, 1482, 2060, 1964, 1875, 1645, 2039, 1640, 733, 
1764, 1639, 1968, 1692, 1677, 1542, 1299, 1328, 1130, 1741, 1929, 
1843, 1427, 1467, 1450, 1041, 1238, 1721, 1757, 1813, 1001, 1208, 
1916, 1435, 540, 681, 1436, 1170, 938, 1206, 1648, 1169, 1311, 
1772, 1333, 1534, 1365, 1124, 846, 732, 753, 1266, 1652, 1772, 
1814, 1649, 1191, 1298, 986, 1296, 1066, 777, 1041, 1388, 1289, 
1097, 1356, 1238, 1732, 1109, 1104, 1155, 1334, 1094, 770, 1411, 
1304, 1269, 1093, 1096, 1121, 943, 695, 1792, 2033, 1586, 768, 
685, 993, 1406, 1246, 1746, 1740, 938, 160, 1641, 1373, 1023, 
1173, 1611, 928, 1038, 1009, 1274, 1369, 1231, 1053, 1163, 880, 
870, 1131, 882, 1143, 632, 394, 510, 543, 535, 824, 874, 591, 
512, 448, 247, 452, 470, 747, 545, 639, 326, 414, 604, 640, 458, 
272, 524, 589, 666, 217, 215, 348, 537, 466), Production = c(794, 
1400, 1505, 1055, 1396, 1331, 1461, 1623, 1513, 1667, 1737, 1264, 
1722, 1587, 2094, 1363, 2007, 1899, 1749, 1693, 1748, 1455, 2078, 
1702, 1736, 1885, 860, 1372, 1716, 1290, 1347, 1451, 1347, 1409, 
1203, 1235, 1397, 1557, 1406, 1451, 1704, 670, 1442, 1336, 1611, 
1401, 1749, 744, 1558, 1665, 1317, 41, 441, 1351, 1392, 1180, 
1447, 1265, 1485, 1494, 1543, 1581, 1575, 1597, 1191, 1386, 889, 
1002, 1573, 1380, 1346, 1243, 1009, 965, 1051, 905, 1094, 1194, 
891, 1033, 921, 880, 1135, 1058, 1171, 1022, 956, 880, 902, 983, 
1014, 945, 1021, 1058, 1191, 1139, 1292, 573, 1173, 514, 1292, 
1310, 1239, 41, 41, 1182, 1028, 1028, 1196, 1214, 1045, 256, 1451, 
1344, 1352, 1257, 1444, 786, 1369, 1185, 1262, 1025, 949, 1051, 
941, 727, 911, 951, 987, 1136, 884, 770, 959, 1102, 1109, 1098, 
988, 983, 1002, 904, 1147, 1149, 919, 1058, 1112, 479, 1028, 
1154, 1126, 1155, 1208, 536, 839, 1178, 1225, 539, 41, 862, 839, 
873)), row.names = c(NA, 160L), class = "data.frame")

# Converting the df to accomodate leap year for weekly observations
Original.df <- Original.df %>%
  mutate(
    isoweek =stringr::str_replace(YearWeek, "^(\\d{4})(\\d{2})$", "\\1-W\\2-1"),
    date = ISOweek::ISOweek2date(isoweek)
  )

#creating test and train data- 1st case- Training data until WK47("2021-11-22")
Original.train.df <- Original.df %>%
  filter(date >= "2018-12-31", date <= "2021-11-22")

Original.test.df <- Original.df %>%
  filter(date >= "2021-11-29", date <= "2021-12-27")

Shipment.Test.df<- Original.test.df %>%
  dplyr::select(-YearWeek, -Production, -date,-isoweek) %>% as_tibble()

# splitting the original train data to contain only Week, Dependent and Independent variables
Total.train.df<-Original.train.df %>%
  mutate(Week.1 = yearweek(ISOweek::ISOweek(date))) %>%
  dplyr::select(-YearWeek,-date,-isoweek) %>%
  as_tsibble(index = Week.1)

#Model.1-Fitting forecast model(Arima with Fourier terms) to Production.qty with the training 
#until WK47(2021-11-22)

lambda_production<-Total.train.df %>% features(Production,features = guerrero) %>% pull(lambda_guerrero)

bestfit.Prod.1.AICc <- Inf

for(K in seq(25)){
  fit.Prod.1 <- Total.train.df %>% 
    model(ARIMA(box_cox(Production,lambda_production) ~ fourier(K = K), stepwise = FALSE, approximation = FALSE))
  
  if(purrr::pluck(glance(fit.Prod.1), "AICc") < bestfit.Prod.1.AICc)
  {
    bestfit.Prod.1.AICc <- purrr::pluck(glance(fit.Prod.1), "AICc")
    bestfit.Prod.1<- fit.Prod.1
    bestK.Prod.1 <- K
  }
}

bestK.Prod.1
glance(bestfit.Prod.1)

#creating test and train data- 2nd case- Training data until WK52("2021-12-27")
Original.train.df_2 <- Original.df %>%
  filter(date >= "2018-12-31", date <= "2021-12-27")

Original.test.df_2 <- Original.df %>%
  filter(date >= "2022-01-03", date <= "2022-01-17")

Shipment.Test.df_2<- Original.test.df_2 %>%
  dplyr::select(-YearWeek, -Production, -date,-isoweek) %>% as_tibble()

# splitting the original train data to contain only Week, Dependent and Independent variables
Total.train.df_2<-Original.train.df_2 %>%
  mutate(Week.1 = yearweek(ISOweek::ISOweek(date))) %>%
  dplyr::select(-YearWeek,-date,-isoweek) %>%
  as_tsibble(index = Week.1)


#Model.2-Fitting forecast model(Arima with Fourier terms) to Production.qty with the training 
#until WK52

lambda_production_2<-Total.train.df_2 %>% features(Production,features = guerrero) %>% pull(lambda_guerrero)

bestfit.Prod.2.AICc <- Inf

for(K in seq(25)){
  fit.Prod.2 <- Total.train.df %>% 
    model(ARIMA(box_cox(Production,lambda_production_2) ~ fourier(K = K), stepwise = FALSE, approximation = FALSE))
  
  if(purrr::pluck(glance(fit.Prod.1), "AICc") < bestfit.Prod.1.AICc)
  {
    bestfit.Prod.2.AICc <- purrr::pluck(glance(fit.Prod.2), "AICc")
    bestfit.Prod.2<- fit.Prod.2
    bestK.Prod.2 <- K
  }
}

bestK.Prod.2
glance(bestfit.Prod.2)

On the above model 2 never got executed fully and still the model is running.

As you can see from above, model 1 and model 2 didnt have any difference other than the training data ,so could you please let me know what is it that im missing here. Thank you

  • I get a Warning message: In sqrt(diag(best$var.coef)) : NaNs produced when running the second loop. You will have to investigate where and why this happens. – phiver Feb 02 '22 at 12:32
  • Could you please let me know whether the data you used contains any zeroes because I have edited the data two hours back to replace zeroes with a number and also used box_cox transformation on the model – Arvind Menon Feb 02 '22 at 12:41
  • Also I suppose this has to do with sample size because if I exclude first 5 weeks 2019 W01 to 2019 W05 and keep the training data from 2019 W06 to 2021 W52- then I could see the model gets completed. If I keep the sample size which has greater than 155 observations then the model is running indefinitely and never gets completed – Arvind Menon Feb 02 '22 at 12:46

0 Answers0