I want to replicate the below formula R using dplyr
+ lag function. The code works till the 2nd row of each group and then onward gives me 0s
forecast = lag(value,1)*(1-lag(Attrition)/52)
Conditions:
- the first value for forecast should be empty as we already have the Value.
- second row calculates from the previous values of Attrition and Value columns.
- third row onward the previous values should be picked from forecast(not Value column) and attrition columns respectively.
I am getting 0's from 3rd row onward. Below is my code for reproducing.
data <- data %>% group_by(Patch) %>% mutate(id = row_number())
data <- data %>% group_by(Patch) %>% mutate(forecast = lag(Value,1)*(1-lag(Attrition,1)/52))
tbl_df(data)
# A tibble: 12 x 6
Patch Week Value Attrition id forecast
<chr> <date> <dbl> <dbl> <int> <dbl>
1 11P11 2021-06-14 2 0.075 1 NA
2 11P11 2021-06-21 0 0.075 2 2.00
3 11P11 2021-06-28 0 0.075 3 0
4 11P12 2021-06-14 3 0.075 1 NA
5 11P12 2021-06-21 0 0.075 2 3.00
6 11P12 2021-06-28 0 0.075 3 0
7 11P12 2021-07-05 0 0.075 4 0
8 11P13 2021-06-14 1 0.075 1 NA
9 11P13 2021-06-21 0 0.075 2 0.999
10 11P13 2021-06-28 0 0.075 3 0
11 11P13 2021-07-05 0 0.075 4 0
12 11P13 2021-07-12 0 0.075 5 0
> dput(data)
structure(list(Patch = c("11P11", "11P11", "11P11", "11P12",
"11P12", "11P12", "11P12", "11P13", "11P13", "11P13", "11P13",
"11P13"), Week = structure(c(18792, 18799, 18806, 18792, 18799,
18806, 18813, 18792, 18799, 18806, 18813, 18820), class = "Date"),
Value = c(2, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 0), Attrition = c(0.075,
0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075,
0.075, 0.075), id = c(1L, 2L, 3L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 5L), forecast = c(NA, 1.99711538461538, 0, NA, 2.99567307692308,
0, 0, NA, 0.998557692307692, 0, 0, 0)), row.names = c(NA,
-12L), groups = structure(list(Patch = c("11P11", "11P12", "11P13"
), .rows = structure(list(1:3, 4:7, 8:12), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -3L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))