1
structure(list(Score = c(2, 5, 1, 36, 69, 8, 54, 25, 2, 2, 2, 
5, 5, 4, 1), ID = c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 
2)), .Names = c("Score", "ID"), row.names = c(NA, -15L), class = 
"data.frame")

Suppose I take group_by (window=3), rolling average for above data.

new %>% group_by(ID) %>% mutate(roll_mean = rollapply(ID,3,mean,align ='right', 
                                                             fill=NA))

Output of above statement is,

structure(list(Score = c(2, 5, 1, 36, 69, 8, 54, 25, 2, 2, 2, 
5, 5, 4, 1), ID = c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 
2), roll_mean = c(NA, NA, 1, 1, 1, 1, 1, NA, NA, 2, 2, 2, 2, 
2, 2)), .Names = c("Score", "ID", "roll_mean"), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -15L), vars = "ID", labels 
= structure(list(
ID = c(1, 2)), row.names = c(NA, -2L), class = "data.frame", vars = "ID", 
drop = TRUE, .Names = "ID"), indices = list(
0:6, 7:14), drop = TRUE, group_sizes = 7:8, biggest_group_size = 8L)

I want to fill in NA, by rolling mean of whatever window size is available. I want output as:

structure(list(Score = c(2L, 5L, 1L, 36L, 69L, 8L, 54L, 25L, 
2L, 2L, 2L, 5L, 5L, 4L, 1L), ID = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), Roll3 = c(2, 3.5, 2.666666667, 
14, 35.33333333, 37.66666667, 43.66666667, 25, 13.5, 9.666666667, 
2, 3, 4, 4.666666667, 3.333333333)), .Names = c("Score", "ID", 
"Roll3"), class = "data.frame", row.names = c(NA, -15L))

[Note: In my original data, I have 20 million observations. If above operation can be performed using data.table, that would be really great]

Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
SriM
  • 11
  • 4

0 Answers0