4

For a matrix with three columns .

     ID     t      res
     1      1     -1.5
     1      2     -1.5
     1      3      0.5
     1      4      0.5
     2      1     -0.5
     2      2     -0.5
     2      3     -2.0
     2      4     -1.5
     2      5      1.5

My goal is to sum the values of column res by ID like this.

   (-1.5*(-1.5+0.5+0.5)     - 1.5*(0.5+0.5)      + 0.5*(0.5))/(4-1) +
   (-0.5*(-0.5-2.0-1.5+1.5) - 0.5*(-2.0-1.5+1.5) - 2.0*(-1.5+1.5)  -1.5*(1.5))/(5-1) 
   =  -0.167 

Any suggestions on how to sum and divide this by groups is much appreciated.

M--
  • 25,431
  • 8
  • 61
  • 93

3 Answers3

3
library(dplyr)

df1 %>% 
  group_by(ID) %>% 
  arrange(ID,  desc(t)) %>% 
  mutate(hlc = cumsum(lag(res, default =  0)) * res / (n()-1) ) %>% 
  pull(hlc) %>% sum()

#> [1] -0.1666667

Data:

read.table(text = "     ID     t      res
     1      1     -1.5
     1      2     -1.5
     1      3      0.5
     1      4      0.5
     2      1     -0.5
     2      2     -0.5
     2      3     -2.0
     2      4     -1.5
     2      5      1.5", stringsAsFactors = F, header = T) -> df1
M--
  • 25,431
  • 8
  • 61
  • 93
2

Here is a base R solution.

df1 <- "ID     t      res
     1      1     -1.5
     1      2     -1.5
     1      3      0.5
     1      4      0.5
     2      1     -0.5
     2      2     -0.5
     2      3     -2.0
     2      4     -1.5
     2      5      1.5"
df1 <- read.table(text = df1, header = TRUE)


out <- tapply(df1$res, df1$ID, FUN = \(x) {
  sum(sapply(seq_along(x), \(i) x[i]*sum(x[-(1:i)]))) / (length(x) - 1)
})
out <- sum(out)
out
#> [1] -0.1666667

Created on 2023-02-25 with reprex v2.0.2

Rui Barradas
  • 70,273
  • 8
  • 34
  • 66
1

Using data.table

library(data.table)
 setDT(df1)[order(ID, -t),  hlc := cumsum(shift(res, fill = 0)) * res/(.N-1), ID]

-output

> sum(df1$hlc)
[1] -0.1666667
akrun
  • 874,273
  • 37
  • 540
  • 662