0

I am able to Standardizing each observation for the variable "len" using the mean and standard deviation calculated from only prior observations. However, I am uable to attach it as a column to my dataframe 'tg'. Can you please help me with this.

This is my data and the code below

tg<-structure(list(len = c(4.2, 11.5, 7.3, 5.8, 6.4, 10, 11.2, 11.2, 
                           5.2, 7, 16.5, 16.5, 15.2, 17.3, 22.5, 17.3, 13.6, 14.5, 18.8, 
                           15.5), supp = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), levels = c("OJ", 
                                                                                                             "VC"), class = "factor"), dose = c(0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 

a<-for (i in 1:20) print((a=(tg$len[i]-mean(tg$len[1:i]))/sd(tg$len[1:i])))
a

How do I merge 'a' to dataframe 'tg'?

Mark
  • 7,785
  • 2
  • 14
  • 34
Es003
  • 21
  • 3

2 Answers2

1

Since you're only manipulating the 'len' column, even though the data you provided has been cut off at the end, it doesn't matter.

tg <- structure(list(len = c(4.2, 11.5, 7.3, 5.8, 6.4, 10, 11.2, 11.2, 
5.2, 7, 16.5, 16.5, 15.2, 17.3, 22.5, 17.3, 13.6, 14.5, 18.8, 
15.5)), class = "data.frame", row.names = c(NA, -20L))

library(cgwtools)
library(tidyverse)

mutate(tg, a = (len - cummean(len)) / as.numeric(cumfun(len,FUN=sd)))

    len          a
1   4.2         NA
2  11.5  0.7071068
3   7.3 -0.1000786
4   5.8 -0.4467580
5   6.4 -0.2338041
6  10.0  0.9034338
7  11.2  1.1020545
8  11.2  0.9600072
9   5.2 -0.9995251
10  7.0 -0.3568003
11 16.5  2.1167875
12 16.5  1.7134116
13 15.2  1.2505815
14 17.3  1.5144804
15 22.5  2.0939917
16 17.3  1.0537874
17 13.6  0.3614600
18 14.5  0.5132783
19 18.8  1.2491686
20 15.5  0.6024895
Mark
  • 7,785
  • 2
  • 14
  • 34
1

You could use existing code from here to compute the results:

tg$a <- (tg$len - cummean(tg$len))/cumvar_cpp(tg$len, TRUE)
tg

   len           a
1   4.2         NA
2  11.5  0.7071068
3   7.3 -0.1000786
4   5.8 -0.4467580
5   6.4 -0.2338041
6  10.0  0.9034338
7  11.2  1.1020545
8  11.2  0.9600072
9   5.2 -0.9995251
10  7.0 -0.3568003
11 16.5  2.1167875
12 16.5  1.7134116
13 15.2  1.2505815
14 17.3  1.5144804
15 22.5  2.0939917
16 17.3  1.0537874
17 13.6  0.3614600
18 14.5  0.5132783
19 18.8  1.2491686
20 15.5  0.6024895

Copying the code from the site given:

cummean <- function (x) cumsum(x) / seq_along(x)

Rcpp::cppFunction('NumericVector cumvar_cpp(NumericVector x, bool sd) {
  int n = x.size();
  NumericVector v(n);
  srand(time(NULL));
  double pivot = x[rand() % n];
  double *xx = &x[0], *xx_end = &x[n], *vv = &v[0];
  int i = 0; double xi, sum2 = 0.0, sum = 0.0, vi;
  for (; xx < xx_end; xx++, vv++, i++) {
    xi = *xx - pivot;
    sum += xi; sum2 += xi * xi;
    vi = (sum2 - (sum * sum) / (i + 1)) / i;
    if (sd) vi = sqrt(vi);
    *vv = vi;
    }
  return v;
  }')
Onyambu
  • 67,392
  • 3
  • 24
  • 53