1

I've got this DF:

structure(list(Date = structure(c(18605, 18604, 18598, 18597, 
18590, 18584, 18583, 18578, 18570, 18569, 18563, 18562, 18557, 
18549, 18548, 18542, 18541, 18536, 18534, 18529, 18521, 18520, 
18515, 18508, 18500, 18499, 18493, 18492, 18486, 18485, 18479, 
18478, 18472, 18471, 18465, 18464, 18458, 18457, 18450, 18445, 
18444, 18437, 18436, 18430, 18429, 18424, 18416, 18415, 18410, 
18409, 18403, 18402, 18396, 18388, 18387, 18381, 18380, 18374, 
18373, 18368, 18367, 18360, 18359, 18354, 18340, 18338, 18331, 
18325, 18317, 18312, 18289, 18282, 18275, 18268), class = "Date"), 
    V1 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0.3, 0, 0, 0, 0, 0.4, 0, 0, 0, 0, 0.2, 0, 0, 0, 0, 0.7, 0, 
    0, 0, 0, 0, 0.5, 0, 0, 0, 0, 0.3, 0, 0, 0, 0, 0, 0.4, 0, 
    0, 0, 0.3, 0, 0, 0, 0, 0, 0, 0, 0, 0.6, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0), V2 = c(0, 0, 0.1, 0, 0, 0.1, 0, 0.2, 0, 0.2, 
    0.1, 0, 0.2, 0.2, 0, 0.1, 0, 0, 0.1, 0, 0.2, 0, 0, 0.4, 0.2, 
    0, 0.3, 0, 0.2, 0, 0.3, 0, 0.6, 0, 0.4, 0, 0, 0.2, 0, 0.4, 
    0.6, 0, 0.3, 0, 0.2, 0.7, 0, 0.1, 0.3, 0, 0.2, 0, 0, 0, 0.3, 
    0, 0.1, 0.3, 0, 0, 0.3, 0.2, 0, 0, 0, 0, 0.6, 0, 0.4, 0, 
    0.2, 0, 0, 0.2), V3 = c(0, 0.3, 0, 0.3, 0.4, 0, 0.2, 0, 0.3, 
    0, 0, 0.2, 0, 0, 0.2, 0, 0.2, 0, 0, 0.1, 0, 0.2, 0, 0, 0, 
    0.3, 0, 0, 0, 0.4, 0, 0.3, 0, 0.7, 0, 0.2, 0.5, 0.4, 0, 0.4, 
    0, 0.8, 0.4, 0, 0.2, 0.6, 0.3, 0.2, 0, 0, 0, 0.4, 0.4, 0, 
    0.2, 0.3, 0, 0.2, 0.3, 0.4, 0, 0.7, 0, 0, 1.4, 0, 0, 1.4, 
    0, 1, 0, 0, 0.3, 0), V4 = c(0, 0.4, 0, 0.1, 0.1, 0, 0.1, 
    0, 0, 0.1, 0, 0.1, 0.2, 0, 0.2, 0, 0.2, 0.3, 0, 0, 0, 0.2, 
    0.3, 0.3, 0, 0, 0, 0.5, 0, 0.6, 0, 0.7, 0, 0, 0, 1.2, 1, 
    0, 0.3, 0, 1.1, 0, 0, 0.4, 0, 0, 0, 0, 0.2, 0.2, 0, 0, 0.2, 
    0, 0, 0.1, 0, 0, 0, 0.2, 0.3, 0, 0.2, 0.3, 0, 1.8, 0, 0, 
    0, 0, 0, 0.2, 0, 0)), row.names = c(NA, -74L), class = c("tbl_df", 
"tbl", "data.frame"))

I'd like to mutate column V1, V2, V3 and V4 to instead of showing the current values posted here, I'd instead like to show their differences from the mean average in their respective columns. So mean of V4 = 0.1635135, so the fourth value should be = 0.4-0.1635135 = 0.2364865.

I've tried doing it piecemeal (doing each column individually), by doing the following, but I keep getting computing errors:

df <- df %>% mutate(across(2, x - mean())

How can I finish this?

halfer
  • 19,824
  • 17
  • 99
  • 186
alec22
  • 735
  • 2
  • 12

2 Answers2

1

Solution 1: Use a purrr-style function in across()

df %>%
  mutate(across(V1:V4, ~ .x - mean(.x)))

# # A tibble: 74 × 5
#    Date          V1      V2       V3      V4
#    <date>     <dbl>   <dbl>    <dbl>   <dbl>
#  1 2020-12-09 -0.05 -0.128  -0.204   -0.164 
#  2 2020-12-08 -0.05 -0.128   0.0959   0.236 
#  3 2020-12-02 -0.05 -0.0284 -0.204   -0.164 
#  4 2020-12-01 -0.05 -0.128   0.0959  -0.0635
#  5 2020-11-24 -0.05 -0.128   0.196   -0.0635
# ...

Solution 2: Select variables with across() and pass it to scale(x, scale = FALSE)

df %>%
  mutate(as_tibble(scale(across(V1:V4), scale = FALSE)))

# # A tibble: 74 × 5
#    Date          V1      V2       V3      V4
#    <date>     <dbl>   <dbl>    <dbl>   <dbl>
#  1 2020-12-09 -0.05 -0.128  -0.204   -0.164 
#  2 2020-12-08 -0.05 -0.128   0.0959   0.236 
#  3 2020-12-02 -0.05 -0.0284 -0.204   -0.164 
#  4 2020-12-01 -0.05 -0.128   0.0959  -0.0635
#  5 2020-11-24 -0.05 -0.128   0.196   -0.0635
# ...
Darren Tsai
  • 32,117
  • 5
  • 21
  • 51
0

Please try the below code

dat2 <- dat %>% mutate(across(starts_with('V'), ~ .x-mean(.x)))
jkatam
  • 2,691
  • 1
  • 4
  • 12