Calculate pairwise variable-differences in a data.frame/tibble

Question

I have the following dataframe consisting of different variables (A, X, L) that were measured at two different times (t1 and t2). Of course, in the real data there are many more variables (just for the sake of the example)

n <- 100

df <- data.frame(A_t1 = runif(n, min = 1, max = 5),
                 A_t2 = runif(n, min = 1, max = 5),
                 X_t1 = runif(n, min = 1, max = 5),
                 X_t2 = runif(n, min = 1, max = 5),
                 L_t1 = runif(n, min = 1, max = 5),
                 L_t2 = runif(n, min = 1, max = 5)
) %>% pivot_longer(cols = starts_with(c("A", "X", "L")))

> df
# A tibble: 600 × 2
   name  value
   <chr> <dbl>
 1 A_t1   4.15
 2 A_t2   2.20
 3 X_t1   4.49
 4 X_t2   3.84
 5 L_t1   1.94
 6 L_t2   1.75
 7 A_t1   3.16
 8 A_t2   1.71
 9 X_t1   2.20
10 X_t2   4.47

What's the easiest way to compute the difference (A_delta, X_delta, L_delta) between t2 and t1 for each variable? (It can be either a long or in a wide format, but not absolute differences). I have been thinking about using group_by() and then lag(), but this seems not to be a very elegant solution.

Thanks for help!

TarJae · Accepted Answer · 2023-01-19T17:30:01.833

Here is one way without pivoting:

library(dplyr)
library(stringr)
df %>%
  mutate(across(ends_with('_t2'), ~ . -
                  get(str_replace(cur_column(), "t2$", "t1")), .names = "diff_{.col}")) %>%
  rename_at(vars(starts_with('diff')), ~ str_remove(., "\\_t2"))

    A_t1  A_t2  X_t1  X_t2  L_t1  L_t2 diff_A diff_X  diff_L
   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>  <dbl>   <dbl>
 1  1.87  3.07  2.81  2.00  4.25  2.92  1.20  -0.809 -1.33  
 2  1.67  3.33  4.93  4.70  1.59  3.06  1.66  -0.225  1.47  
 3  2.51  2.85  2.88  1.12  4.27  2.63  0.338 -1.75  -1.64  
 4  2.92  3.48  3.04  2.85  2.14  3.66  0.565 -0.189  1.53  
 5  1.58  4.76  3.82  2.55  3.52  3.63  3.18  -1.27   0.104 
 6  3.49  3.95  3.58  4.32  1.65  3.67  0.458  0.738  2.02  
 7  1.10  2.67  3.91  4.96  3.08  2.08  1.57   1.06  -0.999 
 8  3.58  1.33  1.55  2.25  4.38  4.47 -2.26   0.704  0.0950
 9  3.05  4.37  3.39  1.71  2.98  4.69  1.32  -1.68   1.71  
10  1.17  3.83  4.92  2.15  1.74  4.00  2.67  -2.77   2.26  
# … with 90 more rows
# ℹ Use `print(n = ...)` to see more rows

score 2 · Answer 2 · answered Jan 19 '23 at 08:57

Adapting this answer from @Mael to your case one option would be to use a bit of glue and rlang::parse_exprs() like so:

n <- 10
set.seed(123)

library(dplyr, warn =FALSE)
library(glue)

cols         <- c("A", "X", "L")
exprs        <- glue("{cols}_t2 - {cols}_t1")
names(exprs) <- glue("{cols}_delta")

df |> 
  mutate(!!!rlang::parse_exprs(exprs))
#>        A_t1     A_t2     X_t1     X_t2     L_t1     L_t2    A_delta    X_delta
#> 1  2.150310 4.827333 4.558157 4.852097 1.571200 1.183325  2.6770233  0.2939397
#> 2  4.153221 2.813337 3.771214 4.609196 2.658185 2.768800 -1.3398839  0.8379826
#> 3  2.635908 3.710283 3.562027 3.762821 2.654897 4.195699  1.0743749  0.2007939
#> 4  4.532070 3.290534 4.977079 4.181870 2.475382 1.487597 -1.2415360 -0.7952094
#> 5  4.761869 1.411699 3.622823 1.098455 1.609779 3.243792 -3.3501704 -2.5243685
#> 6  1.182226 4.599300 3.834122 2.911184 1.555224 1.826126  3.4170739 -0.9229380
#> 7  3.112422 1.984351 3.176264 4.033838 1.932136 1.510127 -1.1280710  0.8575741
#> 8  4.569676 1.168238 3.376568 1.865632 2.863850 4.013231 -3.4014380 -1.5109363
#> 9  3.205740 2.311683 2.156639 2.272724 2.063891 4.580181 -0.8940572  0.1160851
#> 10 2.826459 4.818015 1.588455 1.926503 4.431311 2.497851  1.9915557  0.3380486
#>       L_delta
#> 1  -0.3878754
#> 2   0.1106150
#> 3   1.5408021
#> 4  -0.9877848
#> 5   1.6340129
#> 6   0.2709013
#> 7  -0.4220098
#> 8   1.1493817
#> 9   2.5162909
#> 10 -1.9334598

zx8754 · Answer 3 · 2023-01-19T10:42:31.807

Using base, get the alternating columns and subtract:

cbind(df, df[, c(FALSE, TRUE)] - df[, c(TRUE, FALSE)])

A safer method, in case there are other columns:

#get column names
t1 <- grep("*_t1$", colnames(df), value = TRUE)
t2 <- grep("*_t2$", colnames(df), value = TRUE)

#substract
delta <- df[, t2] - df[, t1]

#pretty column names, and bind to original dataframe
colnames(delta) <- gsub("t.*", "delta", colnames(delta))
res <- cbind(df, delta)

Calculate pairwise variable-differences in a data.frame/tibble

3 Answers3