2

I've got a data like below:

df <- structure(list(x1 = c(0.544341260178568, 0.412555423655238, -0.013600925280521, 
-0.947831642260442, -0.705819557090343, -0.440052278478676, 0.583360907624305, 
-0.548217106316072, -0.381271093402877, 1.66078031000975), x2 = c(-2.17595468838955, 
3.73045998213455, 7.88166053118859, 0.295257601073637, -0.503260811313588, 
0.118118179398699, 3.77037347523743, 2.92758197923041, 3.40618904087335, 
1.45012335878481), x3 = c(14.1085074738418, 9.46630939737492, 
7.30026032988652, 10.1473062197382, 11.0336174184083, 7.09744336163716, 
16.6871358354018, 13.5030856142587, 14.8384334167838, 1.82381360524456
), x4 = c(-2.78166486821977, -3.14368874900826, -3.70425316743753, 
-4.34268218961615, -3.03557313652054, -2.74059520574829, -4.10826186695405, 
-1.97243713944283, -3.88803755426516, -2.56315085425652), x5 = c(-0.279614449281486, 
-0.480466773938402, -1.43353886424161, 0.286937906279445, 0.701999608919316, 
0.591932833840325, 0.994266002713824, 1.03424778687263, 0.462618513817936, 
-3.08491622131441)), row.names = c(NA, -10L), class = c("tbl_df", 
"tbl", "data.frame"))

Now I want to create columns that are sums, products and differences of each pair of column. With sums it went easy:

combn(df, 2, function(x) {
  x %>% transmute(!!paste(names(.), collapse = '+') := rowSums(.))}, 
simplify = FALSE)

But now I need to calculate products and differences. As there are no equivalent to rowSums for difference or product my approach fails here. I was thinking about something like

combn(df, 2, function(x) {
  x %>% transmute(!!paste(names(.), collapse = '-') := apply(., 1, `-`)}, 
simplify = FALSE)

but it doesn't work.

jakes
  • 1,964
  • 3
  • 18
  • 50

1 Answers1

1

Here's one "tidy" approach. It relies on converting the data to a long format where each row in your original df gets assigned an id and the columns are gathered.

This allows us to do a full join of the data frame with itself. That way you get all pairwise combinations of your columns. Once in this format, applying the sums, products and differences becomes really easy.

Update: Reformat output

library(tidyverse)
df <-
  structure(
    list(
      x1 = c(
        0.544341260178568,
        0.412555423655238,
        -0.013600925280521,-0.947831642260442,
        -0.705819557090343,
        -0.440052278478676,
        0.583360907624305,-0.548217106316072,
        -0.381271093402877,
        1.66078031000975
      ),
      x2 = c(
        -2.17595468838955,
        3.73045998213455,
        7.88166053118859,
        0.295257601073637,
        -0.503260811313588,
        0.118118179398699,
        3.77037347523743,
        2.92758197923041,
        3.40618904087335,
        1.45012335878481
      ),
      x3 = c(
        14.1085074738418,
        9.46630939737492,
        7.30026032988652,
        10.1473062197382,
        11.0336174184083,
        7.09744336163716,
        16.6871358354018,
        13.5030856142587,
        14.8384334167838,
        1.82381360524456
      ),
      x4 = c(
        -2.78166486821977,
        -3.14368874900826,
        -3.70425316743753,-4.34268218961615,
        -3.03557313652054,
        -2.74059520574829,
        -4.10826186695405,-1.97243713944283,
        -3.88803755426516,
        -2.56315085425652
      ),
      x5 = c(
        -0.279614449281486,-0.480466773938402,
        -1.43353886424161,
        0.286937906279445,
        0.701999608919316,
        0.591932833840325,
        0.994266002713824,
        1.03424778687263,
        0.462618513817936,-3.08491622131441
      )
    ),
    row.names = c(NA,-10L),
    class = c("tbl_df",
              "tbl", "data.frame")
  )

# Add an id for each observation and covert to long format
df_wrangled <- df %>%
  mutate(id = 1:n()) %>%
  gather(col, val, -id)

pairs <- full_join(df_wrangled, df_wrangled, by = "id") %>%
  mutate(
    sum = val.x + val.y,
    prod = val.x * val.y,
    diff = val.x - val.y
  )

head(pairs)
#> # A tibble: 6 x 8
#>      id col.x val.x col.y   val.y     sum   prod    diff
#>   <int> <chr> <dbl> <chr>   <dbl>   <dbl>  <dbl>   <dbl>
#> 1     1 x1    0.544 x1      0.544   1.09   0.296   0    
#> 2     1 x1    0.544 x2     -2.18   -1.63  -1.18    2.72 
#> 3     1 x1    0.544 x3     14.1    14.7    7.68  -13.6  
#> 4     1 x1    0.544 x4     -2.78   -2.24  -1.51    3.33 
#> 5     1 x1    0.544 x5     -0.280   0.265 -0.152   0.824
#> 6     2 x1    0.413 x1      0.413   0.825  0.170   0

pairs_wrangled <- pairs %>%
  filter(col.x != col.y) %>%
  gather(operation, val, sum, prod, diff) %>%
  mutate(
    label = paste0(
      col.x,
      case_when(operation == "sum" ~ "+", operation == "diff" ~ "-", operation == "prod" ~ "*"),
      col.y
    )
  ) %>%
  select(id, label, val) %>%
  spread(label, val)

head(pairs_wrangled)
#> # A tibble: 6 x 61
#>      id `x1-x2` `x1-x3` `x1-x4` `x1-x5` `x1*x2` `x1*x3` `x1*x4` `x1*x5`
#>   <int>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
#> 1     1   2.72   -13.6     3.33   0.824 -1.18    7.68   -1.51   -0.152 
#> 2     2  -3.32    -9.05    3.56   0.893  1.54    3.91   -1.30   -0.198 
#> 3     3  -7.90    -7.31    3.69   1.42  -0.107  -0.0993  0.0504  0.0195
#> 4     4  -1.24   -11.1     3.39  -1.23  -0.280  -9.62    4.12   -0.272 
#> 5     5  -0.203  -11.7     2.33  -1.41   0.355  -7.79    2.14   -0.495 
#> 6     6  -0.558   -7.54    2.30  -1.03  -0.0520 -3.12    1.21   -0.260 
#> # … with 52 more variables: `x1+x2` <dbl>, `x1+x3` <dbl>, `x1+x4` <dbl>,
#> #   `x1+x5` <dbl>, `x2-x1` <dbl>, `x2-x3` <dbl>, `x2-x4` <dbl>,
#> #   `x2-x5` <dbl>, `x2*x1` <dbl>, `x2*x3` <dbl>, `x2*x4` <dbl>,
#> #   `x2*x5` <dbl>, `x2+x1` <dbl>, `x2+x3` <dbl>, `x2+x4` <dbl>,
#> #   `x2+x5` <dbl>, `x3-x1` <dbl>, `x3-x2` <dbl>, `x3-x4` <dbl>,
#> #   `x3-x5` <dbl>, `x3*x1` <dbl>, `x3*x2` <dbl>, `x3*x4` <dbl>,
#> #   `x3*x5` <dbl>, `x3+x1` <dbl>, `x3+x2` <dbl>, `x3+x4` <dbl>,
#> #   `x3+x5` <dbl>, `x4-x1` <dbl>, `x4-x2` <dbl>, `x4-x3` <dbl>,
#> #   `x4-x5` <dbl>, `x4*x1` <dbl>, `x4*x2` <dbl>, `x4*x3` <dbl>,
#> #   `x4*x5` <dbl>, `x4+x1` <dbl>, `x4+x2` <dbl>, `x4+x3` <dbl>,
#> #   `x4+x5` <dbl>, `x5-x1` <dbl>, `x5-x2` <dbl>, `x5-x3` <dbl>,
#> #   `x5-x4` <dbl>, `x5*x1` <dbl>, `x5*x2` <dbl>, `x5*x3` <dbl>,
#> #   `x5*x4` <dbl>, `x5+x1` <dbl>, `x5+x2` <dbl>, `x5+x3` <dbl>,
#> #   `x5+x4` <dbl>

Created on 2019-04-02 by the reprex package (v0.2.1)

Clemens Hug
  • 477
  • 4
  • 8
  • Neat, but no exactly what I was asking for. You can have a sense of the expected output by calling the working code for sum. – jakes Apr 02 '19 at 07:28
  • The values are identical as far as I can tell. I added a bit of code to make the format more similar to your example. If you really want every column in a separate list element you can split up the data frame. – Clemens Hug Apr 02 '19 at 14:01