2

I'd like to create a new data table which is the sum across rows from variables which contain a string. I have been trying to keep this within the tidyverse as a noob using new dplyr across. Help much appreciated.

dat<- data.frame("Image" = c(1,2,3,4), 
                 "A" = c(1,2,3,4),
                 "A:B"= c(5,6,7,8),
                 "A:B:C"= c(9,10,11,12))

to obtain the sums across the rows of variables containing "A", "B", or "C".

datsums<- data.frame("Image" = c(1,2,3,4),
                     "Asum"= c(15,18,21,24),
                     "Bsum"=c(14,16,18,20),
                     "Csum"=c(9,10,11,12))

I have been unsuccessful using the newer dplyr verbs:

datsums<- dat %>% summarise(across(str_detect("A")), sum, .names ="Asum",
across(str_detect("B")), sum, .names="Bsum",
across(str_detect("C")), sum, .names"Csum")
JMonk
  • 71
  • 5

3 Answers3

1

use rowwise and c_across:

library(tidyverse)

dat %>% 
  rowwise() %>% 
  summarise(
    Asum = sum(c_across(contains("A"))),
    Bsum = sum(c_across(contains("B"))),
    Csum = sum(c_across(contains("C")))
  )

Returns:

`summarise()` ungrouping output (override with `.groups` argument)
# A tibble: 4 x 3
   Asum  Bsum  Csum
  <dbl> <dbl> <dbl>
1    16    14     9
2    20    16    10
3    24    18    11
4    28    20    12

To add columns to the original data.frame, use mutate instead of summarise:

dat %>% 
  rowwise() %>% 
  mutate(
    Asum = sum(c_across(contains("A"))),
    Bsum = sum(c_across(contains("B"))),
    Csum = sum(c_across(contains("C")))
  )
# A tibble: 4 x 7
# Rowwise: 
  Image     A   A.B A.B.C  Asum  Bsum  Csum
  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1     1     1     5     9    16    14     9
2     2     2     6    10    20    16    10
3     3     3     7    11    24    18    11
4     4     4     8    12    28    20    12
Rich Pauloo
  • 7,734
  • 4
  • 37
  • 69
0

Since you want row-wise sum you could use :

library(dplyr)

dat %>% 
  transmute(Asum = rowSums(select(., contains('A', ignore.case = FALSE))), 
            Bsum = rowSums(select(., contains('B', ignore.case = FALSE))), 
            Csum = rowSums(select(., contains('C', ignore.case = FALSE))))

Or for many variables use :

cols <- c('A', 'B', 'C')
purrr::map_dfc(cols, ~dat %>% 
               transmute(!!paste0(.x, 'sum') := 
                  rowSums(select(., contains(.x, ignore.case = FALSE)))))

#  Asum Bsum Csum
#1   15   14    9
#2   18   16   10
#3   21   18   11
#4   24   20   12
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
0

use pivot_longer and pivot_wider

library(tidyverse)

dat %>% 
  pivot_longer(-Image) %>% 
  separate_rows(name, sep = "\\.") %>% 
  pivot_wider(Image,
              names_from = name,
              values_from = value, 
              values_fn = sum, 
              names_prefix = "sum")
#> # A tibble: 4 x 4
#>   Image  sumA  sumB  sumC
#>   <dbl> <dbl> <dbl> <dbl>
#> 1     1    15    14     9
#> 2     2    18    16    10
#> 3     3    21    18    11
#> 4     4    24    20    12

Created on 2020-12-07 by the reprex package (v0.3.0)

Yuriy Saraykin
  • 8,390
  • 1
  • 7
  • 14