3

I have some data that I would like to summarise. I would like to summarise across all of the columns, holding the YEAR column fixed. i.e. For one variable I can do:

df %>% 
  group_by(LG1, YEAR) %>% 
  summarise(Freq = n())

However I would like to do it across each of the variable. The below does not work as I want since it is not grouping by the YEAR variable. I have tried to include group_by(across(all_of(c(vars, YEAR)))) %>% which returns an error.

vars <- c("LG1", "AA1", "FNB1", "RE1", "PE1", "LG2", "AA2", "FNB2", "RE2", "PE2", "LG3", "AA3", "FNB3", "RE3", "PE3")
df %>% 
  select(c(all_of(vars), "YEAR")) %>% 
  group_by(across()) %>% 
  summarise(Freq = n())

Expected output would be a data frame with the frequencies of each variable by year.

Data:

df <- structure(list(ï..N.QUESTIONAIRE = c(119L, 122L, 137L, 59L, 121L, 
19L, 50L, 40L, 124L, 108L, 26L, 193L, 94L, 27L, 49L, 82L, 149L, 
88L, 133L, 150L, 5L, 28L, 175L, 91L, 151L, 97L, 70L, 42L, 21L, 
155L), LG1 = c(4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 3L, 4L, 5L, 
4L, 4L, 4L, 5L, 4L, 4L, 5L, 4L, 5L, 4L, 4L, 3L, 5L, 5L, 5L, 3L, 
3L, 3L), AA1 = c(1L, 3L, 2L, 2L, 2L, 2L, 2L, 3L, 4L, 3L, 3L, 
1L, 1L, 3L, 2L, 1L, 1L, 3L, 1L, 3L, 1L, 2L, 3L, 2L, 2L, 2L, 2L, 
2L, 4L, 1L), FNB1 = c(4L, 4L, 5L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 
4L, 5L, 4L, 4L, 4L, 5L, 4L, 4L, 4L, 5L, 5L, 4L, 4L, 4L, 5L, 2L, 
5L, 4L, 3L, 4L), RE1 = c(2L, 3L, 1L, 2L, 1L, 3L, 3L, 2L, 1L, 
3L, 3L, 4L, 1L, 2L, 3L, 2L, 2L, 2L, 3L, 4L, 2L, 2L, 3L, 2L, 5L, 
3L, 1L, 2L, 2L, 3L), PE1 = c(5L, 5L, 5L, 5L, 5L, 4L, 4L, 4L, 
4L, 5L, 4L, 5L, 4L, 4L, 5L, 5L, 5L, 4L, 5L, 5L, 5L, 4L, 5L, 4L, 
5L, 4L, 4L, 4L, 4L, 4L), LG2 = c(4L, 3L, 5L, 5L, 2L, 4L, 3L, 
3L, 4L, 3L, 2L, 5L, 3L, 3L, 2L, 5L, 5L, 5L, 4L, 4L, 1L, 5L, 2L, 
4L, 1L, 5L, 5L, 4L, 4L, 5L), AA2 = c(4L, 5L, 5L, 4L, 3L, 4L, 
5L, 3L, 5L, 4L, 5L, 5L, 5L, 2L, 5L, 5L, 5L, 4L, 5L, 5L, 5L, 4L, 
3L, 5L, 5L, 5L, 5L, 5L, 4L, 4L), FNB2 = c(1L, 2L, 1L, 2L, 3L, 
1L, 3L, 3L, 1L, 1L, 3L, 2L, 1L, 3L, 2L, 3L, 2L, 2L, 1L, 2L, 1L, 
2L, 2L, 3L, 5L, 1L, 3L, 3L, 2L, 1L), RE2 = c(4L, 3L, 3L, 3L, 
3L, 4L, 3L, 3L, 4L, 3L, 2L, 5L, 4L, 3L, 4L, 4L, 5L, 3L, 2L, 2L, 
4L, 2L, 4L, 1L, 5L, 5L, 4L, 1L, 3L, 4L), PE2 = c(2L, 4L, 1L, 
3L, 3L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 4L, 
1L, 1L, 2L, 2L, 4L, 1L, 1L, 2L, 4L, 1L, 1L), LG3 = c(4L, 3L, 
3L, 4L, 2L, 4L, 4L, 2L, 5L, 3L, 3L, 4L, 4L, 2L, 4L, 3L, 3L, 4L, 
4L, 3L, 5L, 4L, 4L, 2L, 5L, 5L, 3L, 4L, 5L, 4L), AA3 = c(1L, 
3L, 2L, 2L, 3L, 3L, 2L, 1L, 1L, 2L, 2L, 3L, 3L, 2L, 1L, 1L, 1L, 
3L, 2L, 3L, 1L, 1L, 4L, 2L, 4L, 4L, 1L, 1L, 3L, 2L), FNB3 = c(5L, 
5L, 5L, 5L, 5L, 2L, 4L, 4L, 5L, 4L, 5L, 5L, 4L, 4L, 5L, 5L, 5L, 
4L, 5L, 5L, 5L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), RE3 = c(2L, 
2L, 2L, 2L, 3L, 4L, 4L, 2L, 3L, 3L, 3L, 1L, 1L, 2L, 3L, 2L, 1L, 
4L, 4L, 1L, 3L, 1L, 1L, 3L, 5L, 1L, 2L, 4L, 3L, 2L), PE3 = c(5L, 
3L, 4L, 4L, 4L, 4L, 3L, 4L, 5L, 5L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 
4L, 4L, 4L, 5L, 4L, 4L, 3L, 5L, 5L, 4L, 3L, 4L, 3L), YEAR = c(2L, 
2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 
1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L), NATIONALITY = c(2L, 
2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 3L, 1L, 2L, 1L, 1L, 
3L, 1L, 1L, 2L, 2L, 2L, 3L, 1L, 3L, 1L, 2L, 3L, 1L), GENDER = c("F", 
"F", "M", "M", "F", "M", "M", "F", "F", "M", "F", "M", "M", "F", 
"M", "F", "F", "F", "M", "F", "M", "F", "M", "M", "F", "M", "M", 
"F", "M", "F"), AGE = c(1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 3L, 
1L, 3L, 3L, 2L, 2L, 3L, 2L, 3L, 1L, 2L, 1L, 2L, 2L, 3L, 2L, 3L, 
2L, 2L, 1L, 2L)), class = "data.frame", row.names = c(NA, -30L
))
user113156
  • 6,761
  • 5
  • 35
  • 81

2 Answers2

5

We can use across in group_by to include all of vars columns along with YEAR.

library(dplyr)

df %>%  group_by(across(c(all_of(vars), "YEAR"))) %>% summarise(Freq = n())

Also we can use count here -

df %>% count(across(c(all_of(vars), "YEAR")))

If you wanted to count for each value in vars you can use map.

purrr::map(vars, ~df %>% count(YEAR, .data[[.x]]))
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
  • Thanks! But do we need to drop the `Freq = ` part? it creates an extra column and I assumed the frequencies would be displayed under each of the columns. – user113156 Jun 13 '21 at 12:32
  • Were you expecting something like this as output? `purrr::map(vars, ~df %>% count(YEAR, .data[[.x]]))` – Ronak Shah Jun 13 '21 at 12:34
  • I was expecting to compute the number of frequencies for each of the variables for both YEARS, so in year 1 have the frequency and in year 2 have the frequencies. I think the `purrrr` approach is giving me what I want. – user113156 Jun 13 '21 at 12:35
3

We can also specify YEAR as unquoted in count

library(dplyr)
df %>%
     count(across(c(all_of(vars), YEAR)))

Or another option is to to convert to symbols and then evaluate (!!!

df %>%
     group_by(!!! rlang::syms(vars), YEAR) %>%
     summarise(Freq = n(), .groups = 'drop')

Similar option in count

df %>%
     count(!!! rlang::syms(vars), YEAR)
akrun
  • 874,273
  • 37
  • 540
  • 662