I solved my problem by creating a character vector with the names of the group-columns in it. In order to use it within group_by()
, I had to use the syntax !!!syms()
.
group_v <- names(df)[rev(seq(1,ncol(df))[!as.logical(sapply(df, is.numeric))])]
df = df %>% group_by(!!!syms(group_v)) %>%
mutate(PERC_CH = EUR / EUR[which.min(TIME)]-1
,
ABS_CH = EUR - EUR[which.min(TIME)]
)
Other equivalent solutions, thanks to @Tom Hoel's and @Maël's comments:
library(dplyr)
library(magrittr)
# combine use of across() and where() to select columuns
df %>%
group_by(across(where(is.character))) %>%
mutate(PERC_CH = EUR / EUR[which.min(TIME)]-1, ABS_CH = EUR - EUR[which.min(TIME)])
#> # A tibble: 12 × 9
#> # Groups: SCENARIO, INSTITUTE, METHOD_DEC, CPTY_CLASS, Deaf [4]
#> SCENARIO INSTITUTE METHOD_DEC CPTY_CLASS Deaf TIME EUR PERC_CH ABS_CH
#> <chr> <chr> <chr> <chr> <chr> <int> <dbl> <dbl> <dbl>
#> 1 AC BCR BIL SME Y 2021 13447. 0 0
#> 2 AC BCR BIL SME Y 2022 16461. 0.224 3014.
#> 3 AC BCR BIL SME Y 2023 19510. 0.451 6064.
#> 4 AC BCR BIL SME N 2021 79951. 0 0
#> 5 AC BCR BIL SME N 2022 80847. 0.0112 896.
#> 6 AC BCR BIL SME N 2023 84940. 0.0624 4989.
#> 7 AC BCR CRL BANK Y 2021 0 NaN 0
#> 8 AC BCR CRL BANK Y 2022 1047. Inf 1047.
#> 9 AC BCR CRL BANK Y 2023 302. Inf 302.
#> 10 AC BCR CRL BANK N 2021 104609. 0 0
#> 11 AC BCR CRL BANK N 2022 107720. 0.0297 3110.
#> 12 AC BCR CRL BANK N 2023 103467. -0.0109 -1143.
df %>%
group_by(across(!where(is.numeric))) %>%
mutate(PERC_CH = EUR / EUR[which.min(TIME)]-1, ABS_CH = EUR - EUR[which.min(TIME)])
#> # A tibble: 12 × 9
#> # Groups: SCENARIO, INSTITUTE, METHOD_DEC, CPTY_CLASS, Deaf [4]
#> SCENARIO INSTITUTE METHOD_DEC CPTY_CLASS Deaf TIME EUR PERC_CH ABS_CH
#> <chr> <chr> <chr> <chr> <chr> <int> <dbl> <dbl> <dbl>
#> 1 AC BCR BIL SME Y 2021 13447. 0 0
#> 2 AC BCR BIL SME Y 2022 16461. 0.224 3014.
#> 3 AC BCR BIL SME Y 2023 19510. 0.451 6064.
#> 4 AC BCR BIL SME N 2021 79951. 0 0
#> 5 AC BCR BIL SME N 2022 80847. 0.0112 896.
#> 6 AC BCR BIL SME N 2023 84940. 0.0624 4989.
#> 7 AC BCR CRL BANK Y 2021 0 NaN 0
#> 8 AC BCR CRL BANK Y 2022 1047. Inf 1047.
#> 9 AC BCR CRL BANK Y 2023 302. Inf 302.
#> 10 AC BCR CRL BANK N 2021 104609. 0 0
#> 11 AC BCR CRL BANK N 2022 107720. 0.0297 3110.
#> 12 AC BCR CRL BANK N 2023 103467. -0.0109 -1143.
# create a vector of character containing the names of the columns
# and then use it to refer to the columns of df in group_by() using the syntax !!!syms()
grps <- names(df)[!sapply(df, is.numeric)]
df %>%
group_by(!!!syms(grps)) %>%
mutate(PERC_CH = EUR / EUR[which.min(TIME)]-1, ABS_CH = EUR - EUR[which.min(TIME)])
#> # A tibble: 12 × 9
#> # Groups: SCENARIO, INSTITUTE, METHOD_DEC, CPTY_CLASS, Deaf [4]
#> SCENARIO INSTITUTE METHOD_DEC CPTY_CLASS Deaf TIME EUR PERC_CH ABS_CH
#> <chr> <chr> <chr> <chr> <chr> <int> <dbl> <dbl> <dbl>
#> 1 AC BCR BIL SME Y 2021 13447. 0 0
#> 2 AC BCR BIL SME Y 2022 16461. 0.224 3014.
#> 3 AC BCR BIL SME Y 2023 19510. 0.451 6064.
#> 4 AC BCR BIL SME N 2021 79951. 0 0
#> 5 AC BCR BIL SME N 2022 80847. 0.0112 896.
#> 6 AC BCR BIL SME N 2023 84940. 0.0624 4989.
#> 7 AC BCR CRL BANK Y 2021 0 NaN 0
#> 8 AC BCR CRL BANK Y 2022 1047. Inf 1047.
#> 9 AC BCR CRL BANK Y 2023 302. Inf 302.
#> 10 AC BCR CRL BANK N 2021 104609. 0 0
#> 11 AC BCR CRL BANK N 2022 107720. 0.0297 3110.
#> 12 AC BCR CRL BANK N 2023 103467. -0.0109 -1143.
Created on 2022-09-01 with reprex v2.0.2
Reproducible dataset:
df=structure(list(SCENARIO = c("AC", "AC", "AC", "AC", "AC", "AC",
"AC", "AC", "AC", "AC", "AC", "AC"), INSTITUTE = c("BCR",
"BCR", "BCR", "BCR", "BCR", "BCR", "BCR", "BCR", "BCR", "BCR",
"BCR", "BCR"), METHOD_DEC = c("BIL", "BIL", "BIL",
"BIL", "BIL", "BIL", "CRL", "CRL", "CRL", "CRL", "CRL",
"CRL"), CPTY_CLASS = c("SME", "SME", "SME", "SME", "SME",
"SME", "BANK", "BANK", "BANK", "BANK", "BANK", "BANK"), Deaf = c("Y",
"Y", "Y", "N", "N", "N", "Y", "Y", "Y", "N", "N", "N"), TIME = c(2021L,
2022L, 2023L, 2021L, 2022L, 2023L, 2021L, 2022L, 2023L, 2021L,
2022L, 2023L), EUR = c(13446.7, 16460.6727685, 19510.2132,
79951.1120192, 80847.03547, 84940.1414854, 0, 1047.150372256,
302.308772901, 104609.421568, 107719.773397, 103466.689156
)), row.names = c(NA, -12L), class = c("tbl_df", "tbl", "data.frame"
))