How do i paste a string within a dplyr function i.e. summarise( ) and run it as a code?
library('tidyverse')
df <- tibble(ID = c('a','a','b','c','c','e','e','f','g','g'),
var1 = floor(runif(10, min=0, max=100)),
var2 = floor(runif(10, min=0, max=100)),
var3 = floor(runif(10, min=0, max=100)),
var4 = floor(runif(10, min=0, max=100))
)
sample data
> df
# A tibble: 10 x 5
ID var1 var2 var3 var4
<chr> <dbl> <dbl> <dbl> <dbl>
1 a 82 4 21 32
2 a 90 34 12 51
3 b 67 77 69 32
4 c 56 3 96 76
5 c 38 2 46 79
6 e 34 91 12 12
7 e 49 16 38 31
8 f 34 1 76 82
9 g 95 84 54 70
10 g 13 53 65 79
Replace this
df %>%
group_by(ID) %>%
summarise(var1 = sum(var1),
var2 = sum(var2),
var3 = sum(var3))
With this
#Define character string vector to replace command line
sum_var <- select(df,starts_with('var')) %>% names()
sum_var_str <- paste0(sum_var," = sum(",sum_var,")")
sum_var_str <- str_c(sum_var_str, collapse = ", ")
> sum_var
[1] "var1" "var2" "var3" "var4"
> sum_var_str
[1] "var1 = sum(var1), var2 = sum(var2), var3 = sum(var3), var4 = sum(var4)"
#run code with character string
df %>%
group_by(ID) %>%
summarise(sum_var_str) #this line doesn't work
I have tried
- summarise(!!parse_quosure(sum_var_str))
- summarise(parse(text =sum_var_str))
What am i missing?
thanks,
#--------------- In case you question why am i doing this? ---------
I want to use multidplyr, and it has yet to have anything for summarise_at. i have hundreds if not thousands, so the summarise_at is necessary, but unfortunately, not available in multidplyr.
looking for an alternative to work around it.
library('multidplyr')
cluster <- new_cluster(5)
#works
df %>%
group_by(ID) %>%
#partition(cluster) %>%
summarise_at(.vars = vars(starts_with('var')),sum)
#collect()
#works
df %>%
group_by(ID) %>%
partition(cluster) %>%
summarise(var1 = sum(var1),
var2 = sum(var2),
var3 = sum(var3)) %>%
collect()
#doesnt works
df %>%
group_by(ID) %>%
partition(cluster) %>%
summarise_at(.vars = vars(starts_with('var')),sum) %>%
collect()
Error in UseMethod("group_vars") :
no applicable method for 'group_vars' applied to an object of class "multidplyr_party_df"
#I want to see if this works
df %>%
group_by(ID) %>%
partition(cluster) %>%
summarise(parse(text =sum_var_str)) %>% #incorrect line of code
collect()