6

I'm trying to generate a correlation matrix with significance stars. Take the following dataframe:

df <- tibble(stub = c(1,2,3,4),
             stub_pvalue = c(.00, .04, .07,.2))

I'd like to write a function that pastes any column (e.g. stub in this example) concatenated with "***" if stub_pvalue is less than .01, and otherwise simply pastes stub. Something like:

assign_stars <- function(var) {

    if (paste0(var,"_pvalue") < .01) {
      paste0(var, "***")
    } else {
      paste0(var)
    }

}

df %>% 
  mutate(col_with_stars = map_chr(col, assign_stars))

However, I can't figure out how to have the if's first logical condition evaluate on the var + "_pvalue". Can anyone help?

M--
  • 25,431
  • 8
  • 61
  • 93

4 Answers4

2
assign_stars <- function(df, var, threshold, marker) {

  require(dplyr)
  require(rlang)

  var <- sym(var)
  val <- sym(paste(var, "pvalue" , sep="_"))
  out <- sym(paste(var, "marker" , sep="_"))

  mutate(df, !!out := if_else(!!val < threshold, 
                              paste0(!!var, marker),
                              as.character(!!var)
                              )
         ) 
}

If we wanted to do this only for one column, then following works:

df %>% 
  assign_stars(., "stub", 0.01, "***")

# # A tibble: 4 x 5
#    stub stub_pvalue  stub_marker
#    <dbl>      <dbl>  <chr>      
# 1     1        0     1***       
# 2     2        0.04  2          
# 3     3        0.07  3          
# 4     4        0.2   4  

But if we want to pass multiple columns to this function, we need to use purrr:

#sample data with multiple sets of columns:
df <- tibble(stub = c(1,2,3,4),
             stub_pvalue = c(.00, .04, .07,.2),
             sho = c(8,7,6,5),
             sho_pvalue = c(.005, .03, .00,.24))
library(purrr)  

pmap_dfc(list(c("stub", "sho")), ~ assign_stars(df, ..1, 0.01, "***")) %>% 
  select(!! names(df), ends_with("marker"))

#> # A tibble: 4 x 6
#>    stub stub_pvalue   sho sho_pvalue stub_marker sho_marker
#>   <dbl>       <dbl> <dbl>      <dbl> <chr>       <chr>     
#> 1     1        0        8      0.005 1***        8***      
#> 2     2        0.04     7      0.03  2           7         
#> 3     3        0.07     6      0     3           6***      
#> 4     4        0.2      5      0.24  4           5

We can also use different threshold and marker for each column:

library(purrr)  

pmap_dfc(list(c("stub", "sho"), c(0.01, 0.04), c("*", "**")), 
         ~ assign_stars(df, ..1, ..2, ..3)) %>% 
   select(!! names(df), ends_with("marker"))

#> # A tibble: 4 x 6
#>    stub stub_pvalue   sho sho_pvalue stub_marker sho_marker
#>   <dbl>       <dbl> <dbl>      <dbl> <chr>       <chr>     
#> 1     1        0        8      0.005 1*          8**       
#> 2     2        0.04     7      0.03  2           7**       
#> 3     3        0.07     6      0     3           6**       
#> 4     4        0.2      5      0.24  4           5
M--
  • 25,431
  • 8
  • 61
  • 93
1

you may be over-thinking the issue, ifelse is vectorized function and you can just use it to perform the dynamic change.

df <- tibble(stub1 = c(1,2,3,4),
             stub1_pvalue = c(.00, .04, .07,.2),
             stub2 = c(1,2,3,4),
             stub2_pvalue = c(.00,.00,.02,.2))


for(x in paste0("stub",seq(1:2))){
 df[[paste0(x,"_with_star")]] <- ifelse(df[[paste0(x,"_pvalue")]]< .01, paste0(df[[x]],"***"),df[[x]])
}

df

# A tibble: 4 x 6
  stub1 stub1_pvalue stub2 stub2_pvalue stub1_with_star stub2_with_star
  <dbl>        <dbl> <dbl>        <dbl> <chr>           <chr>          
1     1         0        1         0    1***            1***           
2     2         0.04     2         0    2               2***           
3     3         0.07     3         0.02 3               3              
4     4         0.2      4         0.2  4               4         
Chuan
  • 667
  • 8
  • 22
1

Here's a non tidyverse solution but should work without specifying any columns. I'm using data of M-- from another answer.

threshold = 0.05
nms = names(df)[grepl("pvalue", names(df))]
cbind(df, data.frame(lapply(nms, function(nm){
    prefix = strsplit(nm, "_")[[1]][1]
    setNames(data.frame(ifelse(df[[nm]] < threshold,
                               paste0(df[[prefix]], "***"),
                               df[[prefix]]),
                        stringsAsFactors = FALSE),
             paste0(prefix, "_marker"))
})))
#  stub stub_pvalue sho sho_pvalue stub_marker sho_marker
#1    1        0.00   8      0.005        1***       8***
#2    2        0.04   7      0.030        2***       7***
#3    3        0.07   6      0.000           3       6***
#4    4        0.20   5      0.240           4          5
d.b
  • 32,245
  • 6
  • 36
  • 77
0

You could write a function in base R then use dplyr as show below:

assign_stars = function(var){
  pval = paste0(substitute(var),"_pvalue")
  tst = tryCatch(get(pval, parent.frame()), error = function(e) FALSE)
  if(length(unlist(tst))==1&&tst==FALSE) return(NULL)
  paste0(var, ifelse(tst<0.01,"***",""))
}

Then you could use it as:

for one variable:

   df%>%mutate(stub_marker = assign_stars(stub))
# A tibble: 4 x 5
   stub stub_pvalue   sho sho_pvalue stub_marker
  <dbl>       <dbl> <dbl>      <dbl> <chr>      
1     1        0        8      0.005 1***       
2     2        0.04     7      0.03  2          
3     3        0.07     6      0     3          
4     4        0.2      5      0.24  4      

for all the variables:

df%>%mutate_all(funs(marker=assign_stars))
# A tibble: 4 x 6
   stub stub_pvalue   sho sho_pvalue stub_marker sho_marker
  <dbl>       <dbl> <dbl>      <dbl> <chr>       <chr>     
1     1        0        8      0.005 1***        8***      
2     2        0.04     7      0.03  2           7         
3     3        0.07     6      0     3           6***      
4     4        0.2      5      0.24  4           5         
Onyambu
  • 67,392
  • 3
  • 24
  • 53