2

I'm trying to add significance annotations to an errorbar plot with a factor x-axis and dodged groups within each level of the x-axis. It is a similar but NOT identical use case to this

My base errorbar plot is:

library(ggplot2)
library(dplyr)

pres_prob_pd = structure(list(x = structure(c(1, 1, 1, 2, 2, 2, 3, 3, 3), labels = c(`1` = 1, 
`2` = 2, `3` = 3)), predicted = c(0.571584427222816, 0.712630712634987, 
0.156061969566517, 0.0162388386564817, 0.0371877245103279, 0.0165022541901018, 
0.131528946944238, 0.35927812866896, 0.0708662221985375), std.error = c(0.355802875027348, 
0.471253661425626, 0.457109887762665, 0.352871728451576, 0.442646879181155, 
0.425913568532558, 0.376552208691762, 0.48178172708116, 0.451758041335245
), conf.low = c(0.399141779923204, 0.496138837620712, 0.0701919316506831, 
0.00819832576725402, 0.0159620304815404, 0.00722904089045731, 
0.0675129352870401, 0.17905347369819, 0.030504893442457), conf.high = c(0.728233665534388, 
0.861980236164486, 0.311759350126477, 0.031911364587827, 0.0842227723261319, 
0.0372248587668487, 0.240584344249407, 0.590437963881823, 0.156035177669385
), group = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("certain", 
"neutral", "uncertain"), class = "factor"), group_col = structure(c(1L, 
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("certain", "neutral", 
"uncertain"), class = "factor"), language = structure(c(2L, 2L, 
2L, 1L, 1L, 1L, 3L, 3L, 3L), .Label = c("english", "dutch", "german"
), class = "factor"), top = c(0.861980236164486, 0.861980236164486, 
0.861980236164486, 0.0842227723261319, 0.0842227723261319, 0.0842227723261319, 
0.590437963881823, 0.590437963881823, 0.590437963881823)), row.names = c(NA, 
-9L), groups = structure(list(language = structure(1:3, .Label = c("english", 
"dutch", "german"), class = "factor"), .rows = structure(list(
    4:6, 1:3, 7:9), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, 3L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

#dodge
pd = position_dodge(.75)

#plot
p = ggplot(pres_prob_pd,aes(x=language,y=predicted,color=group,shape=group)) +
  geom_point(position=pd,size=2) + 
  geom_errorbar(aes(ymax=conf.high,ymin=conf.low),width=.125,position=pd)
p

enter image description here

What I want to do is annotate the plot such that the contrasts between group within each level of language are annotated for significance. I've plotted points representing the relevant contrasts and (toy) sig. annotations as follows:

#bump function
f = function(x){
  v = c()
  bump=0.025
  constant = 0
  for(i in x){
    v = c(v,i+constant+bump)
    bump = bump + 0.075
  }
  v
}

#create contrasts
combs = data.frame(gtools::combinations(3, 2, v=c("certain", "neutral", "uncertain"), set=F, repeats.allowed=F)) %>%
  mutate(contrast=c("cont_1","cont_2","cont_3"))
combs = rbind(combs %>% mutate(language = 'english'),
              combs %>% mutate(language='dutch'),
              combs %>% mutate(language = "german")) %>%
  left_join(select(pres_prob_pd,language:top)%>%distinct(),by='language') %>%
  group_by(language) 

#long transform and calc y_pos
combs_long = mutate(combs,y_pos=f(top)) %>% gather(long, probability, X1:X2, factor_key=TRUE) %>% mutate(language=factor(language,levels=c("english","dutch","german"))) %>% 
  arrange(language,contrast)

#back to wide
combs_wide =combs_long %>% spread(long,probability) 
combs_wide$p = rep(c('***',"*","ns"),3)

#plot
p + 
      geom_point(data=combs_long,
               aes(x = language,
                   color=probability,
                   shape=probability,
                 y=y_pos),
               inherit.aes = T,
               position=pd,
               size=2) +
    geom_text(data=combs_wide,
               aes(x=language,
                   label=p,
                  y=y_pos+.025,
                  group=X1),
              color='black',
              position=position_dodge(.75),
              inherit.aes = F)

enter image description here

What I am failing to achieve is plotting a line connecting each of the contrasts of group within each level of language, as is standard when annotating significant group-wise differences. Any help much appreciated!

Axeman
  • 32,068
  • 8
  • 81
  • 94
Cole Robertson
  • 599
  • 1
  • 7
  • 18

0 Answers0