1

I'm making a bar chart, and want to add lines for p-values of pairwise comparisons of the x axis groups. I want to automatically assign the y positions of these horizontal p-value lines, in the same way as add_y_position() does.

Here is how to do it with add_y_position:

# Load required packages
library(rstatix)
library(ggplot2)

# Format data
df <- ToothGrowth %>%
  mutate(dose = as.factor(dose))

# Do t-test
stat.test <- df %>%
  t_test(len ~ dose) %>%
  add_y_position()

# Plot
ggplot(df, aes(x=dose, y=len)) +
  geom_boxplot() +
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01)

enter image description here

However, I'm not using rstatix to do my statistical test, and add_y_position requires an object of class rstatix_test. I've bodged it as below, but is there a more robust way to do it??

df <- structure(list(cells = c("125CAG_astrocyte", "125CAG_astrocyte", 
                               "125CAG_astrocyte", "125CAG_astrocyte", "125CAG_astrocyte", "125CAG_astrocyte", 
                               "125CAG_astrocyte", "125CAG_astrocyte", "125CAG_astrocyte", "125CAG_astrocyte", 
                               "180CAG_astrocyte", "180CAG_astrocyte", "180CAG_astrocyte", "180CAG_astrocyte", 
                               "180CAG_astrocyte", "180CAG_astrocyte", "180CAG_astrocyte", "180CAG_astrocyte", 
                               "180CAG_astrocyte", "180CAG_astrocyte", "180CAG_astrocyte", "QS1.23_astrocyte", 
                               "QS1.23_astrocyte", "QS1.23_astrocyte", "QS1.23_astrocyte", "QS1.23_astrocyte", 
                               "QS1.23_astrocyte", "QS1.23_astrocyte", "QS1.23_astrocyte", "QS2A_astrocyte", 
                               "QS2A_astrocyte", "QS2A_astrocyte", "QS2A_astrocyte", "QS2A_astrocyte", 
                               "QS2A_astrocyte", "QS2A_astrocyte", "QS2A_astrocyte", "QS2A_astrocyte"
), day = c(21, 21, 21, 21, 21, 21, 50, 50, 50, 80, 21, 21, 21, 
           21, 21, 50, 50, 50, 80, 80, 80, 21, 21, 21, 50, 50, 80, 80, 80, 
           21, 21, 21, 50, 50, 50, 80, 80, 80), dii.c = c(0.437236013145938, 
                                                          0.534377635220068, 0.691706217410442, -1.97988197201862, 1.21848774141036, 
                                                          -2.56524550094464, -0.323113360827943, 3.68031981243924, 3.12201343101798, 
                                                          4.12041645377106, -0.666504700306258, -1.0519761422485, 3.33152519321638, 
                                                          -1.46133065596105, -0.151713694700577, 8.18205330655683, 4.57982898309134, 
                                                          7.42919662040787, 6.77232116296189, 7.02581271991899, 8.19169219747535, 
                                                          -0.185821728247712, -0.00210775745503433, 0.187929485702747, 
                                                          0.476938765819182, 0.366202391144212, 0.393312571703907, 0.390413162193677, 
                                                          0.356374753429206, 0.451067071694077, -0.2770394052073, -0.122521797126516, 
                                                          0.317893728642077, -0.147581976420215, 0.0133741113853953, -0.108859062750234, 
                                                          0.0967729826487992, -0.172655588111051)), row.names = c(NA, -38L
                                                          ), class = c("tbl_df", "tbl", "data.frame"))


# Linear model
model.lm <- lm(dii.c ~ day * cells, data=df)

# Two way anova
aov <- anova(model.lm)

# Pairwise comparison
library(emmeans)
pwc <- emtrends(model.lm,
                pairwise ~ cells,
                var = "day", adjust = "tukey")

# Extract pairwise significance
contrasts <- data.frame(pwc$contrasts)
contrasts$p.signif <- symnum(contrasts$p.value, corr=FALSE, na=FALSE, legend=FALSE,
                             cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), 
                             symbols = c("***", "**", "*", ".", " "))
contrasts <- contrasts %>%
  mutate(grouped = contrast) %>%
  tidyr::separate(grouped, c("group1", "group2"), sep=" - ")
contrasts$p.value <- formatC(contrasts$p.value, format="e", digits=2)

# Extract rates
rates <- data.frame(pwc$emtrends) %>%
  mutate(cells = fct_reorder(cells, -day.trend))

# Plot rates
ggplot(rates, aes(x=cells, y=day.trend)) +
  geom_bar(stat="identity") +
  geom_errorbar(aes(ymin=day.trend-SE, 
                    ymax=day.trend+SE), width=0.25) +
  stat_pvalue_manual(contrasts,
                     label = "{p.signif} \n p = {p.value}",
                     y.position = 1.3*max(rates[,paste0(x.variable,".trend")]), 
                     step.increase = 0.3, vjust=-0.1) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.2))) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

enter image description here

Mike
  • 921
  • 7
  • 26
  • You might be interested in [ggstatsplot](https://indrajeetpatil.github.io/ggstatsplot/) – bretauv Sep 02 '22 at 08:04
  • Thanks @bretauv, could you give a demo of how it would work with my data? – Mike Sep 02 '22 at 08:09
  • No idea, I just know this package by name and I saw that the plot you want to make look like the plots this package produces. But I've never used it – bretauv Sep 02 '22 at 08:13
  • @bretauv, unfortunately as far as I can see `ggstratsplot` can do the stats and make nice plots, but not add my independent stats to plot with autocalculated y positions – Mike Sep 02 '22 at 08:22

0 Answers0