I'm making a bar chart, and want to add lines for p-values of pairwise comparisons of the x axis groups. I want to automatically assign the y positions of these horizontal p-value lines, in the same way as add_y_position()
does.
Here is how to do it with add_y_position:
# Load required packages
library(rstatix)
library(ggplot2)
# Format data
df <- ToothGrowth %>%
mutate(dose = as.factor(dose))
# Do t-test
stat.test <- df %>%
t_test(len ~ dose) %>%
add_y_position()
# Plot
ggplot(df, aes(x=dose, y=len)) +
geom_boxplot() +
stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01)
However, I'm not using rstatix
to do my statistical test, and add_y_position
requires an object of class rstatix_test
. I've bodged it as below, but is there a more robust way to do it??
df <- structure(list(cells = c("125CAG_astrocyte", "125CAG_astrocyte",
"125CAG_astrocyte", "125CAG_astrocyte", "125CAG_astrocyte", "125CAG_astrocyte",
"125CAG_astrocyte", "125CAG_astrocyte", "125CAG_astrocyte", "125CAG_astrocyte",
"180CAG_astrocyte", "180CAG_astrocyte", "180CAG_astrocyte", "180CAG_astrocyte",
"180CAG_astrocyte", "180CAG_astrocyte", "180CAG_astrocyte", "180CAG_astrocyte",
"180CAG_astrocyte", "180CAG_astrocyte", "180CAG_astrocyte", "QS1.23_astrocyte",
"QS1.23_astrocyte", "QS1.23_astrocyte", "QS1.23_astrocyte", "QS1.23_astrocyte",
"QS1.23_astrocyte", "QS1.23_astrocyte", "QS1.23_astrocyte", "QS2A_astrocyte",
"QS2A_astrocyte", "QS2A_astrocyte", "QS2A_astrocyte", "QS2A_astrocyte",
"QS2A_astrocyte", "QS2A_astrocyte", "QS2A_astrocyte", "QS2A_astrocyte"
), day = c(21, 21, 21, 21, 21, 21, 50, 50, 50, 80, 21, 21, 21,
21, 21, 50, 50, 50, 80, 80, 80, 21, 21, 21, 50, 50, 80, 80, 80,
21, 21, 21, 50, 50, 50, 80, 80, 80), dii.c = c(0.437236013145938,
0.534377635220068, 0.691706217410442, -1.97988197201862, 1.21848774141036,
-2.56524550094464, -0.323113360827943, 3.68031981243924, 3.12201343101798,
4.12041645377106, -0.666504700306258, -1.0519761422485, 3.33152519321638,
-1.46133065596105, -0.151713694700577, 8.18205330655683, 4.57982898309134,
7.42919662040787, 6.77232116296189, 7.02581271991899, 8.19169219747535,
-0.185821728247712, -0.00210775745503433, 0.187929485702747,
0.476938765819182, 0.366202391144212, 0.393312571703907, 0.390413162193677,
0.356374753429206, 0.451067071694077, -0.2770394052073, -0.122521797126516,
0.317893728642077, -0.147581976420215, 0.0133741113853953, -0.108859062750234,
0.0967729826487992, -0.172655588111051)), row.names = c(NA, -38L
), class = c("tbl_df", "tbl", "data.frame"))
# Linear model
model.lm <- lm(dii.c ~ day * cells, data=df)
# Two way anova
aov <- anova(model.lm)
# Pairwise comparison
library(emmeans)
pwc <- emtrends(model.lm,
pairwise ~ cells,
var = "day", adjust = "tukey")
# Extract pairwise significance
contrasts <- data.frame(pwc$contrasts)
contrasts$p.signif <- symnum(contrasts$p.value, corr=FALSE, na=FALSE, legend=FALSE,
cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1),
symbols = c("***", "**", "*", ".", " "))
contrasts <- contrasts %>%
mutate(grouped = contrast) %>%
tidyr::separate(grouped, c("group1", "group2"), sep=" - ")
contrasts$p.value <- formatC(contrasts$p.value, format="e", digits=2)
# Extract rates
rates <- data.frame(pwc$emtrends) %>%
mutate(cells = fct_reorder(cells, -day.trend))
# Plot rates
ggplot(rates, aes(x=cells, y=day.trend)) +
geom_bar(stat="identity") +
geom_errorbar(aes(ymin=day.trend-SE,
ymax=day.trend+SE), width=0.25) +
stat_pvalue_manual(contrasts,
label = "{p.signif} \n p = {p.value}",
y.position = 1.3*max(rates[,paste0(x.variable,".trend")]),
step.increase = 0.3, vjust=-0.1) +
scale_y_continuous(expand = expansion(mult = c(0, 0.2))) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))