How to adjust error box to violin plot using geom_violin?

Question

***editing:

errbar_lims <- group_by(dt, together) %>% 
  dplyr::summarize(mean=mean(score), se=sd(score)/sqrt(n()), 
            upper=mean+(2*se), lower=mean-(2*se))




> dput(dt)
structure(list(ï..count = c(50L, 7L, 21L, 22L, 94L, 58L, 147L, 
4L, 30L, 67L, 91L, 75L, 143L, 15L, 64L, 141L, 39L, 18L, 27L, 
70L, 142L, 95L, 26L, 78L, 8L, 146L, 46L, 138L, 36L, 63L, 66L, 
97L, 56L, 25L, 19L, 59L, 99L, 5L, 33L, 17L, 55L, 98L, 31L, 42L, 
76L, 23L, 44L, 32L, 52L, 60L, 20L, 37L, 140L, 93L, 65L, 87L, 
13L, 68L, 51L, 16L, 152L, 81L, 54L, 35L, 149L, 77L, 90L, 38L, 
48L, 153L, 2L, 14L, 12L, 10L, 3L, 28L, 61L, 71L, 6L, 45L, 69L, 
43L, 53L, 47L, 34L, 92L, 9L, 57L, 145L, 11L, 62L, 49L, 148L, 
144L, 1L, 40L, 24L, 88L, 13L, 96L), condition = c(2L, 3L, 1L, 
2L, 2L, 2L, 3L, 4L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 1L, 3L, 2L, 3L, 
2L, 2L, 3L, 3L, 2L, 4L, 2L, 3L, 2L, 4L, 3L, 2L, 4L, 4L, 1L, 3L, 
3L, 3L, 1L, 1L, 1L, 3L, 2L, 4L, 2L, 4L, 3L, 4L, 1L, 4L, 4L, 4L, 
4L, 4L, 4L, 1L, 2L, 2L, 1L, 3L, 3L, 4L, 1L, 2L, 3L, 1L, 1L, 2L, 
2L, 4L, 2L, 1L, 2L, 4L, 2L, 3L, 4L, 1L, 3L, 2L, 2L, 1L, 4L, 1L, 
3L, 1L, 4L, 3L, 1L, 1L, 3L, 2L, 1L, 4L, 4L, 1L, 4L, 2L, 3L, 1L, 
1L), together = structure(c(2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 
2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 
2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 
2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 
2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L), .Label = c("Shared Negative", 
"Shared Positive"), class = "factor", label = "together"), second = structure(c(1L, 
2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 
1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 
1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 
1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 
2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 
2L, 1L, 1L), .Label = c("Negative Second", "Positive Second"), class = "factor"), 
    experimenter = c(1L, 1L, 4L, 4L, 4L, 1L, 1L, 2L, 4L, 2L, 
    2L, 2L, 1L, 2L, 1L, 4L, 3L, 4L, 2L, 4L, 2L, 4L, 2L, 4L, 1L, 
    2L, 4L, 4L, 3L, 3L, 2L, 2L, 4L, 3L, 4L, 1L, 2L, 2L, 1L, 2L, 
    2L, 2L, 2L, 3L, 2L, 1L, 2L, 3L, 1L, 2L, 4L, 2L, 1L, 4L, 2L, 
    2L, 4L, 4L, 1L, 4L, 4L, 3L, 2L, 3L, 2L, 4L, 2L, 4L, 2L, 4L, 
    2L, 2L, 1L, 3L, 3L, 2L, 2L, 4L, 2L, 2L, 3L, 4L, 1L, 2L, 2L, 
    4L, 2L, 4L, 1L, 3L, 3L, 1L, 2L, 1L, 3L, 3L, 4L, 3L, 1L, 4L
    ), age = structure(c(23L, 24L, 25L, 23L, 24L, 35L, 25L, 23L, 
    23L, 24L, 23L, 24L, 31L, 23L, 25L, 23L, 20L, 23L, 23L, 22L, 
    27L, 22L, 25L, 25L, 23L, 31L, 23L, 24L, 25L, 23L, 26L, 24L, 
    24L, 26L, 22L, 24L, 23L, 24L, 21L, 22L, 22L, 22L, 27L, 26L, 
    63L, 23L, 22L, 32L, 24L, 22L, 23L, 31L, 40L, 24L, 24L, 22L, 
    23L, 38L, 22L, 27L, 29L, 24L, 22L, 25L, 32L, 24L, 24L, 23L, 
    23L, 23L, 56L, 48L, 27L, 25L, 23L, 24L, 21L, 25L, 23L, 27L, 
    31L, 26L, 26L, 24L, 30L, 23L, 25L, 25L, 26L, 26L, 25L, 35L, 
    28L, 30L, 21L, 25L, 23L, 37L, 21L, 44L), label = "Age"), 
    sex = structure(c(2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 
    1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 
    1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 
    1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 
    1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 
    1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L
    ), .Label = c("Female", "Male"), class = "factor", label = "Sex"), 
    q1 = structure(c(0L, 11L, 18L, 0L, 18L, 19L, 9L, 13L, 36L, 
    41L, 29L, 34L, 33L, 53L, 27L, 35L, NA, 40L, 49L, 34L, 38L, 
    48L, 35L, 29L, 23L, 47L, 35L, 69L, 50L, 45L, 60L, 49L, 34L, 
    NA, 43L, 44L, 51L, 20L, 37L, 69L, 36L, 41L, 45L, 60L, 47L, 
    62L, 58L, 47L, 18L, 30L, 52L, 48L, 60L, 51L, 53L, 54L, 42L, 
    52L, 47L, 51L, 56L, NA, 54L, 51L, 47L, 57L, 47L, 66L, 53L, 
    54L, NA, 54L, 57L, 49L, 67L, 80L, 49L, 36L, 58L, 57L, 50L, 
    87L, 51L, 55L, 59L, 70L, 65L, 59L, 61L, 67L, 50L, 63L, 60L, 
    73L, 70L, 88L, 88L, 83L, 100L, 100L), label = "Question 1"), 
    q2 = c(0L, 13L, 12L, 22L, 23L, 19L, 31L, 11L, 71L, 13L, 36L, 
    12L, 33L, 24L, 37L, 37L, 37L, 41L, 50L, 35L, 58L, 39L, 42L, 
    37L, 64L, 48L, 44L, NA, 40L, 57L, 46L, 49L, 37L, 42L, 67L, 
    53L, 51L, 35L, 49L, 65L, 49L, 58L, 51L, 49L, 46L, 59L, 40L, 
    47L, NA, 51L, 53L, 47L, 60L, 51L, 53L, NA, 63L, 52L, 41L, 
    49L, 50L, NA, 53L, 52L, 64L, 54L, 57L, 50L, 57L, 54L, 55L, 
    64L, 62L, 27L, 65L, 70L, 53L, 68L, 65L, 54L, 59L, 53L, 61L, 
    56L, 58L, 69L, 74L, 62L, 56L, 67L, 62L, 58L, 60L, 72L, 78L, 
    100L, 84L, 96L, 100L, 99L), q3 = c(0L, 14L, 18L, NA, 19L, 
    16L, 53L, 66L, 81L, 35L, 36L, 65L, 32L, 73L, 75L, 30L, 64L, 
    44L, 13L, 51L, 62L, 50L, 42L, 51L, 47L, 12L, 60L, 62L, 66L, 
    77L, 55L, 43L, 69L, 70L, 85L, 68L, 50L, 56L, 46L, 97L, 67L, 
    80L, 59L, 63L, 46L, 60L, 35L, 47L, 35L, 52L, 48L, 58L, 62L, 
    51L, 65L, 67L, 65L, 61L, 71L, 58L, 56L, 57L, 54L, 73L, 60L, 
    62L, 57L, 63L, 53L, 54L, 79L, 87L, 59L, 61L, 78L, 16L, 43L, 
    81L, 69L, 69L, 58L, 47L, 49L, 56L, 58L, 51L, 77L, 65L, 61L, 
    67L, 83L, 90L, 81L, 83L, 78L, 66L, 98L, 9L, 71L, 100L), q4 = c(0L, 
    10L, 23L, 33L, 20L, 17L, 7L, 20L, 3L, 41L, 29L, 17L, 32L, 
    0L, 39L, 50L, 22L, 42L, 52L, 43L, 18L, 24L, 46L, 53L, 31L, 
    14L, 31L, 43L, 24L, 41L, 19L, 42L, 38L, 42L, 37L, 69L, 33L, 
    57L, 51L, 7L, 49L, 10L, 44L, 29L, 50L, 24L, 59L, 48L, 63L, 
    46L, 9L, 49L, 44L, 51L, 44L, 43L, 38L, 45L, 12L, 52L, 49L, 
    NA, 53L, 42L, 40L, 46L, 68L, 46L, 53L, 54L, 33L, 41L, 39L, 
    42L, 32L, 41L, 66L, 36L, 21L, 55L, 44L, 61L, 47L, 56L, 61L, 
    57L, 68L, 41L, 39L, 67L, 23L, 47L, 68L, 34L, 61L, 25L, 68L, 
    92L, 70L, 100L), q5 = c(5L, 7L, 15L, 0L, 19L, 17L, 31L, 19L, 
    23L, 19L, 26L, 34L, 35L, 52L, 46L, 44L, NA, 42L, 34L, 18L, 
    34L, 47L, 51L, 34L, 47L, 39L, 57L, 46L, 48L, 43L, 49L, 41L, 
    41L, 42L, 48L, 43L, 50L, 49L, 68L, 43L, 38L, 46L, 46L, 48L, 
    61L, 57L, 50L, 49L, 47L, 51L, NA, 48L, 34L, 51L, 52L, 52L, 
    46L, 54L, 48L, 55L, 57L, NA, 53L, 51L, 58L, 48L, 50L, 64L, 
    57L, 54L, 52L, 53L, 61L, 79L, 58L, 78L, 51L, 64L, 68L, 58L, 
    55L, 59L, 64L, 62L, 60L, 58L, 67L, 63L, 66L, 68L, 72L, 65L, 
    72L, 69L, 75L, 27L, 70L, 95L, 100L, 100L), q6 = c(3L, 13L, 
    14L, 43L, 17L, 23L, 0L, 20L, 11L, 33L, 38L, 3L, 44L, 0L, 
    6L, 50L, 0L, 46L, 34L, 53L, 34L, 19L, 50L, 43L, 47L, 63L, 
    46L, 27L, 18L, 42L, 42L, 41L, 51L, 32L, 43L, 24L, 50L, 52L, 
    11L, 4L, 49L, 31L, 59L, 28L, 61L, 46L, 56L, 50L, 40L, 51L, 
    30L, 47L, 57L, 46L, 46L, 44L, 43L, 42L, 53L, 50L, 50L, 53L, 
    53L, 66L, 30L, 40L, 52L, 39L, 52L, 54L, 51L, 32L, 32L, 64L, 
    43L, 7L, 58L, 31L, 39L, 51L, 57L, 44L, 59L, 62L, 61L, 57L, 
    32L, 39L, 68L, 58L, 83L, 62L, 43L, 32L, 61L, 65L, 60L, 94L, 
    86L, 100L), q7 = c(40L, 20L, 31L, 67L, 18L, 27L, 50L, 49L, 
    29L, 38L, 45L, 53L, 53L, 53L, 53L, 56L, 70L, 45L, 43L, 53L, 
    42L, 69L, 53L, 47L, 47L, 48L, 53L, 48L, 80L, 66L, 46L, 48L, 
    61L, 62L, 37L, 69L, 49L, 61L, 69L, 86L, 50L, 68L, 49L, 50L, 
    35L, 44L, 43L, 50L, 62L, 51L, 53L, 50L, 46L, 51L, 53L, 51L, 
    71L, 53L, 87L, 57L, 56L, 54L, 53L, 42L, 69L, 61L, 45L, 47L, 
    53L, 54L, 68L, 73L, 66L, 65L, 56L, 72L, 69L, 71L, 71L, 60L, 
    57L, 63L, 75L, 58L, 61L, 52L, 24L, 69L, 71L, 53L, 94L, 81L, 
    43L, 91L, 61L, 57L, 46L, 95L, 84L, 100L), q8 = c(3L, 25L, 
    13L, 0L, 18L, 27L, 15L, 17L, 13L, 38L, 31L, 29L, 26L, 53L, 
    11L, 36L, 23L, 30L, 46L, 43L, 57L, 39L, 25L, 42L, 63L, 69L, 
    30L, 64L, 47L, 41L, 54L, 42L, 37L, 38L, 39L, 21L, 50L, 47L, 
    50L, 24L, 49L, 45L, 45L, 55L, 47L, 43L, 46L, 49L, 62L, 51L, 
    43L, 47L, 63L, 51L, 48L, 49L, 40L, 54L, 46L, 49L, 58L, 49L, 
    53L, 52L, 41L, 50L, 45L, 47L, 53L, 54L, 50L, 56L, 64L, 39L, 
    57L, 38L, 49L, 43L, 48L, 52L, 58L, 55L, 68L, 62L, 59L, 58L, 
    64L, 68L, 46L, 56L, 31L, 63L, 67L, 71L, 62L, 99L, 82L, 98L, 
    100L, 100L), q9 = c(0L, 13L, 5L, 0L, 18L, 25L, 0L, 19L, 0L, 
    15L, 22L, 64L, 26L, 0L, 51L, 37L, 60L, 43L, 43L, 50L, 17L, 
    38L, 51L, 49L, 28L, 32L, 40L, 13L, 16L, 19L, 36L, 51L, 55L, 
    46L, 35L, 26L, 41L, 48L, 31L, 21L, 43L, 61L, 39L, 40L, 46L, 
    49L, 50L, 50L, 52L, 53L, 87L, 55L, 36L, 51L, 48L, 52L, 42L, 
    53L, 59L, 50L, 41L, 53L, 53L, 52L, 66L, 60L, 56L, 54L, 53L, 
    54L, 25L, 34L, 37L, 50L, 38L, 73L, 46L, 72L, 56L, 52L, 59L, 
    53L, 40L, 62L, 60L, 58L, 64L, 63L, 67L, 59L, 79L, 63L, 95L, 
    39L, 66L, 75L, 72L, 91L, 79L, 100L), q10 = c(0L, 2L, 3L, 
    0L, 18L, 25L, 21L, 17L, 22L, 31L, 24L, 16L, 26L, 53L, 37L, 
    16L, 53L, 40L, 50L, 40L, 61L, 49L, 34L, 44L, 34L, 69L, 47L, 
    37L, 66L, 24L, 49L, 50L, 35L, 42L, 37L, 56L, 50L, 52L, 66L, 
    65L, 51L, 41L, 45L, 62L, 46L, 42L, 49L, 50L, 61L, 53L, 69L, 
    48L, 38L, 51L, 48L, 50L, 69L, 53L, 65L, 58L, 58L, 53L, 53L, 
    51L, 58L, 57L, 59L, 60L, 53L, 54L, 74L, 48L, 68L, 69L, 52L, 
    74L, 65L, 47L, 59L, 56L, 67L, 57L, 70L, 61L, 59L, 67L, 63L, 
    70L, 66L, 53L, 75L, 65L, 72L, 100L, 67L, 100L, 78L, 98L, 
    100L, 100L), score = structure(c(5.1, 12.8, 15.2, 18.33, 
    18.8, 21.5, 21.7, 25.1, 28.9, 30.4, 31.6, 32.7, 34, 36.1, 
    38.2, 39.1, 41.13, 41.3, 41.4, 42, 42.1, 42.2, 42.9, 42.9, 
    43.1, 44.1, 44.3, 45.44, 45.5, 45.5, 45.6, 45.6, 45.8, 46.22, 
    47.1, 47.3, 47.5, 47.7, 47.8, 48.1, 48.1, 48.1, 48.2, 48.4, 
    48.5, 48.6, 48.6, 48.7, 48.89, 48.9, 49.33, 49.7, 50, 50.5, 
    51, 51.33, 51.9, 51.9, 52.9, 52.9, 53.1, 53.17, 53.2, 53.2, 
    53.3, 53.5, 53.6, 53.6, 53.7, 54, 54.11, 54.2, 54.5, 54.5, 
    54.6, 54.9, 54.9, 54.9, 55.4, 56.4, 56.4, 57.9, 58.4, 59, 
    59.6, 59.7, 59.8, 59.9, 60.1, 61.5, 65.2, 65.7, 66.1, 66.4, 
    67.9, 70.2, 74.6, 85.1, 89, 99.9), label = "Evaluation Score")), row.names = c(NA, 
-100L), class = "data.frame")

I created a basic violin plot with a boxplot, and I want to add an error box. Here is some code:

  dplyr::summarize(mean=mean(score), se=sd(score)/sqrt(n()), 
            upper=mean+(2*se), lower=mean-(2*se))

p <- ggplot() +
  geom_violin(data=dt, aes(x=together, y=score, fill=second, color=second)) +
  geom_point(data=errbar_lims, aes(x=together, y=mean), size=3) +
  geom_errorbar(aes(x=errbar_lims$together, ymax=errbar_lims$upper, 
                    ymin=errbar_lims$lower), stat='identity', width=.25) +
  theme_minimal()

print(p)

This is what I get: The errors are only shown for the two groups.

How can I add define the errors and display an error box to all four violins? I there a way to overlay the error box on the violins? like those:

Any help would be appreciated!

could you provide `dt` with `dput(dt)`? – Waldi Jan 27 '22 at 07:09 — Waldi, Jan 27 '22 at 07:09

score 3 · Accepted Answer · answered Jan 27 '22 at 09:10

You have a total of four violins on your plot, because you have placed the factor level together along the x axis, and used the column second for your fill aesthetic. Each value of together therefore has two violins: one for "Negative Second" and one for "Positive Second".

The problem is that when you made the data frame errbar_lims, you only grouped by together, so if we examine it we will see it does not contain any information about second. It only has two rows, so can only produce two error bars:

errbar_lims
#> # A tibble: 2 x 5
#>   together         mean    se upper lower
#>   <fct>           <dbl> <dbl> <dbl> <dbl>
#> 1 Shared Negative  53.3  1.92  57.1  49.4
#> 2 Shared Positive  45.3  2.00  49.3  41.3

What you are looking for is a data frame with four rows to cover all 4 possible combinations of together and second. You could achieve that by doing this:

errbar_lims <- group_by(dt, together, second) %>% 
  summarize(mean  = mean(score), 
            se    = sd(score) / sqrt(n()), 
            upper = mean + (2 * se), 
            lower = mean - (2 * se))

If we examine this, we will see it has the four rows that we need for our four error bars:

errbar_lims
#> # A tibble: 4 x 6
#> # Groups:   together [2]
#>   together        second           mean    se upper lower
#>   <fct>           <fct>           <dbl> <dbl> <dbl> <dbl>
#> 1 Shared Negative Negative Second  55.7  3.35  62.4  49.0
#> 2 Shared Negative Positive Second  50.9  1.91  54.8  47.1
#> 3 Shared Positive Negative Second  45.0  3.02  51.1  39.0
#> 4 Shared Positive Positive Second  45.5  2.69  50.9  40.2

Now we can plot, but we need to remember to tell geom_point and geom_errorbar that they should be grouped according to the second column. We also need to tell them we want the geoms to be dodged so that they are not all plotted between the violins:

ggplot(dt, aes(x = together, fill = second)) +
  geom_violin(aes(y = score, color = second)) +
  geom_point(data = errbar_lims, aes(x = together, y = mean, group = second), 
             size = 3,
             position = position_dodge(width = 0.9)) +
  geom_errorbar(data = errbar_lims, 
                aes(ymax = upper, ymin = lower, group = second), 
                stat = 'identity', 
                position = position_dodge(width = 0.9),
                width = 0.25) +
         theme_minimal()

How to adjust error box to violin plot using geom_violin?

1 Answers1