4

I am trying to connect sets of (two) points at each level of x, in each facet. Here is a reproducible example:

datum <- structure(list(frequency = c(8L, 7L, 6L, 18L, 5L, 11L, 16L, 15L, 
9L, 8L, 8L, 10L, 2L, 20L, 14L, 3L, 6L, 2L, 2L, 11L, 10L, 6L, 
15L, 19L, 18L, 18L, 8L, 2L, 10L, 15L, 12L, 17L, 1L, 18L, 7L, 
8L, 16L, 4L, 9L, 2L, 7L, 3L, 16L, 7L, 18L, 20L, 9L, 10L, 13L, 
2L, 15L, 7L, 3L, 20L, 4L, 15L, 5L, 7L, 9L, 16L, 5L, 8L, 10L, 
10L, 7L, 10L, 10L, 17L, 7L, 8L, 13L, 13L, 16L, 5L, 20L, 18L, 
13L, 19L, 3L, 8L, 14L, 12L, 20L, 2L, 9L, 13L, 7L, 2L, 5L, 5L, 
13L, 9L, 13L, 7L, 9L, 4L, 4L, 20L, 1L, 4L), band = structure(c(2L, 
4L, 2L, 3L, 2L, 1L, 4L, 1L, 2L, 1L, 3L, 4L, 2L, 4L, 3L, 4L, 3L, 
2L, 3L, 2L, 2L, 4L, 2L, 1L, 1L, 2L, 1L, 4L, 4L, 1L, 4L, 4L, 2L, 
1L, 4L, 4L, 3L, 4L, 1L, 1L, 3L, 4L, 1L, 3L, 4L, 1L, 2L, 1L, 1L, 
2L, 2L, 1L, 3L, 4L, 2L, 1L, 2L, 4L, 2L, 2L, 4L, 4L, 2L, 4L, 4L, 
1L, 1L, 4L, 2L, 3L, 4L, 1L, 2L, 4L, 1L, 2L, 4L, 1L, 1L, 3L, 4L, 
4L, 2L, 2L, 2L, 1L, 3L, 2L, 2L, 2L, 3L, 3L, 1L, 3L, 4L, 3L, 3L, 
1L, 3L, 4L), .Label = c("1", "2", "3", "4"), class = "factor"), 
test = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 
2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 
2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 
1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 
2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 
2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L
), .Label = c("1", "2"), class = "factor"), knowledge = structure(c(2L, 
3L, 1L, 3L, 1L, 1L, 3L, 3L, 1L, 3L, 1L, 3L, 2L, 2L, 1L, 1L, 
1L, 1L, 3L, 3L, 1L, 2L, 3L, 1L, 1L, 2L, 2L, 1L, 1L, 3L, 2L, 
3L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 3L, 3L, 1L, 1L, 2L, 3L, 
3L, 2L, 2L, 3L, 1L, 1L, 2L, 2L, 2L, 3L, 1L, 3L, 1L, 1L, 2L, 
1L, 1L, 2L, 3L, 1L, 1L, 1L, 1L, 3L, 2L, 2L, 1L, 2L, 3L, 2L, 
1L, 2L, 3L, 3L, 2L, 1L, 3L, 1L, 3L, 2L, 1L, 3L, 2L, 2L, 3L, 
1L, 1L, 2L, 1L, 2L, 3L, 1L, 3L, 1L), .Label = c("1", "2", 
"3"), class = "factor")), .Names = c("frequency", "band", 
"test", "knowledge"), row.names = c(NA, -100L), class = "data.frame")

Here is the code I have so far:

ggplot(datum, aes(knowledge, frequency, color=test)) +
stat_summary(fun.y='mean', geom='point', position=position_dodge(width=.9), size=3) +
facet_grid(~band) +
labs(y='number of words (max = 20)', x='self-report knowledge') + 
scale_x_discrete(labels=c('none', 'form', 'meaning')) 

Looking at the left-most facet ('1') in the graph, I would like a line to connect the pretest to posttest in the none column, another line connecting pretest to posttest in the form column, and a line connecting the pretest to the posttest in the meaning column. I would like this done in each facet.

I hope that makes sense, and thanks!

jazzurro
  • 23,179
  • 35
  • 66
  • 76
Tjuggles
  • 43
  • 3

2 Answers2

1

I find relying on ggplot too much for data manipulation/summarizing can hurt more than it helps. I have no idea how to connect the position-dodged points with a line. Instead, I'd do something like this:

library(dplyr)
datsum = datum %>%
  group_by(band, knowledge, test) %>%
  summarize(mean = mean(frequency)) %>%
  ungroup %>%
  mutate(knowledge_fac = factor(knowledge, labels = c('none', 'form', 'meaning')))

ggplot(datsum, aes(x = test, y = mean)) +
  geom_path(aes(group = band:knowledge)) +
  geom_point(aes(color = factor(test))) +
  facet_grid(band ~ knowledge_fac) +
  labs(y='number of words (max = 20)', x='self-report knowledge')
Gregor Thomas
  • 136,190
  • 20
  • 167
  • 294
  • Thanks a lot for the help. I was thinking that I shouldn't be relying on ggplot as much as I was, so thanks for confirming it. I will mark this as solved, even though the issue of connecting position-dodged points with a line wasn't solved. – Tjuggles May 21 '15 at 01:55
  • @Tjuggles thanks. Yes, if you want to learn how to do that, you should ask a question where that is in the title and it's minimal for that problem (simple simulated data, no faceting or other complications). – Gregor Thomas May 21 '15 at 02:49
1

Borrowing from Gregor's work in munging the data, I think this does what was requested. The mutate() chunk creates Test to be a numeric offset of -0.1 for test 1 and 0.1 for test 2. This is then added to the numeric value of knowledge. The result is the numeric x passed to ggplot2. Gregor correctly defined the groups, so the rest is straightforward.

library(dplyr)
datsum <-  datum %>%
  group_by(band, knowledge, test) %>%
  summarize(mean = mean(frequency)) %>%
  mutate(Test = 0.1 * (2 * (test == 2) - 1),
         Knowledge = as.numeric(knowledge) + Test) %>%
  ungroup 

ggplot(datsum, aes(x = Knowledge, y = mean, color = test)) +
  geom_path(aes(group = band:knowledge), color = "black") +
  geom_point(size = 3) +
  facet_wrap(~ band, nrow = 1) +
  labs(y='number of words (max = 20)', x='self-report knowledge') +
  scale_color_manual(values = c("orange", "blue")) +
  scale_x_continuous(limits = c(0.5, 3.5), breaks = 1:3, 
                     labels = c("none", "form", "meaning"))
Dennis
  • 732
  • 4
  • 4