0

I'd like to ggplot 3 pairs (factor x0) of 2 conditions (factor cond0), using boxplots with specific quantile limits.

The 2 problems are:

  1. There are 3 groups in condition A (red), but only 2 groups in condition B (blue). Since group 1B is missing, the boxplot of group 1A occupies its space on the graph (twice as large). I would like its width to be as narrow as the others, and the space of the missing group 1B to be maintained even if it is empty.

  2. Since the group 3B has only one value (and therefore no outlier), the outliers of group 3A are located in the middle of the pair instead of being aligned with boxplot 3A.

Would there be a solution to these problems?

Thanks for help

library(dplyr)
library(ggplot2)

# dataframe
x1 <- rep(1:3, each=60)
y1 <- rnorm(180, rep(c(20,35,50), each=60), 10)
cond1 <- rep("A", each=180)
dat1 <- data.frame(x1, y1, cond1)
dat1$x1 <- as.factor(dat1$x1)
dat1$cond1 <- as.factor(dat1$cond1)
dat1 <- dat1 %>% rename(x0 = x1, y0 = y1, cond0 = cond1)

x2 <- rep(2:3, each = 179, len = 180) ; y2
y2 <- rnorm(180, rep(c(30,60), each=90), 7) ; x2
cond2 <- rep("B", each=180)
dat2 <- data.frame(x2, y2, cond2)
dat2$x2 <- as.factor(dat2$x2)
dat2$cond2 <- as.factor(dat2$cond2)
dat2 <- dat2 %>% rename(x0 = x2, y0 = y2, cond0 = cond2)

dat <- rbind(dat1,dat2) 


# define boxplots limits
dat_boxlim <- function(x) {
  r <- quantile(x, probs = c(0.1, 0.4, 0.5, 0.8, 0.9))
  names(r) <- c("ymin", "lower", "middle", "upper", "ymax")
  r
}

# define outliers limits    
dat_boxout <- function(x) {
  subset(x, x < quantile(x, 0.1) | x > quantile(x, 0.9))  
}

# figure
ggplot(dat, aes(x0, y0, group=interaction(cond0, x0), fill = cond0))+
  stat_summary(fun.data = dat_boxlim, geom = "boxplot", position = position_dodge(0.7), width = 0.5, show.legend = TRUE) +
  stat_summary(fun = dat_boxout, geom = "point", size=2, position = position_dodge(0.7), show.legend = FALSE)

enter image description here

stefan
  • 90,330
  • 6
  • 25
  • 51
denis
  • 199
  • 1
  • 8

1 Answers1

0

The first problem is solved by using "position = position_dodge2(preserve = "single")" in stat_summary geom="boxplot".

The second problem is solved using the new formula below.

The whole appropriate code is:

library(dplyr)
library(ggplot2)

# dataframe
x1 <- rep(1:3, each=60)
y1 <- rnorm(180, rep(c(20,35,50), each=60), 10)
cond1 <- rep("A", each=180)
dat1 <- data.frame(x1, y1, cond1)
dat1$x1 <- as.factor(dat1$x1)
dat1$cond1 <- as.factor(dat1$cond1)
dat1 <- dat1 %>% rename(x0 = x1, y0 = y1, cond0 = cond1)

x2 <- rep(2:3, each = 179, len = 180) ; y2
y2 <- rnorm(180, rep(c(30,60), each=90), 7) ; x2
cond2 <- rep("B", each=180)
dat2 <- data.frame(x2, y2, cond2)
dat2$x2 <- as.factor(dat2$x2)
dat2$cond2 <- as.factor(dat2$cond2)
dat2 <- dat2 %>% rename(x0 = x2, y0 = y2, cond0 = cond2)

dat <- rbind(dat1,dat2) 

# define boxplots limits
dat_boxlim <- function(x) {
  r <- quantile(x, probs = c(0.1, 0.4, 0.5, 0.8, 0.9))
  names(r) <- c("ymin", "lower", "middle", "upper", "ymax")
  r
}

# define outliers limits
dat_boxout <- function(x) {
  if (length(x) > 1) {  # or other length if needed (e.g. > 7)
    return(subset(x, x < quantile(x, 0.1) | x > quantile(x, 0.9))) # only for low outliers
  } else {
    return(NA)
  }
} 

# figure
ggplot(dat, aes(x0, y0, group=interaction(cond0, x0), fill = cond0))+
  stat_summary(fun.data = dat_boxlim, geom = "boxplot", position = position_dodge2(preserve = "single", 0.7, padding = 0.1), width = 0.5, show.legend = TRUE) +
  stat_summary(fun = dat_boxout, geom = "point", size=2, position = position_dodge(preserve = "total", 0.5), show.legend = FALSE)

enter image description here

denis
  • 199
  • 1
  • 8