0

I am creating faceted box plots that are grouped by a variable. Instead of having the x-axis text be the factors for the x-axis variable I'd like the x-axis text to be the grouping variable.

However, I don't just want to use the grouping variable as my x-axis variable because I'd like the boxplots to cluster. Its hard to explain well. But I think its clear from the code and comments below.

Let me know if you have any suggestions or can help and thanks in advance!

    library(ggplot2) 
    library(scales)
    ln_clr <- "black"
    bk_clr <- "white"
    set.seed(1)

# Creates variables for a dataset
    donor = rep(paste0("Donor",1:3), each=40)
    machine = sample(rep(rep(paste0("Machine",1:4), each=1),30))
    gene = rep(paste0("Gene",LETTERS[1:5]), each=24)
    value = rnorm(24*5, mean=rep(c(0.5,10,1000,25000,8000), each=24), 
                  sd=rep(c(0.5,8,900,9000,3000), each=24))

# Makes all values positive
    for(m in 1:length(value)){
        if(value[m]<0){
            value[m] <- sqrt(value[m]*value[m])
        }
    }
# Creates a data frame from variables
    df = data.frame(donor, machine, gene, value)

# Adds a clone variable    
        clns <- LETTERS[1:4]
        k=1
        for(i in 1:nrow(df)/4){
            for(j in 1:length(clns)){
                df$clone[k] <- paste(df$donor[k],clns[j],sep="")
                    k = k+1
            }
        }
        df$clone <- as.factor(df$clone)


#*************************************************************************************************************************************
# Creates the facet of the machine but what I want on the x-axis is clone, not donor. 
# However, if I set x to clone it doesn't group the boxplots and its harder to read 
# the graph.
    bp1 <- ggplot(df, aes(x=donor, y=value, group=clone)) +
        stat_boxplot(geom ='errorbar', position = position_dodge(width = .83), 
                     width = 0.25, size = 0.7, coef = 1) +
        geom_boxplot(coef=1, outlier.shape = NA, position = position_dodge(width = .83), 
                     lwd = 0.3, alpha = 1, colour = ln_clr) +
        geom_point(position = position_dodge(width = 0.83), size = 1.8, alpha = 0.9, 
                    mapping=aes(group=clone)) +
        facet_wrap(~ machine, ncol=2, scales="free_x") 

    bp1 + scale_y_log10(expand = c(0, 0)) +
        theme(axis.text.x= element_text(size=rel(1), colour = "black", angle=45, hjust=1),
              strip.background = element_rect(colour = ln_clr, fill = bk_clr, size = 1))

# Creates the facet of the Donor and clusters the clones but doesn't facet the  
# machine. This could be okay if I could put spaces in between the different  
# machine values but not the donors and could remove the donor facet labels, and 
# only have the machine values show up once.
    bp2 <- ggplot(df, aes(x=clone, y=value)) +
        stat_boxplot(geom ='errorbar', position = position_dodge(width = .83),  
                     width = 0.25, size = 0.7, coef = 1) +
        geom_boxplot(coef=1, outlier.shape = NA, position = position_dodge(width = .83), 
                     lwd = 0.3, alpha = 1, colour = ln_clr) +
        geom_point(position = position_dodge(width = 0.83), size = 1.8, alpha = 0.9) +
        facet_wrap(machine ~ donor, scales="free_x", ncol=6) 

    bp2 + scale_y_log10(expand = c(0, 0)) +
        theme(axis.text.x= element_text(size=rel(1), colour = "black", angle=45, hjust=1),
              strip.background = element_rect(colour = ln_clr, fill = bk_clr, size = 1),
              panel.spacing = unit(0, "lines"))    

Below is an example comparing what I'd like in an ideal world (Top two facets) as compared to what I'm getting (bottom two facets).

enter image description here

Nathan
  • 323
  • 3
  • 13

2 Answers2

1

I'm not sure I understand exactly what you're trying to do, so let me know if this is on the right track:

library(dplyr)

pd = position_dodge(width=0.83)

ggplot(df %>% mutate(clone=gsub("Donor[1-3]","",clone),
                     donor=gsub("Donor", "", donor)), 
       aes(x=clone, y=value, color=donor, group=interaction(clone,donor))) +
  geom_boxplot(coef=1, outlier.shape=NA, position=pd, lwd=0.3) +
  geom_point(position=pd, size=1.8, alpha=0.9) +
  facet_wrap(~ machine, ncol=2, scales="free_x") +
  scale_y_log10(expand = c(0.02, 0)) +
  theme(strip.background=element_rect(colour=ln_clr, fill=bk_clr, size=1)) 

enter image description here

How about this:

ggplot(df, aes(x=clone, y=value, group=interaction(clone,donor))) +
  geom_boxplot(coef=1, outlier.shape=NA, lwd=0.3) +
  geom_point(size=1.8, alpha=0.9) +
  facet_wrap(~ machine, ncol=2, scales="free_x") +
  scale_y_log10(expand = c(0.02, 0)) +
  theme(axis.text.x= element_text(size=rel(1), colour = "black", angle=45, hjust=1),
        strip.background=element_rect(colour=ln_clr, fill=bk_clr, size=1)) 

enter image description here

eipi10
  • 91,525
  • 24
  • 209
  • 285
  • Hey Eipi10. Thanks for the quick response again. Unfortunately, it looks like you've just reparameterized the clones to group the donors and that's not really what I was trying to do. I realized I could edit the graph in photoshop to display what I was hoping for, and I've added the example image above. Does it make more sense with that example? – Nathan Apr 08 '17 at 05:28
  • Hey Eipi10, I might be wrong but isn't that the exact same plot as if you just facet the plot of x=clone, y=value, with no group=interaction(clone,donor)? I mean visually, to me, I couldn't tell the difference, but is there a difference? What I'd like to do is have the clones cluster by donor, in the above graph all clones are equally spaced apart. Thank you again for taking the time to think about it, I really appreciate it. – Nathan Apr 10 '17 at 17:50
0

I found a work around for this problem but its not very elegant. I'd be super happy if some one came up with a better solution. Using the code to create a function for a "multiplot" found here and adding the code below I was able to do what I wanted. However, This is a slightly wonky solution in that I can't really format my titles with boxes around them and there are still two "clone" titles on the x axis that I can't replace easily with a single x-axis title. Also, had I of had many "machines" in my example this solution would have been painful to scale. All-in-all not ideal but passible for what I need. Special thanks to Eipi10 for their help, I appreciate it.

# Creates a multi-plot function for use in the graphs below   
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
    library(grid)

    # Make a list from the ... arguments and plotlist
    plots <- c(list(...), plotlist)

    numPlots = length(plots)

    # If layout is NULL, then use 'cols' to determine layout
    if (is.null(layout)) {
        # Make the panel
        # ncol: Number of columns of plots
        # nrow: Number of rows needed, calculated from # of cols
        layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                         ncol = cols, nrow = ceiling(numPlots/cols))
    }

    if (numPlots==1) {
        print(plots[[1]])

    } else {
        # Set up the page
        grid.newpage()
        pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))

        # Make each plot, in the correct location
        for (i in 1:numPlots) {
            # Get the i,j matrix positions of the regions that contain this subplot
            matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))

            print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                            layout.pos.col = matchidx$col))
        }
    }
}

    # Call multiplot function after storing each of the below plots as variables
    ln_clr <- "black"
    bk_clr <- "white"

    bp3 <- ggplot(df[df$machine=="Machine1",], aes(x=clone, y=value)) +
        geom_boxplot(coef=1, outlier.shape=NA, lwd=0.3) +
        geom_point(size=1.8, alpha=0.9) +
        ggtitle("Machine 1") + 
        expand_limits(y=c(0.001,10^5)) +
        facet_wrap(~ donor, nrow=1, scales="free_x") + scale_y_log10(expand = c(0, 0)) +
        theme(axis.text.x= element_text(size=rel(1), color = ln_clr, angle=45, hjust=1),
              panel.spacing = unit(0.25, "lines"), axis.title.x= element_blank(),
              plot.title = element_text(hjust=0.5), 
              strip.text.x = element_text(size=rel(1), face="bold", colour = ln_clr),
              strip.background = element_rect(colour = ln_clr, fill = bk_clr, size = 1),
              axis.line.x= element_line(size = 1.25, colour = ln_clr),
              axis.line.y= element_line(size = 1.25, colour = ln_clr),
              panel.grid.major = element_blank(), 
              panel.grid.minor = element_blank(), 
              panel.background = element_rect(fill = bk_clr),
              panel.border = element_blank(),
              plot.background = element_rect(fill = bk_clr))

    bp4 <- ggplot(df[df$machine=="Machine2",], aes(x=clone, y=value)) +
        geom_boxplot(coef=1, outlier.shape=NA, lwd=0.3) +
        geom_point(size=1.8, alpha=0.9) +
        ggtitle("Machine 2") + 
        expand_limits(y=c(0.001,10^5)) +
        facet_wrap(~ donor, nrow=1, scales="free_x") + scale_y_log10(expand = c(0, 0)) +
        theme(axis.text.x= element_text(size=rel(1), colour = ln_clr, angle=45, hjust=1),
              panel.spacing = unit(0.25, "lines"), plot.title = element_text(hjust=0.5),
              strip.text.x = element_text(size=rel(1), face="bold", colour = ln_clr),
              strip.background = element_rect(colour = ln_clr, fill = bk_clr, size = 1),
              axis.line.x= element_line(size = 1.25, colour = ln_clr),
              axis.line.y= element_line(size = 1.25, colour = ln_clr),
              panel.grid.major = element_blank(), 
              panel.grid.minor = element_blank(), 
              panel.background = element_rect(fill = bk_clr),
              panel.border = element_blank(),
              plot.background = element_rect(fill = bk_clr))

    bp5 <- ggplot(df[df$machine=="Machine3",], aes(x=clone, y=value)) +
        geom_boxplot(coef=1, outlier.shape=NA, lwd=0.3) +
        geom_point(size=1.8, alpha=0.9) +
        ggtitle("Machine 3") + 
        expand_limits(y=c(0.001,10^5)) +
        facet_wrap(~ donor, nrow=1, scales="free_x") + scale_y_log10(expand = c(0, 0)) +
        theme(panel.spacing = unit(0.25, "lines"), axis.title.y= element_blank(),
              axis.title.x= element_blank(),axis.line.y= element_blank(),
              axis.text.y=element_blank(),
              axis.text.x= element_text(size=rel(1), colour = ln_clr, angle=45, hjust=1),
              axis.ticks.y=element_blank(), plot.title = element_text(hjust=0.5),
              strip.text.x = element_text(size=rel(1), face="bold", colour = ln_clr),
              strip.background = element_rect(colour = ln_clr, fill = bk_clr, size = 1),
              axis.line.x= element_line(size = 1.25, colour = ln_clr),
              panel.grid.major = element_blank(), 
              panel.grid.minor = element_blank(), 
              panel.background = element_rect(fill = bk_clr),
              panel.border = element_blank(),
              plot.background = element_rect(fill = bk_clr))

    bp6 <- ggplot(df[df$machine=="Machine4",], aes(x=clone, y=value)) +
        geom_boxplot(coef=1, outlier.shape=NA, lwd=0.3) +
        geom_point(size=1.8, alpha=0.9) +
        ggtitle("Machine 4") + 
        expand_limits(y=c(0.001,10^5)) +
        facet_wrap(~ donor, nrow=1, scales="free_x") + scale_y_log10(expand = c(0, 0)) +
        theme(axis.text.x= element_text(size=rel(1), colour = ln_clr, angle=45, hjust=1),
              panel.spacing = unit(0.25, "lines"), plot.title = element_text(hjust=0.5),
              strip.text.x = element_text(size=rel(1), face="bold", colour = ln_clr),
              strip.background = element_rect(colour = ln_clr, fill = bk_clr, size = 1),
              axis.line.x= element_line(size = 1.25, colour = ln_clr),
              axis.line.y= element_blank(),
              axis.text.y=element_blank(),
              axis.ticks.y=element_blank(),
              axis.title.y= element_blank(),
              panel.grid.major = element_blank(), 
              panel.grid.minor = element_blank(), 
              panel.background = element_rect(fill = bk_clr),
              panel.border = element_blank(),
              plot.background = element_rect(fill = bk_clr))

# Plot all 4 graphs and saves them as a output file  
    png(filename="graph3.png", width= 9, height= 7.5, units = "in", res=600)
    multiplot(bp3, bp4, bp5, bp6, cols=2)
    dev.off()

enter image description here

Alternatively, if I set the "strip.text.x = " and the "strip.background =" as element_blank(). I can generate the below:

enter image description here

Nathan
  • 323
  • 3
  • 13