0

I'm trying to make a ggupset plot, with a violin plot and intersections manually ordered.

I would like to order the intersections (x axis or gene combinations) in the following order: Gene_A, Gene_B, Gene_D, Gene_E Gene_D with Gene_H, Gene_C with GeneE with Gene_G with Gene_I, Gene_B with Gene_C with Gene_F with Gene_J.

I've tried to use scale_x_upset but this doesn't seem to work.

I've tried to generate a toy dataset, but for some reason it generates a horizontal violin plot and the y axis is not in the correct order. This doesn't happen with my actual dataset, so probably best to ignore this flaw.

I've found this link (https://github.com/const-ae/ggupset/issues/19) that might be helpful, but have no idea how to implement the code and use forcats::fct_infreq() to order intersections.

Im new to R so any advice would be really appreciated.


library(ggplot2)
library(tidyverse, warn.conflicts = FALSE)
library(ggupset)

sample <- c("NK_H1_053", "Sample_A", "T7-488", "5567625262", "Sample_A", "50509588", "ABC_AJDH_GH_2002_56789_678", "505312347654633", "5567625262", "Sample_A", "50509588", "505312347654633", "Sample_A", "505312347654633", "5567625262", "50806829", "505312347654633", "5567625262")
Phenotype <- c("low", "high", "low", "high", "high", "high", "high", "high", "high", "high", "high", "high", "high", "high", "high", "high", "high", "high")
Size <- c("23", "6", "24", "6", "6", "6", "15", "6", "6", "6", "6", "6", "6", "6", "6", "16", "6", "6")
Speed <- c("Fast", "Fast", "Slow", "Fast", "Fast", "Fast", "Fast", "Slow", "Fast", "Fast", "Fast", "Slow", "Fast", "Slow", "Fast", "Fast", "Slow", "Fast")
AGDe_mutations <- c("No_AGDe_mutation", "AGDe35", "No_AGDe_mutation", "No_AGDe_mutation", "AGDe35", "AGDe35;AGDe12GD", "No_AGDe_mutation", "AGDe35;AGDe12GD", "No_AGDe_mutation", "AGDe35", "AGDe35;AGDe12GD", "AGDe35;AGDe12GD", "AGDe35", "AGDe35;AGDe12GD", "No_AGDe_mutation", "No_AGDe_mutation", "AGDe35;AGDe12GD", "No_AGDe_mutation")
genes <- c("Gene_A", "Gene_E", "Gene_E", "Gene_F", "Gene_G", "Gene_D", "Gene_D", "Gene_C", "Gene_C", "Gene_C", "Gene_H", "Gene_I", "Gene_I", "Gene_B", "Gene_B", "Gene_B", "Gene_J", "Gene_J")

df <- data.frame(sample, Phenotype, Size, Speed, AGDe_mutations, genes)


df %>%
  group_by(sample, Phenotype, Size) %>% 
  summarize(gene_list = list(genes)) %>%
  ggplot(aes(gene_list, Size)) + geom_violin(width=2.1, alpha=2.5) + geom_jitter(aes(color=Phenotype), width=0.001, height = 0.001) +
  labs(x = "Genes", y = "size") + 
  scale_x_upset(intersections = list(c("Gene_A"), c("Gene_B"), c("Gene_D"), c("Gene_E"), c("Gene_D", "Gene_H"), c("Gene_C", "GeneE", "Gene_G", "Gene_I"), c("Gene_B", "Gene_C", "Gene_F", "Gene_J"), c("Gene_B", "Gene_C", "Gene_I", "Gene_J")),
  sets = c("Gene_A", "Gene_B", "Gene_C", "Gene_D", "Gene_E", "Gene_F", "Gene_G", "Gene_H", "Gene_I", "Gene_J")) +
  theme_combmatrix(combmatrix.panel.line.size = 0.8) +
  geom_hline(yintercept=18, color="pink", size=1, linetype = 'dashed') +
  theme_combmatrix(combmatrix.label.text = element_text(size=12), combmatrix.label.extra_spacing = 5) 

0 Answers0