I got a big table that is available here as a Renvironment it looks something like this:
gene | s1 | s2 | s3 | s4 | s5 | s6 | s7 | s8 | s9 | s10 | s11 | s12 | type |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
TRAM2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | proeteinCoding |
CLIC5 | 0 | 0 | 1 | 0 | 1 | 0.2 | 0 | 0 | 1.3 | 1 | 0 | 0.7 | proeteinCoding |
GAL3ST2 | 0 | 0.5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | trna |
UHRF1BP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | trna |
OSTM1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | trna |
IMPG2 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | miRNA |
OXCT1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.3 | miRNA |
CPNE3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | miRNA |
PPP1R15 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | miRNA |
ADAM11 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | snoRNA |
PTHLH | 0 | 0 | 0 | 0 | 0.1 | 0.5 | 0 | 0 | 0.1 | 0.2 | 0 | 0.5 | snoRNA |
By using the following code I can get the unique elements OF TYPE (last column) of all cells"
table <-read.delim("smalRNAseq/counts_molc_NoLengthCut_mirnaCollapsed2.txt", header = T, sep = "\t")
genecodev22 <-read.table("genecodev22.csv")
#assign new names to the coluns so I can merge them
colnames(genecodev22)[colnames(genecodev22) %in% c("V1", "V2")] <- c("ENSEMBLE", "TYPE")
colnames(table)[colnames(table) %in% c("X", "X.1")] <- c("ENSEMBLE1", "ENSEMBLE")
#mergethem by ENSEMBLE so that the type will be a new entry at the back
mergetable <- merge(table,genecodev22, match = "first", by="ENSEMBLE")
#assign the first column as column names, the one with the true ensemble names
row.names(mergetable) <- mergetable[[1]]
#and remove it
mergetable2 <- mergetable[,-2:-1]
#get all entries with no 0 value
mergetable3 <- mergetable2[rowSums(mergetable2[1:nrow(mergetable2),1:95])>0,]
# Total number of unique element and occurancy p
list_of_elements <- aggregate(data.frame(count = mergetable3$TYPE),
list(value = mergetable3$TYPE),
length)
#plot it
row.names(list_of_elements) <- list_of_elements[[1]]
list_of_elements <- list_of_elements[-1]
list_of_elementsOrd<- list_of_elements[order(list_of_elements$count, decreasing = T),]
library(ggplot2)
ggplot(as.data.frame(list_of_elementsOrd),
aes(x=reorder(value, -count), y=count, fill=value)) +
geom_bar(stat = "identity") +
coord_flip() +
geom_text(aes(label=count), vjust=-1, color="black", size=3.5)+
theme(axis.text.x = element_text(angle = 90), legend.position = "none")
WHAT I WANT I would like for each s# to draw a stacked box plot with the unique Type by occurrence (0s should not be counted).
Many thanks
EDIT: I manage to create a list of all 'aggregate' elements with the following loop:
i=1
list_of_elementsOrd <- c()
mergedElements <- list_of_elements[1:2]
for (i in 1:length(mergetable2[-1])) {
function(row) all(row !=0 )), ]
mergetable2[mergetable2 == 0] <- NA
list_of_elements <- aggregate(data.frame(count = mergetable2$TYPE),
list(value = mergetable2$TYPE),
length)
list_of_elementsOrd[[i]]<- list_of_elements[order(list_of_elements$count, decreasing = T),]
}
But of course I can not get the plot done. When I transform it to dataframe I get column_names as:
value.70 count.70 value.71 count.71