Given the iris
data set as an example, I do Pearson clusters using dendextend
in the following way:
library(RColorBrewer)
library(dendextend)
data(iris)
newmat <- iris[,1:4]
rownames(newmat) <- paste(iris$Species, rownames(iris))
dmat <- 1 - cor(t(newmat), method="pearson")
dmat <- as.dist(dmat)
clust.obj <- hclust(dmat, method="complete")
dend.obj <- as.dendrogram(clust.obj)
numsamples <- length(rownames(newmat))
maxdist <- max(get_nodes_attr(dend.obj, "height"))
groups <- levels(iris$Species)
cols <- colorRampPalette(brewer.pal(length(groups), "Set1"))
myPal <- cols(length(groups))
vals1 <- grep(groups[1], labels(dend.obj), value=TRUE)
vals2 <- grep(groups[2], labels(dend.obj), value=TRUE)
vals3 <- grep(groups[3], labels(dend.obj), value=TRUE)
vals1B <- grepl(groups[1], labels(dend.obj))
vals2B <- grepl(groups[2], labels(dend.obj))
vals3B <- grepl(groups[3], labels(dend.obj))
dend.obj <- dend.obj %>%
set("leaves_pch", 19) %>%
set("leaves_cex", 1) %>%
set("branches_lty", 2) %>%
set("by_labels_branches_col", value = vals1, TF_values = c(myPal[1],Inf)) %>%
set("by_labels_branches_lwd", value = vals1, TF_values = c(2,Inf)) %>%
set("by_labels_branches_lty", value = vals1, TF_values = c(1,Inf)) %>%
set("by_labels_branches_col", value = vals2, TF_values = c(myPal[2],Inf)) %>%
set("by_labels_branches_lwd", value = vals2, TF_values = c(2,Inf)) %>%
set("by_labels_branches_lty", value = vals2, TF_values = c(1,Inf)) %>%
set("by_labels_branches_col", value = vals3, TF_values = c(myPal[3],Inf)) %>%
set("by_labels_branches_lwd", value = vals3, TF_values = c(2,Inf)) %>%
set("by_labels_branches_lty", value = vals3, TF_values = c(1,Inf)) %>%
set("labels_colors", ifelse(vals1B, myPal[1], ifelse(vals2B, myPal[2], myPal[3]))) %>%
set("leaves_col", ifelse(vals1B, myPal[1], ifelse(vals2B, myPal[2], myPal[3])))
png(filename="test.png", height=1200, width=400)
mar.default <- c(5,4,4,2) + 0.1
par(mar = mar.default + c(0, 0, 0, 4))
plot(dend.obj, main="test cluster", xlab="Distance", horiz=TRUE, cex.main=1, cex.axis=1, cex.lab=1)
legend(maxdist, numsamples, groups, cex=1, pch=19, col=myPal)
dev.off()
Which produces this cluster, which I find really useful with the colors and all:
The problem is that now I want to encapsulate this into a function. And the length of groups
can vary. So I need to do the set
part of dend.obj
inside a for
loop or something.
Something like:
for (i in 1:length(groups)){
set("by_labels_branches_col", value=grep(groups[i],labels(dend.obj),value=TRUE), TF_values=c(myPal[i],Inf)) %>%
set("by_labels_branches_lwd", value=grep(groups[i],labels(dend.obj),value=TRUE), TF_values=c(2,Inf)) %>%
set("by_labels_branches_lty", value=grep(groups[i],labels(dend.obj),value=TRUE), TF_values=c(1,Inf))
}
Which obviously does not work... And the same goes for the ifelse
in there, which would be really tricky.
Any help would be appreciated! I have no idea how to tackle this. Thanks!