1

Issue:

I have a data frame called Sub_Whistle_Count (see below). I'm trying to do a hierarchical analysis using hclust(), but I can't customise the right label names Whistle_Type_Sub (28 different whistle subtypes produced by dolphins) to appear in the dendrogram, only the numerical identifier (see below).

 #Data frame structure

'data.frame':   62 obs. of  3 variables:
 $ Country         : Factor w/ 3 levels "Italy","Turkey",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ Whistle_Type_Sub: Factor w/ 28 levels "A","AA","AA1",..: 1 24 25 11 2 18 7 5 9 13 ...
 $ N      

     : int  25 64 31 5 4 5 3 10 2 1 ...

I'm trying to change the labels in the dendrogram (type fan) created from the Ape package with the rows Whistle_Type_Sub and to cluster N by Country - . In this case, I'm attempting to incorporate colour using the RColorBrewer package and the Dark2 palette for the edges (lines) of a phylogeny-type plot using the plot(as.phylo(), type = "fan") plot.

#Desired labels

mylabels<- c("A", "E", "EA", "BE", "AA","D", "B", "AD", "BC", "CA", "AA1", "DD1", "ED", "DC", "C", "AC", "ADC", "DE", 
             "EA",  "A", "E", "DE", "F", "BE", "D", "EE1", "B", "CA", "DB",  "BB", "AA1", "ED", "AD",  "DD1", "AA", 
             "A","ED", "E","DD", "DD1","CDC", "C", "AC", "D",  "F", "EE1","BCB", "DC", "ADC", "DE",  "CA", "AA",  "BE",  
             "CBC", "B", "EA",  "AA1", "AD",  "BB",  "CD", "CB",  "DB")

I'm also trying to add colour to the edges (lines), branches and leaf tips to highlight the different clusters of whistle subtypes between countries (Country). I want to colour the 'Whistle_Type_Sub` names in the dendrogram in accordance with the group they cluster with i.e Turkey, Montenegro or Italy and for the edges to match the terminal tip colours when that branch within the dendrogram is associated with a given group.

Adding colour to the tips (labels) based on a set of groups (Whistle Sub Type and Country) appears to not be a big problem using the tip.color command, although, I am not one hundred per cent sure that I've done this part correctly.

However, when I try to customise the labels Whistle_Type_Sub for the dendrogram, I cannot solve the problem as the command show.tip.label literally requires a true and false response (please see my desired output below).

Would anyone be able to lend a hand?

Any help is always appreciated

I tried:

#Attempt 1
#Create labels 
labels <- def(Sub_Whistle_Count$tip.label, Sub_Whistle_Count$Whistle_Type_Sub)
(mylabels<-brewer.pal(3, "Dark2")[labels])
character(0)

#Attempt 2
labels <- def(dend$tip.label, Sub_Whistle_Count$Whistle_Type_Sub)
Warning messages:
1: In get(results[[i]], packages[[i]]) :
  restarting interrupted promise evaluation
2: In get(results[[i]], packages[[i]]) :
  internal error -3 in R_decompress1

R-Code:

library(data.table)
library(cluster) #agnes function
library(usedist) #change label names
library(ape) #create fan data frame
library('dendextend')
library(RColorBrewer) #Customise the colour palette
library(phytools)

#DENDROGRAM 
Cluster.Country.Dist<-dist(cbind(Sub_Whistle_Count[1, 3], 2*(as.numeric(Sub_Whistle_Count$Whistle_Type_Sub)-2)))
Cluster.Country.Dist

Cluster.Country.hcl<-hclust(Cluster.Country.Dist)
Cluster.Country.hcl

dend = as.dendrogram(Cluster.Country.hcl) 
dend 
#Connecting label color with the country label
geo <- factor(c("Montenegro", "Turkey", "Italy"))
(mycol<-brewer.pal(3, "Dark2")[geo])

#Select the colour of the branch
X <- brewer.pal(3, "Dark2")

#Open a new graphics window
dev.new()

#Plot the dednrogram
plot(as.phylo(dend), type="fan", cex=0.9, label.offset = 0.8,
     edge.color = sample(X, length(Sub_Whistle_Count$Country)/2, replace = TRUE),
     edge.width = sample(2:3, length(Sub_Whistle_Count$Country)/2, replace = TRUE),
     show.tip.label = sample(2:3, length(Sub_Whistle_Count$Whistle_Type_Sub)/2, replace = TRUE), 
     show.node.label = TRUE, 
     tip.color=mycol, lwd=1)

Output from R Code

enter image description here

Desired Output

enter image description here

Data

structure(list(Country = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Italy", "Turkey", 
"Montenegro"), class = "factor"), Whistle_Type_Sub = structure(c(1L, 
24L, 25L, 11L, 2L, 18L, 7L, 5L, 9L, 13L, 3L, 22L, 26L, 20L, 12L, 
4L, 6L, 23L, 25L, 1L, 24L, 23L, 28L, 11L, 18L, 27L, 7L, 13L, 
19L, 8L, 3L, 26L, 5L, 22L, 2L, 1L, 26L, 24L, 21L, 22L, 17L, 12L, 
4L, 18L, 28L, 27L, 10L, 20L, 6L, 23L, 13L, 2L, 11L, 15L, 7L, 
25L, 3L, 5L, 8L, 16L, 14L, 19L), .Label = c("A", "AA", "AA1", 
"AC", "AD", "ADC", "B", "BB", "BC", "BCB", "BE", "C", "CA", "CB", 
"CBC", "CD", "CDC", "D", "DB", "DC", "DD", "DD1", "DE", "E", 
"EA", "ED", "EE1", "F"), class = "factor"), N = c(25L, 64L, 31L, 
5L, 4L, 5L, 3L, 10L, 2L, 1L, 2L, 3L, 2L, 1L, 1L, 3L, 2L, 4L, 
26L, 54L, 20L, 10L, 18L, 7L, 7L, 10L, 2L, 3L, 2L, 2L, 2L, 1L, 
1L, 1L, 1L, 23L, 1L, 13L, 10L, 5L, 4L, 8L, 9L, 9L, 20L, 1L, 1L, 
9L, 1L, 9L, 2L, 6L, 3L, 1L, 10L, 9L, 2L, 3L, 1L, 2L, 2L, 3L)), row.names = c(NA, 
-62L), class = "data.frame")
Alice Hobbs
  • 1,021
  • 1
  • 15
  • 31

1 Answers1

1

R Code

library(data.table)
library(cluster) #agnes function
library(usedist) #change label names
library(ape) #create fan data frame
library('dendextend')
library(RColorBrewer) #Customise the colour palette
library(phytools)

#Create a data frame object
Sub_Whistle_Count<-as.data.frame(Yeo.Whistle.Count_Reorder)
Sub_Whistle_Count

#Check the structure of Yeo.Whistle.Count
str(Sub_Whistle_Count)

#DENDROGRAM 
Cluster.Country.Dist<-dist(cbind(Sub_Whistle_Count[1:2], 2*(as.numeric(Sub_Whistle_Count$Whistle_Type_Sub)-2)))
Cluster.Country.Dist

##tip labels
mylabels<- c("A", "E", "EA", "BE", "AA","D", "B", "AD", "BC", "CA", "AA1", "DD1", "ED", "DC", "C", "AC", "ADC", "DE", 
             "EA",  "A", "E", "DE", "F", "BE", "D", "EE1", "B", "CA", "DB",  "BB", "AA1", "ED", "AD",  "DD1", "AA", 
             "A","ED", "E","DD", "DD1","CDC", "C", "AC", "D",  "F", "EE1","BCB", "DC", "ADC", "DE",  "CA", "AA",  "BE",  
             "CBC", "B", "EA",  "AA1", "AD",  "BB",  "CD", "CB",  "DB")


#Change the rows and column label names in the dist object
NewLabels<-dist_setNames(Cluster.Country.Dist, mylabels[1:62])
NewLabels

#Hierarchical Clustering algorithm on the dataset using hclust() 
Cluster.Country.hcl<-hclust(NewLabels)
Cluster.Country.hcl

#Create a dendrogram object
dend = as.dendrogram(Cluster.Country.hcl) 
dend 

#Connecting label color with the country label
geo <- factor(c("Montenegro", "Turkey", "Italy"))
(mycol<-brewer.pal(3, "Dark2")[geo])

#Select the color of the branch
X <- brewer.pal(3, "Dark2")

#Open a new graphics window
dev.new()

#Plot the dendrogram
plot(as.phylo(dend), type="fan", cex=0.9, label.offset = 0.8,
     edge.color = sample(X, length(Sub_Whistle_Count$Country)/2, replace = TRUE),
     edge.width = sample(1:2, length(Sub_Whistle_Count$Country)/2, replace = TRUE),
     show.tip.label = TRUE, 
     tip.color=mycol, lwd=1)

Diagram

enter image description here

Alice Hobbs
  • 1,021
  • 1
  • 15
  • 31