3

I have clustered xy data which I'd like to plot using R's plotly where the points are colored by their cluster membership and there's text annotating the cluster number and the fraction of the points out of the total. I'm also trying to get this text annotation to be in the same colors as the points, i.e., colored by cluster.

Here's my code:

library(dplyr)
library(plotly)
set.seed(1)
df <- do.call(rbind,lapply(seq(1,20,4),function(i) data.frame(x=rnorm(50,mean=i,sd=1),y=rnorm(50,mean=i,sd=1),cluster=i)))
df$cluster <- factor(df$cluster)
cluster.colors <- c("#FF6C91","#EF7F47","#D39300","#AAA300","#6BB100")
cluster.centers.df <- dplyr::group_by(df,cluster) %>%
  dplyr::mutate(x=mean(x),y=mean(y),n=n()) %>% unique()
cluster.centers.df$percentage <- cluster.centers.df$n/sum(cluster.centers.df$n)
cluster.centers.df$cluster <- as.character(cluster.centers.df$cluster)


plot_ly(marker=list(size=12),type='scatter',mode="markers",x=df$x,y=df$y,color=df$cluster,colors=cluster.colors,showlegend=F) %>%
  add_annotations(text=sprintf("<b>%s</b>",paste0(cluster.centers.df$cluster,"\n",gsub("\\.00","",as.character(cluster.centers.df$percentage)),"%")),showarrow=T,arrowhead=1,arrowsize=1,x=cluster.centers.df$x,y=cluster.centers.df$y,ax=20,ay=-30,font=list(size=14,color=cluster.colors)) %>%
  layout(xaxis=list(title="X",zeroline=F,showticklabels=F),yaxis=list(title="Y",zeroline=F,showticklabels=F))

This, however, does not color the text according to cluster.colors.

enter image description here

Any idea?

dan
  • 6,048
  • 10
  • 57
  • 125

1 Answers1

2

I was not able to figure out why add_annotations() method didn't work, so I created the annotations manually and added the colors on each for loop iteration. Please refer the below code and let me know if this fixes your issue.

library(dplyr)
library(plotly)
set.seed(1)
df <- do.call(rbind,lapply(seq(1,20,4),function(i) data.frame(x=rnorm(50,mean=i,sd=1),y=rnorm(50,mean=i,sd=1),cluster=i)))
df$cluster <- factor(df$cluster)
cluster.colors <- c("#FF6C91","#EF7F47","#D39300","#AAA300","#6BB100")
cluster.centers.df <- dplyr::group_by(df,cluster) %>%
  dplyr::mutate(x=mean(x),y=mean(y),n=n()) %>% unique()
cluster.centers.df$percentage <- cluster.centers.df$n/sum(cluster.centers.df$n)
cluster.centers.df$cluster <- as.character(cluster.centers.df$cluster)

a <- c()
for (i in 1:length(cluster.centers.df$cluster)){
  a[[i]] <- list(text=sprintf("<b>%s</b>",paste0(cluster.centers.df$cluster[i],"\n",gsub("\\.00","",as.character(cluster.centers.df$percentage[i])),"%")),showarrow=T,arrowhead=1,arrowsize=1,x=cluster.centers.df$x[i],y=cluster.centers.df$y[i],ax=20,ay=-30,font=list(size=14,color=cluster.colors[i]))
}
plot_ly(marker=list(size=12),type='scatter',mode="markers",x=df$x,y=df$y,color=df$cluster,colors=cluster.colors,showlegend=F) %>%
layout(annotations = a, xaxis=list(title="X",zeroline=F,showticklabels=F),yaxis=list(title="Y",zeroline=F,showticklabels=F))
Naren Murali
  • 19,250
  • 3
  • 27
  • 54