1

I want to add column names as labels to my Sankey plot in R (at the bottom of each column in the Sankey diagram). How can I do that? My code and data are below. Appreciate!

    library(tidyr)
    library(dplyr)
    library(networkD3)
    links <-
      df %>% 
      mutate(row = row_number()) %>%  # add a row id
      gather('col', 'source', -row) %>%  # gather all columns
      mutate(col = match(col, names(df))) %>%  # convert col names to col nums
      mutate(source = paste0(source, '_', col)) %>%  # add col num to node names
      group_by(row) %>%
      arrange(col) %>%
      mutate(target = lead(source)) %>%  # get target from following node in row
      ungroup() %>% 
      filter(!is.na(target)) %>%  # remove links from last column in original data
      select(source, target) %>% 
      group_by(source, target) %>% 
      summarise(value = n())  # aggregate and count similar links
    nodes <- data.frame(id = unique(c(links$source, links$target)),
                        stringsAsFactors = FALSE)
    nodes$name <- sub('_[0-9]*$', '', nodes$id)
    nodes <- data.frame(rbind(nodes[which(nodes$name=="Achiever"),],nodes[which(nodes$name=="Regular"),],nodes[which(nodes$name=="Disheartened"),],nodes[which(nodes$name=="Underachiever"),]))
    links$source <- match(links$source, nodes$id) - 1
    links$target <- match(links$target, nodes$id) - 1
    sankeyNetwork(Links = links, Nodes = nodes, Source = 'source',
                  Target = 'target', Value = 'value', NodeID = 'name',iterations=0)

Data:

df <- structure(list(March = c("Disheartened", "Regular", "Regular", 
"Achiever", "Achiever", "Achiever", "Achiever", "Achiever", "Regular", 
"Achiever", "Regular", "Achiever", "Disheartened", "Achiever", 
"Regular", "Regular", "Regular", "Regular", "Achiever", "Achiever", 
"Achiever", "Regular", "Achiever", "Disheartened", "Achiever", 
"Achiever", "Regular", "Achiever", "Achiever", "Regular", "Achiever", 
"Achiever", "Regular", "Achiever", "Regular"), April = c("Disheartened", 
"Regular", "Regular", "Regular", "Achiever", "Achiever", "Achiever", 
"Achiever", "Regular", "Achiever", "Regular", "Achiever", "Disheartened", 
"Achiever", "Regular", "Disheartened", "Regular", "Regular", 
"Achiever", "Achiever", "Achiever", "Regular", "Achiever", "Disheartened", 
"Achiever", "Achiever", "Regular", "Achiever", "Achiever", "Regular", 
"Regular", "Achiever", "Achiever", "Achiever", "Regular"), May = c("Disheartened", 
"Regular", "Regular", "Regular", "Achiever", "Achiever", "Achiever", 
"Achiever", "Regular", "Achiever", "Regular", "Achiever", "Disheartened", 
"Achiever", "Regular", "Disheartened", "Regular", "Regular", 
"Achiever", "Achiever", "Achiever", "Achiever", "Achiever", "Disheartened", 
"Achiever", "Achiever", "Regular", "Achiever", "Achiever", "Regular", 
"Regular", "Achiever", "Regular", "Achiever", "Regular"), June = c("Disheartened", 
"Regular", "Regular", "Regular", "Achiever", "Achiever", "Achiever", 
"Achiever", "Regular", "Achiever", "Regular", "Achiever", "Disheartened", 
"Achiever", "Regular", "Disheartened", "Regular", "Regular", 
"Achiever", "Achiever", "Achiever", "Achiever", "Achiever", "Disheartened", 
"Achiever", "Achiever", "Regular", "Achiever", "Achiever", "Regular", 
"Regular", "Achiever", "Regular", "Achiever", "Regular")), class = "data.frame", row.names = c(NA, 
-35L))
Ester Silva
  • 670
  • 6
  • 24

1 Answers1

0

If possible, you can try (and I recommend) the very nice ggsankey package.

library(ggsankey)
library(tidyverse)
df %>% 
  make_long(March, April, May, June) %>% 
  ggplot(aes(x = x, 
                 next_x = next_x, 
                 node = node, 
                 next_node = next_node,
                 fill = factor(node),
                 label = node)) +
  geom_sankey(flow.alpha = .3,
              node.color = "gray30",
              show.legend = F) +
  geom_sankey_label(size = 3, color = "white", fill = "gray40") +
  theme_sankey(base_size = 18) +
  xlab("")

enter image description here

Roman
  • 17,008
  • 3
  • 36
  • 49