1

I have a sankey graph that I created in ggplot, but I want the nodes on the right to relate to the left node... so if there are 4 colours from the left node entering the right node, the right node should have all 4 colours.

Here is my dataset

df2 = structure(list(x = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), levels = c("Method_Group", 
"Topic"), class = "factor"), node = c("BRUV + Both", "Behavioural Ecology", 
"BRUV + Both", "Conservation Methods", "BRUV + Both", "Other Drivers", 
"Animal Borne + No Receiver", "Behavioural Ecology", "Controlled + Receiver", 
"Behavioural Ecology", "Controlled + Receiver", "Reproductive Ecology", 
"Controlled + Receiver", "Other Drivers", "Controlled + Receiver", 
"Behavioural Ecology", "Controlled + Receiver", "Methodological", 
"Animal Borne + No Receiver", "Behavioural Ecology", "Animal Borne + No Receiver", 
"Methodological", "Stationary + No Receiver", "Reproductive Ecology", 
"Stationary + No Receiver", "Landuse Management", "Stationary + No Receiver", 
"Other Drivers", "Animal Borne + No Receiver", "Behavioural Ecology", 
"Animal Borne + No Receiver", "Methodological", "Animal Borne + No Receiver", 
"Reproductive Ecology", "Stationary + Receiver", "Behavioural Ecology", 
"Stationary + Receiver", "Fisheries Managemenet", "Stationary + Receiver", 
"Behavioural Ecology", "Stationary + Receiver", "Methodological", 
"Stationary + Receiver", "Fisheries Managemenet", "BRUV + Both", 
"Behavioural Ecology", "BRUV + Both", "Methodological", "BRUV + Both", 
"Conservation Methods"), next_x = structure(c(2L, NA, 2L, NA, 
2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 
2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 
2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA), levels = c("Method_Group", 
"Topic"), class = "factor"), next_node = c("Behavioural Ecology", 
NA, "Conservation Methods", NA, "Other Drivers", NA, "Behavioural Ecology", 
NA, "Behavioural Ecology", NA, "Reproductive Ecology", NA, "Other Drivers", 
NA, "Behavioural Ecology", NA, "Methodological", NA, "Behavioural Ecology", 
NA, "Methodological", NA, "Reproductive Ecology", NA, "Landuse Management", 
NA, "Other Drivers", NA, "Behavioural Ecology", NA, "Methodological", 
NA, "Reproductive Ecology", NA, "Behavioural Ecology", NA, "Fisheries Managemenet", 
NA, "Behavioural Ecology", NA, "Methodological", NA, "Fisheries Managemenet", 
NA, "Behavioural Ecology", NA, "Methodological", NA, "Conservation Methods", 
NA)), row.names = c(NA, -50L), class = c("tbl_df", "tbl", "data.frame"
))

Here is the code that I'm using

devtools::install_github("davidsjoberg/ggsankey")
library(ggsankey)
library(dplyr)
library(ggplot2)

ggplot(df2, aes(x = x, next_x = next_x, node = node, next_node = next_node, fill = factor(node), label = node)) +
  geom_sankey(flow.alpha = 1, node.color = "black",show.legend = FALSE, width=0.40) +
  geom_sankey_label(size = 5, color = "black", fill= "white") + 
  theme_bw() +
  theme(axis.title = element_blank()
        , axis.text.y = element_blank()
        , axis.text.x = element_blank()
        , axis.ticks = element_blank()  
        , panel.grid = element_blank())+
  #scale_fill_viridis_d(option = "inferno")
  scale_fill_viridis(discrete=TRUE) 

I want the right node of my graph to look like the right node in this image, enter image description here

where all the colours from the left node show up in the right node if they are related. Is this possible with sankey plots? and if so how do I do that?

Kristen Cyr
  • 629
  • 5
  • 16

1 Answers1

2

A bit hacky but one option would be to overplot the boxed on the right using the computed x and y coordinates used to draw the flows via geom_polygon.

To this end I first get the data using layer_data, filter it for the endpoints via filter(x == 2 - width / 2) and keep only the data for one of the endpoints per polygon or flow using distinct(fill, flow_end_ymax, .keep_all = TRUE).

This dataset could then be used in a geom_rect to extend the flows and overplot the rectangles drawn by geom_sankey:

library(ggsankey)
library(dplyr)
library(ggplot2)

width <- .4
p <- ggplot(df2, aes(x = x, next_x = next_x, node = node, next_node = next_node, fill = factor(node), label = node)) +
  geom_sankey(flow.alpha = 1, node.color = "black", show.legend = FALSE, width = width, linewidth = 2) +
  theme_void() +
  theme(
    plot.margin = unit(rep(5.5, 4), "pt")
  ) +
  scale_fill_viridis_d()

# Get the data from the flows layer
dat <- layer_data(last_plot(), 1) |>
  filter(x ==  2 - width / 2) |>
  distinct(fill, flow_end_ymax, .keep_all = TRUE)

# Get the data from the boxes layer
dat1 <- layer_data(last_plot(), 2) |>
  filter(xmin == 2 - width / 2)
p +
  geom_rect(data = dat, aes(
    xmin = x, xmax = x + width,
    ymin = flow_end_ymin, ymax = flow_end_ymax,
    fill = label
  ), inherit.aes = FALSE) +
  # Draw the outline
  geom_rect(data = dat1, aes(
    xmin = xmin, xmax = xmax,
    ymin = ymin, ymax = ymax
  ), inherit.aes = FALSE, fill = NA, color = "black", linewidth = 2) +
  geom_sankey_label(size = 5, color = "black", fill = "white") +
  guides(fill = "none")

enter image description here

stefan
  • 90,330
  • 6
  • 25
  • 51
  • Wow, this is great. Where in your code can I increase the width of the black outline around the boxes? – Kristen Cyr Mar 21 '23 at 19:24
  • 1
    Hi Kristen. I guess you mean the outline of the outer box? In that case IMHO the cleanest approach would be to draw another rect on top. :D Basically the same hacky approach. See my edit. – stefan Mar 21 '23 at 20:17
  • 1
    It may be hacky, but this is exactly what I'm looking for! – Kristen Cyr Mar 21 '23 at 21:11