0

How can I create a Sankey plot that indicates a change in land use class by years from a dataframe that shows proportion of area for each land use class.

lu_dt <- structure(list(LU_class = c("Cropland", "Forest", "Grassland", 
"Other Land", "Settlement", "Water", "Wetland"), lu2000p = c(27.79, 
22.92, 0.78, 0.05, 47.66, 0.34, 0.46), lu2005p = c(27.86, 22.51, 
0.78, 0.05, 48, 0.34, 0.46), lu2010p = c(23.29, 17.37, 0.69, 
0.03, 57.86, 0.34, 0.42), lu2015p = c(21.36, 16.95, 0.66, 0.03, 
60.24, 0.34, 0.42), lu2020p = c(21.07, 16.81, 0.65, 0.03, 60.68, 
0.34, 0.41)), row.names = c(NA, -7L), class = c("tbl_df", "tbl", 
"data.frame"))

Code I tried but the output is not what I am looking for

library(tidyverse)
library(networkD3)
lu_dt %>%
  pivot_longer(cols = 2:6, names_to = "yrs",values_to = "val")%>%
  dplyr::select(yrs, LU_class,val)-> lu_dt_l
links <-lu_dt_l%>%
  dplyr::select(source=yrs,target=LU_class,value=val)

nodes <- data.frame(name=c(unique(links$source),unique(links$target)))
links$IDsource <- match(links$source, nodes$name)-1 
links$IDtarget <- match(links$target, nodes$name)-1

sankeyNetwork(Links = links, Nodes = nodes,
              Source = "IDsource", Target = "IDtarget",
              Value = "value", NodeID = "name",fontSize = 20, nodeWidth = 20,LinkGroup = "source",
              NodeGroup = "name")

I want something like the image below from here. enter image description here

Lily Nature
  • 613
  • 7
  • 18

1 Answers1

0

Reference site: https://r-graph-gallery.com/321-introduction-to-interactive-sankey-diagram-2.html

lu_dt <- structure(list(LU_class = c("Cropland", "Forest", "Grassland","Other Land", "Settlement", "Water", "Wetland"), 
                        lu2000p = c(27.79, 22.92, 0.78, 0.05, 47.66, 0.34, 0.46), 
                        lu2005p = c(27.86, 22.51, 0.78, 0.05, 48, 0.34, 0.46), 
                        lu2010p = c(23.29, 17.37, 0.69, 0.03, 57.86, 0.34, 0.42), 
                        lu2015p = c(21.36, 16.95, 0.66, 0.03, 60.24, 0.34, 0.42), 
                        lu2020p = c(21.07, 16.81, 0.65, 0.03, 60.68, 0.34, 0.41)), 
                   row.names = c(NA, -7L), class = c("tbl_df", "tbl", "data.frame"))
lu_dt <- lu_dt %>% remove_rownames %>% column_to_rownames(var="LU_class")

link_lu_dt <- lu_dt %>%
  rownames_to_column %>%
  gather(key = 'key', value = 'value', -rowname) %>%
  filter(value > 0)
colnames(link_lu_dt) <- c("A", "B", "value")
link_lu_dt<-as.data.frame(link_lu_dt)

link_lu_dt$source<-paste(link_lu_dt$A, link_lu_dt$B, sep="_")
link_lu_dt<-link_lu_dt[,-c(1,2)]

final_link_lu_dt <- data.frame(matrix(ncol=3, nrow=28))
final_link_lu_dt$X1<-link_lu_dt[1:28,2]
final_link_lu_dt$X2<-link_lu_dt[8:35,2]
final_link_lu_dt$X3<-link_lu_dt[8:35,1]
colnames(final_link_lu_dt) <- c("source", "target", "value")

nodes_lu_dt <- data.frame(name=c(as.character(final_link_lu_dt$source), as.character(final_link_lu_dt$target)) %>% unique())
nodes_lu_dt$group<-as.factor(c("a","b","c","d","e","f","g","a","b","c","d","e","f","g","a","b","c","d","e","f","g","a","b","c","d","e","f","g","a","b","c","d","e","f","g"))

final_link_lu_dt$IDsource=match(final_link_lu_dt$source, nodes_lu_dt$name)-1 
final_link_lu_dt$IDtarget=match(final_link_lu_dt$target, nodes_lu_dt$name)-1

sankeyNetwork(Links = final_link_lu_dt, Nodes = nodes_lu_dt,
              Source = "IDsource", Target = "IDtarget", 
              Value = "value", NodeID = "name",
              sinksRight=FALSE, NodeGroup = "group")


Result
enter image description here

Ssong
  • 184
  • 1
  • 10
  • this is not i am looking for. i want to show the change by years so 2000,2005,2010 will be the nodes and change between them will be the links. – Lily Nature Nov 10 '22 at 16:57
  • @LilyNature Okay, I understand your question and edited the previous code. But I recommend using other charts for visualization. As you can see, the source and target bar lengths in your question (i.e., Cropland_2001, Forest_2001, ..., Bare_2018) are the same despite the different values per node. Also, it is hard to define the value between source and target in your code. To show the pattern by time, it will be better to use a connected scatter plot setting the x-axis to year and the y-axis to value. – Ssong Nov 11 '22 at 08:16