0

I am trying to make a sankey diagram including 6 levels in total in r-studio using the googlevis package. With the help of How to make a googleVis multiple Sankey from a data.frame? I was successful with three levels with the code presented there. Here it is:

'''

source <- sample(c("NorthSrc", "SouthSrc", "EastSrc", "WestSrc"), 100, replace=T)
mid <- sample(c("NorthMid", "SouthMid", "EastMid", "WestMid"), 100, replace=T)
destination <- sample(c("NorthDes", "SouthDes", "EastDes", "WestDes"), 100, replace=T) 
dummy <- rep(1,100) # For aggregation

dat <- data.frame(source,mid,destination,dummy)
aggdat <- aggregate(dummy~source+mid+destination,dat,sum)

library(dplyr)

datSM <- dat %>%
  group_by(source, mid) %>%
  summarise(toMid = sum(dummy) ) %>%
  ungroup()

datMD <- dat %>%
  group_by(mid, destination) %>%
  summarise(toDes = sum(dummy) ) %>%
  ungroup()

colnames(datSM) <- colnames(datMD) <- c("From", "To", "Dummy")

datVis <- rbind(datSM, datMD)

p <- gvisSankey(datVis, from="From", to="To", weight="dummy")
plot(p)

'''

This results in: enter image description here

My question now is, how can I insert additional levels and how do I have to adapt the code that multiple mid-levels are accepted? Here is the example dataset:

'''

source <- sample(c("NorthSrc", "SouthSrc", "EastSrc", "WestSrc"), 100, replace=T)
mid_one <- sample(c("North", "South", "East", "West"), 100, replace=T)
mid_two <-sample(c("WestNorth", "WestSouth", "SouthEast", "NorthWest"), 100, replace=T)
mid_three <- sample(c("NorthMid", "SouthMid", "EastMid", "WestMid"), 100, replace=T)
mid_four <- sample(c("West", "East", "NorthCis", "SouthCis"), 100, replace=T)
destination <- sample(c("NorthDes", "SouthDes", "EastDes", "WestDes"), 100, replace=T) 
dummy <- rep(1,100) # For aggregation

dat <- data.frame(source,mid_one, mid_two,mid_three, mid_four,destination,dummy)
aggdat <- aggregate(dummy~source+mid_one+mid_two+mid_three+mid_four+destination,dat,sum)

'''

HOX
  • 1
  • 1
  • 1

1 Answers1

0

just found the solution myself (if someone is looking for a similar problem):


    datSM1 <- dat %>%
      group_by(source, mid_one) %>%
      summarise(toMid1 = sum(dummy) ) %>%
      ungroup()

    datSM2 <- dat %>%
      group_by(mid_one, mid_two) %>%
      summarise(toMid2 = sum(dummy) ) %>%
      ungroup()

    datSM3 <- dat %>%
      group_by(mid_two, mid_three) %>%
      summarise(toMid3 = sum(dummy) ) %>%
      ungroup()

    datSM4 <- dat %>%
      group_by(mid_three, mid_four) %>%
      summarise(toMid4 = sum(dummy) ) %>%
      ungroup()

    datMD <- dat %>%
      group_by(mid_four, destination) %>%
      summarise(toDes = sum(dummy) ) %>%
      ungroup()

    colnames(datSM1)<- colnames(datSM2) <- colnames(datSM3)<- colnames(datSM4)<- 
    colnames(datMD) <- c("From", "To", "Dummy")
    datVis <- rbind(datSM1,datSM2, datSM3,datSM4,datMD)


then plot via gvisSankey

Waldi
  • 39,242
  • 6
  • 30
  • 78
HOX
  • 1
  • 1
  • 1