1

I am investigating Sankey Diagrams via R's Riverplot package. However my output is not legible at all, and am sure am doing something wrong but am still quite new to R. Essentially, the data (https://raw.githubusercontent.com/kilimba/data/master/infection_flows.csv) represents the flow of infections from particular age-groups to particular age-groups (by sex). My R program looks like so (data acquired via curl):

library(riverplot)
library(curl)

edges <- read.csv(curl("https://raw.githubusercontent.com/kilimba/data/master/infection_flows.csv"))

names(edges) <- c("N1","N2","Value")
edges$N1 <- paste(edges$N1,"-source")
edges$N2 <- paste(edges$N2,"-target")

nodes = data.frame(ID = unique(c(edges$N1, edges$N2)), stringsAsFactors = FALSE)

nodes$x <- ifelse(grepl("Male -target",nodes$ID),3,
                  ifelse(grepl("Male -source",nodes$ID),1,
                         ifelse(grepl("Female",nodes$ID),2,"")
                  )
)
nodes$x <- as.numeric(nodes$x)

#males who are infection sources
msource <- subset(nodes,nodes$x =='1')
msource$y = 0

for(i in 1:nrow(msource)){
    msource["y"][i,] <- i 
}

#females (both sources and targets). Wish is that this will form the central tier of the chart
# as both a target of infection and a source
female <- subset(nodes,nodes$x == '2')
female$y = 0

for(i in 1:nrow(female)){
  female["y"][i,] <- i
}

# males who are infection targets
mtarget <- subset(nodes,nodes$x == '3')
mtarget$y = 0

for(i in 1:nrow(mtarget)){
  mtarget["y"][i,] <- i
}

nodes <- rbind(mtarget,msource,female)

rownames(nodes) = nodes$ID

palette = paste0(brewer.pal(4, "Set1"), "60")

styles = lapply(nodes$y, function(n) {
    list(col = palette[n+1], lty = 0, textcol = "black")
  })

names(styles) = nodes$ID

rp <- list(nodes = nodes, edges = edges, styles = styles)
class(rp) <- c(class(rp), "riverplot")
plot(rp, plot_area = 0.95, yscale=0.06)


msource <- subset(nodes,nodes$x =='1')

for(i in 1:nrow(msource)){
  msource[i,]$y <- i 
}

female <- subset(nodes,nodes$x == '2')

for(i in 1:nrow(female)){
  female[i,]$y <- i 
}

mtarget <- subset(nodes,nodes$x == '3')
for(i in 1:nrow(mtarget)){
  mtarget[i,]$y <- i 
}

nodes <- rbind(mtarget,msource,female)

rp <- list(nodes = nodes, edges = edges)
class(rp) <- c(class(rp), "riverplot")
plot(rp, plot_area = 0.95, yscale=0.06)

As you see when you run it, the output is unintelligible, whereas I was looking for something along the lines of:

||              ||                                ||
==============>>  ==============================>>||
||              ||==============================>>||
15-19 Male   15-19 Female                     15-19 Male

dput of the nodes dataframe is:

structure(list(ID = c("15-24 Male -target", "20-24 Male -target", 
"25-29 Male -target", "30-34 Male -target", "35-39 Male -target", 
"40-44 Male -target", "45-49 Male -target", "50-54 Male -target", 
"55-59 Male -target", "60+ Male -target", "15-19 Male -source", 
"20-24 Male -source", "25-29 Male -source", "30-34 Male -source", 
"35-39 Male -source", "40-44 Male -source", "45-49 Male -source", 
"50-54 Male -source", "55-59 Male -source", "60+ Male -source", 
"15-19 Female -source", "20-24 Female -source", "25-29 Female -source", 
"30-34 Female -source", "35-39 Female -source", "40-44 Female -source", 
"45-49 Female -source", "50-54 Female -source", "55-59 Female -source", 
"15-19 Female -target", "20-24 Female -target", "25-29 Female -target", 
"30-34 Female -target", "35-39 Female -target", "40-44 Female -target", 
"45-49 Female -target", "50-54 Female -target", "55-59 Female -target", 
"60+ Female -target"), x = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2), y = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 
2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 
12, 13, 14, 15, 16, 17, 18, 19)), .Names = c("ID", "x", "y"), row.names = c("15-24 Male -target", 
"20-24 Male -target", "25-29 Male -target", "30-34 Male -target", 
"35-39 Male -target", "40-44 Male -target", "45-49 Male -target", 
"50-54 Male -target", "55-59 Male -target", "60+ Male -target", 
"15-19 Male -source", "20-24 Male -source", "25-29 Male -source", 
"30-34 Male -source", "35-39 Male -source", "40-44 Male -source", 
"45-49 Male -source", "50-54 Male -source", "55-59 Male -source", 
"60+ Male -source", "15-19 Female -source", "20-24 Female -source", 
"25-29 Female -source", "30-34 Female -source", "35-39 Female -source", 
"40-44 Female -source", "45-49 Female -source", "50-54 Female -source", 
"55-59 Female -source", "15-19 Female -target", "20-24 Female -target", 
"25-29 Female -target", "30-34 Female -target", "35-39 Female -target", 
"40-44 Female -target", "45-49 Female -target", "50-54 Female -target", 
"55-59 Female -target", "60+ Female -target"), class = "data.frame")

dput() of the edges dataframe is:

structure(list(N1 = c("15-19 Male -source", "15-19 Male -source", 
"15-19 Male -source", "20-24 Male -source", "20-24 Male -source", 
"20-24 Male -source", "20-24 Male -source", "20-24 Male -source", 
"25-29 Male -source", "25-29 Male -source", "25-29 Male -source", 
"25-29 Male -source", "25-29 Male -source", "25-29 Male -source", 
"25-29 Male -source", "30-34 Male -source", "30-34 Male -source", 
"30-34 Male -source", "30-34 Male -source", "30-34 Male -source", 
"30-34 Male -source", "30-34 Male -source", "35-39 Male -source", 
"35-39 Male -source", "35-39 Male -source", "35-39 Male -source", 
"35-39 Male -source", "35-39 Male -source", "35-39 Male -source", 
"35-39 Male -source", "35-39 Male -source", "40-44 Male -source", 
"40-44 Male -source", "40-44 Male -source", "40-44 Male -source", 
"40-44 Male -source", "40-44 Male -source", "40-44 Male -source", 
"40-44 Male -source", "40-44 Male -source", "45-49 Male -source", 
"45-49 Male -source", "45-49 Male -source", "45-49 Male -source", 
"45-49 Male -source", "45-49 Male -source", "45-49 Male -source", 
"45-49 Male -source", "45-49 Male -source", "45-49 Male -source", 
"50-54 Male -source", "50-54 Male -source", "50-54 Male -source", 
"50-54 Male -source", "50-54 Male -source", "50-54 Male -source", 
"50-54 Male -source", "50-54 Male -source", "50-54 Male -source", 
"50-54 Male -source", "55-59 Male -source", "55-59 Male -source", 
"55-59 Male -source", "55-59 Male -source", "55-59 Male -source", 
"55-59 Male -source", "55-59 Male -source", "55-59 Male -source", 
"55-59 Male -source", "60+ Male -source", "60+ Male -source", 
"60+ Male -source", "60+ Male -source", "60+ Male -source", "60+ Male -source", 
"60+ Male -source", "60+ Male -source", "60+ Male -source", "60+ Male -source", 
"15-19 Female -source", "15-19 Female -source", "15-19 Female -source", 
"15-19 Female -source", "15-19 Female -source", "15-19 Female -source", 
"20-24 Female -source", "20-24 Female -source", "20-24 Female -source", 
"20-24 Female -source", "20-24 Female -source", "20-24 Female -source", 
"20-24 Female -source", "20-24 Female -source", "20-24 Female -source", 
"20-24 Female -source", "25-29 Female -source", "25-29 Female -source", 
"25-29 Female -source", "25-29 Female -source", "25-29 Female -source", 
"25-29 Female -source", "25-29 Female -source", "25-29 Female -source", 
"25-29 Female -source", "25-29 Female -source", "30-34 Female -source", 
"30-34 Female -source", "30-34 Female -source", "30-34 Female -source", 
"30-34 Female -source", "30-34 Female -source", "30-34 Female -source", 
"30-34 Female -source", "30-34 Female -source", "35-39 Female -source", 
"35-39 Female -source", "35-39 Female -source", "35-39 Female -source", 
"35-39 Female -source", "35-39 Female -source", "35-39 Female -source", 
"35-39 Female -source", "35-39 Female -source", "40-44 Female -source", 
"40-44 Female -source", "40-44 Female -source", "40-44 Female -source", 
"40-44 Female -source", "40-44 Female -source", "40-44 Female -source", 
"40-44 Female -source", "45-49 Female -source", "45-49 Female -source", 
"45-49 Female -source", "45-49 Female -source", "45-49 Female -source", 
"45-49 Female -source", "45-49 Female -source", "50-54 Female -source", 
"50-54 Female -source", "50-54 Female -source", "50-54 Female -source", 
"50-54 Female -source", "50-54 Female -source", "55-59 Female -source", 
"55-59 Female -source"), N2 = c("15-19 Female -target", "20-24 Female -target", 
"25-29 Female -target", "15-19 Female -target", "20-24 Female -target", 
"25-29 Female -target", "30-34 Female -target", "35-39 Female -target", 
"15-19 Female -target", "20-24 Female -target", "25-29 Female -target", 
"30-34 Female -target", "35-39 Female -target", "40-44 Female -target", 
"45-49 Female -target", "15-19 Female -target", "20-24 Female -target", 
"25-29 Female -target", "30-34 Female -target", "35-39 Female -target", 
"40-44 Female -target", "45-49 Female -target", "15-19 Female -target", 
"20-24 Female -target", "25-29 Female -target", "30-34 Female -target", 
"35-39 Female -target", "40-44 Female -target", "45-49 Female -target", 
"50-54 Female -target", "55-59 Female -target", "15-19 Female -target", 
"20-24 Female -target", "25-29 Female -target", "30-34 Female -target", 
"35-39 Female -target", "40-44 Female -target", "45-49 Female -target", 
"50-54 Female -target", "55-59 Female -target", "15-19 Female -target", 
"20-24 Female -target", "25-29 Female -target", "30-34 Female -target", 
"35-39 Female -target", "40-44 Female -target", "45-49 Female -target", 
"50-54 Female -target", "55-59 Female -target", "60+ Female -target", 
"15-19 Female -target", "20-24 Female -target", "25-29 Female -target", 
"30-34 Female -target", "35-39 Female -target", "40-44 Female -target", 
"45-49 Female -target", "50-54 Female -target", "55-59 Female -target", 
"60+ Female -target", "20-24 Female -target", "25-29 Female -target", 
"30-34 Female -target", "35-39 Female -target", "40-44 Female -target", 
"45-49 Female -target", "50-54 Female -target", "55-59 Female -target", 
"60+ Female -target", "15-19 Female -target", "20-24 Female -target", 
"25-29 Female -target", "30-34 Female -target", "35-39 Female -target", 
"40-44 Female -target", "45-49 Female -target", "50-54 Female -target", 
"55-59 Female -target", "60+ Female -target", "15-24 Male -target", 
"20-24 Male -target", "25-29 Male -target", "30-34 Male -target", 
"35-39 Male -target", "40-44 Male -target", "15-24 Male -target", 
"20-24 Male -target", "25-29 Male -target", "30-34 Male -target", 
"35-39 Male -target", "40-44 Male -target", "45-49 Male -target", 
"50-54 Male -target", "55-59 Male -target", "60+ Male -target", 
"15-24 Male -target", "20-24 Male -target", "25-29 Male -target", 
"30-34 Male -target", "35-39 Male -target", "40-44 Male -target", 
"45-49 Male -target", "50-54 Male -target", "55-59 Male -target", 
"60+ Male -target", "20-24 Male -target", "25-29 Male -target", 
"30-34 Male -target", "35-39 Male -target", "40-44 Male -target", 
"45-49 Male -target", "50-54 Male -target", "55-59 Male -target", 
"60+ Male -target", "20-24 Male -target", "25-29 Male -target", 
"30-34 Male -target", "35-39 Male -target", "40-44 Male -target", 
"45-49 Male -target", "50-54 Male -target", "55-59 Male -target", 
"60+ Male -target", "25-29 Male -target", "30-34 Male -target", 
"35-39 Male -target", "40-44 Male -target", "45-49 Male -target", 
"50-54 Male -target", "55-59 Male -target", "60+ Male -target", 
"30-34 Male -target", "35-39 Male -target", "40-44 Male -target", 
"45-49 Male -target", "50-54 Male -target", "55-59 Male -target", 
"60+ Male -target", "35-39 Male -target", "40-44 Male -target", 
"45-49 Male -target", "50-54 Male -target", "55-59 Male -target", 
"60+ Male -target", "45-49 Male -target", "60+ Male -target"), 
    Value = c(0.653, 0.05, 0.005, 4.752, 4.29, 0.171, 0.065, 
    0.076, 2.772, 12.659, 3.636, 0.242, 0.019, 0.019, 0.001, 
    0.523, 3.943, 6.486, 1.655, 0.192, 0.074, 0.059, 0.286, 1.424, 
    3.276, 3.686, 1.349, 0.397, 0.065, 0.455, 0.008, 0.208, 0.763, 
    0.938, 1.567, 2.14, 1.054, 0.102, 0.083, 0.01, 0.023, 0.066, 
    0.1, 0.207, 0.535, 0.338, 0.262, 0.033, 0.292, 0.076, 0.003, 
    0.012, 0.017, 0.116, 0.082, 0.241, 0.358, 0.093, 0.017, 0.05, 
    0.015, 0.078, 0.039, 0.176, 0.119, 0.181, 0.098, 0.029, 0.3, 
    0.002, 0.009, 0.005, 0.008, 0.035, 0.034, 0.03, 0.024, 0.05, 
    0.526, 0.988, 1.894, 0.431, 0.061, 0.024, 0.02, 0.212, 4.804, 
    5.526, 1.29, 0.341, 0.206, 0.039, 0.009, 0.012, 0.02, 0.032, 
    0.28, 2.32, 3.103, 1.146, 0.37, 0.087, 0.018, 0.09, 0.017, 
    0.108, 0.156, 0.803, 1.307, 0.628, 0.183, 0.12, 0.045, 0.026, 
    0.12, 0.011, 0.088, 0.453, 0.812, 0.448, 0.08, 0.195, 0.105, 
    0.008, 0.023, 0.088, 0.264, 0.187, 0.157, 0.087, 0.068, 0.009, 
    0.007, 0.012, 0.07, 0.112, 0.064, 0.029, 0.037, 0.008, 0.007, 
    0.023, 0.027, 0.018, 0.019, 0.012)), .Names = c("N1", "N2", 
"Value"), row.names = c(NA, -146L), class = "data.frame")

Any help would be greatly apreciated.

Thanks, Tumaini

www
  • 38,575
  • 12
  • 48
  • 84
Tumaini Kilimba
  • 195
  • 2
  • 15
  • Can you dput() the nodes data frame and the edges data frame (or some subset of them if they are too large)? That will make it much easier for others to answer your question. – lawyeR Jun 05 '15 at 13:17
  • @lawyeR sorry if I am misunderstanding your question, but both my nodes data frame and edges data frame can be recreated through the above code, as I have linked to the data am working with (hosted on github). I have no idea what dput() does (have tried reading it but not getting it) but if its the dataframes you need I think you can access them after running the first few lines of the code. Let me know if I have completely misunderstood your question. Nodes dataframe in the code is called NODES, and edges is called EDGES – Tumaini Kilimba Jun 05 '15 at 17:06
  • Yes, your code presumably creates the data frames. But dput(NODES) and dput(EDGES) will provide us with the clean data frames as you are working with them. The easier you make it for people to answer, the more likely they will do so. I am very interested in riverplots and have created them, but it is easier to start with clean data. – lawyeR Jun 05 '15 at 17:31
  • @lawyeR ok thanks have done so.. must admit had never come across that particular method of data sharing, but learn something new everyday :) – Tumaini Kilimba Jun 05 '15 at 22:29

0 Answers0