2

Following on from this question.

I would like to sum all of the nodes 'upstream' each node. Unlike the answer posted to the question above which calculates from parent towards child in the shortest path, I would like to sum ALL values from all the children towards the parent. In a river context: from the downstream catchments towards catchments ALL catchments upstream.

My input data

input <- structure(list(ZHYD = c("B030000156", "B030000159", "B030000165", 
"B030000167", "B030000170", "B030000171", "B030000175", "B030000177", 
"B030000181", "B030000183", "B030000184", "B030000190", "B030000192", 
"B030000193", "B030000195", "B030000196", "B030000197", "B030000198", 
"B030000199", "B030000201", "B030000202", "B030000133", "B030000191"
), NextDown = c("B030000133", "B030000133", "B030000159", "B030000159", 
"B030000167", "B030000167", "B030000170", "B030000175", "B030000175", 
"B030000171", "B030000170", "B030000171", "B030000184", "B030000191", 
"B030000197", "B030000197", "B030000191", "B030000190", "B030000190", 
"B030000199", "B030000199", "OUTLET", "B030000184"), count = c(2, 
0, 2, 0, 0, 0, 2, 3, 0, 0, 1, 0, 1, 2, 1, 0, 7, 0, 0, 0, 0, 5, 
0), Exutoire = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L), Outlet = c("BSO0000016", 
"BSO0000016", "BSO0000016", "BSO0000016", "BSO0000016", "BSO0000016", 
"BSO0000016", "BSO0000016", "BSO0000016", "BSO0000016", "BSO0000016", 
"BSO0000016", "BSO0000016", "BSO0000016", "BSO0000016", "BSO0000016", 
"BSO0000016", "BSO0000016", "BSO0000016", "BSO0000016", "BSO0000016", 
"BSO0000016", "BSO0000016"), EcrRiv_km = c(54.91, 5.14, 37.71, 
8.28, 17.22, 5.6, 45.87, 84.1, 26.22, 43.29, 32.49, 43.85, 35.1, 
11.09, 67.88, 32.66, 102.71, 18.21, 0.81, 14.05, 16.27, 45.44, 
3.47), EcrRivCoun = c(20, 3, 18, 5, 9, 3, 29, 44, 16, 18, 18, 
19, 16, 10, 30, 19, 56, 12, 3, 11, 10, 13, 5), DFLS = c(0.5, 
1, 0.5, 1, 1, 1, 0.5, 0.333333333333333, 1, 1, 1, 1, 1, 0.5, 
1, 1, 0.142857142857143, 1, 1, 1, 1, 0.2, 1), density = c(27.455, 
0, 18.855, 0, 0, 0, 22.935, 28.0333333333333, 0, 0, 32.49, 0, 
35.1, 5.545, 67.88, 0, 14.6728571428571, 0, 0, 0, 0, 9.088, 0
), dendritic_r = c(2.7455, 1.71333333333333, 2.095, 1.656, 1.91333333333333, 
1.86666666666667, 1.58172413793103, 1.91136363636364, 1.63875, 
2.405, 1.805, 2.30789473684211, 2.19375, 1.109, 2.26266666666667, 
1.71894736842105, 1.83410714285714, 1.5175, 0.27, 1.27727272727273, 
1.627, 3.49538461538462, 0.694)), class = c("tbl_df", "tbl", 
"data.frame"), row.names = c(NA, -23L), .Names = c("ZHYD", "NextDown", 
"count", "Exutoire", "Outlet", "EcrRiv_km", "EcrRivCoun", "DFLS", 
"density", "dendritic_r"))

This code accumulates the count on the shortest 'downstream' path

df <- data.frame(input$ZHYD, input$NextDown, input$count)
colnames(df) <- c('parent_id', 'id', 'count')
g <- graph_from_data_frame(df)
plot(g)

df <- get.data.frame(g, what = "edges")
dtr <- FromDataFrameNetwork(df)
dtr$countcum <- 0
dtr$Do(function(node) node$countcum <- node$parent$countcum + node$count, filterFun = isNotRoot)
print(dtr, "count", "countcum")

Answer

This answer does the job perfectly

myApply <- function(node) {
  node$uscum<- 
    sum(c(node$count, purrr::map_dbl(node$children, myApply)), na.rm = TRUE)
}
myApply(tree)
print(dtr, "count", "uscum")
Josh J
  • 395
  • 1
  • 3
  • 13

1 Answers1

2

I think you may be looking for subcomponent:

> subcomponent(g,"B030000156","out")
+ 3/24 vertices, named, from 8540f89:
[1] B030000156 B030000133 OUTLET    

> subcomponent(g,"B030000196","out")
+ 9/24 vertices, named, from 8540f89:
[1] B030000196 B030000197 B030000191 B030000184 B030000170 B030000167 B030000159 B030000133 OUTLET   

You can also use in or all as modifiers if you want to go in the other (or both) directions. If you use sapply you can iterate over all nodes:

> sapply(V(g),subcomponent,graph=g,mode="out")
$B030000156
+ 3/24 vertices, named, from 8540f89:
[1] B030000156 B030000133 OUTLET    

$B030000159
+ 3/24 vertices, named, from 8540f89:
[1] B030000159 B030000133 OUTLET    

$B030000165
+ 4/24 vertices, named, from 8540f89:
[1] B030000165 B030000159 B030000133 OUTLET
... the rest are truncated

You can sum up all the weights along a path like this:

> E(g)$weight=as.numeric(df[,3])
> sum(E(g,path=c("B030000159","B030000133","OUTLET"))$weight)
[1] 5

Here's a circuitous way to get sums of weights along paths after extracting the node names from the igraph object:

library(stringr)
paths <- sapply(V(g),subcomponent,graph=g,mode="out")
z <- capture.output(paths)  # forcefully yank output from igraph object
pathlist <- z[which(str_detect(z,"[1] "))]

For your first and last vertices in pathlist, the total lengths are:

> sum(E(g,path=unlist(strsplit(pathlist[1],"\\s+"))[2:length(unlist(strsplit(pathlist[1],"\\s+")))])$weight)
[1] 5     

> sum(E(g,path=unlist(strsplit(pathlist[13],"\\s+"))[2:length(unlist(strsplit(pathlist[13],"\\s+")))])$weight)
[1] 6

You can also extract all the downstream paths into a data frame:

> library(stringi)
> paths.df <- as.data.frame(stri_extract_all_words(pathlist, simplify = TRUE))
> head(paths.df)
  V1         V2         V3         V4         V5         V6         V7         V8         V9    V10
1  1 B030000171 B030000167 B030000159 B030000133     OUTLET                                        
2  1 B030000181 B030000175 B030000170 B030000167 B030000159 B030000133 OUTLET                  
3  1 B030000183 B030000171 B030000167 B030000159 B030000133     OUTLET                             
4  1 B030000190 B030000171 B030000167 B030000159 B030000133     OUTLET                             
5  1 B030000193 B030000191 B030000184 B030000170 B030000167 B030000159 B030000133     OUTLET       
6  1 B030000195 B030000197 B030000191 B030000184 B030000170 B030000167 B030000159 B030000133 OUTLET
mysteRious
  • 4,102
  • 2
  • 16
  • 36
  • A good first start but it doesn't work in instances where pathlist spans more than 2 lines. e.g. `paths <- sapply(V(g),subcomponent,graph=g,mode="in")` this problem will occur when mode is in or out with the full dataset – Josh J Mar 26 '18 at 07:16