0

I am trying to convert a sociomatrix (using the sna package in r) into an edgelist, but keeping the vertex names. Unfortunately, I seem to be at a dead end.

For context, I'm using the sna package because I need to symmetrize the data, which I am doing before putting it in edgelist format. Essentially, I need to take a network matrix, symmetrize it, turn it into an edgelist, and then write it to .csv. It's the step of converting it to the edgelist is where I'm running into the problem. Here's an example of what I'm running into:

library(sna)

testmat <-rgraph("6")

colnames(testmat)  <- c("153","154", "155", "156", "157", "158")
rownames(testmat)  <- c("153","154", "155", "156", "157", "158")

maxsymmetrizedfile <-symmetrize(testmat, rule = "weak")
rownames(maxsymmetrizedfile) <- rownames(testmat)
colnames(maxsymmetrizedfile) <- colnames(testmat)
as.edgelist.sna(maxsymmetrizedfile)
maxsymm_edge<-as.edgelist.sna(maxsymmetrizedfile)

When I do this, this is what I get:

      snd rec val
 [1,]   2   1   1
 [2,]   3   1   1
 [3,]   4   1   1
 [4,]   5   1   1
 [5,]   6   1   1
 [6,]   1   2   1
 [7,]   3   2   1
 [8,]   5   2   1
 [9,]   1   3   1
[10,]   2   3   1
[11,]   4   3   1
[12,]   5   3   1
[13,]   6   3   1
[14,]   1   4   1
[15,]   3   4   1
[16,]   6   4   1
[17,]   1   5   1
[18,]   2   5   1
[19,]   3   5   1
[20,]   1   6   1
[21,]   3   6   1
[22,]   4   6   1

When what I really want is the names ("153","154", "155", "156", "157", "158") instead of 1-6.

(The actual matrix/edgelist will be different when you run the code because the graph is randomly generated, but it should do the same thing)

Do you have any suggestions as to what I need to do to make this work? I have used the network and igraph packages in the past, but when I convert the sociomatrix to a graph/network object and attempt to use the other packages to convert to edgelist I get other, different errors (having trouble passing the "n" argument).

1 Answers1

0

Here's your data:

vertex_names <- c("153","154", "155", "156", "157", "158")

testmat <- sna::rgraph(length(vertex_names))
dimnames(testmat) <- list(vertex_names, vertex_names)
testmat
#>     153 154 155 156 157 158
#> 153   0   0   0   1   1   0
#> 154   1   0   0   1   0   1
#> 155   1   1   0   0   0   1
#> 156   1   0   1   0   1   1
#> 157   1   0   1   1   0   0
#> 158   0   1   1   1   1   0

maxsymmetrizedfile <- sna::symmetrize(testmat, rule = "weak")
dimnames(maxsymmetrizedfile) <- dimnames(testmat)
maxsymmetrizedfile
#>     153 154 155 156 157 158
#> 153   0   1   1   1   1   0
#> 154   1   0   1   1   0   1
#> 155   1   1   0   1   1   1
#> 156   1   1   1   0   1   1
#> 157   1   0   1   1   0   1
#> 158   0   1   1   1   1   0

maxsymm_edge has an attribute named "vnames" that's missing from your example.

maxsymm_edge <- sna::as.edgelist.sna(maxsymmetrizedfile)
maxsymm_edge
#>       snd rec val
#>  [1,]   2   1   1
#>  [2,]   3   1   1
#>  [3,]   4   1   1
#>  [4,]   5   1   1
#>  [5,]   1   2   1
#>  [6,]   3   2   1
#>  [7,]   4   2   1
#>  [8,]   6   2   1
#>  [9,]   1   3   1
#> [10,]   2   3   1
#> [11,]   4   3   1
#> [12,]   5   3   1
#> [13,]   6   3   1
#> [14,]   1   4   1
#> [15,]   2   4   1
#> [16,]   3   4   1
#> [17,]   5   4   1
#> [18,]   6   4   1
#> [19,]   1   5   1
#> [20,]   3   5   1
#> [21,]   4   5   1
#> [22,]   6   5   1
#> [23,]   2   6   1
#> [24,]   3   6   1
#> [25,]   4   6   1
#> [26,]   5   6   1
#> attr(,"n")
#> [1] 6
#> attr(,"vnames")
#> [1] "153" "154" "155" "156" "157" "158" # *********

We can index into the "vnames" included in the edgelist (which is the same as vertex_names).

(vnames <- attr(maxsymm_edge, "vnames"))
#> [1] "153" "154" "155" "156" "157" "158"

(snd_indices <- maxsymm_edge[, "snd"])
#>  [1] 2 3 4 5 1 3 4 6 1 2 4 5 6 1 2 3 5 6 1 3 4 6 2 3 4 5
vnames[snd_indices]
#>  [1] "154" "155" "156" "157" "153" "155" "156" "158" "153" "154" "156" "157"
#> [13] "158" "153" "154" "155" "157" "158" "153" "155" "156" "158" "154" "155"
#> [25] "156" "157"

(rec_indices <- maxsymm_edge[, "snd"])
#>  [1] 2 3 4 5 1 3 4 6 1 2 4 5 6 1 2 3 5 6 1 3 4 6 2 3 4 5
vnames[rec_indices]
#>  [1] "154" "155" "156" "157" "153" "155" "156" "158" "153" "154" "156" "157"
#> [13] "158" "153" "154" "155" "157" "158" "153" "155" "156" "158" "154" "155"
#> [25] "156" "157"

So we can build a data frame directly like so:

el_df <- data.frame(
  snd = attr(maxsymm_edge, "vnames")[maxsymm_edge[, "snd"]],
  rec = attr(maxsymm_edge, "vnames")[maxsymm_edge[, "rec"]],
  val = maxsymm_edge[, "val"],
  
  stringsAsFactors = FALSE # the default if R.Version()$major >= 4
)
el_df
#>    snd rec val
#> 1  154 153   1
#> 2  155 153   1
#> 3  156 153   1
#> 4  157 153   1
#> 5  153 154   1
#> 6  155 154   1
#> 7  156 154   1
#> 8  158 154   1
#> 9  153 155   1
#> 10 154 155   1
#> 11 156 155   1
#> 12 157 155   1
#> 13 158 155   1
#> 14 153 156   1
#> 15 154 156   1
#> 16 155 156   1
#> 17 157 156   1
#> 18 158 156   1
#> 19 153 157   1
#> 20 155 157   1
#> 21 156 157   1
#> 22 158 157   1
#> 23 154 158   1
#> 24 155 158   1
#> 25 156 158   1
#> 26 157 158   1

Why a data frame instead of a matrix? Because the vertex names and "val" are of different types (character versus double), so attempting to do so will (at best) coerce "val"to a bunch of strings.

str(el_df)
#> 'data.frame':    18 obs. of  3 variables:
#>  $ snd: chr  "154" "155" "156" "157" ...
#>  $ rec: chr  "153" "153" "153" "153" ...
#>  $ val: num  1 1 1 1 1 1 1 1 1 1 ...

But, this only matters if you're going to use "val". The network isn't weighted, so you could index into "vnames" to build a matrix edge list instead (or use as.matrix(el_df[, 1:2]) to drop that column and go from the data frame to a matrix).

With all that in mind, we can go a step further and build a function that handles the whole operation:

as_edge_list_df <- function(adj_mat, use_vertex_names = TRUE) {
  melted <- do.call(cbind, lapply(list(row(adj_mat), col(adj_mat), adj_mat), as.vector)) # 3 col matrix of row index, col index, and `x`'s values
  filtered <- melted[melted[, 3] != 0, ] # drop rows where column 3 is 0
  
  if (use_vertex_names && !is.null(dimnames(adj_mat))) { # in case we don't want vertex names  
    if (!all(rownames(adj_mat) == colnames(adj_mat))) {  # in case `adj_mat` is malformed
      stop("row names do not match column names.")
    }
    vertex_names <- rownames(adj_mat)
    data.frame(
      snd = vertex_names[filtered[, 1L]],
      rec = vertex_names[filtered[, 2L]],
      val = filtered[, 3L]
    )
  } else {
    data.frame(
      snd = filtered[, 1L],
      rec = filtered[, 2L],
      val = filtered[, 3L]
    )
  }
}

Then, take it for a test drive...

el_df2 <- as_edge_list_df(maxsymmetrizedfile)
el_df2
#>    snd rec val
#> 1  154 153   1
#> 2  155 153   1
#> 3  156 153   1
#> 4  157 153   1
#> 5  153 154   1
#> 6  155 154   1
#> 7  156 154   1
#> 8  158 154   1
#> 9  153 155   1
#> 10 154 155   1
#> 11 156 155   1
#> 12 157 155   1
#> 13 158 155   1
#> 14 153 156   1
#> 15 154 156   1
#> 16 155 156   1
#> 17 157 156   1
#> 18 158 156   1
#> 19 153 157   1
#> 20 155 157   1
#> 21 156 157   1
#> 22 158 157   1
#> 23 154 158   1
#> 24 155 158   1
#> 25 156 158   1
#> 26 157 158   1

... and verify it does exactly the same thing as what we did to build el_df.

stopifnot(identical(el_df, el_df2))
knapply
  • 647
  • 1
  • 5
  • 11