Here's your data:
vertex_names <- c("153","154", "155", "156", "157", "158")
testmat <- sna::rgraph(length(vertex_names))
dimnames(testmat) <- list(vertex_names, vertex_names)
testmat
#> 153 154 155 156 157 158
#> 153 0 0 0 1 1 0
#> 154 1 0 0 1 0 1
#> 155 1 1 0 0 0 1
#> 156 1 0 1 0 1 1
#> 157 1 0 1 1 0 0
#> 158 0 1 1 1 1 0
maxsymmetrizedfile <- sna::symmetrize(testmat, rule = "weak")
dimnames(maxsymmetrizedfile) <- dimnames(testmat)
maxsymmetrizedfile
#> 153 154 155 156 157 158
#> 153 0 1 1 1 1 0
#> 154 1 0 1 1 0 1
#> 155 1 1 0 1 1 1
#> 156 1 1 1 0 1 1
#> 157 1 0 1 1 0 1
#> 158 0 1 1 1 1 0
maxsymm_edge
has an attribute named "vnames"
that's missing from your example.
maxsymm_edge <- sna::as.edgelist.sna(maxsymmetrizedfile)
maxsymm_edge
#> snd rec val
#> [1,] 2 1 1
#> [2,] 3 1 1
#> [3,] 4 1 1
#> [4,] 5 1 1
#> [5,] 1 2 1
#> [6,] 3 2 1
#> [7,] 4 2 1
#> [8,] 6 2 1
#> [9,] 1 3 1
#> [10,] 2 3 1
#> [11,] 4 3 1
#> [12,] 5 3 1
#> [13,] 6 3 1
#> [14,] 1 4 1
#> [15,] 2 4 1
#> [16,] 3 4 1
#> [17,] 5 4 1
#> [18,] 6 4 1
#> [19,] 1 5 1
#> [20,] 3 5 1
#> [21,] 4 5 1
#> [22,] 6 5 1
#> [23,] 2 6 1
#> [24,] 3 6 1
#> [25,] 4 6 1
#> [26,] 5 6 1
#> attr(,"n")
#> [1] 6
#> attr(,"vnames")
#> [1] "153" "154" "155" "156" "157" "158" # *********
We can index into the "vnames"
included in the edgelist (which is the same as vertex_names
).
(vnames <- attr(maxsymm_edge, "vnames"))
#> [1] "153" "154" "155" "156" "157" "158"
(snd_indices <- maxsymm_edge[, "snd"])
#> [1] 2 3 4 5 1 3 4 6 1 2 4 5 6 1 2 3 5 6 1 3 4 6 2 3 4 5
vnames[snd_indices]
#> [1] "154" "155" "156" "157" "153" "155" "156" "158" "153" "154" "156" "157"
#> [13] "158" "153" "154" "155" "157" "158" "153" "155" "156" "158" "154" "155"
#> [25] "156" "157"
(rec_indices <- maxsymm_edge[, "snd"])
#> [1] 2 3 4 5 1 3 4 6 1 2 4 5 6 1 2 3 5 6 1 3 4 6 2 3 4 5
vnames[rec_indices]
#> [1] "154" "155" "156" "157" "153" "155" "156" "158" "153" "154" "156" "157"
#> [13] "158" "153" "154" "155" "157" "158" "153" "155" "156" "158" "154" "155"
#> [25] "156" "157"
So we can build a data frame directly like so:
el_df <- data.frame(
snd = attr(maxsymm_edge, "vnames")[maxsymm_edge[, "snd"]],
rec = attr(maxsymm_edge, "vnames")[maxsymm_edge[, "rec"]],
val = maxsymm_edge[, "val"],
stringsAsFactors = FALSE # the default if R.Version()$major >= 4
)
el_df
#> snd rec val
#> 1 154 153 1
#> 2 155 153 1
#> 3 156 153 1
#> 4 157 153 1
#> 5 153 154 1
#> 6 155 154 1
#> 7 156 154 1
#> 8 158 154 1
#> 9 153 155 1
#> 10 154 155 1
#> 11 156 155 1
#> 12 157 155 1
#> 13 158 155 1
#> 14 153 156 1
#> 15 154 156 1
#> 16 155 156 1
#> 17 157 156 1
#> 18 158 156 1
#> 19 153 157 1
#> 20 155 157 1
#> 21 156 157 1
#> 22 158 157 1
#> 23 154 158 1
#> 24 155 158 1
#> 25 156 158 1
#> 26 157 158 1
Why a data frame instead of a matrix? Because the vertex names and "val"
are of different types (character
versus double
), so attempting to do so will (at best) coerce "val"
to a bunch of strings.
str(el_df)
#> 'data.frame': 18 obs. of 3 variables:
#> $ snd: chr "154" "155" "156" "157" ...
#> $ rec: chr "153" "153" "153" "153" ...
#> $ val: num 1 1 1 1 1 1 1 1 1 1 ...
But, this only matters if you're going to use "val"
. The network isn't weighted, so you could index into "vnames"
to build a matrix edge list instead (or use as.matrix(el_df[, 1:2])
to drop that column and go from the data frame to a matrix).
With all that in mind, we can go a step further and build a function that handles the whole operation:
as_edge_list_df <- function(adj_mat, use_vertex_names = TRUE) {
melted <- do.call(cbind, lapply(list(row(adj_mat), col(adj_mat), adj_mat), as.vector)) # 3 col matrix of row index, col index, and `x`'s values
filtered <- melted[melted[, 3] != 0, ] # drop rows where column 3 is 0
if (use_vertex_names && !is.null(dimnames(adj_mat))) { # in case we don't want vertex names
if (!all(rownames(adj_mat) == colnames(adj_mat))) { # in case `adj_mat` is malformed
stop("row names do not match column names.")
}
vertex_names <- rownames(adj_mat)
data.frame(
snd = vertex_names[filtered[, 1L]],
rec = vertex_names[filtered[, 2L]],
val = filtered[, 3L]
)
} else {
data.frame(
snd = filtered[, 1L],
rec = filtered[, 2L],
val = filtered[, 3L]
)
}
}
Then, take it for a test drive...
el_df2 <- as_edge_list_df(maxsymmetrizedfile)
el_df2
#> snd rec val
#> 1 154 153 1
#> 2 155 153 1
#> 3 156 153 1
#> 4 157 153 1
#> 5 153 154 1
#> 6 155 154 1
#> 7 156 154 1
#> 8 158 154 1
#> 9 153 155 1
#> 10 154 155 1
#> 11 156 155 1
#> 12 157 155 1
#> 13 158 155 1
#> 14 153 156 1
#> 15 154 156 1
#> 16 155 156 1
#> 17 157 156 1
#> 18 158 156 1
#> 19 153 157 1
#> 20 155 157 1
#> 21 156 157 1
#> 22 158 157 1
#> 23 154 158 1
#> 24 155 158 1
#> 25 156 158 1
#> 26 157 158 1
... and verify it does exactly the same thing as what we did to build el_df
.
stopifnot(identical(el_df, el_df2))