3

I have a list of 2 elements that each has 3 elements. In every 3 elements, there is a list of length 2 nested elements so that each includes a tibble with rows and columns. tibbles' dimension is fixed for rows but the columns are different from the third column.

Here is the structure:

my_list <- list(list(list(ENSG0000014 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000014", "ENSG0000014", 
"ENSG0000014"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0"), `1_43223126_C_T_b37` = c("0", "1", "0"), `1_43223317_T_C_b37` = c("1", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame")), ENSG0000015 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000015", "ENSG0000015", 
"ENSG0000015"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame"))), list(ENSG0000014 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000014", "ENSG0000014", 
"ENSG0000014"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0"), `1_43223126_C_T_b37` = c("0", "1", "0"), `1_43223317_T_C_b37` = c("1", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame")), ENSG0000015 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000015", "ENSG0000015", 
"ENSG0000015"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame"))), list(ENSG0000014 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000014", "ENSG0000014", 
"ENSG0000014"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0"), `1_43223126_C_T_b37` = c("0", "1", "0"), `1_43223317_T_C_b37` = c("1", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame")), ENSG0000015 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000015", "ENSG0000015", 
"ENSG0000015"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame")))), list(list(ENSG0000014 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000014", "ENSG0000014", 
"ENSG0000014"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0"), `1_43223126_C_T_b37` = c("0", "1", "0"), `1_43223317_T_C_b37` = c("1", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame")), ENSG0000015 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000015", "ENSG0000015", 
"ENSG0000015"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame"))), list(ENSG0000014 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000014", "ENSG0000014", 
"ENSG0000014"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0"), `1_43223126_C_T_b37` = c("0", "1", "0"), `1_43223317_T_C_b37` = c("1", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame")), ENSG0000015 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000015", "ENSG0000015", 
"ENSG0000015"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame"))), list(ENSG0000014 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000014", "ENSG0000014", 
"ENSG0000014"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0"), `1_43223126_C_T_b37` = c("0", "1", "0"), `1_43223317_T_C_b37` = c("1", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame")), ENSG0000015 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000015", "ENSG0000015", 
"ENSG0000015"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame")))))

For every 3 elements, I need first, extract those tibbles, except the first three columns, to create new data.matrix. Then automate this job presumably in a loop to get the same data.matrix for each of the 3 elements. So the output will be three matrices for each of the 2 main elements of the list.

I'd appreciate it if you can help.

Maya_Cent
  • 471
  • 4
  • 10

1 Answers1

2

As it is nested, an option is rrapply

library(rrapply)
library(dplyr)
out <- rrapply(my_list, classes = "data.frame",
    f = function(x) x %>% 
        dplyr::select(-(1:3)), how = "list")

-output

out
[[1]]
[[1]][[1]]
[[1]][[1]]$ENSG0000014
# A tibble: 6 × 4
  `1_43222779_A_G_b37` `1_43222856_A_G_b37` `1_43223126_C_T_b37` `1_43223317_T_C_b37`
  <chr>                <chr>                <chr>                <chr>               
1 1                    0                    0                    1                   
2 1                    0                    1                    0                   
3 2                    0                    0                    0                   
4 1                    0                    0                    1                   
5 1                    0                    1                    0                   
6 2                    0                    0                    0                   

[[1]][[1]]$ENSG0000015
# A tibble: 6 × 2
  `1_43222779_A_G_b37` `1_43222856_A_G_b37`
  <chr>                <chr>               
1 1                    0                   
2 1                    0                   
3 2                    0                   
4 1                    0                   
5 1                    0                   
6 2                    0       
...
akrun
  • 874,273
  • 37
  • 540
  • 662
  • thanks for giving me a hand. The suggested soloution drope the first 3 columns correctly. But, as I've pointed out, the final output should have prepared for each ENSG in every 3 elements, a data.matrix since requested by the next step, cv.glmnet function. I'm gonna mark the answer as Solved and raise another question. – Maya_Cent Feb 25 '22 at 09:39
  • @Maya_Cent I initially used a different code to return a single data.frame, but got confused by the structure you wanted especially because it is a nested structure. Can you update your post with expected output structure – akrun Feb 25 '22 at 16:01
  • thanks. I am wondering to get, for each of the five elements (folds), a data.matrix where rows are ENSGs and columns are Variations. All automated through a function in a loop or other appraches. – Maya_Cent Feb 25 '22 at 16:29
  • @Maya_Cent My previous solution was `out <- rrapply(my_list, classes = "data.frame", f = function(x) x[-(1:3)], how = "flatten") |> rbindlist(fill = TRUE) |> type.convert(as.is = TRUE) |> as.matrix()` – akrun Feb 25 '22 at 16:36