0

The gmt variable contains lists of lists of character vectors. My goal is to substitute the name for each list in gmt (i.e., names(gmt)) with its corresponding id. Everything before the first occurrence of _ should be removed in the process. And the output should retain the GMT format.

gmt <- lapply(gmt, \(x) {x$name <- sub("^[^_]+_", "", x$id); x})

My code above was adapted from an answer that I received from a previous question. However, the output is coerced into a list and did not retain the original GMT format.

Data:

gmt <- list(
  REACTOME_INTERLEUKIN_6_SIGNALING = list(
    id = "REACTOME_INTERLEUKIN_6_SIGNALING",
    name = "http://www.gsea-msigdb.org/gsea/msigdb/human/geneset/REACTOME_INTERLEUKIN_6_SIGNALING",
    genes = c(
      "JAK2", "TYK2", "CBL", "STAT1", "IL6ST", "IL6", "IL6R", "JAK1",
      "STAT3", "PTPN11", "SOCS3"
    )
  ),
  REACTOME_APOPTOSIS = list(
    id = "REACTOME_APOPTOSIS",
    name = "http://www.gsea-msigdb.org/gsea/msigdb/human/geneset/REACTOME_APOPTOSIS",
    genes = c(
      "BAD", "CFLAR", "PSMB1", "PSMC4", "BID", "VIM", "FAS", "BAK1",
      "DAPK2", "CDH1", "PSMA4", "DSG2", "CASP8", "PRKCQ", "ROCK1",
      "PSME4", "ARHGAP10", "TP63", "TP73", "PKP1", "BAX", "PSMC5",
      "ADD1", "DNM1L", "PPP1R13B", "DYNLL1", "PSME1", "CLSPN", "PSMD5",
      "DSP", "PSMD8", "MAPK1", "GZMB", "PSMC6", "PSMA3", "PSMC1", "PSMB5",
      "ACIN1", "PSMA6", "PSME2", "PSMA7", "E2F1", "PSMD10", "XIAP",
      "BMX", "STK24", "TRADD", "MAPK3", "PSMD7", "TJP1", "BMF", "GSDMD",
      "TNFRSF10A", "AKT2", "BBC3", "CARD8", "GSDME", "PSMA2", "MAPK8",
      "UNC5B", "PSMD3", "SEPTIN4", "KPNB1", "C1QBP", "PSMD11", "YWHAE",
      "BIRC2", "PSMD9", "LMNB1", "UNC5A", "KPNA1", "TFDP2", "PSMD14",
      "AKT3", "FASLG", "TJP2", "APAF1", "TNFRSF10B", "PPP3CC", "TNFSF10",
      "H1-3", "H1-1", "PSMF1", "PSMB2", "TRAF2", "TICAM1", "SEM1",
      "YWHAH", "PSMA1", "PSME3", "CASP9", "YWHAQ", "STK26", "DSG3",
      "DSG1", "APC", "DBNL", "NMT1", "TLR4", "PSMB7", "RIPK1", "UACA",
      "CASP6", "TP53", "PMAIP1", "AKT1", "PSMB6", "PSMA5", "TP53BP2",
      "RPS27A", "CDKN2A", "GSN", "GAS2", "APIP", "UBC", "BCL2L11",
      "LY96", "PSMA8", "APPL1", "PSMD4", "PSMB4", "DFFA", "LMNA", "PSMC2",
      "OMA1", "PSMD6", "PRKCD", "HMGB2", "CASP3", "YWHAZ", "CASP7",
      "PSMC3", "YWHAB", "DAPK3", "CTNNB1", "FADD", "H1-4", "FNTA",
      "STAT3", "PTK2", "DFFB", "AVEN", "YWHAG", "UBB", "CD14", "BCL2L1",
      "BCL2", "CYCS", "PSMD1", "PSMD2", "SFN", "PLEC", "MAGED1", "PAK2",
      "SATB1", "DIABLO", "H1-5", "PSMD13", "BCAP31", "MAPT", "DCC",
      "H1-2", "H1-0", "HMGB1", "DAPK1", "PSMD12", "SPTAN1", "OCLN",
      "TFDP1", "OPA1", "PSMB8", "PSMB10", "PPP3R1", "UBA52", "PSMB11",
      "PSMB9", "TICAM2", "DYNLL2", "PSMB3"
    )
  ),
  REACTOME_HEMOSTASIS = list(
    id = "REACTOME_HEMOSTASIS",
    name = "http://www.gsea-msigdb.org/gsea/msigdb/human/geneset/REACTOME_HEMOSTASIS",
    genes = c(
      "FGR", "CD99", "TFPI", "KDM1A", "PRKAR2B", "ITGAL", "ITGA3",
      "LAMP2", "ITGA2B", "TBXA2R", "NOS2", "SELE", "CD9", "FYN", "PLAUR",
      "EHD3", "IGF1", "CD74", "HGF", "SLC7A9", "EHD2", "CD44", "PRKCH",
      "VCL", "RAB27B", "LCP2", "HSPA5", "BCAR1", "PIK3CB", "KIF1B",
      "ITIH4", "F7", "ATP2B4", "DGKG", "GNA15", "GUCY1B1", "GPC1",
      "HMG20B", "GNAI3", "DGKA", "PRKCQ", "PPP2R5A", "CD84", "KIF26A",
      "RHOA", "PRKCZ", "ATP2B3", "KIF2A", "RASGRP2", "PPP2R5B", "TRPC7",
      "ATP1B3", "GNB5", "CDC42", "ATP2B1", "WDR1", "PRKACA", "SPP2",
      "ACTN1", "IRAG1", "ATP2A3", "MGLL", "SRI", "ACTB", "TUBA3D",
      "KIFAP3", "STXBP2", "DGKD", "APBB1IP", "ACTN2", "CAPZB", "PPP2R5C",
      "GNB1", "RAPGEF3", "CEACAM1", "KIF22", "CARMIL1", "P2RX5", "APLP2",
      "APOB", "KIF3C", "CEACAM6", "GNAS", "GP6", "GNA11", "DOCK9",
      "DOCK3", "KIF9", "F11", "SIRPG", "P2RX7", "KIF16B", "NOS1", "RCOR1",
      "THPO", "GLG1", "KIF4A", "ITGA6", "RAPGEF4", "TF", "APOH", "ANGPT2",
      "SLC7A8", "TGFB2", "CBX5", "BRPF3", "ITPR3", "JAK2", "ABL1",
      "SERPIND1", "P2RX6", "PPIL2", "MAPK1", "PICK1", "SLC16A8", "TIMP3",
      "PDGFB", "VTI1B", "ITPK1", "SERPINA4", "SLC8A3", "ABHD12", "PROCR",
      "TUBB1", "ANGPT4", "KIF3B", "F9", "GATA1", "CD99L2", "TIMP1",
      "MAGED2", "SYTL4", "DGKH", "MAPK3", "SLC7A6", "SLC7A5", "STX4",
      "CSK", "SCG3", "PLAT", "TNFRSF10A", "PPP2CB", "TUBB4A", "KLC3",
      "TGFB1", "CEACAM5", "PPP2R1A", "PIK3R2", "MAG", "PIK3CG", "CAV1",
      "SERPINE1", "RARRES2", "CDC37L1", "DOCK8", "GATA3", "RAD51C",
      "P2RX1", "PFN1", "C1QBP", "AKAP10", "LGALS3BP", "COL1A1", "PRKAR1A",
      "PF4V1", "P2RX3", "EHD1", "VWF", "SELPLG", "SH2B3", "GNB3", "PTPN6",
      "MAPK14", "PHACTR2", "PDE10A", "PPP2R5D", "VEGFA", "KIF20A",
      "SPARC", "PPP2CA", "KNG1", "HRG", "PRKAR2A", "GNAI2", "GNB4",
      "ITGA4", "PDE1A", "GRB14", "FN1", "PROC", "SDC1", "SOS1", "PLEK",
      "QSOX1", "STXBP3", "RAP1A", "HDAC1", "CAPZA1", "DOCK7", "MFN2",
      "PLA2G4A", "CD58", "CD2", "KIF21B", "CD48", "MPL", "PIK3R3",
      "F3", "SERPINC1", "APOA1", "SLC8A2", "MYB", "TGFB3", "EPCAM",
      "GNA13", "TEK", "IFNA6", "IFNA8", "CLU", "TNFRSF10B", "AKAP1",
      "A1BG", "KIF18A", "PIK3CA", "PLG", "CD244", "PLAU", "SRGN", "ITPR2",
      "PDE1B", "CDK2", "NFE2", "TUBA1B", "SDC4", "PLCG1", "PSG8", "CEACAM8",
      "F13A1", "SERPINB6", "TREM1", "ABCC4", "KIF25", "IRF1", "KLC1",
      "F10", "PRKCG", "RAP1B", "F2RL3", "GNG13", "TUBA4A", "AAMP",
      "GNG11", "GNGT1", "GNAI1", "VPREB3", "IGLL1", "RAC2", "DOCK4",
      "CALU", "PDE11A", "ISLR", "ATP1B2", "KIF1C", "DOCK6", "KIF1A",
      "ACTN4", "GATA5", "SLC7A10", "HABP4", "F12", "CAP1", "RBSN",
      "KIF3A", "RAF1", "JCHAIN", "H3-3B", "KRAS", "VAV3", "CABLES1",
      "DOCK2", "DAGLA", "P2RX4", "CD36", "PHF21A", "CD63", "MICAL1",
      "KCNMB4", "DOCK10", "SERPINE2", "RAC1", "DGKB", "GATA4", "VPS45",
      "GYPC", "KIF12", "TLN1", "IFNA21", "KLC4", "TUBB2A", "TUBB2B",
      "ARRB1", "PRCP", "TRPC6", "PPP2R1B", "THBS1", "KIF23", "KIF11",
      "KIF20B", "OLA1", "ITGAV", "PRKG2", "MMRN1", "PDE5A", "TRPC3",
      "CENPE", "EGF", "KIF21A", "ITGAX", "PDPK1", "KIF2B", "GATA6",
      "ARRB2", "PIK3R5", "TP53", "SLC16A3", "GRB7", "VAV1", "SOD1",
      "APP", "AKT1", "PRKACB", "KIF2C", "ITGA10", "ATP1B1", "F13B",
      "ECM1", "SCCPDH", "LEFTY2", "RHOB", "RAB5A", "MANF", "AHSG",
      "DGKQ", "PIK3R1", "LHFPL2", "PCYOX1L", "KCNMB1", "GNA12", "DOCK11",
      "DOK2", "DOCK5", "AK3", "IFNA5", "IFNA16", "DGKZ", "SERPING1",
      "ENDOD1", "ESAM", "CABLES2", "FERMT3", "ALDOA", "ITGB1", "ADRA2A",
      "VEGFC", "DOCK1", "ITPR1", "SLC7A11", "TUBA3E", "GUCY1A2", "MERTK",
      "CYRIB", "DGKE", "PPP2R5E", "ANGPT1", "PRKCA", "CXADR", "JAM2",
      "APOOL", "SLC16A1", "SLC7A7", "TTN", "KIF5A", "GNA14", "GNAQ",
      "KCNMA1", "TSPAN7", "CD109", "ATP2B2", "LRP8", "DGKI", "PAFAH2",
      "TAGLN2", "F11R", "FCER1G", "CYB5R1", "PTGIR", "PDE9A", "ITGB2",
      "VAV2", "JAML", "SHC1", "ORAI2", "SH2B2", "ITGA5", "RACGAP1",
      "GNG3", "ITIH3", "PDPN", "SDC3", "KIF26B", "FCAMR", "H3-3A",
      "ALB", "ABHD6", "PPBP", "PF4", "KIF15", "CLEC3B", "PRKCD", "TEX264",
      "ANXA5", "GUCY1A1", "ITGA2", "F2RL2", "KLKB1", "DAGLB", "KIF6",
      "COL1A2", "NOS3", "CDK5", "YWHAZ", "SYK", "PRKACG", "KIF27",
      "VEGFD", "CLEC1B", "JAM3", "SERPINB8", "TMX3", "WEE1", "PRKCB",
      "GNGT2", "CRK", "IGF2", "STIM1", "GNG8", "TUBA1A", "TUBA1C",
      "KIFC2", "SERPINF2", "SLC3A2", "GNG4", "IRF2", "INPP5D", "P2RY12",
      "SIN3A", "PTK2", "SDC2", "VPREB1", "GP9", "PCDH7", "P2RY1", "ITGAM",
      "ZFPM2", "GYPA", "KIF5B", "PSG6", "CEACAM3", "MFN1", "KCNMB3",
      "PRKCE", "FGG", "FGA", "FGB", "IFNB1", "JMJD1C", "BSG", "GNB2",
      "GNG12", "GTPBP2", "CTSW", "RASGRP1", "CFL1", "TUBB8B", "OLR1",
      "VEGFB", "TNFRSF10D", "GNG5", "SELP", "ATP2A2", "HRAS", "KLC2",
      "A2M", "TUBB6", "YES1", "GNG7", "RHOG", "CHID1", "GRB2", "SH2B1",
      "TUBAL3", "THBD", "GP5", "PTPN11", "GATA2", "ZFPM1", "F2", "F2R",
      "RAD51B", "ANXA2", "LCK", "SLC8A1", "GAS6", "H3C13", "PSG9",
      "TUBA8", "ADRA2C", "PROS1", "F8", "MAFF", "GP1BA", "PRKG1", "KIF18B",
      "GNG2", "PDE2A", "IFNA10", "P2RX2", "PRKAR1B", "TUBB4B", "IFNA2",
      "SELL", "SERPINA5", "SERPINA3", "KIF19", "PPIA", "ATP2A1", "PTPN1",
      "PRTN3", "HBG2", "HDAC2", "MMP1", "CD47", "NHLRC2", "FLNA", "FAM3C",
      "MAFG", "SRC", "H3C12", "SERPINA1", "H3C4", "PDGFA", "SPN", "KCNMB2",
      "SERPINB2", "PSAP", "S100A10", "CFD", "KIF13B", "IFNA1", "PLCG2",
      "TUBA3C", "SIRPA", "TOR4A", "MAFK", "CALM1", "F5", "CAPZA2",
      "L1CAM", "GP1BB", "H3C14", "H3C15", "KIFC1", "MPIG6B", "LY6G6F",
      "CD177", "PSG5", "TMSB4X", "IGKV4-1", "IGKV5-2", "IGKV3D-20",
      "IGLV6-57", "IGLV1-51", "IGLV1-47", "IGLV1-44", "IGLV7-43", "IGLV1-40",
      "IGLV3-27", "IGLV3-25", "IGLV2-23", "IGLV3-21", "IGLV3-19", "IGLV2-14",
      "IGLV2-11", "IGLV3-1", "IGLC2", "IGLC3", "IGHA2", "IGHA1", "IGHM",
      "IGHV1-2", "IGHV2-5", "IGHV3-7", "IGHV3-11", "IGHV3-13", "IGHV3-23",
      "IGHV3-33", "IGHV4-34", "IGHV4-39", "IGHV1-46", "IGHV3-48", "IGHV3-53",
      "IGHV1-69", "NRAS", "LAT", "HBE1", "HBG1", "ITGA1", "IFNA7",
      "GNAT3", "PSG3", "PSG7", "HBD", "IGHV4-59", "KIF4B", "IFNA14",
      "ORM2", "ORM1", "PSG1", "IFNA13", "IFNA17", "IFNA4", "IGKV2D-30",
      "IGKV3-20", "IGKV1D-33", "IGKV1-17", "IGKV1-16", "MIF", "IGKV1D-16",
      "IGKV3-11", "IGKV1-33", "PSG2", "IGKV1-39", "IGKV2D-28", "GNG10",
      "PSG11", "PSG4", "IGKV2-30", "IGKV1-12", "IGKV1-5", "TUBA4B",
      "IGKV2-28", "IGKV3-15", "HBB", "GYPB", "SELENOP", "IGKV2D-40",
      "IGKV1D-39", "LYN", "TUBB3", "ITGB3", "PECAM1", "TUBB8", "IGHV3-30",
      "H3C8", "ADRA2B", "IGHV2-70", "DGKK", "H3C6", "H3C11", "H3C1",
      "ORAI1", "PIK3R6", "H3C7", "IGLV2-8", "H3C10", "IGKV1D-12", "H3C2",
      "H3C3"
    )
  ),
  REACTOME_INTRINSIC_PATHWAY_FOR_APOPTOSIS = list(
    id = "REACTOME_INTRINSIC_PATHWAY_FOR_APOPTOSIS",
    name = "http://www.gsea-msigdb.org/gsea/msigdb/human/geneset/REACTOME_INTRINSIC_PATHWAY_FOR_APOPTOSIS",
    genes = c(
      "BAD", "BID", "BAK1", "CASP8", "TP63", "TP73", "BAX", "PPP1R13B",
      "DYNLL1", "MAPK1", "GZMB", "E2F1", "XIAP", "MAPK3", "BMF", "GSDMD",
      "AKT2", "BBC3", "CARD8", "GSDME", "MAPK8", "SEPTIN4", "C1QBP",
      "YWHAE", "TFDP2", "AKT3", "APAF1", "PPP3CC", "YWHAH", "CASP9",
      "YWHAQ", "NMT1", "UACA", "TP53", "PMAIP1", "AKT1", "TP53BP2",
      "CDKN2A", "APIP", "BCL2L11", "CASP3", "YWHAZ", "CASP7", "YWHAB",
      "STAT3", "AVEN", "YWHAG", "BCL2L1", "BCL2", "CYCS", "SFN", "DIABLO",
      "TFDP1", "PPP3R1", "DYNLL2"
    )
  )
) |>
  structure(class = "GMT")
moodymudskipper
  • 46,417
  • 11
  • 121
  • 167
melolili
  • 1,237
  • 6
  • 16
  • 3
    you should consider doing `gmt[] <- lapply(....)` instead. Note that the `[]` operator is used for inplace replacement. This will of course work, assuming your `lapply` code works as intended – Onyambu May 04 '23 at 20:29
  • @Onyambu the `lapply` is already changing the `name` vectors. So `gmt[]` is not needed – melolili May 04 '23 at 20:40
  • 1
    lapply is not changing the name but rather creating a copy with different names. If it was indeed changing the name there there would be no need of using assignment operator. eg note that if you run `a <- lapply(gmt ...)` your `gmt` object is still unchanged. Thats why I stated that you need to use the inplace assignment. ie `[]` operator. `gmt[] <- your_lapply_code_here` – Onyambu May 04 '23 at 20:43
  • This post might provide more clarity: https://stackoverflow.com/a/45104831/14000041 – Joe Robert May 05 '23 at 00:41

1 Answers1

0

This a situation where you should consider preferring a for loop to lapply. One of the reasons apply family of loops are used is that they avoid side effects. But here you want the side effect of changing something in the calling scope. That is the kind of use case where for loops can shine.

for (i in seq_along(gmt)) {
  gmt[[i]]$name <- sub("^[^_]+_", "", gmt[[i]]$id)
}
dww
  • 30,425
  • 5
  • 68
  • 111