I know the subject is widely covers but I didn't find the code working for my case... I have a dataframe of this type:
V1 V2 V3
1: label1 alias_fr alias_fr
2: label1 triplet triplet
3: label1 Q9327 Q3122270
4: label2 NULL NULL
5: label3 alias_fr NULL
6: label3 triplet NULL
7: label3 Q678 NULL
This dataframe is generated after mapping a json output to a query from a df input:
df <- Map(rbind, originalDF$input,out) #I first used Map(c,..) but it seems to be more difficult to reshape than rbind
df <- rbind.fill(lapply(df,function(y){as.data.frame(t(y),stringsAsFactors=FALSE)}))
class(df)
[1] "data.frame"
The example given is simplified though, as I have more than 3 columns, and some values are lists. Nevertheless when I have non-NULL values for a label I have always the same number of rows within a column (3 in my example: alias_fr, triplet, Qxx).
And I would like to have V2 and V3 values in row for each V1 value:
V1 var1 var2 var3
label1 alias_fr triplet Q9327
label1 alias_fr triplet Q3122270
label2 NULL NULL NULL
label3 alias_fr triplet Q678
I try to start melt: melt(df,id="V1")
, but then I am stuck.
I also tried reshape, cast, dcast, without any success, and I am more and more confuse with all reshaping stuff... If a reshape master is around, I would be very gratefull ;)
[Edit]: real objects to clarify my issue
Ok so this is an extract of the real dataset I’m working with:
#original dataset (actually it’s one column of the dataset)
originalDF <- c("Guy de Maupassant", "J.-J. Goldman", "Poitou-Charentes")
#output of the API query from the text in the orginalDF
out <- list(structure(list(`_index` = c("alias_fr", "alias_fr"), `_type` = c("triplet",
"triplet"), `_id` = c("Q9327", "Q3122270"), `_score` = c(NA,
NA), sort = list(-4.95263021255079, -6.65910164747673), `_source.types` = list(
structure(list(id = c("Q5", "dbPedia.Person"), value = c("être humain",
"personne")), .Names = c("id", "value"), class = "data.frame", row.names = 1:2),
structure(list(id = c("Q11424", "dbPedia.Film"), value = c("film",
"film")), .Names = c("id", "value"), class = "data.frame", row.names = 1:2)),
`_source.pageRank` = c(-4.95263021255079, -6.65910164747673
), `_source.subTypes` = list(structure(list(id = c("Q1930187",
"Q36180", "Q15949613", "Q6625963", "Q214917"), value = c("journaliste",
"écrivain", "nouvelliste", "romancier", "dramaturge")), .Names = c("id",
"value"), class = "data.frame", row.names = c(NA, 5L)), NULL),
`_source.label` = c("Guy de Maupassant", "Guy de Maupassant"
), `_source.id` = c("Q9327", "Q3122270")), .Names = c("_index",
"_type", "_id", "_score", "sort", "_source.types", "_source.pageRank",
"_source.subTypes", "_source.label", "_source.id"), class = "data.frame", row.names = 1:2),
list(), structure(list(`_index` = "alias_fr", `_type` = "triplet",
`_id` = "Q17009", `_score` = NA, sort = list(-5.0448283638424),
`_source.types` = list(structure(list(id = "Q22670030",
value = "ancienne région française"), .Names = c("id",
"value"), class = "data.frame", row.names = 1L)), `_source.pageRank` = -5.0448283638424,
`_source.label` = "Poitou-Charentes", `_source.id` = "Q17009"), .Names = c("_index",
"_type", "_id", "_score", "sort", "_source.types", "_source.pageRank",
"_source.label", "_source.id"), class = "data.frame", row.names = 1L))
#df object (generated from Map, then rbind.fill)
df <- structure(list(V1 = list("Guy de Maupassant", "Guy de Maupassant",
"Guy de Maupassant", "Guy de Maupassant", "Guy de Maupassant",
"Guy de Maupassant", "Guy de Maupassant", "Guy de Maupassant",
"Guy de Maupassant", "Guy de Maupassant", "J.-J. Goldman",
"Poitou-Charentes", "Poitou-Charentes", "Poitou-Charentes",
"Poitou-Charentes", "Poitou-Charentes", "Poitou-Charentes",
"Poitou-Charentes", "Poitou-Charentes", "Poitou-Charentes"),
V2 = list("alias_fr", "triplet", "Q9327", NA_character_,
-4.95263021255079, structure(list(id = c("Q5", "dbPedia.Person"
), value = c("être humain", "personne")), .Names = c("id",
"value"), class = "data.frame", row.names = 1:2), "-4.95263021255079",
structure(list(id = c("Q1930187", "Q36180", "Q15949613",
"Q6625963", "Q214917"), value = c("journaliste", "écrivain",
"nouvelliste", "romancier", "dramaturge")), .Names = c("id",
"value"), class = "data.frame", row.names = c(NA, 5L)),
"Guy de Maupassant", "Q9327", NULL, "alias_fr", "triplet",
"Q17009", NA_character_, -5.0448283638424, structure(list(
id = "Q22670030", value = "ancienne région française"), .Names = c("id",
"value"), class = "data.frame", row.names = 1L), "-5.0448283638424",
"Poitou-Charentes", "Q17009"), V3 = list("alias_fr",
"triplet", "Q3122270", NA_character_, -6.65910164747673,
structure(list(id = c("Q11424", "dbPedia.Film"), value = c("film",
"film")), .Names = c("id", "value"), class = "data.frame", row.names = 1:2),
"-6.65910164747673", NULL, "Guy de Maupassant", "Q3122270",
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL), V4 = list(NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL)), .Names = c("V1", "V2",
"V3", "V4"), row.names = c(NA, 20L), class = "data.frame")