I'm trying to wrangle a json file into a data frame but I'm having trouble when unnesting to a wider format because the columns contain lists of unequal sizes. In addition, it appears that one column of lists contains what should be the name of the new column and the other column contains the data.
I apologize for not including a repex but I don't know how to create a data frame with lists. I've included a screen shot to hopefully highlight the issue.
When I attempt to unnest with raw_json2 %>% unnest_wider(value)
I get the following error:
Error: Can't combine `..1$...1` <character> and `..27$...1` <list>.
It appears that the column type
contains what should be the column names, and that 'value' contains the values for those columns. So essentially columns would be added to my data frame from type
and data added from value
.
EDIT------------------------------ Sample data. I put the first 30 as there some tricky lists at the end.
> dput(head(raw_json2, n=30))
structure(list(id = c(112357710L, 112357713L, 112357714L, 112357717L,
112357719L, 112357723L, 112357727L, 112357730L, 112357732L, 112357736L,
112357737L, 112357738L, 112357744L, 112357745L, 112357746L, 112357747L,
112357759L, 112357760L, 112357761L, 112357764L, 112357765L, 112357766L,
112357767L, 112357775L, 112357777L, 112357780L, 112357782L, 112357783L,
112357784L, 112357791L), start_sec = c("00:00:19.000000", "00:45:34.000000",
"00:02:19.000000", "00:00:49.000000", "00:00:46.000000", "00:00:59.000000",
"00:01:17.000000", "00:01:29.000000", "00:01:43.000000", "00:02:02.000000",
"00:02:04.000000", "00:02:06.000000", "00:02:14.000000", "00:02:36.000000",
"00:02:22.000000", "00:02:46.000000", "00:02:52.000000", "00:02:48.000000",
"00:03:00.000000", "00:03:02.000000", "00:03:10.000000", "00:03:12.000000",
"00:04:01.000000", "00:03:27.000000", "00:04:15.000000", "00:03:53.000000",
"00:04:00.000000", "00:04:03.000000", "00:05:05.000000", "00:05:06.000000"
), end_sec = c("00:00:19.000000", "00:45:34.000000", "00:02:19.000000",
"00:00:49.000000", "00:00:46.000000", "00:00:59.000000", "00:01:17.000000",
"00:01:29.000000", "00:01:43.000000", "00:02:02.000000", "00:02:04.000000",
"00:02:06.000000", "00:02:14.000000", "00:02:36.000000", "00:02:22.000000",
"00:02:46.000000", "00:02:52.000000", "00:02:48.000000", "00:03:00.000000",
"00:03:02.000000", "00:03:10.000000", "00:03:12.000000", "00:04:01.000000",
"00:03:27.000000", "00:04:15.000000", "00:03:53.000000", "00:04:00.000000",
"00:04:03.000000", "00:05:05.000000", "00:05:06.000000"), type = list(
c("teamNames", "list"), "list", "teamNames", "teamNames",
c("teamNames", "list"), "teamNames", "teamNames", "teamNames",
"teamNames", c("teamNames", "list"), "teamNames", "teamNames",
"teamNames", "teamNames", "teamNames", "teamNames", "teamNames",
"teamNames", "teamNames", "teamNames", "teamNames", "teamNames",
"teamNames", c("teamNames", "list"), "teamNames", c("teamNames",
"list"), c("teamNames", "list", "chartPoint", "chartPoint"
), "teamNames", "teamNames", "teamNames"), value = list(c("Real Madrid",
"kickoff"), "1", "Real Madrid", "Real Madrid", c("Real Madrid",
"throw in"), "Real Madrid", "Barcelona", "Real Madrid", "Barcelona",
c("Real Madrid", "free kick"), "Real Madrid", "Real Madrid",
"Real Madrid", "Real Madrid", "Barcelona", "Barcelona", "Barcelona",
"Real Madrid", "Real Madrid", "Barcelona", "Barcelona", "Real Madrid",
"Real Madrid", c("Real Madrid", "throw in"), "Real Madrid",
c("Real Madrid", "corner kick"), list("Real Madrid", "save",
list(x = 483.51837158203, y = 397.89303588867, x2 = 0L,
y2 = 0L, type = "point", sector = 1L, orientation = "left"),
list(x = 274.94967651367, y = 404.6828918457, x2 = 0L,
y2 = 0L, type = "point", sector = 3L, orientation = "left")),
"Barcelona", "Barcelona", "Barcelona")), row.names = c(NA,
-30L), class = c("tbl_df", "tbl", "data.frame"))