5

Question

I find myself using reshape2::melt to obtain a single "long" data.frame from a hierarchical list of data.frame objects. However, the column names of the result has the list hierarchy levels labeled as "L1", "L2", etc. However, since those levels have meaning, I want to give those columns meaningful names instead. What's the best way to do this? Can it be done using the single call to melt?

I am not wed to melt or reshape2, so I am open to other approaches or packages.

Current setup

Let's suppose we have a hierarchical list of data.frame objects such as this:

library(reshape2)
x <- structure(list(cyl_6 = structure(list(gear_3 = structure(list( mpg = 1:2, qsec = 3:4), .Names = c("mpg", "qsec"), row.names = c(NA, -2L), class = "data.frame"), gear_4 = structure(list(mpg = 5:6, qsec = 7:8), .Names = c("mpg", "qsec"), row.names = c(NA, -2L), class = "data.frame")), .Names = c("gear_3", "gear_4")), cyl_8 = structure(list(gear_3 = structure(list(mpg = 9:10, qsec = 11:12), .Names = c("mpg", "qsec"), row.names = c(NA, -2L), class = "data.frame"), gear_4 = structure(list(mpg = 13:14, qsec = 15:16), .Names = c("mpg", "qsec"), row.names = c(NA, -2L), class = "data.frame")), .Names = c("gear_3", "gear_4"    ))), .Names = c("cyl_6", "cyl_8"))

When I use melt(x), I get the column name of "L1" for the cylinder count and "L2" for the gear count. I would like the column to say "cylinders" and "gears", respectively, instead.

mx <- melt(x)

Here's the output of head(mx). I do not want it to just say "L1" and "L2":

1> head(mx)
  variable value     L2    L1
1      mpg     1 gear_3 cyl_6
2      mpg     2 gear_3 cyl_6
3     qsec     3 gear_3 cyl_6
4     qsec     4 gear_3 cyl_6
5      mpg     5 gear_4 cyl_6
6      mpg     6 gear_4 cyl_6

So, I resort to setting "L1" and "L2" manually:

names(mx)[3:4] <- c("gears", "cylinders")

Desired output

Here's the desired final column name setup. I would like to be able to achieve this without resetting the "names" of mx manually as a separate step.

1> head(mx)
   variable value  gears cylinders
1       mpg     1 gear_3     cyl_6
2       mpg     2 gear_3     cyl_6
3      qsec     3 gear_3     cyl_6
4      qsec     4 gear_3     cyl_6
5       mpg     5 gear_4     cyl_6
6       mpg     6 gear_4     cyl_6
Kalin
  • 1,691
  • 2
  • 16
  • 22

2 Answers2

3

You could try

library(tidyr)
res <- unnest(x, sex)
head(res)
#   sex  Hair   Eye value
#1 MALE Black Brown    32
#2 MALE Brown Brown    53
#3 MALE   Red Brown    10
#4 MALE Blond Brown     3
#5 MALE Black  Blue    11
#6 MALE Brown  Blue    50

For the updated question, it is not easier with unnest. Repeated calls to unnest is needed

library(dplyr)
dN <- unnest(lapply(x, unnest, gear), cylinders) %>%
                         gather(variable, value, mpg:qsec)
head(dN,3)
#  cylinders   gear variable value
#1     cyl_6 gear_3      mpg     1
#2     cyl_6 gear_3      mpg     2
#3     cyl_6 gear_4      mpg     5
akrun
  • 874,273
  • 37
  • 540
  • 662
  • That looks like it would work. I hadn't asked in my original question, but would unnest work for a hierarchy of lists? E.g. `unnest(x, sex, location, topic)` (assuming a list structure organized that way)? I can't test try it just yet since I can't upgrade my R to allow `tidyr` (I'm stuck with Ubuntu 14.04 LTS for now with R 3.02). – Kalin Apr 15 '15 at 19:21
  • @user29020 I haven't tested it that way. Could you please update your post with a similar data structure. – akrun Apr 15 '15 at 19:24
2
library('reshape2')
x <- structure(list(cyl_6 = structure(list(gear_3 = structure(list( mpg = 1:2, qsec = 3:4), .Names = c("mpg", "qsec"), row.names = c(NA, -2L), class = "data.frame"), gear_4 = structure(list(mpg = 5:6, qsec = 7:8), .Names = c("mpg", "qsec"), row.names = c(NA, -2L), class = "data.frame")), .Names = c("gear_3", "gear_4")), cyl_8 = structure(list(gear_3 = structure(list(mpg = 9:10, qsec = 11:12), .Names = c("mpg", "qsec"), row.names = c(NA, -2L), class = "data.frame"), gear_4 = structure(list(mpg = 13:14, qsec = 15:16), .Names = c("mpg", "qsec"), row.names = c(NA, -2L), class = "data.frame")), .Names = c("gear_3", "gear_4"    ))), .Names = c("cyl_6", "cyl_8"))

Looking at the source code, the Ls are hard-coded, so you can't change them on the fly without doing an extra step like you're doing now.

reshape2:::melt.list
# function (data, ..., level = 1) 
# {
#   parts <- lapply(data, melt, level = level + 1, ...)
#   result <- rbind.fill(parts)
#   names <- names(data) %||% seq_along(data)
#   lengths <- vapply(parts, nrow, integer(1))
#   labels <- rep(names, lengths)
#   label_var <- attr(data, "varname") %||% paste("L", level, sep = "")
#   result[[label_var]] <- labels
#   result
# }

What you could do is change this function slightly and add a new argument so you can name them whatever you want:

meltList <- function (data, ..., level = 1, nn) {
  require('reshape2')
  '%||%' <- function (a, b) if (!is.null(a)) a else b
  parts <- lapply(data, melt, level = level + 1, ...)
  result <- plyr::rbind.fill(parts)
  names <- names(data) %||% seq_along(data)
  lengths <- vapply(parts, nrow, integer(1))
  labels <- rep(names, lengths)
  label_var <- attr(data, "varname") %||% paste("L", level, sep = "")
  result[[label_var]] <- labels
  if (!missing(nn))
    names(result)[grep('^L\\d+', names(result))] <- nn
  result
}

ex

head(meltList(x, nn = c('gears','cylinders')))
#   variable value  gears cylinders
# 1      mpg     1 gear_3     cyl_6
# 2      mpg     2 gear_3     cyl_6
# 3     qsec     3 gear_3     cyl_6
# 4     qsec     4 gear_3     cyl_6
# 5      mpg     5 gear_4     cyl_6
# 6      mpg     6 gear_4     cyl_6

This will also work on more nested lists:

y <- list(x = x, y = x)
head(melt(y))
#   variable value     L3    L2 L1
# 1      mpg     1 gear_3 cyl_6  x
# 2      mpg     2 gear_3 cyl_6  x
# 3     qsec     3 gear_3 cyl_6  x
# 4     qsec     4 gear_3 cyl_6  x
# 5      mpg     5 gear_4 cyl_6  x
# 6      mpg     6 gear_4 cyl_6  x

vs

head(meltList(y, nn = c('gears','cylinders','variable')))
#   variable value  gears cylinders variable
# 1      mpg     1 gear_3     cyl_6        x
# 2      mpg     2 gear_3     cyl_6        x
# 3     qsec     3 gear_3     cyl_6        x
# 4     qsec     4 gear_3     cyl_6        x
# 5      mpg     5 gear_4     cyl_6        x
# 6      mpg     6 gear_4     cyl_6        x
rawr
  • 20,481
  • 4
  • 44
  • 78