6

Problem:

I create a table using the gridExtra package:

require("gridExtra")

# Prepare data frame
col1 = c(rep("A", 3), rep("B", 2), rep("C", 5))
col2 = c(rep("1", 4), rep("2", 3), rep("3", 3))
col3 = c(1:10)
df = data.frame(col1, col2, col3)

# Create table
grid.arrange(tableGrob(df, show.rownames=F))

The output:

Default output

Question:

I would like to get rid of the repeating row entries and achieve spanning entries which look like this (this image is a mockup made with Photoshop):

Desired Output

Any ideas how to achieve this programmatically in R?

user2030503
  • 3,064
  • 2
  • 36
  • 53
  • Curiosity: In what application you'd like to use this? – Ferdinand.kraft Sep 28 '13 at 17:32
  • @Ferdinand.kraft: the table is generated as part of a a knitr code chunk. – user2030503 Sep 28 '13 at 17:40
  • I don't think that's what @Ferdinand.kraft meant. I think the question was more of "why do you want to do this?" – A5C1D2H2I1M1N2O1R2T1 Sep 28 '13 at 17:54
  • @AnandaMahto: the Knitr code chunk is part of LaTeX report. The report contains the table. For easier reading experience I prefer to skip repeating row entries. I am aware that there are also other table related packages for LaTeX output, but I prefer in this case extraGrid's table feature. Hope this clarifies the "why"? – user2030503 Sep 28 '13 at 21:37
  • i would use gtable for this. [This gist](https://gist.github.com/baptiste/5561717) has a few examples commented out. – baptiste Sep 28 '13 at 22:49
  • @baptiste: Really like your package, I use it a lot! Thanks for this hint, but to tweak the code to solve my question overstretches my R programming skills by far. Even reading the gist carefully few times, I have no idea what to do. Can you give me a pointer for the example in the question? – user2030503 Sep 29 '13 at 07:26
  • See flextable, can do something very similar: https://davidgohel.github.io/flextable/articles/overview.html – user2030503 Apr 26 '17 at 03:50

2 Answers2

5

I would use gtable, and take advantage of its more flexible framework,

enter image description here

require(gtable)
require(plyr)

## build a rectGrob with parameters
cellRect <- function(fill=NA) 
  rectGrob(gp=gpar(fill=fill, col=NA))

cellText <- function(label, colour="black", 
                     hjust=c("left", "center", "right"), ...) {
  hjust <- match.arg(hjust)
  x <- switch(hjust,
              "left" = 0,
              "center"=0.5,
              "right"=1)
  textGrob(label, x=x, hjust=x, gp=gpar(col=colour, ...))
}


rowMax_units <- function(m){
  do.call(unit.c, apply(m, 1, function(l)
    max(do.call(unit.c, lapply(l, grobHeight)))))
}

colMax_units <- function(m){
  do.call(unit.c, apply(m, 2, function(l)
    max(do.call(unit.c, lapply(l, grobWidth)))))
}

findHeights <- function(l)
  do.call(unit.c, lapply(l,grobHeight))
findWidths <- function(l)
  do.call(unit.c, lapply(l,grobWidth))

## NAs are used to indicate grobs that span multiple cells
gtable_colheader <- function(header, n = NULL, 
                             padding=unit(rep(5,5),"mm"), ...){

  type <- 2L
  if(is.null(n)) n <- max(apply(header, type, length))

  start <- alply(header, type, function(s) which(!is.na(s), TRUE))
  end <- llply(start, function(s) c(s[-1], n+1) - 1 )

  fixed <- rep(seq_along(start), sapply(start, length)) # t,b for rows, l,r for cols

  label <- header[!is.na(header)]

  d <- data.frame(label =  label,
                  start=unlist(start), end=unlist(end), fixed, fixed,
                  stringsAsFactors=FALSE)

  names(d) <- c("label","t","b","l","r")

  ## make grobs
  d$grobs <- lapply(d$label, cellText, hjust="center")
  d$widths <- lapply(d$grobs, grobWidth)
  d$heights <- lapply(d$grobs, grobHeight)

  widths <- dlply(d, names(d)[4], # t if type==1, l if type==2
                  function(d) width=do.call(unit.c, d$widths))
  heights <- dlply(d, names(d)[4],
                   function(d) heights=do.call(unit.c, d$heights))

  ## extract widths and heights relevant to the layout
  attr(d, "widths") <- do.call(unit.c, lapply(widths, max))
  attr(d, "heights") <- heights[[which(sapply(heights, length) == n)]]

  ## create gtable
  g <- gtable()
  g <- gtable_add_cols(g, attr(d,"widths") + padding[1])
  g <- gtable_add_rows(g, attr(d,"heights")+ padding[2])

  ## vertical/horizontal separators
  sgh <- segmentsGrob(x0 = unit(0, "npc"), y0 = unit(0, "npc"),
                      x1 = unit(1, "npc"), y1 = unit(0, "npc"),
                      gp=gpar(lwd=2, col="white"))
  sgv <- segmentsGrob(x0 = unit(1, "npc"), y0 = unit(0, "npc"),
                      x1 = unit(1, "npc"), y1 = unit(1, "npc"),
                      gp=gpar(lwd=2, col="white"))
  d2 <- subset(d, b < n)
  g <- with(d2, gtable_add_grob(g, replicate(length(d2$grobs), sgh, simplify=FALSE),
                                t, l, b, r, z=1, name="seph"))
  g <- gtable_add_grob(g, replicate(ncol(g)-1, sgv, simplify=FALSE),
                       t=1, b=nrow(g),l=seq.int(ncol(g)-1), z=1, name="sepv")
  g <- with(d, gtable_add_grob(g, grobs, t, l, b, r, z=0, name="text"))
  g <- gtable_add_grob(g, rectGrob(gp=gpar(fill="grey90", col="white")), t=1, l=1, 
                       b=nrow(g), r=ncol(g), z=-Inf, name="rect")
  g
}

v <- cbind(c("A", NA, NA, "B", NA, "C", NA, NA, NA, NA),
           c(1, NA, NA, NA, 2, NA, NA, 3, NA, NA),
           seq(1,10))
g2 <- gtable_colheader(v)
header <- paste0("col #",1:3)
head <- lapply(header, textGrob, gp=gpar(fontface="bold"))
w <- do.call(unit.c, lapply(header, stringWidth)) + unit(5, "mm")
h <- max(do.call(unit.c, lapply(head, grobHeight))) + unit(5, "mm")
hg <- gtable_matrix("header",  widths=w, heights=h,
                      grobs=matrix(head, nrow=1))

grid.newpage()
grid.draw(gtable:::rbind_gtable(hg, g2, size="first"))
baptiste
  • 75,767
  • 19
  • 198
  • 294
  • wow this is esthetically a perfect solution. Just one last step is open to me: how do I convert my data frame `df` to your object `v` which you have used for the answer. Can you please modify your answer so that it works with my data frame `df` ? – user2030503 Sep 29 '13 at 17:00
  • i couldn't think of a clever trick, so you might want to ask a separate question – baptiste Sep 29 '13 at 17:10
  • @user2030503 I'll post the code to do that but you had better give the checkmark to baptiste or all the coders who have passed on will haunt you and slip in extra keystrokes into your brain. – IRTFM Sep 29 '13 at 22:18
  • Yes, the table layout related answer is based on the code of baptiste while the data related code comes from DWin - thanks a lot! – user2030503 Sep 30 '13 at 04:23
  • attr(d, "heights") <- heights[[which(sapply(heights, length) == n)]] ----> attr(d, "heights") <- heights[[max(which(sapply(heights, length) == n))]] makes it handle the situation where there are multiple columns without NAs. Great answer! – Sonia Sep 27 '15 at 15:36
4
 require(grid)
 require(gridExtra)
   Loading required package: gridExtra

 df = data.frame(col1, col2, col3, stringsAsFactors=FALSE)
df2 <- df
df2[] <- lapply(df2, function(col) col <- ifelse( !duplicated(col, fromLast=TRUE), col, ""))
df2
#---------------
   col1 col2 col3
1               1
2               2
3     A         3
4          1    4
5     B         5
6               6
7          2    7
8               8
9               9
10    C    3   10
#-------------
 grid.arrange(tableGrob(df2, show.rownames=F))  # works

The two step process of copying and assignment to df2[] preserves the dataframe structure. The duplicated parameter fromLast changes the "hits" to be the last in a series rather than the first.

With the clarified request, here's the code to calculate the positions for the first column:

> tapply(df[[1]], df[[1]], FUN=function(x) mean(seq_along(x)))
  A   B   C 
2.0 1.5 3.0 

Here's the code to create the v-matrix from your data:

v <- as.matrix( as.data.frame( lapply(df,function(col) 
             ifelse(!duplicated(col), as.character(col), NA)))  )
v
      col1 col2 col3
 [1,]    1    1    1
 [2,]   NA   NA    2
 [3,]   NA   NA    3
 [4,]    2   NA    4
 [5,]   NA    2    5
 [6,]    3   NA    6
 [7,]   NA   NA    7
 [8,]   NA    3    8
 [9,]   NA   NA    9
[10,]   NA   NA   10
  g2 <- gtable_colheader(v)
 header <- colnames(v)
 head <- lapply(header, textGrob, gp=gpar(fontface="bold"))
 w <- do.call(unit.c, lapply(header, stringWidth)) + unit(5, "mm")
 h <- max(do.call(unit.c, lapply(head, grobHeight))) + unit(5, "mm")
 hg <- gtable_matrix("header",  widths=w, heights=h,
                       grobs=matrix(head, nrow=1))

 grid.newpage()
 grid.draw(gtable:::rbind_gtable(hg, g2, size="first"))
IRTFM
  • 258,963
  • 21
  • 364
  • 487
  • Thanks. Your code has 2 issues. 1st it overrides the letters in col1 to appear as numbers and 2nd the entry after spanning appear in the bottom. I'd prefer either in the middle (my question) or on top. – user2030503 Sep 28 '13 at 17:38
  • Oh heck! Bitten by the f-ing factors again. Just use stringsAsFactors=FALSE in the dataframe call. (Or use `as.character(col)`), But, I have no idea what "middle" you are referring to. – IRTFM Sep 28 '13 at 17:45
  • Sorry I was a bit sloppy in explanation: Example: The "C" shall appear not at the bottom of the combined cells in row 10, but in the middle. See my mockup image - there it appears in row 8, not 10. – user2030503 Sep 28 '13 at 17:53
  • You might offer the code that you used to construct that table. I'm not planning to pursue this since it's seems to require a bunch of 'grid' hacking that you have already done. – IRTFM Sep 28 '13 at 18:06
  • The table layout related answer is based on the code of baptiste while the data related code comes from DWin - thanks a lot! – user2030503 Sep 30 '13 at 04:21