1

In the following code, I'm trying to show for each feature of a dataframe, its relation with the rest of features.

Thus, using Peter Haschke's multiplot approach, I've been able to show all the combinations.

suppressMessages(require(ggplot2))
suppressMessages(require(grid))

multiplot <- function(..., plotlist = NULL, file, cols = 1, layout = NULL) {
  plots <- c(list(...), plotlist)

  numPlots = length(plots)

  if (is.null(layout)) {
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                    ncol = cols, nrow = ceiling(numPlots/cols))
  }

  if (numPlots == 1) {
    print(plots[[1]])

  } else {
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))

    for (i in 1:numPlots) {
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}


df <- data.frame(matrix(rnorm(400), nrow=100))
colnames(df) <- c("F1","F2","F3","F4")

pdf("example.pdf")
plots <- list()
count <- 1
for(i_feature in colnames(df)) {
  for(j_feature in colnames(df)) {
    if(i_feature == j_feature) {
      empty_df <- data.frame()
      blank_plot <- ggplot(empty_df) +
                    geom_point() +
                    xlim(0, 100) +
                    ylim(0, 100) +
                    theme(panel.grid.major = element_blank(),
                          panel.grid.minor = element_blank(),
                          axis.ticks=element_blank(),
                          axis.text.x=element_blank(),
                          axis.text.y=element_blank(),
                          axis.title.x=element_blank(),
                          axis.title.y=element_blank()
                    )
      blank_plot <- blank_plot + geom_text(aes(x=45,y=50,label=j_feature), size=6,fontface="bold")
      plots[[count]] <- blank_plot
    } else {
      plots[[count]] <- ggplot(df,aes_string(x=i_feature,y=j_feature)) +
                          geom_point(size = 1) +
                          theme(panel.grid.major = element_blank(),
                                panel.grid.minor = element_blank(),
                                axis.ticks=element_blank(),
                                axis.text.x=element_blank(),
                                axis.text.y=element_blank(),
                                axis.title.x=element_blank(),
                                axis.title.y=element_blank()
                          ) +
                          theme(legend.position="none")

    }
    count <- count + 1
  }
}

multiplot(plotlist = plots, cols = ncol(df))
garbage <- dev.off()

As you can see, instead of plotting one column against itself, I show a label with the name of the column. The problem with this implementation is that this line:

blank_plot <- blank_plot + geom_text(aes(x=45,y=50,label=j_feature), size=6,fontface="bold")

doesn't properly refresh the label name, so the last feature to be processed, is the one that is shown always, as can be seen in the screenshot.

Relation between features

Can anybody help me to solve this issue?

asuka
  • 2,249
  • 3
  • 22
  • 25

1 Answers1

2

ggplot2 doesn't process the aestethics until a call to plot is made. As this is done after the loop, it finds "F4" as the value of j_feature. You can test this by doing

j_feature <- "test"
multiplot(plotlist = plots, cols = ncol(df))

The solution is relatively simple: create a dataframe with the relevant information used to generate the blank plot, so that the label is stored in the plot object.

plots <- list()
count <- 1
for(i_feature in colnames(df)) {
  for(j_feature in colnames(df)) {

    if(i_feature == j_feature) {
      #create empty df with relevant parameters to feed to aes
      empty_df <- data.frame(x=45,y=50,label=j_feature)
      blank_plot <- ggplot(empty_df) +
        xlim(0, 100) +
        ylim(0, 100) +
        theme(panel.grid.major = element_blank(),
              panel.grid.minor = element_blank(),
              axis.ticks=element_blank(),
              axis.text.x=element_blank(),
              axis.text.y=element_blank(),
              axis.title.x=element_blank(),
              axis.title.y=element_blank()
        ) + geom_text(aes(x=x,y=y,label=label),fontface="bold")
      plots[[count]] <- blank_plot
    } else {
      plots[[count]] <- ggplot(df,aes_string(x=i_feature,y=j_feature)) +
        geom_point(size = 1) +
        theme(panel.grid.major = element_blank(),
              panel.grid.minor = element_blank(),
              axis.ticks=element_blank(),
              axis.text.x=element_blank(),
              axis.text.y=element_blank(),
              axis.title.x=element_blank(),
              axis.title.y=element_blank()
        ) +
        theme(legend.position="none")

    }
    count <- count + 1
  }
}
multiplot(plotlist = plots, cols = ncol(df))

enter image description here

Heroka
  • 12,889
  • 1
  • 28
  • 38