Thanks again to @Allan Cameron for his excellent answer. I used his code to write a function that mimics GGally::ggparcoord()
, but with individual y-axes. Here, the normalisation of the axis breaks and the data is done so that the heights of the axes are identical.
I also added an argument truth
, which is an optional data.frame
containing points to plot for each variable; in the context of my application, the lines correspond to parameter estimates, and the truth
points are the true values we are trying to estimate.
Here is the function:
ggparcoord_ind_yaxis <- function(
data,
truth = NULL,
truthPointSize = 2,
columns = 1:ncol(data),
groupColumn = NULL,
alphaLines = 1,
nbreaks = 4,
axis_font_size = 3
) {
# select the variables to plot
data_subset <- data %>% select(columns)
# re-order truth to match columns
col_names <- data_subset %>% names
if (!is.null(truth)) {
truth <- truth %>% select(col_names)
data_subset <- data_subset %>% rbind(truth)
}
# Calculate the axis breaks for each variable on the *original* scale.
# Note that the breaks computed by pretty() are guaranteed to contain all of
# the data. We include truth in these breaks, just in case one of the true
# points falls outside the range of the data (can easily happen in the context
# of comparing parameter estimates to the true values).
breaks_df <- data_subset %>%
stack %>% # convert to long format
group_by(ind) %>% # group by the plotting variables
summarize(breaks = pretty(values, n = nbreaks))
# Normalise the breaks to be between 0 and 1, and set the coordinates of the
# tick marks. Importantly, if we want the axis heights to be the same, the
# breaks need to be normalised to be between exactly 0 and 1.
axis_df <- breaks_df %>%
mutate(yval = (breaks - min(breaks))/(max(breaks) - min(breaks))) %>%
mutate(xmin = as.numeric(ind) - 0.05,
xmax = as.numeric(ind),
x_text = as.numeric(ind) - 0.2)
# Calculate the co-ordinates for our axis lines:
axis_line_df <- axis_df %>%
group_by(ind) %>%
summarize(min = min(yval), max = max(yval))
# Getting the minimum/maximum breaks on the original scale, to scale the
# data in the same manner that we scaled the breaks
minmax_breaks <- breaks_df %>%
summarize(min_break = min(breaks), max_break = max(breaks)) %>%
tibble::column_to_rownames(var = "ind")
# Normalise the original data in the same way that the breaks were normalised.
# This ensures that the scaling is correct.
# Do the same for the truth points, if they exist.
lines_df <- data %>% select(columns)
for (col in col_names) {
lines_df[, col] <- (lines_df[, col] - minmax_breaks[col, "min_break"]) / ( minmax_breaks[col, "max_break"] - minmax_breaks[col, "min_break"])
if (!is.null(truth)) {
truth[, col] <- (truth[, col] - minmax_breaks[col, "min_break"]) / ( minmax_breaks[col, "max_break"] - minmax_breaks[col, "min_break"])
}
}
# Reshape original data (and truth):
lines_df <- lines_df %>%
mutate(row = row_number()) %>% # need row information to group individual rows
bind_cols(data[, groupColumn, drop = FALSE]) %>% # need groupColumn for colour aesthetic
reshape2::melt(id.vars = c("row", groupColumn),
# choose names that are consistent with stack() above:
value.name = "values", variable.name = "ind")
# Reshape truth, as above
if (!is.null(truth)) {
truth <- truth %>%
mutate(row = row_number()) %>% # need row information to group individual rows
reshape2::melt(id.vars = c("row"),
# choose names that are consistent with stack():
value.name = "values", variable.name = "ind")
}
# Now plot:
gg <- ggplot() +
geom_line(data = lines_df %>% sample_n(nrow(.)), # permute rows to prevent one group dominating over another
aes_string(x = "ind", y = "values", group = "row", colour = groupColumn),
alpha = alphaLines) +
geom_segment(data = axis_line_df, aes(x = ind, xend = ind, y = min, yend = max),
inherit.aes = FALSE) +
geom_segment(data = axis_df, aes(x = xmin, xend = xmax, y = yval, yend = yval),
inherit.aes = FALSE) +
geom_text(data = axis_df, aes(x = x_text, y = yval, label = breaks),
inherit.aes = FALSE, size = axis_font_size)
if (!is.null(truth)) {
gg <- gg + geom_point(data = truth, aes(x = ind, y = values),
inherit.aes = FALSE, colour = "red", size = truthPointSize)
}
gg <- gg + theme_bw() +
theme(panel.grid = element_blank(),
panel.border = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text.y = element_blank())
return(gg)
}
An example using the iris
data set:
library("ggplot2")
library("dplyr")
library("tibble")
truth <- iris %>% select(4:1) %>% apply(2, median, simplify = FALSE) %>% data.frame
ggparcoord_ind_yaxis(iris, truth = truth, columns = 4:1, groupColumn = "Species", alphaLines = 0.5)
