Although there has been discussion around this before, I thought I would post an answer here that shows how to ensure that a particular proportion of points are included within each contour line.
To do this, we can get the 2d density with MASS::kde2d
, then convert to a raster using terra
. We can then order the points according to the density in the associated 2d density grid and find the density at which a quantile is passed with approx
density_quantiles <- function(x, y, quantiles) {
dens <- MASS::kde2d(x, y, n = 500)
df <- cbind(expand.grid(x = dens$x, y = dens$y), z = c(dens$z))
r <- terra::rast(df)
ind <- sapply(seq_along(x), function(i) cellFromXY(r, cbind(x[i], y[i])))
ind <- ind[order(-r[ind][[1]])]
vals <- r[ind][[1]]
ret <- approx(seq_along(ind)/length(ind), vals, xout = quantiles)$y
replace(ret, is.na(ret), max(r[]))
}
This means that if we can specify a vector of quantiles:
quantiles <- c(0, 0.2, 0.4, 0.6, 0.8)
We can get an easy-to-interpret plot showing the areas enclosing 20%, 40%, 60% and 80% of our points like this:
ggplot(dat, aes(X, Y)) +
geom_density2d_filled(
aes(fill = after_stat(level)),
contour_var = "density",
breaks = density_quantiles(dat$X, dat$Y, quantiles)) +
geom_point(alpha = 0.1) +
coord_equal() +
scale_fill_viridis_d('Quantiles', l
abels = scales::percent(quantiles[-1]),
direction = -1)

And quartiles would be like this:
quartiles <- c(0, 0.25, 0.5, 0.75)
ggplot(dat, aes(X, Y)) +
geom_density2d_filled(
aes(fill = after_stat(level)),
contour_var = "density",
breaks = density_quantiles(dat$X, dat$Y, quartiles)) +
geom_point(alpha = 0.1) +
coord_equal() +
scale_fill_viridis_d('Quartiles', labels = scales::percent(quartiles[-1]),
direction = -1)

Note that this is very different from the levels in the ndensity
plot, which are proportions of the maximum density, not areas containing a fixed proportion of points. In other words, if you looked sideways at a 3-D representation of the ndensity
plot, the bands would all have equal height, as the following animation shows (see footnote for the code that produces this):

The code seems to give "strange" results when there are pockets of high density, such as this:
set.seed(123)
dat <-
data.frame(
X = c(rnorm(300, 3, 2.5), rnorm(150, 7, 2), rnorm(450, 4, 0.5)),
Y = c(rnorm(300, 6, 2.5), rnorm(150, 2, 2), rnorm(450, 5, 0.5)))
ggplot(dat, aes(X, Y)) +
geom_density2d_filled(
aes(fill = after_stat(level)),
contour_var = "density",
breaks = density_quantiles(dat$X, dat$Y, quantiles)) +
geom_point(alpha = 0.1) +
coord_equal() +
scale_fill_viridis_d('Quartiles', labels = scales::percent(quantiles[-1]),
direction = -1)

Where we can see that the 80th centile contains "islands", or non-contiguous areas. However, this is simply because we are still plotting density, and these are the areas above a threshold density value that contain the correct number of points. There is no guarantee that the density bands will be single contiguous areas, wherever one sets the thresholds.
We can see this clearly using a 3D plot of the quantiles
version of density_quantiles
, where little 'lumps' of low density break our thresholds here and there.
dens <- MASS::kde2d(dat$X, dat$Y, n = 1000)
levels <- as.character(cut(dens$z[-1, -1],
breaks = density_quantiles(dat$X, dat$Y, quantiles),
labels = c(scales::viridis_pal()(4))))
persp(dens, col = levels, phi = 20, theta = -20, axes = FALSE, border = NA)

If instead you want the areas to be contiguous, then your problem becomes poorly defined. For example, it's not clear how the following density plot should look if you want contiguous areas:
set.seed(123)
dat <-
data.frame(
X = c(rnorm(150, 3, 2), rnorm(150, 10, 2)),
Y = c(rnorm(150, 3, 2), rnorm(150, 10, 2)))
ggplot(dat, aes(X, Y)) +
geom_density2d_filled(
aes(fill = after_stat(level)),
contour_var = "density",
breaks = density_quantiles(dat$X, dat$Y, quartiles)) +
geom_point(alpha = 0.1) +
coord_equal() +
scale_fill_viridis_d('Quartiles', labels = scales::percent(quartiles[-1]),
direction = -1)

In this example, to have contiguous areas, one would have to select one of these clusters as being the central point from which the number of dots is counted. This would give very artificial and misleading results which would not be reflective of density at all, but would have to be a distance function from the point of highest density, and would therefore always be a set of nested circles, like a bullseye.
Code for animation
library(magick)
p1 <- ggplot(dat, aes(X, Y)) +
geom_density2d_filled(
aes(fill = after_stat(level)),
contour_var = "ndensity",
breaks = seq(0.25, 1, length.out = 4),
show.legend = FALSE
) +
geom_point(alpha = 0.1) +
coord_fixed(0.95, expand = FALSE) +
theme(plot.margin = margin(75, 50, 60, 50))
ggsave('gg.png', p1, width = 480, height = 480, units = 'px', dpi = 72)
dens <- MASS::kde2d(dat$X, dat$Y, n = 1000)
df2 <- data.frame(x = dens$x, y = apply(dens$z, 1, max)/max(dens$z))
p2 <- ggplot(df2, aes(x, y)) +
geom_area(fill = scales::viridis_pal()(3)[3]) +
geom_area(fill = scales::viridis_pal()(3)[2],
aes(y = ifelse(y > 0.75, 0.75, y))) +
geom_area(fill = scales::viridis_pal()(3)[1],
aes(y = ifelse(y > 0.5, 0.5, y))) +
geom_area(fill = 'gray92',
aes(y = ifelse(y > 0.25, 0.25, y))) +
coord_fixed(diff(range(dens$x))) +
geom_hline(yintercept = c(0, 0.25, 0.5, 0.75, 1)) +
theme_classic() +
theme(plot.margin = margin(58, 50, 50, 45))
ggsave('gg2.png', p2, width = 480, height = 480, units = 'px', dpi = 72)
levels <- as.character(cut(dens$z[-1, -1],
breaks = c(0, 0.25, 0.5, 0.75, 1) * max(dens$z),
labels = c('gray92', scales::viridis_pal()(3))))
for(i in seq(0, 90, 3)) {
ragg::agg_png(paste0("persp", sprintf("%02d", i), ".png"))
persp(dens, col = levels, phi = i, d = 1000, axes = FALSE,
box = FALSE, border = NA)
dev.off()
}
f <- list.files(pattern = 'persp\\d+\\.png', full.names = TRUE)
c(c(rep(f[31], 10), rev(f), rep(f[1], 10),
rep('gg2.png', 10), rep(f[1], 10), f, rep(f[31], 5),
rep('gg.png', 10))
) %>%
rev(.) %>%
image_read() %>%
image_join() %>%
image_animate(fps = 10) %>%
image_write("D:\\persp.gif")