I'm trying to reproduce this chart from Our World in Data.
I'm searching for methods that will get the line labels to look as close as possible to the original. Here's what I've got so far (shows the ggrepel()
version, see commented out line for alternate):
library(tidyverse)
library(ggrepel)
keep <- c("Israel", "United Arab Emirates", "United Kingdom",
"United States", "Chile", "European Union", "China",
"Russia", "Brazil", "World", "Mexico", "Indonesia",
"Bangladesh")
owid <- read_csv("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv") %>%
filter(location %in% keep) %>%
filter(date >= "2021-01-01" & date <= "2021-02-12") %>%
select(location, date, total_vaccinations_per_hundred) %>%
arrange(location, date) %>%
group_by(location) %>%
complete(date = seq.Date(as.Date("2021-01-01"),
as.Date("2021-02-12"),
by="day")) %>%
fill(total_vaccinations_per_hundred) %>%
ungroup() %>%
mutate(location = factor(location),
location = fct_reorder2(location, total_vaccinations_per_hundred,
total_vaccinations_per_hundred)) %>%
mutate(label = if_else(date == max(date),
as.character(location),
NA_character_))
owid %>%
ggplot(aes(x=date, y=total_vaccinations_per_hundred, group=location,
color=location)) +
geom_point() +
geom_line() +
scale_y_continuous(breaks=c(seq(0, 70, 10))) +
theme_minimal() +
labs(title = "Cumulative COVID-19 vaccination doses administered per 100 people",
subtitle = "This is counted as a single dose, and may not equal the total number of people vaccinated, depending on the specific dose regime (e.g. people receive multiple doses).",
caption = "Source: Official data collected by Our World in Data — Last updated 13 February, 11:40 (London time)",
y="",
x="") +
theme(panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(linetype = "dashed"),
panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title.position = "plot",
plot.title = element_text(face="bold"),
legend.position = "none") +
geom_label_repel(aes(label = label),
nudge_x = 1,
hjust = "left", direction="y",
na.rm = TRUE) +
#geom_label(aes(label = label), hjust=0, nudge_x = 1) +
scale_x_date(breaks = as.Date(c("2021-01-01",
"2021-01-10",
"2021-01-15",
"2021-01-20",
"2021-01-25",
"2021-01-30",
"2021-02-04",
"2021-02-12")),
labels = scales::date_format("%b %d"),
limits = as.Date(c("2021-01-01",
"2021-03-01")))