Add labels to scatter plot

Question

I cannot seem to add labels to my scatter plot.

I want to label the scatter with the variable states. It is currently not working either due to conflict with annotate or some other geom function.

Here is my data:

datatrials <- structure(list(states = c(
  "AP", "AR", "AS", "BR", "CH", "GJ",
  "HR", "HP", "JK", "JH", "KA", "KL", "MP", "NL", "OR", "PY", "PB",
  "RJ", "SK", "TG", "TR", "UP", "UT"
), cured = c(
  60.44117647, 2.631578947,
  24.70095694, 47.31207289, 71.09634551, 67.46961326, 36.86526743,
  41.78272981, 35.24676234, 42.68617021, 37.26310608, 43.57429719,
  63.40242198, 0, 59.29648241, 30.48780488, 85.3956229, 69.87152922,
  0, 51.52317881, 36.96581197, 25.99078341, 59.29659755
), Total = c(
  4080L,
  38L, 1672L, 4390L, 301L, 18100L, 2954L, 359L, 2857L, 752L, 4063L,
  1494L, 8588L, 58L, 2388L, 82L, 2376L, 9652L, 2L, 3020L, 468L,
  1085L, 8729L
), deaths = c(
  1.666666667, 0, 0.23923445, 0.569476082,
  1.661129568, 6.198895028, 0.778605281, 1.39275766, 1.190059503,
  0.664893617, 1.304454836, 0.736278447, 4.319981369, 0, 0.293132328,
  0, 1.978114478, 2.165354331, 0, 3.278145695, 0, 0.737327189,
  2.623439111
), SLT_preval = c(
  7.1, 39.3, 41.7, 23.5, 6.1, 19.2,
  6.3, 3.1, 4.3, 35.4, 16.3, 5.4, 28.1, 39, 42.9, 4.7, 8, 14.1,
  9.7, 10.1, 48.5, 29.4, 12.4
), smoking_preval = c(
  14.2, 22.7,
  13.3, 5.1, 9.4, 7.7, 19.7, 14.2, 20.8, 11.1, 8.8, 9.3, 10.2,
  13.2, 7, 7.2, 7.3, 13.2, 10.9, 8.3, 27.7, 13.5, 18.1
), density_Population = c(
  330.7605972,
  18.75330475, 453.9513884, 1325.360556, 10162.04386, 325.839688,
  637.9420067, 133.8522264, 108.3866651, 484.1552049, 352.2724528,
  918.5972004, 276.9192201, 135.6954581, 297.7151573, 2951.02714,
  598.4943608, 236.7722235, 97.27325254, 351.2115064, 397.6534427,
  987.360228, 210.3632556
), avg_tobacco_use = c(
  10.65, 31, 27.5,
  14.3, 7.75, 13.45, 13, 8.65, 12.55, 23.25, 12.55, 7.35, 19.15,
  26.1, 24.95, 5.95, 7.65, 13.65, 10.3, 9.2, 38.1, 21.45, 15.25
), urbanization = c(
  29.47, 22.94, 14.1, 11.29, 97.25, 42.6, 34.88,
  10.03, 26.11, 24.05, 38.67, 47.7, 27.63, 28.86, 16.69, 68.33,
  37.48, 24.87, 25.15, 38.88, 26.17, 22.27, 30.23
), gats2_tobacco_india = c(
  20,
  45.5, 48.2, 25.9, 13.7, 25.1, 23.6, 16.1, 23.7, 38.9, 22.8, 12.7,
  34.2, 43.3, 45.6, 11.2, 13.4, 24.7, 17.9, 17.8, 64.5, 35.5, 26.5
), Cases_per_pop = c(
  7.56909681, 2.419676298, 4.695700757, 3.517630291,
  25.98247866, 28.33774883, 10.4734347, 4.817527749, 20.99759524,
  1.948492028, 6.013674471, 4.184939244, 10.06104045, 2.578127257,
  5.151399591, 5.80103032, 7.882852583, 11.91124239, 0.289749671,
  7.672231694, 11.22357603, 0.456107101, 77.58519395
)), class = "data.frame", row.names = c(
  NA,
  -23L
))

This is my code:

library(ggplot2)
library(ggExtra)
library(gridExtra)

#working plot 
plot1 <- ggplot(datatrials, aes(SLT_preval,urbanization)) + geom_point(color = '#CC9933') +
  geom_smooth(fullrange=TRUE,method = "lm", level=0.95) +
  ylab("Urbanization %") +
  xlab("Smokeless Tobacco Use %") +
  theme(axis.text=element_text(size=14),
        axis.title=element_text(size=14)) +
  scale_x_continuous(expand=c(0,0), limits=c(0,100)) +
  scale_y_continuous(expand=c(0,0), limits=c(-50,100)) +
  coord_cartesian(xlim = c(0, 70), ylim = c(0, 100)) +
  theme(axis.title.y = element_text(margin=margin (t=0, r=5, b=0, l=0))) +
  geom_label(x = 0.95*max(SLT_preval), y = 0.92*max(urbanization), size = 4.3, label = "n = 32; p-value = 0.015; \n CI = -0.799:-0.050; rho = -0.426")

plot2 <- ggplot(datatrials, aes(smoking_preval,urbanization)) + geom_point(color = '#615513') +
  geom_smooth(fullrange=TRUE,method = "lm", se=FALSE) +
  ylab("Urbanization %") +
  xlab("Smoking %") +
  theme(axis.text=element_text(size=14),
        axis.title=element_text(size=14)) +
  scale_x_continuous(expand=c(0,0), limits=c(0,100)) +
  scale_y_continuous(expand=c(0,0), limits=c(-50,100)) +
  coord_cartesian(xlim = c(0, 70), ylim = c(0, 100)) +
  geom_label(x = 1.35*max(smoking_preval), y = 0.92*max(urbanization), size = 4.3, label = "n = 32; p-value = 0.186; \n CI = -0.641:0.165; rho = -0.239")

p1 <- ggMarginal(plot1, type="histogram", colour = '#FF0000', fill = '#FAC95F')
p2 <- ggMarginal(plot2, type="histogram", colour = '#FF0000', fill = '#615513')
grid.arrange(p1, p2, ncol=2)

can you add a dataframe structure to make it reproducible? you can do it by dput(dataframe.name). if you don't want to share the entire data you can do 1- rows:dput(dataframe.name[1:10,]). sometimes if there is a conflict you need to specify a library before the function name e.g. ggplot2::geom_text_repel ( i am not sure whether it is from ggplot 2 or not, just showing a syntax herE) — yuliaUU, Jun 23 '20 at 18:31
Welcome to Stack Overflow (SO)! I think we can help you with this, but there are a couple of things different about SO from a forum that you should be aware one. One is that we don't allow or strongly discourage cloud links because (a) they tend to break over time and (b) they may contain malware, so if you could `dput` or otherwise reproduce your data or fake data instead that would be better. Also, we have more of a structured, minimalist format so you don't have to worry about asking for help in the question title or body; you can just describe your problem and it won't sound rude. — Hack-R, Jun 23 '20 at 18:33

score 0 · Accepted Answer · answered Jun 23 '20 at 18:35

Try this. I added the geom_text. Next. There was an error in your use of geom_label which lacked the datatrials$. Also I switched to annotate which works fine if you add geom = "label".

library(ggplot2)
library(ggExtra)

# working plot
plot1 <- ggplot(datatrials, aes(SLT_preval, urbanization)) +
  geom_point(color = "#CC9933") +
  geom_smooth(fullrange = TRUE, method = "lm", level = 0.95) +
  # Add geom_text
  geom_text(aes(label = states)) +
  ylab("Urbanization %") +
  xlab("Smokeless Tobacco Use %") +
  theme(
    axis.text = element_text(size = 14),
    axis.title = element_text(size = 14)
  ) +
  scale_x_continuous(expand = c(0, 0), limits = c(0, 100)) +
  scale_y_continuous(expand = c(0, 0), limits = c(-50, 100)) +
  coord_cartesian(xlim = c(0, 70), ylim = c(0, 100)) +
  theme(axis.title.y = element_text(margin = margin(t = 0, r = 5, b = 0, l = 0))) +
  # Switch to annotate. Add geom. Add datatrials$
  annotate(
    geom = "label", x = 0.95 * max(datatrials$SLT_preval),
    y = 0.92 * max(datatrials$urbanization), size = 4.3, label = "n = 32; p-value = 0.015; \n CI = -0.799:-0.050; rho = -0.426"
  )

plot2 <- ggplot(datatrials, aes(smoking_preval, urbanization)) +
  geom_point(color = "#615513") +
  geom_smooth(fullrange = TRUE, method = "lm", se = FALSE) +
  # Add geom_text
  geom_text(aes(label = states)) +
  ylab("Urbanization %") +
  xlab("Smoking %") +
  theme(
    axis.text = element_text(size = 14),
    axis.title = element_text(size = 14)
  ) +
  scale_x_continuous(expand = c(0, 0), limits = c(0, 100)) +
  scale_y_continuous(expand = c(0, 0), limits = c(-50, 100)) +
  coord_cartesian(xlim = c(0, 70), ylim = c(0, 100)) +
  # Switch to annotate. Add geom. Add datatrials$
  annotate(geom = "label", x = 1.35 * max(datatrials$smoking_preval), y = 0.92 * max(datatrials$urbanization), size = 4.3, label = "n = 32; p-value = 0.186; \n CI = -0.641:0.165; rho = -0.239")

p1 <- ggMarginal(plot1, type = "histogram", colour = "#FF0000", fill = "#FAC95F")
#> `geom_smooth()` using formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'
p2 <- ggMarginal(plot2, type = "histogram", colour = "#FF0000", fill = "#615513")
#> `geom_smooth()` using formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'

#par(mfrow = c(1, 2))
gridExtra::grid.arrange(p1, p2, ncol = 2)

^{Created on 2020-06-23 by the reprex package (v0.3.0)}

Thank you - this worked perfectly! I am still looking at the code trying to grasp but I will learn if I stick around. — SamV, Jun 23 '20 at 21:04
I have just made one change - replaced geom_text with geom_text_repel. I was wondering if there is a way I can automatically generate the label "n=32....." based on the data rather than typing it in? That would be cooooooollll ;-) Thanks anyway; cheers!! — SamV, Jun 25 '20 at 13:27
Sure is this possible. (; Put your regression output with all stats in a list. e.g. `stats <- lm(urbanization ~ SLT_preval, data = datatrials) %>% broom::tidy(conf.int = TRUE) %>% filter(term == "SLT_preval") %>% as.list()` gives me a list which i can use to make the label like so: `paste0("n = ", nrow(datatrials), "; p-value = ", scales::number(stats$p.value, accuracy = .001), ";\nCI = ", scales::number(stats$conf.low, accuracy = .001), ":",`scales::number(stats$conf.high, accuracy = .001)` — stefan, Jun 25 '20 at 14:05
I want to appreciate the help provided by all above users in this thread and the stackoverflow community for your assistance in visualization of data in one of my upcoming publications. Kindly contact me if you do not consent to using your user ID in the acknowledgements. Thank you and all the best! Stay safe and healthy!! — SamV, Oct 21 '20 at 13:27

Add labels to scatter plot

1 Answers1