0

EDITED: in response to comments, i put output from code

I'm learning graphing with ggplot and have made quite a bit of progress in getting the visualization I need. However, I haven't been able to find references to two specific problems:

  1. The data labels of the geom_point chart have some lines that contaminate the visualization that I can't remove. In mean, yes, I removed this by changing the category of variable "Sexo", because I see the are hidden categories that occupied "ghost space" in the graph (I put a commentary in the line of code)

  2. This is the most serious problem: the values of geom_point have nothing on commun with scale on secondary y axis. I think this is because secondary axis needs to refers to de primary one, but you'll understand that this is wrong in visualization meaning.

Do you have any idea where the problem could be?

Thank you very much

graf1 %>%
  mutate(Sexo = fct_relevel(Sexo,"Mujeres", "Total", "Hombres")) %>% 
ggplot(aes(fill=Sexo, y=ing_medio, x=Year)) + 
  geom_bar(position="dodge", stat="identity")+
  coord_cartesian(ylim = c(0,800))+
  geom_text(aes(label=paste0("$",round(ing_medio,1))),
            vjust=-0.5,size=3,check_overlap = F,
            position = position_dodge(0.9))+
  scale_fill_brewer(palette="Paired")+
  geom_line(aes( y=brecha, x=Year, group=1,color ="Brecha de género (%)"))+
  scale_color_manual(NULL, values = "red") +
  geom_point(aes( y=brecha, x=Year, group=1,color ="Brecha de género (%)"),inherit.aes = F)+
  geom_label(aes( y=brecha, x=Year,label=(ifelse(Sexo=="Total",paste0("-",brechap),""))),
             size=3,inherit.aes = F,vjust=-1)+            #### If i change "Total" by "Hombres" o "Mujeres" the contamination disappear, but just visually
  scale_x_discrete("Año")+
  scale_y_continuous(name="Ingreso medio en miles de pesos ($)",
                     sec.axis = sec_axis( trans=~./2000, 
                     name="Brecha (%)",
                     labels = scales::label_percent(),
                     breaks = seq(0, 1, by = 0.05)))+
  theme(panel.grid.minor.y = element_line(color="lightgrey"),
        panel.grid.major.y = element_line(color="lightgrey"),
        element_blank(),panel.background = element_blank(),legend.position="bottom",
        legend.text=element_text(size=8),
        plot.title = element_text(face = "bold",size=9, hjust=0.5),
        axis.title=element_text(size=8,face="bold"),
        axis.title.y.right = element_text(angle = 90))+
  labs(title="Ingreso medio mensual de las personas ocupadas y brecha de género en el ingreso,\n según sexo y año, 2017-2021")


enter image description here

dput(head(graf1))



list(data = structure(list(Sexo = structure(c(1L, 2L, 3L, 1L, 
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), levels = c("Mujeres", 
"Total", "Hombres"), class = "factor"), ing_medio = c(467.060109828569, 
581.086381783187, 664.363627171407, 499.37645299215, 606.399207890318, 
685.007895112625, 506.650544840556, 620.528397683238, 704.273922502823, 
551.32710782737, 635.133967009455, 692.288992114432, 586.178446433092, 
681.039367826197, 749.045509202275), Year = c("2017", "2017", 
"2017", "2018", "2018", "2018", "2019", "2019", "2019", "2020", 
"2020", "2020", "2021", "2021", "2021"), brecha = c(197.303517342838, 
197.303517342838, 197.303517342838, 185.631442120475, 185.631442120475, 
185.631442120475, 197.623377662267, 197.623377662267, 197.623377662267, 
140.961884287062, 140.961884287062, 140.961884287062, 162.867062769183, 
162.867062769183, 162.867062769183), brechap = c("29.7%", "29.7%", 
"29.7%", "27.1%", "27.1%", "27.1%", "28.1%", "28.1%", "28.1%", 
"20.4%", "20.4%", "20.4%", "21.7%", "21.7%", "21.7%")), row.names = c(3L, 
1L, 2L, 6L, 4L, 5L, 9L, 7L, 8L, 12L, 10L, 11L, 15L, 13L, 14L), class = "data.frame"), 
    layers = list(<environment>, <environment>, <environment>, 
        <environment>, <environment>), scales = <environment>, 
    mapping = structure(list(x = ~Year, y = ~ing_medio, fill = ~Sexo), class = "uneval"), 
    theme = structure(list(axis.title = structure(list(family = NULL, 
        face = "bold", colour = NULL, size = 8, hjust = NULL, 
        vjust = NULL, angle = NULL, lineheight = NULL, margin = NULL, 
        debug = NULL, inherit.blank = FALSE), class = c("element_text", 
    "element")), axis.title.y.right = structure(list(family = NULL, 
        face = NULL, colour = NULL, size = NULL, hjust = NULL, 
        vjust = NULL, angle = 90, lineheight = NULL, margin = NULL, 
        debug = NULL, inherit.blank = FALSE), class = c("element_text", 
    "element")), legend.text = structure(list(family = NULL, 
        face = NULL, colour = NULL, size = 8, hjust = NULL, vjust = NULL, 
        angle = NULL, lineheight = NULL, margin = NULL, debug = NULL, 
        inherit.blank = FALSE), class = c("element_text", "element"
    )), legend.position = "bottom", panel.background = structure(list(), class = c("element_blank", 
    "element")), panel.grid.major.y = structure(list(colour = "lightgrey", 
        size = NULL, linetype = NULL, lineend = NULL, arrow = FALSE, 
        inherit.blank = FALSE), class = c("element_line", "element"
    )), panel.grid.minor.y = structure(list(colour = "lightgrey", 
        size = NULL, linetype = NULL, lineend = NULL, arrow = FALSE, 
        inherit.blank = FALSE), class = c("element_line", "element"
    )), plot.title = structure(list(family = NULL, face = "bold", 
        colour = NULL, size = 9, hjust = 0.5, vjust = NULL, angle = NULL, 
        lineheight = NULL, margin = NULL, debug = NULL, inherit.blank = FALSE), class = c("element_text", 
    "element"))), complete = FALSE, validate = TRUE), coordinates = <environment>)
  • I can't run your code to try changes to it and see what might help, because you haven't provided input data. Here is a good guide you can follow to learn how to give example data. https://community.rstudio.com/t/faq-how-to-do-a-minimal-reproducible-example-reprex-for-beginners/23061 – Nir Graham Sep 02 '22 at 17:34

1 Answers1

0

The issue is some kind of overplotting, i.e. using an ifelse you add empty labels for your male and female categories which result in the "ghost space". To prevent that, filter your data for the total category and use the filtered data in geom_label (and for the geom_line and the geom_point). Additionally, as using vjust = -1 will not center the labels inside the "box" I would suggest to switch to nudge_y to shift the labels upwards:

library(ggplot2)
library(dplyr)
library(forcats)

graf2 <- graf1 %>%
  mutate(Sexo = fct_relevel(Sexo, "Mujeres", "Total", "Hombres"))

graf_line <-  graf2 |> 
  filter(Sexo == "Total")

ggplot(graf2, aes(fill = Sexo, y = ing_medio, x = Year)) +
  geom_bar(position = "dodge", stat = "identity") +
  coord_cartesian(ylim = c(0, 800)) +
  geom_text(aes(label = paste0("$", round(ing_medio, 1))),
    vjust = -0.5, size = 3, check_overlap = F,
    position = position_dodge(0.9)
  ) +
  geom_line(data = graf_line, aes(y = brecha, x = Year, group = 1, color = "Brecha de género (%)")) +
  geom_point(data = graf_line, aes(y = brecha, x = Year, group = 1, color = "Brecha de género (%)"), inherit.aes = F) +
  geom_label(
    data = graf_line, aes(y = brecha, x = Year, label = paste0("-", brechap)),
    size = 3, inherit.aes = F, nudge_y = 25
  ) +
  scale_fill_brewer(palette = "Paired") +
  scale_color_manual(NULL, values = "red") +
  scale_x_discrete("Año") +
  scale_y_continuous(
    name = "Ingreso medio en miles de pesos ($)",
    sec.axis = sec_axis(
      trans = ~ . / 2000,
      name = "Brecha (%)",
      labels = scales::label_percent(),
      breaks = seq(0, 1, by = 0.05)
    )
  ) +
  theme(
    panel.grid.minor.y = element_line(color = "lightgrey"),
    panel.grid.major.y = element_line(color = "lightgrey"),
    element_blank(), panel.background = element_blank(), legend.position = "bottom",
    legend.text = element_text(size = 8),
    plot.title = element_text(face = "bold", size = 9, hjust = 0.5),
    axis.title = element_text(size = 8, face = "bold"),
    axis.title.y.right = element_text(angle = 90)
  ) +
  labs(title = "Ingreso medio mensual de las personas ocupadas y brecha de género en el ingreso,\n según sexo y año, 2017-2021")

enter image description here

UPDATE Personally I'm not a fan of using a secondary axis and would suggest to use a second plot to show the "gender gap". Be it as it is using a secondary scale also requires to rescale the data:

library(ggplot2)
library(dplyr)
library(forcats)

graf2 <- graf1 %>%
  mutate(Sexo = fct_relevel(Sexo, "Mujeres", "Total", "Hombres"))

graf_line <-  graf2 %>%
  filter(Sexo == "Hombres") %>%
  # Compute brechap from data
  mutate(brechap = brecha / ing_medio)

# Scales of primary and secondary axis
scale_prim <- c(0, 800)
scale_sec <- c(0, .8)

graf_line <- graf_line %>%
  # Rescale brechap to scale of primary axis
  mutate(brechap_scale = scales::rescale(brechap, to = scale_prim, from = scale_sec))
         
ggplot(graf2, aes(fill = Sexo, y = ing_medio, x = Year)) +
  geom_bar(position = "dodge", stat = "identity") +
  coord_cartesian(ylim = c(0, 800)) +
  geom_text(aes(label = paste0("$", round(ing_medio, 1))),
            vjust = -0.5, size = 3, check_overlap = F,
            position = position_dodge(0.9)
  ) +
  # For the line map brechap_scale on y
  geom_line(data = graf_line, aes(y = brechap_scale, x = Year, group = 1, color = "Brecha de género (%)")) +
  geom_point(data = graf_line, aes(y = brechap_scale, x = Year, group = 1, color = "Brecha de género (%)"), inherit.aes = F) +
  geom_label(
    data = graf_line, aes(y = brechap_scale, x = Year, label = scales::percent(-brechap)),
    size = 3, inherit.aes = F, nudge_y = 25
  ) +
  scale_fill_brewer(palette = "Paired") +
  scale_color_manual(NULL, values = "red") +
  scale_x_discrete("Año") +
  scale_y_continuous(
    name = "Ingreso medio en miles de pesos ($)",
    sec.axis = sec_axis(
      # Backward transform to scale of sec axis
      trans = ~ scales::rescale(., to = scale_sec, from = scale_prim),
      name = "Brecha (%)",
      labels = scales::label_percent(prefix = "-"),
      breaks = seq(0, 1, by = 0.1)
    )
  ) +
  theme(
    panel.grid.minor.y = element_line(color = "lightgrey"),
    panel.grid.major.y = element_line(color = "lightgrey"),
    element_blank(), panel.background = element_blank(), legend.position = "bottom",
    legend.text = element_text(size = 8),
    plot.title = element_text(face = "bold", size = 9, hjust = 0.5),
    axis.title = element_text(size = 8, face = "bold"),
    axis.title.y.right = element_text(angle = 90)
  ) +
  labs(title = "Ingreso medio mensual de las personas ocupadas y brecha de género en el ingreso,\n según sexo y año, 2017-2021")

enter image description here

DATA

graf1 <- structure(list(Sexo = structure(c(
  1L, 2L, 3L, 1L,
  2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L
), levels = c(
  "Mujeres",
  "Total", "Hombres"
), class = "factor"), ing_medio = c(
  467.060109828569,
  581.086381783187, 664.363627171407, 499.37645299215, 606.399207890318,
  685.007895112625, 506.650544840556, 620.528397683238, 704.273922502823,
  551.32710782737, 635.133967009455, 692.288992114432, 586.178446433092,
  681.039367826197, 749.045509202275
), Year = c(
  "2017", "2017",
  "2017", "2018", "2018", "2018", "2019", "2019", "2019", "2020",
  "2020", "2020", "2021", "2021", "2021"
), brecha = c(
  197.303517342838,
  197.303517342838, 197.303517342838, 185.631442120475, 185.631442120475,
  185.631442120475, 197.623377662267, 197.623377662267, 197.623377662267,
  140.961884287062, 140.961884287062, 140.961884287062, 162.867062769183,
  162.867062769183, 162.867062769183
), brechap = c(
  "29.7%", "29.7%",
  "29.7%", "27.1%", "27.1%", "27.1%", "28.1%", "28.1%", "28.1%",
  "20.4%", "20.4%", "20.4%", "21.7%", "21.7%", "21.7%"
)), row.names = c(
  3L,
  1L, 2L, 6L, 4L, 5L, 9L, 7L, 8L, 12L, 10L, 11L, 15L, 13L, 14L
), class = "data.frame")
stefan
  • 90,330
  • 6
  • 25
  • 51
  • thanks for your answer. It fixed the "ghost space" problem, but there still remains the problem of unmatched values with secondary Y axis' scale Do you have any idea about it? Thanks – Sebastián Valladares Sep 05 '22 at 01:52
  • Aw. Sorry. I overread that part. See my update for one approach to fix the issue with the secondary axis. – stefan Sep 05 '22 at 06:15