1

I've been trying for hours to get rid of the "a" in the legend. I saw solutions here for that but the solution was to get rid of the legend altogether. However, I want to keep it. Apart from that, I want the values to start with the mandatory school at the bottom of the each column and have the highest educational attainment at the top. How can I do this? Please do not close my question. I find it disrespectful since I need support in doing this. Thank you!

earnings_data <- tibble::tibble(
  Group.1 = factor(rep(c("(25,34]", "(34,44]", "(44,54]", "(54,65]"), 18)),
  Group.2 = rep(rep(1:2, 9), each = 4L),
  Group.3 = rep(1:9, each = 8L),
  x = c(
    5.06818181818182, 5.80811808118081, 5.90760869565217, 6.00348432055749,
    4.35483870967742, 4.66666666666667, 4.72625698324022, 5.08411214953271,
    4.70833333333333, 5.38095238095238, 5.86842105263158, 6.46428571428571, 4,
    4.92857142857143, 5.31578947368421, 5.28571428571429, 5.3, 6.3,
    5.95652173913043, 6.66666666666667, 4.66666666666667, 4.69230769230769,
    5.54545454545455, 5.73333333333333, 5, 7.26666666666667, 7.13636363636364,
    7.54545454545455, 5, 5.9, 6.82608695652174, 5.83333333333333,
    6.08465608465608, 6.83566878980892, 7.28323197219809, 7.28296438883542,
    5.68085106382979, 6.04819277108434, 6.26519337016575, 6.38515901060071,
    5.74358974358974, 6.57692307692308, 7.23478260869565, 7.52631578947368,
    6.04347826086957, 6.43181818181818, 6.74324324324324, 6.20338983050847,
    6.1031746031746, 7.01630434782609, 7.37894736842105, 7.72950819672131,
    5.70114942528736, 6.46938775510204, 6.73913043478261, 7.19230769230769,
    7.23032069970845, 7.96165644171779, 8.35185185185185, 8.23263327948304,
    6.52486187845304, 7.21951219512195, 7.40825688073395, 7.74803149606299,
    7.59722222222222, 8.47927656367747, 8.64701436130007, 8.67477592829706,
    6.93237410071942, 7.87311178247734, 8.11830357142857, 8.28571428571429
  ),
  share = c(
    9.59238368157303, 9.42485144608869, 9.26469454935699, 9.07884380703652,
    8.90483213980824, 8.60539148301954, 8.19283090969065, 8.803490414879,
    8.91131010179019, 8.73168832315526, 9.20323794967356, 9.77569644399565,
    8.17925322471276, 9.08834712747471, 9.21476857137597, 9.15257840139337,
    10.0311384508647, 10.2230297801189, 9.34140315188156, 10.081749555502,
    9.54246209549822, 8.65267385380536, 9.61288638544082, 9.92766161736722,
    9.46333816119313, 11.7916428151637, 11.1917076248986, 11.4107074514545,
    10.2240665308909, 10.8796735178176, 11.8328259356068, 10.1008185060422,
    11.5162316247324, 11.0922612071819, 11.4220360607478, 11.0137534484817,
    11.6162798457357, 11.1529428342749, 10.8605329926627, 11.0563426797082,
    10.8707064005501, 10.6723937264978, 11.3460546311905, 11.3817646297641,
    12.3577847634247, 11.8603528718889, 11.6892187349767, 10.7415968228904,
    11.5512810253294, 11.3853790943181, 11.5721431438945, 11.6890448739816,
    11.6577862053377, 11.9296316331656, 11.6820892994844, 12.4539762239334,
    13.6845939590431, 12.9194049051995, 13.0979149627558, 12.4499020359058,
    13.3421243900356, 13.3128704475982, 12.8420007999215, 13.4162502721425,
    14.379016594924, 13.7593487022761, 13.5608079256008, 13.1185377538782,
    14.1754108045561, 14.5181162309553, 14.0728463708406, 14.3472850616437
  ),
)
#create age ranges
age_ranges <- cut(df_final$age, breaks=c(25,34,44,54,65))

#group data by age range, gender, and education level, and calculate mean value
earnings_data <- aggregate(df_final$earnings, by=list(age_ranges, df_final$gender, df_final$education), FUN=mean)

#calculate the percentage of each education level for each age range and gender group
earnings_data <- earnings_data %>%
  group_by(Group.1, Group.2) %>%
  mutate(share = x/sum(x)*100)

# Create a test data frame with one row for each education level
test_data <- data.frame(Group.3 = unique(earnings_data$Group.3))

# Fill in the color for each education level
test_data$color <- c("#2c7bb6", "#abd9e9", "#ffffbf", "#fdae61", "#d7191c", "#FF0000", "#00FF00", "#0000FF", "#FFFF00")

# Print out the test data frame
print(test_data)

#create plot with facets and text labels for education share

p <- ggplot(earnings_data, aes(x=Group.2, y=x, fill=interaction(Group.3))) + 
  geom_bar(stat="identity", position="stack") +
  facet_wrap(~Group.1,nrow=1) +
  labs(title="Mean Earnings by Age Range and Gender", x="Age Range", y="Mean Earnings") +
  scale_fill_manual(values=colors, name="Gender") +
  geom_text(aes(label=paste(round(share,1), "%"), color=as.factor(Group.3)), position=position_stack(vjust=0.5), size=3.5) +
  scale_color_manual(values=colors, name="Education Level", labels=c("Mandatory School", "Pre-vocational education", "Short apprenticeship commercial shool", "Diploma middle school", "Apprenticeship", "Full-time vocational school", "Teacher diploma", "Higher professional education", "university degree")) +
  guides(fill=FALSE, color=guide_legend(override.aes = list(shape = 22, size = 5), reverse=FALSE)) +
  theme(axis.text.x = element_blank(), axis.ticks.x = element_blank())
# Add footnote using ggtext
footnote <- "Note: The first column represents males and the second column represents females."
plot <- p + theme(plot.caption = element_markdown())
plot <- p + labs(caption = footnote)
plot

enter image description here

moodymudskipper
  • 46,417
  • 11
  • 121
  • 167
TFT
  • 129
  • 10
  • Your code is not reproducible, can you post the output of `dput(earnings_data)`? The `a` in the legend come from the color aesthetic. Why do you have `geom_text` with a color and then `scale_color_manual`? As far as I understand the code you should keep `scale_fill_manual` only. – Rui Barradas Apr 14 '23 at 13:46
  • @RuiBarradas Thanks for your comment. I've added the data to my main posting. Sorry for that! – TFT Apr 14 '23 at 13:59
  • Rui is right. You can skip or use the colour argument in `geom_bar` but it does not make a lot of sense in geom text. Especially because (from what I have to assume) it changes your text colour to the background colour of your plot. You probably want to replace your `interaction(Group.3)` with the Education level labels in the `data.frame`. Then you can use `scale_manual_fill` to assign the right colours. Also you need contrast for the colour of the `geom_text` otherwise it will be invisible as you have it now. Then remove the `color` legend and just use the `fill` legend. – dparthier Apr 14 '23 at 14:39

1 Answers1

1

Here is a way.
Instead of creating a data.frame with the colors, create a named vector of color codes, with names equal to the unique Group.3 values and a vector of legend text values. Coerce Group.3 to factor with the same levels as the colors' names reversing the levels from the highest to the lowest. Also reverse the legend text.

suppressPackageStartupMessages({
  library(dplyr)
  library(ggplot2)
})

# Create values, order
fill_colors <- c("#2c7bb6", "#abd9e9", "#ffffbf", "#fdae61", "#d7191c", "#FF0000", "#00FF00", "#0000FF", "#FFFF00")
fill_levels <- sort(unique(earnings_data$Group.3))
fill_colors <- setNames(fill_colors, fill_levels)
fill_labels <- c("Mandatory School", "Pre-vocational education", "Short apprenticeship commercial shool", "Diploma middle school", "Apprenticeship", "Full-time vocational school", "Teacher diploma", "Higher professional education", "university degree")

footnote <- "Note: The first column represents males and the second column represents females."

earnings_data %>%
  mutate(Group.3 = factor(Group.3, levels = rev(fill_levels))) %>%
  ggplot(aes(Group.2, y = x, fill = Group.3)) + 
  geom_col(position="stack") +
  facet_wrap( ~ Group.1, nrow = 1) +
  scale_fill_manual(name = "Gender", values = fill_colors, labels = rev(fill_labels)) +
  labs(
    title = "Mean Earnings by Age Range and Gender", 
    x = "Age Range", 
    y = "Mean Earnings",
    caption = footnote
  ) +
  theme(
    axis.text.x = element_blank(), 
    axis.ticks.x = element_blank(),
    plot.caption = ggtext::element_markdown()
  )

Created on 2023-04-14 with reprex v2.0.2

Rui Barradas
  • 70,273
  • 8
  • 34
  • 66