0

I am trying to add the total count (Sum in the dataframe, supplied in the code below) above each bar in a paired, stacked bar graph in ggplot2. I've attached an image of the graph, which was produced in RStudio. For instance, above the "Female" and "Male" bars in "Overall", there should be 1892 and 13334, respectively.

Also, if you look at the image, the label "2.7%" is too big for the bar, and I'd like to just get that one removed. I've tried a lot of things, but nothing is working. Below are both the image and the code to fully reproduce what I have.

R-plot

# ----------------Creating the dataframe-----------------------
Productivity <- c('<1', '1-5', '6-10', '>10', '<1', '1-5', '6-10', '>10', '<1', '1-5', '6-10', '>10', '<1', '1-5', '6-10', '>10', 
                  '<1', '1-5', '6-10', '>10', '<1', '1-5', '6-10', '>10', '<1', '1-5', '6-10', '>10', '<1', '1-5', '6-10', '>10')

Period <- c('Overall', 'Overall', 'Overall', 'Overall', 'Starting at 1980', 'Starting at 1980', 
            'Starting at 1980', 'Starting at 1980', 'Starting at 1990', 'Starting at 1990', 
            'Starting at 1990', 'Starting at 1990', 'Starting at 2000', 'Starting at 2000', 
            'Starting at 2000', 'Starting at 2000', 'Overall', 'Overall', 'Overall', 'Overall', 
            'Starting at 1980', 'Starting at 1980', 'Starting at 1980', 'Starting at 1980', 
            'Starting at 1990', 'Starting at 1990', 'Starting at 1990', 'Starting at 1990', 
            'Starting at 2000', 'Starting at 2000', 'Starting at 2000', 'Starting at 2000')

Gender <- c('Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 
            'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Male', 'Male', 'Male', 'Male', 'Male', 
            'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male')

Frequency <- c(1316, 261, 156, 159, 152, 17, 5, 14, 324, 52, 24, 65, 829, 189, 127, 80, 7663, 2041, 1412, 2218, 
               962, 161, 107, 411, 2101, 487, 303, 925, 4332, 1345, 973, 748)

Percentage <- c(69.6, 13.8, 8.2, 8.4, 80.9, 9, 2.7, 7.4, 69.7, 11.2, 5.2, 14, 67.7, 15.4, 10.4, 6.5, 57.5, 15.3, 10.6, 
                16.6, 58.6, 9.8, 6.5, 25, 55.1, 12.8, 7.9, 24.2, 58.6, 18.2, 13.2, 10.1)

Sum <- c(1892, 1892, 1892, 1892, 188, 188, 188, 188, 465, 465, 465, 465, 1225, 1225, 1225, 1225, 13334, 
         13334, 13334, 13334, 1641, 1641, 1641, 1641, 3816, 3816, 3816, 3816, 7398, 7398, 7398, 7398)

Label <- c('69.6%', '13.8%', '8.2%', '8.4%', '80.9%', '9%', '2.7%', '7.4%', '69.7%', '11.2%', '5.2%', 
           '14%', '67.7%', '15.4%', '10.4%', '6.5%', '57.5%', '15.3%', '10.6%', '16.6%', '58.6%', '9.8%', 
           '6.5%', '25%', '55.1%', '12.8%', '7.9%', '24.2%', '58.6%', '18.2%', '13.2%', '10.1%')

d <- data.frame(Productivity, Period, Gender, Frequency, Percentage, Sum, Label)

#--------------Code to produce ggplot graph------------------------------

#Reordering labels
o<-c("<1", "1-5", "6-10", ">10")   
d$ReOrder<-factor(d$Productivity, levels=o)

#Producing plot
p <- ggplot(data=d, aes(x=Gender, y=Frequency, fill=ReOrder, label=Label)) + 
  geom_bar(stat="identity", color="black", position = position_fill(reverse = TRUE)) +
  scale_fill_brewer(palette='Pastel1') +
  geom_text(size = 4, position = position_fill(vjust = 0.5, reverse = TRUE)) +
  facet_grid(~Period) + 
  labs(title="Research productivity", x="", y="Percent", fill="Research longevity (years)") + 
  theme_minimal()+
  theme(plot.title = element_text(size=25, margin=margin(t=20, b=20))) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
  coord_fixed(ratio = 6) +
  scale_y_continuous(expand = c(0, 0))

p

1 Answers1

1

You Could just swap out "" for "2.7%" in your data to fix the problem of the too large text. To add the totals above each bar you could add geom_text(aes(x = Gender, y = 1.05, label = as.character(Sum)), vjust = 1) to your ggplot. So it'd be something like this:

library(ggplot2)
library(scales)

# ----------------Creating the dataframe-----------------------
Productivity <- c('<1', '1-5', '6-10', '>10', '<1', '1-5', '6-10', '>10', '<1', '1-5', '6-10', '>10', '<1', '1-5', '6-10', '>10', 
                  '<1', '1-5', '6-10', '>10', '<1', '1-5', '6-10', '>10', '<1', '1-5', '6-10', '>10', '<1', '1-5', '6-10', '>10')

Period <- c('Overall', 'Overall', 'Overall', 'Overall', 'Starting at 1980', 'Starting at 1980', 
            'Starting at 1980', 'Starting at 1980', 'Starting at 1990', 'Starting at 1990', 
            'Starting at 1990', 'Starting at 1990', 'Starting at 2000', 'Starting at 2000', 
            'Starting at 2000', 'Starting at 2000', 'Overall', 'Overall', 'Overall', 'Overall', 
            'Starting at 1980', 'Starting at 1980', 'Starting at 1980', 'Starting at 1980', 
            'Starting at 1990', 'Starting at 1990', 'Starting at 1990', 'Starting at 1990', 
            'Starting at 2000', 'Starting at 2000', 'Starting at 2000', 'Starting at 2000')

Gender <- c('Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 
            'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Female', 'Male', 'Male', 'Male', 'Male', 'Male', 
            'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male')

Frequency <- c(1316, 261, 156, 159, 152, 17, 5, 14, 324, 52, 24, 65, 829, 189, 127, 80, 7663, 2041, 1412, 2218, 
               962, 161, 107, 411, 2101, 487, 303, 925, 4332, 1345, 973, 748)

Percentage <- c(69.6, 13.8, 8.2, 8.4, 80.9, 9, 2.7, 7.4, 69.7, 11.2, 5.2, 14, 67.7, 15.4, 10.4, 6.5, 57.5, 15.3, 10.6, 
                16.6, 58.6, 9.8, 6.5, 25, 55.1, 12.8, 7.9, 24.2, 58.6, 18.2, 13.2, 10.1)

Sum <- c(1892, 1892, 1892, 1892, 188, 188, 188, 188, 465, 465, 465, 465, 1225, 1225, 1225, 1225, 13334, 
         13334, 13334, 13334, 1641, 1641, 1641, 1641, 3816, 3816, 3816, 3816, 7398, 7398, 7398, 7398)

Label <- c('69.6%', '13.8%', '8.2%', '8.4%', '80.9%', '9%', '', '7.4%', '69.7%', '11.2%', '5.2%', 
           '14%', '67.7%', '15.4%', '10.4%', '6.5%', '57.5%', '15.3%', '10.6%', '16.6%', '58.6%', '9.8%', 
           '6.5%', '25%', '55.1%', '12.8%', '7.9%', '24.2%', '58.6%', '18.2%', '13.2%', '10.1%')

d <- data.frame(Productivity, Period, Gender, Frequency, Percentage, Sum, Label)

#--------------Code to produce ggplot graph------------------------------

#Reordering labels
o<-c("<1", "1-5", "6-10", ">10")   
d$ReOrder<-factor(d$Productivity, levels=o)

dat <- unique(d[, c("Period", "Gender", "Sum", "ReOrder")])

#Producing plot
p <- ggplot(data=d, aes(x=Gender, y=Frequency, fill=ReOrder, label=Label)) + 
  geom_bar(stat="identity", color="black", position = position_fill(reverse = TRUE)) +
  scale_fill_brewer(palette='Pastel1') +
  geom_text(size = 4, position = position_fill(vjust = 0.5, reverse = TRUE)) +
  facet_grid(~Period) + 
  labs(title="Research productivity", x="", y="Percent", fill="Research longevity (years)") + 
  theme_minimal()+
  theme(plot.title = element_text(size=25, margin=margin(t=20, b=20))) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
  coord_fixed(ratio = 6) +
  scale_y_continuous(expand = c(0, 0), labels = percent) +
  geom_text(aes(x = Gender, y = 1.05, label = as.character(Sum)), vjust = 1)

p

enter image description here

Chris
  • 1,575
  • 13
  • 20
  • This is exactly what I wanted. Thank you so much. One more thing I noticed - is there an easy way to convert the y-axis so it is (0, 25, 50, 75, 100) instead of (0, 0.25, 0.5, 0.75, 1)? Thanks again. – somesurgeon Apr 19 '18 at 21:40