23

I'm using ggplot to create sevral boxplots from the following data:

df<-(structure(list(Effect2 = c("A2", "A2", "A2", "A2", "A2", "A2", 
"A2", "A2", "A2", "A2", "A2", "A2", "A2", "A2", "A1", "A1", "A1", 
"A1", "A1", "A1", "A1", "A1", "A1", "A1", "A1", "A1", "A1", "A1", 
"A3", "A3", "A3", "A3", "A3", "A3", "A3", "A3", "A3", "A3", "A3", 
"A3", "A3", "A3", "A3", "A3", "A3", "A3", "A3", "A3", "A3", "A3", 
"A3", "A3", "A3", "A3", "A3", "A3", "A3", "A3", "A3", "A3", "A3", 
"A3", "A3", "A3", "A3", "A3", "A3", "A3", "A3", "A3", "A3", "A3", 
"A3", "A3", "B1", "B1", "B1", "B1", "B1", "B1", "B1", "B1", "B1", 
"B1", "B1", "B1", "B1", "B1", "B1", "B1", "B1", "B1", "B1", "B1", 
"B1", "B1", "B1", "B1", "B1", "B1", "B1", "B1", "B1", "B1", "B1", 
"B1", "B1", "B1", "B1", "B1", "B1", "B1", "B1", "B1", "B1", "B1", 
"B1", "B1", "B1", "B1", "B1", "B1", "B1", "B1", "B1", "B1", "B1", 
"B1", "B1", "B1", "B1", "B1", "B1", "B1", "B1", "B1", "B1", "B1", 
"B1", "B1", "B1", "B1", "B1", "B1", "B1", "B1", "C3", "C3", "C3", 
"C3", "C3", "C3", "C3", "C3", "C3", "C3", "C3", "C3", "C3", "C3", 
"C3", "C3", "C3", "C3", "C3", "C3", "C3", "C3", "C2", "C2", "C2", 
"C2", "C2", "C2", "C2", "C4", "C4", "C4", "C4", "C4", "C4", "C4", 
"C4", "C4", "C4", "C4", "C1", "C1", "C1", "C1", "C1", "C1", "C1", 
"C1", "C1", "C1", "C1", "C1", "C1", "C1", "C1", "C1", "C1", "C1", 
"C1", "C1", "C1", "C1", "D1", "D1", "D1", "D1", "D1", "D1", "D1", 
"D1", "D1", "D1", "D1", "D1", "D1", "D1", "D1", "D1", "D1", "D1", 
"D1", "D1", "D1", "D1", "D1", "D1", "D1", "D1", "D1", "D1", "D1", 
"D1", "D1", "D1", "D1", "D1", "D1", "D1", "D1", "D1", "D1", "D1", 
"D1", "D1", "D1", "D1", "D1", "D1", "D1", "D1", "D1", "D1", "D1", 
"E1", "E1", "E1", "E1", "E1", "E1", "E1", "E1", "E1", "E1", "E1", 
"E1", "E1", "E1", "E1", "E1", "E1", "E1", "E1", "E1", "E1", "E1", 
"E1", "E1", "E1", "E1", "F1", "F1", "F1", "F1", "F1", "F1", "F1", 
"F1", "F1", "F1", "F1", "F1", "F1", "F1", "F1", "F1", "F1", "F1", 
"F1", "F1", "G1", "G1", "G1", "G1", "G1", "G1", "G1", "G1", "G1", 
"G1", "G1", "G1", "G1", "G1", "G1", "G1", "G1", "G1", "G2", "H1", 
"H1", "H1", "H1", "H1", "H1", "H1", "H1", "H1", "H1", "H1", "H1", 
"H1", "H1", "H1", "H1", "H1", "H1", "H1", "H1", "H1", "H1", "H1", 
"H1", "H1", "H2", "H2", "H2", "H2", "H2", "H2", "H2", "H2", "H2", 
"H2", "H2", "H2", "H2", "H2", "H2", "H2", "H2", "H2", "H2", "H2", 
"H2", "H2", "H2", "H2", "H2", "H2", "H2", "H2", "H2", "H2", "H2", 
"H2", "H2", "H2", "H2", "H2", "H2", "H2", "H2", "H2", "H2", "H2", 
"H2", "H2", "H2", "H2", "H3", "H3", "H3", "H3", "H3", "H3", "H3", 
"H3", "H3", "H3", "H3", "H3", "H3", "H3", "H3", "H3", "H3", "H3", 
"H3", "H3", "H3", "H3", "H3", "H3", "H3", "H3", "H3", "H3", "H3", 
"H3", "H3", "H3", "H3", "H3", "H3", "H3", "H3", "H3", "H3", "H3", 
"H3", "H3", "H3", "H3", "H3", "H3", "H3", "H3", "H3", "H3", "H3", 
"H3", "H3", "H3", "H3", "H3", "H3", "H3", "H3", "H3", "H3", "H3", 
"H3", "H3", "H3", "H3", "H3", "H3", "H3", "H3", "H3", "H3", "H3", 
"H3", "H3", "H4", "H4", "H4", "H4", "H4", "H4", "H4", "H4", "H4", 
"H4", "H4", "H4", "H4", "H4", "H4", "H4", "H4", "H4", "H4", "H4", 
"H4", "H4", "H4", "H4", "H4", "H4", "H4", "H4", "H4", "H4", "H4", 
"H4", "H4", "H4", "H4", "H4", "H4", "H4", "H4", "H4", "H4", "H4"
), OddsRatioEst = c(0.07, 17.79, 3.16, 4.57, 5.34, 0.09, 0.15, 
0.1, 0.41, 2.16, 2.17, 0.2, 4.32, 5.94, 0.09, 3.28, 10.37, 8.49, 
3.15, 0.15, 0.15, 0.34, 13.78, 0.08, 0.04, 6.01, 0.08, 0.07, 
3.63, 7.92, 2.71, 11.41, 12.52, 80.85, 4.72, 3.4, 6.25, 12.05, 
8.7, 2.28, 3.63, 2.83, 2.36, 3.81, 12.73, 7.77, 3.15, 3.24, 51.21, 
6.99, 7.05, 3.39, 1.93, 4.6, 4.55, 16.3, 41.46, 1.99, 2.07, 2.27, 
9.94, 8.35, 3.27, 4.41, 5, 5.35, 11.47, 4.05, 3.06, 3.05, 8.45, 
2.68, 2.45, 4.41, 25.53, 3.74, 18.2, 2.27, 4.19, 2.69, 13.24, 
8.31, 12.96, 8.46, 11.22, 5.28, 18.88, 5.58, 5.96, 3.98, 8.46, 
2.23, 102.55, 6.48, 2.64, 3.78, 4.25, 3.64, 4.21, 5.19, 2.43, 
6.79, 2.68, 10.31, 7.44, 11.89, 5.53, 16.65, 5.99, 9.37, 19.29, 
5.12, 2.42, 2.98, 11.38, 14.45, 3.72, 4.38, 19.8, 6.29, 6.74, 
9.77, 11.78, 22.23, 3.61, 4.77, 12.05, 7.13, 35.14, 84.47, 8.99, 
10.16, 8.79, 11.21, 9.27, 130.54, 5.09, 22.14, 34.78, 11.93, 
46.06, 4.84, 8.79, 36.47, 15.92, 20.78, 0.07, 0.18, 0.17, 0.36, 
0.23, 0.57, 0.17, 0.41, 0.15, 0.2, 0.58, 0.62, 0.08, 0.53, 2.68, 
0.14, 0.37, 0.19, 0.25, 0.33, 1.68, 0.13, 7.93, 7.77, 108.84, 
6.82, 7.12, 14.64, 2016.8, 4.94, 2.86, 3, 74.58, 6.96, 11.82, 
3.43, 3.02, 17.94, 40.41, 11.23, 3.32, 0.44, 0.51, 0.43, 0.02, 
4.42, 4.74, 2.65, 1.77, 3.58, 0.34, 2.49, 1.68, 4.58, 2.62, 13.75, 
0.48, 1.59, 0.01, 0.13, 0.1, 17.42, 11.34, 17.29, 3.32, 6.82, 
7.06, 4.96, 3.04, 10.39, 0.29, 2.5, 3.39, 7.27, 19.25, 6.54, 
14.29, 101.56, 11.86, 24.13, 12.77, 6.21, 9.35, 5.09, 8.72, 9.93, 
2.77, 16.64, 6.64, 4.51, 11.98, 6.99, 2.69, 2.93, 4.54, 3.35, 
2.48, 10.31, 1.69, 160.8, 7.69, 2.73, 37.65, 220.84, 14.02, 4.18, 
158.82, 25.92, 10.85, 7.29, 24.36, 7.16, 64.93, 3.25, 2.95, 1.72, 
1.71, 3.66, 2.34, 3.49, 0.24, 3.67, 2.94, 0.11, 1.52, 2.09, 1.61, 
1.55, 1.59, 2.5, 0.19, 4.1, 2.65, 2.59, 1.29, 11.68, 4.81, 0.09, 
3.14, 2.08, 0.01, 0.11, 0.27, 8.01, 5.59, 0.46, 0.33, 4.32, 0.47, 
2.27, 0.02, 0.11, 0.23, 4.13, 1.98, 12.67, 0.24, 7.55, 5.79, 
0.01, 5.85, 0.02, 19.41, 6.51, 0.51, 0.04, 3.26, 0.12, 6.34, 
0.25, 0.07, 0.06, 13.71, 1.85, 277.25, 111.76, 548.23, 30.23, 
4.63, 3.04, 5.23, 5.37, 0.16, 4.53, 0.09, 0.13, 2.05, 2.04, 2.64, 
11.35, 2.47, 29.4, 0.26, 2.1, 1.83, 0.85, 7.33, 4.84, 0.1, 22.84, 
31.24, 18.17, 4.08, 5.32, 11.99, 6.21, 0.26, 15.2, 16.84, 2.55, 
12.22, 3.2, 14.25, 0.02, 2.62, 0.38, 4.64, 23.27, 2.47, 6.57, 
2.41, 8.64, 2.4, 7.06, 4.8, 167.14, 3.05, 27.73, 25.86, 5.84, 
4.68, 5.1, 11.55, 10.55, 44.11, 21.53, 7.95, 6.06, 9.41, 26.45, 
24.42, 6.95, 79.77, 120.19, 67.39, 5.79, 23.37, 234.51, 41.03, 
10.67, 11.29, 13.07, 56.72, 86.03, 723.44, 40.78, 238.65, 12.76, 
765.98, 42.38, 13.33, 30.93, 12.92, 12.8, 15.5, 104.96, 15.69, 
111.41, 47.93, 17.37, 94.1, 32.88, 58.79, 31.44, 7.7, 81.19, 
84.48, 411.86, 69.94, 17.27, 21.52, 35.4, 15.74, 5.52, 15.03, 
31, 24.32, 29.6, 23.08, 251.96, 8257.41, 43.17, 237.92, 9.05, 
61.38, 5.65, 15.66, 7.87, 302850763, 13.21, 81.4, 31.63, 69.81, 
10.89, 192.84, 168.78, 389.25, 7.08, 18.41, 53.07, 5.82, 128.07, 
50.1, 142.92, 26.9, 629.3, 28.91, 1006.21, 2349.3, 320.77, 136.88, 
115.99, 15, 4884.28, 9.97, 5.91, 6.08, 5.11, 7.39, 7.68, 4.77, 
5.42, 3.49, 4.16, 11.32, 0, 4.01, 4.91, 9.08, 18.33, 10.86, 12.95, 
10.64, 6.03, 2.71, 4.93, 7.64, 345.75, 24, 3.92, 4.48, 9.36, 
1.22, 4, 30.22, 31.37, 56.32, 25.68, 5.42, 66, 15.03, 9.75, 27.1, 
9.36, 74.58, 21.51)), .Names = c("Effect2", "OddsRatioEst"), class = "data.frame", row.names = c(NA, 
-512L))

There are several extreme outliers in the data which stretch the y-axis so that the graph is completely useless : enter image description here

I changed the y-limits with the following:

ggplot(df, aes(x=Effect2, y=OddsRatioEst)) + geom_boxplot(outlier.colour=NA) +
  scale_y_continuous(limits=c(0,100), breaks=seq(0,100,10), expand = c(0, 0))

which appears better, Y max set to 100

but this produces the following warning:

Warning message:
Removed 37 rows containing non-finite values (stat_boxplot)

and I realise that in fact the boxplots are rescaled to however I set my limits - e.g. the values of Q1, Q2 and Q3 are reduced. Y max set to 50, note how Q3 of last but one box to the right is now < 30 compared to the previous example where it is > 40 How can I get a true representation of boxplots without including large outliers?

user2568648
  • 3,001
  • 8
  • 35
  • 52

1 Answers1

47

Use coord_cartesian instead of scale_y_continuous:

ggplot(df, aes(x=Effect2, y=OddsRatioEst)) +
    geom_boxplot(outlier.colour=NA) + 
    coord_cartesian(ylim = c(0, 100))

From the coord_cartesian documentation:

Setting limits on the coordinate system will zoom the plot (like you're looking at it with a magnifying glass), and will not change the underlying data like setting limits on a scale will.

The output is as follows. As you can see, not deleting those outliers changes the picture somewhat, so you may want to change the y limit as well. enter image description here

Joe
  • 3,831
  • 4
  • 28
  • 44
  • Thanks. Is there a way to set breaks using this method as the arguments `breaks`, `minor_breaks` and `expand` are all unused in `coord_cartesian`? – user2568648 Jan 28 '15 at 19:52
  • 2
    I believe you can use `scale_y_continuous` alongside `coord_cartesian` to modify eg axis breaks (just don't supply it with the `ylim` argument). – Joe Jan 28 '15 at 20:00
  • 1
    you can also use log scale: scale_y_log10() + coord_trans(y="log10") + – Alec Istomin May 23 '17 at 21:07
  • 2
    This is so important @Joe ... As a matter of fact, I was used to use `scale_y_continuous` in order to do that. But in the case of boxplots, if you use the latter it will modify your boxplot as if you were filtering your data to those limits. Hence its highly important to use `coord_cartesian` instead of `scale_y_continuous` to set the limits. @user2568648 if you want to set scales, you can use `scale_y_continuous(breaks = pretty( c(0,100) , n = 5) )`, where n is the amount of ticks you want. Note that I didnt use limits inside `scale_y_continuous`. – Jorge V Aug 29 '21 at 03:56