Issue:
I have been struggling with rescaling the loadings (arrows) length in a ggplot2/ggbiplot in a PCA biplot
. I have researched extensively through StackOverflow, on the web, and I've asked the R Studio Community to resolve my issue, although, the only information that I can find is either through different biplot functions or a reference to other entirely different packages for PCA (MASS, factoextra, FactoMineR, PCAtools, and many others), none of which address the question that I would like to answer.
What am I missing? I would really like to continue using ggbiplot/ggplot2
to get a better understanding of both packages and I prefer the visual representation of the output plot in diagram 1
(see R code 1 below)
So far, my most successful attempt to rescale the loadings (arrows) was using the function geom_segment()
and geom_label()
(see R-code 2 and diagram 2
). The problem is the new arrows have overlayed themselves on top of the original arrows from diagram 1 (there are now short and long loadings in diagram 2: 16 arrows for 8 parameters), and the labels with grey backgrounds are now in the foreground and have not adjusted to the right position at the end of arrowheads.
Desired plot
Ideally, I would like the biplot to resemble diagram 3
(see below) where the loadings (arrows) are longer
and just slightly thicker
(like diagram 2) and the labels with grey backgrounds are not overlapping each other (like diagram 3) and sit neatly at the end of the loading arrowheads. I used the argument varname.adjust()
for the labels in diagram 1 but I'm not sure how to apply this to the functions geom_segment()
and geom_label() in diagram 2. See the data below
If anyone can help, I would be deeply appreciative.
Many thanks in advance
R-code 1
install.packages("remotes")
remotes::install_github("vqv/ggbiplot")
install_github("vqv/ggbiplot")
#install.packages("devtools")
library(devtools)
library(ggbiplot)
library(remotes)
#You can do a PCA to visualize the difference between the groups using the standardised box cox data
PCA=prcomp(Whistle_Parameters[2:18], center = TRUE, scale=TRUE, retx = T)
#PCA biplot
PCA_plot<-ggbiplot(PCA, ellipse=TRUE, circle=TRUE, varname.adjust = 2.5, groups=Box_Cox_Stan_Dataframe$Country, var.scale = 1) +
ggtitle("PCA of Acoustic Parameters") +
theme(plot.title = element_text(hjust = 0.5)) +
theme_minimal() +
theme(panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank()) +
theme(axis.line.x = element_line(color="black", size = 0.8),
axis.line.y = element_line(color="black", size = 0.8))
#Place the arrows in the forefront of the points
PCA_plot$layers <- c(PCA_plot$layers, PCA_plot$layers[[2]])
#The options for styling the plot within the function itself are somewhat limited, but since it produces a
#ggplot object, we can re-specify the necessary layers. The following code should work on any object
#output from ggbiplot. First we find the geom segment and geom text layers:
seg <- which(sapply(PCA_plot$layers, function(x) class(x$geom)[1] == 'GeomSegment'))
txt <- which(sapply(PCA_plot$layers, function(x) class(x$geom)[1] == 'GeomText'))
#We can change the colour and width of the segments by doing
PCA_plot$layers[[seg[1]]]$aes_params$colour <- 'black'
PCA_plot$layers[[seg[2]]]$aes_params$colour <- 'black'
#Labels
# Extract loadings of the variables
PCAloadings <- data.frame(Variables = rownames(PCA$rotation), PCA$rotation)
#To change the labels to have a gray background, we need to overwrite the geom_text layer with a geom_label layer:
PCA_plot$layers[[txt]] <- geom_label(aes(x = xvar, y = yvar, label = PCAloadings$Variables,
angle = 0.45, hjust = 0.5, fontface = "bold"),
label.size = NA,
data = PCA_plot$layers[[txt]]$data,
fill = '#dddddd80')
PCA_plot
R-code 2
#Labels
# Extract loadings of the variables
PCAloadings <- data.frame(Variables = rownames(PCA$rotation), PCA$rotation)
#PCA biplots
PCA_plot1<-ggbiplot(PCA, ellipse=TRUE, circle=TRUE, varname.adjust = 2.5, groups=Box_Cox_Stan_Dataframe$Country, var.scale = 1) +
ggtitle("PCA of Acoustic Parameters") +
theme(plot.title = element_text(hjust = 0.5)) +
theme_minimal() +
theme(panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank()) +
theme(axis.line.x = element_line(color="black", size = 0.8),
axis.line.y = element_line(color="black", size = 0.8)) +
geom_segment(data = PCAloadings, aes(x = 0, y = 0, xend = (PC1*4.6),
yend = (PC2*4.6)), arrow = arrow(length = unit(1/2, "picas")),
color = "black")
#Place the arrows in the forefront of the points
PCA_plot1$layers <- c(PCA_plot1$layers, PCA_plot1$layers[[2]])
#The options for styling the plot within the function itself are somewhat limited, but since it produces a
#ggplot object, we can re-specify the necessary layers. The following code should work on any object
#output from ggbiplot. First we find the geom segment and geom text layers:
seg <- which(sapply(PCA_plot1$layers, function(x) class(x$geom)[1] == 'GeomSegment'))
txt <- which(sapply(PCA_plot1$layers, function(x) class(x$geom)[1] == 'GeomText'))
#We can change the colour and width of the segments by doing
PCA_plot1$layers[[seg[1]]]$aes_params$colour <- 'black'
PCA_plot1$layers[[seg[2]]]$aes_params$colour <- 'black'
#To change the labels to have a gray background, we need to overwrite the geom_text layer with a geom_label layer:
PCA_plot1$layers[[txt]] <- geom_label(aes(x = xvar, y = yvar, label = PCAloadings$Variables,
angle = 0.45, hjust = 0.5, fontface = "bold"),
label.size = NA,
data = PCA_plot1$layers[[txt]]$data,
fill = '#dddddd80')
PCA_plot1
Diagram 1
Diagram 2 - These arrows overlay the original arrows in diagram 1 and are positioned incorrectly
Data
structure(list(Country = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("France", "Germany"
), class = "factor"), Low.Freq = c(1229.098358827, 759.408910773596,
627.156561121131, 857.914227798394, 816.020512657709, 726.252107003186,
603.388640229802, 1120.43591201848, 521.916239230762, 66.4277073927702,
842.827028732445, 1548.00685289626, 743.586603639966, 982.298813187027,
938.898554484786, 908.067281510105, 1650.51840217509, 435.837013213028,
905.64518713548, 621.693057238002, 408.874626715846, 763.284854056395,
1163.36397892984, 1267.90558781017, 1561.05494859439, 269.110242829792,
1628.20258277437, 1381.52801863709, 1259.64885050619, 582.429604337893
), High.Freq = c(270.037998321385, 673.100410969792, 1354.51641087434,
582.818682820139, 1949.42791374982, 533.072062804075, 1462.73353623344,
1475.85981044777, 1672.72713391206, 1360.85064740235, 1027.62671423916,
1637.72929840934, 555.708652550379, 683.537132648398, 1714.01010661954,
267.117743854174, 738.883902818488, 842.919932827166, 124.511854388999,
1940.70836004547, 991.37814311059, 1959.73951887933, 435.882938574683,
223.944759894009, 827.050231552967, 1929.9835959516, 731.983627515309,
934.515637669084, 1381.80407878684, 1735.12129509753), Start.Freq = c(209.223178720873,
1243.93824398519, 714.942866646311, 1230.88587487336, 1133.38920481274,
655.640254812419, 176.783487591076, 566.793710992312, 1259.4101411541,
135.19626803044, 1188.65745695622, 1055.06564740433, 145.269654935287,
994.102001940972, 611.97046714505, 1239.25416627405, 273.254811174704,
1187.0983873612, 532.369927415851, 730.144132713145, 306.959091815357,
761.432150933258, 833.35964575595, 633.492439842001, 1011.91529244509,
1002.6837164403, 874.223664731894, 1039.77329580107, 571.716041690428,
358.734914494325), End.Freq = c(3100.50977989246, 2865.99128764993,
3749.07057886566, 1662.75251781181, 3469.5453928947, 1223.29004528624,
1571.69393815622, 3877.90064918956, 3467.9046701139, 2812.46639335828,
3344.48917919081, 823.479192696172, 3587.72640978872, 1943.42597579601,
3726.46938122543, 904.270327650973, 2199.36865174236, 1608.75061469279,
1233.86988042306, 3573.54022639883, 3840.13648049746, 2696.92512488242,
2152.58952962537, 2225.74334558365, 2836.8576276391, 3909.86535579565,
2642.3514330105, 3830.39875611625, 2532.59589574087, 2047.68204963624
), Peak.Freq = c(615.103200058515, 781.386010343022, 1254.22250479111,
1042.32081012698, 1785.4136370848, 609.196990586287, 936.529532621528,
628.617072934145, 1138.00887772997, 965.752651960148, 331.381776986669,
831.243197072226, 1396.09323352817, 796.57855885715, 1434.02692184993,
581.91826512844, 1482.84787412806, 712.229765737013, 711.849861782499,
379.432018940052, 1495.87429192735, 1375.38825516007, 1568.51147252198,
39.6849748542959, 254.973241980045, 526.048000326837, 1599.41223732841,
1723.40465012645, 461.157566614546, 847.403323972557), Center.Freq = c(-0.00396318509300687,
0.377462792184857, 1.66350671788962, -0.152573382048654, 0.438259482923988,
1.62149800844459, 0.501892326424285, -0.166579179714419, 1.06081611813746,
1.39199162769052, 1.52164843383928, -0.389958351497529, 0.00261034688899059,
0.0726410215179534, 1.00473421813784, 1.27072495569536, 1.41569796343226,
0.737375815997266, 0.412628778604207, 0.51099123600198, 1.65512836540775,
-1.12408230668747, 0.438260531725931, -1.11347230908714, 1.09021071848368,
1.26465014876586, -0.663254496003035, 0.64384027394782, 1.29816899903361,
0.0302328674903059), Delta.Freq = c(2374.48934930825, 2535.28648042237,
930.363518659463, 2372.94461226817, 2578.50041236941, 1652.93682378145,
2412.64071270543, 1643.35808756239, 1597.6988634255, 2347.87731769764,
1545.35983248752, 417.894712991398, 676.404759114593, 2717.74464723351,
2750.52013318133, 1387.50061490775, 1088.18301844773, 208.885548316239,
982.856603814324, 1304.55461743298, 2064.83914948351, 1454.17493801179,
1975.72909682146, 1340.40119652782, 1358.81720189322, 398.974468430338,
1807.83210129773, 197.995771350184, 1458.91300578134, 2459.54002342707
), Delta.Time = c(1.52332103330495, -0.729369599299347, 0.5446606158259,
-0.0806278952890181, -1.03355982391612, 0.381391555011319, -0.710006011318096,
0.184876103317229, -0.0939796220798944, 0.878826387745255, 0.889598364118577,
0.929698941247702, 0.734996499853458, -0.43364546563554, -0.176575903721404,
0.556057576098353, -0.31543237357059, 1.31950129257089, 1.08676447814548,
-1.08756351145615, -0.163851619861579, -0.945982375537661, 0.473134073749239,
-0.231569591521918, -0.565159893817776, 1.14721196081124, -1.14555651287826,
1.60486934195338, -1.00704726744845, 1.14020903183312)), row.names = c(NA,
30L), class = "data.frame")