0

Issue

I have used the function ggbiplot() to produce a PCA biplot for multivariate data (see diagram 1 - below)

I found this Stackoverflow post for specifying the colour/transparency/position of line segments in ggbiplot.

Therefore, I am trying to use the sample code in this post to change the colour of the arrows in the biplot and the text of the whistle parameter names to black (see my ideal output in diagram 2 - below).

When I run the geom_segment() function, I keep getting this error message (see below) and I cannot quite figure out how to run the functions ggbiplot2() and geom_segment() (see below) without getting error messages to produce a biplot with the desired output.

Error in layer(data = data, mapping = mapping, stat = stat, geom = GeomSegment,  : 
  object 'arrow.color' not found

Specifically, I would like the arrows to be black, boldface and longer and the text to be boldface and black with the whistle parameters text to be inside labels with a grey background (exactly the same as diagram 2 - see below).

Unfortunately, I cannot share my data, so I have produced a Dummy Data frame, so the resulting PCA biplot will probably look very different to diagram 1 (see below).

If anyone can help, I would be deeply appreciative.

Dummy Data

#Dummy data
#Create a cluster column with dummy data (clusters = 3)
f1 <- gl(n = 2, k=167.5); f1

#Produce a data frame for the dummy level data
f2<-as.data.frame(f1)

#Rename the column f2
colnames(f2)<-"Country"

#How many rows
nrow(f2)

#Rename the levels of the dependent variable 'Country' as classifiers
#prefer the inputs to be factors
levels(f2$Country) <- c("France", "Germany")

#Create random numbers
Start.Freq<-runif(335, min=1.195110e+02, max=23306.000000)
End.Freq<-runif(335, min=3.750000e+02, max=65310.000000)
Delta.Time<-runif(335, min=2.192504e-02, max=3.155762)
Low.Freq<-runif(335, min=6.592500e+02, max=20491.803000)
High.Freq<-runif(335, min=2.051000e+03, max=36388.450000)
Peak.Freq<-runif(335, min=7.324220+02, max=35595.703000)
Center.Freq<-runif(335, min=2.190000e-02, max=3.155800)
Delta.Freq<-runif(335, min=1.171875+03, max=30761.719000)
Delta.Time<-runif(335, min=2.192504e-02, max=3.155762)

#Bind the columns together
Bind<-cbind(f2, Start.Freq, End.Freq,  Low.Freq, High.Freq, Peak.Freq,  Center.Freq, Delta.Freq, Delta.Time)

#Rename the columns 
colnames(Bind)<-c('Country', 'Low.Freq', 'High.Freq', 'Start.Freq', 'End.Freq', 'Peak.Freq', 'Center.Freq', 
                  'Delta.Freq', 'Delta.Time')

#Produce a dataframe
Whistle_Parameters<-as.data.frame(Bind)
Whistle_Parameters

Data Transformation

#Box Cox

#Create a dataframe format for the Yeo transform
Box<-as.data.frame(Whistle_Parameters)
Box

#Check the structure of the dataframe 'Box'
str(Box)

#Use the function powerTransform(), specifying family = "bcPower", to obtain an optimal Box Cox transformation
transform_Low.Freq.box=car::powerTransform(Box$Low.Freq, family= "bcPower")
transform_Low.Freq.box

transform_High.Freq.box=car::powerTransform(Box$High.Freq, family= "bcPower")
transform_High.Freq.box

transform_Start.Freq.box=car::powerTransform(Box$Start.Freq, family= "bcPower")
transform_Start.Freq.box

transform_End.Freq.box=car::powerTransform(Box$End.Freq, family= "bcPower")
transform_End.Freq.box

transform_Peak.Freq.box=car::powerTransform(Box$Peak.Freq, family= "bcPower")
transform_Peak.Freq.box

transform_Center.Freq.box=car::powerTransform(Box$Center.Freq, family= "bcPower")
transform_Center.Freq.box

transform_Delta.Freq.box=car::powerTransform(Box$Delta.Freq, family= "bcPower")
transform_Delta.Freq.box

transform_Delta.Time.box=car::powerTransform(Box$Delta.Time, family= "bcPower")
transform_Delta.Time.box

#Produce a dataframe object
Box_Cox_Transformation<-as.data.frame(stand_box)
Box_Cox_Transformation

PCA

#install.packages("devtools")
library(devtools)
install_github("vqv/ggbiplot")
library(ggbiplot)

#You can do a PCA to visualize the difference between the groups using the standardised box cox data
PCA=prcomp(Box_Cox_Transformation[2:8], center = TRUE, scale=TRUE, retx = T)
PCA

#Plot the names of the principal components
names(PCA)

#Print the attributes of the PCA
attributes(PCA)

#Summarise the importance of the components showing the standard deviation, proportion of variance, 
#and the cumulative proportion
summary(PCA)

#In the arglist in the function, add "name = expression" terms for color, line type and transparency ("alpha") for the arrows.
ggbiplot2 <- function (pcobj, choices = 1:2, scale = 1, pc.biplot = TRUE, 
                       obs.scale = 1 - scale, var.scale = scale, groups = NULL, 
                       ellipse = FALSE, ellipse.prob = 0.68, labels = NULL, labels.size = 3, 
                       alpha = 1, var.axes = TRUE, circle = FALSE, circle.prob = 0.69, 
                       varname.size = 3, varname.adjust = 1.5, varname.abbrev = FALSE, 
                       linetype = "solid",
                       alpha_arrow = c(0.2, 0.5, 1, 1),
                       arrow.color = c(muted("red"), "black", "red")) 
  
ggbiplot2


#Then search for the geom_segment part, and add arguments for color, linetype and alpha:

 g <- ggbiplot2 + geom_segment(data = df.v, aes(x = 0, y = 0, xend = xvar, yend = yvar),
                        arrow = arrow(length = unit(1/2, "picas")),
                        arrow.color = arrow.color, linetype = linetype, alpha = alpha_arrow)

Assign the edited function to a new name, e.g. ggbiplot2. Try it, where you set values other than the default for the arrows:

#Build a classification model for the PCA components
PCA_plot<-ggbiplot2(PCA, ellipse=TRUE, circle=TRUE, varname.adjust = 1.6, groups=Box_Cox_Transformation$Country,
                    arrow.color = "black", linetype = "solid", alpha_arrow = 0.5) +
                    ggtitle("PCA of Acoustic Parameters") +
                    theme(plot.title = element_text(hjust = 0.5)) +
                    theme_minimal() +
                    theme(panel.background = element_blank(), 
                    panel.grid.major = element_blank(), 
                    panel.grid.minor = element_blank(),
                    panel.border = element_blank()) + 
                    theme(axis.line.x = element_line(color="black", size = 0.8),
                    axis.line.y = element_line(color="black", size = 0.8)) 
            

PCA_plot

Diagram 1 - PCA_Plot without the function

enter image description here

Diagram 2 enter image description here

marc_s
  • 732,580
  • 175
  • 1,330
  • 1,459
Alice Hobbs
  • 1,021
  • 1
  • 15
  • 31

2 Answers2

1

Unfortunately, despite the effort you put in to creating a dummy data set, the code you made to reproduce it contains errors. However, this seems a bit tangential to what you are asking here, which is to change the colors and weights of segments and text in the image produced by ggbiplot. To do this, we can simply use the example that comes with the package:

library(ggbiplot)

data(wine)
wine.pca <- prcomp(wine, scale. = TRUE)

p <- ggbiplot(wine.pca, obs.scale = 1, var.scale = 1, 
         groups = wine.class, ellipse = TRUE, circle = TRUE)

p

enter image description here

The options for styling the plot within the function itself are somewhat limited, but since it produces a ggplot object, we can re-specify the necessary layers. The following code should work on any object output from ggbiplot. First we find the geom segment and geom text layers:

seg <- which(sapply(p$layers, function(x) class(x$geom)[1] == 'GeomSegment'))
txt <- which(sapply(p$layers, function(x) class(x$geom)[1] == 'GeomText'))

We can change the colour and width of the segments by doing

p$layers[[seg]]$aes_params$colour <- 'black'
p$layers[[seg]]$aes_params$size <- 1

To change the labels to have a gray background, we need to overwrite the geom_text layer with a geom_label layer:

p$layers[[txt]] <- geom_label(aes(x = xvar, y = yvar, label = varname,
                                  angle = angle, hjust = hjust), 
                              label.size = NA,
                              data = p$layers[[txt]]$data, 
                              fill = '#dddddd80')

Now we can draw the plot with a clean modern theme:

p + theme_minimal()

enter image description here

Allan Cameron
  • 147,086
  • 7
  • 49
  • 87
  • Thank you so much Allan, this is exactly what I wanted to create. I used this line of code and I got this error message: PCA_plot$layers[[seg]]$aes_params$colour <- 'black' Error in PCA_plot$layers[[seg]]$aes_params$colour <- "black" : invalid type/length (environment/1) in vector allocation – Alice Hobbs Sep 20 '22 at 17:22
  • Do you know what went wrong? Everything runs absolutely fine until here. – Alice Hobbs Sep 20 '22 at 17:22
  • Can you check what `seg` is? – Allan Cameron Sep 20 '22 at 17:39
  • seg <- which(sapply(PCA_plot$layers, function(x) class(x$geom)[1] == 'GeomSegment')) txt <- which(sapply(PCA_plot$layers, function(x) class(x$geom)[1] == 'GeomText')) #We can change the colour and width of the segments by doing PCA_plot$layers[[seg]]$aes_params$colour <- 'black' PCA_plot$layers[[seg]]$aes_params$size <- 1 – Alice Hobbs Sep 20 '22 at 18:11
  • The output of seg is: [1] 2 6. The plot is called PCA_plot – Alice Hobbs Sep 20 '22 at 18:12
  • Aha. You have two geom_segment layers for some reason. Try `PCA_plot$layers[[seg[1]]]$aes_params$colour <- 'black'` and `PCA_plot$layers[[seg[2]]]$aes_params$colour <- 'black'` – Allan Cameron Sep 20 '22 at 18:40
  • `PCA_plot$layers[[txt]] <- geom_label(aes(x = xvar, y = yvar, label = varname, angle = angle, hjust = hjust), label.size = NA, data = PCA_plot$layers[[txt]]$Box_Cox_Stan_Dataframe[,5:12], fill = '#dddddd80')` PCA_plot. I ran this code and got this error. `Error in FUN(X[[i]], ...) : object 'varname' not found` – Alice Hobbs Sep 20 '22 at 18:48
  • Where you have written: `data = p$layers[[txt]]$data,` I added `Box_Cox_Stan_Dataframe[,5:12]` as this is my data frame with the vectors used for the PCA. Have I done this right? I am really grateful to you. Many thanks – Alice Hobbs Sep 20 '22 at 18:50
  • No Alice, try just with `PCA_plot$layers[[txt]]$data` – Allan Cameron Sep 20 '22 at 19:40
  • The reason why I said that is because if I run the labels code without changing it, I get this error message `Error in fortify(): ! data must be a data frame, or other object coercible by fortify(), not an S3 object with class waiver`. Run `rlang::last_error()` to see where the error occurred`. – Alice Hobbs Sep 21 '22 at 03:41
  • `Run rlang::last_error() to see where the error occurred. > rlang::last_error() Error in fortify(): ! data` must be a data frame, or other object coercible by `fortify(), not an S3 object with class waiver. --- Backtrace: 1. ggplot2::geom_label(...) 2. ggplot2::layer(...) 4. ggplot2:::fortify.default(data)` – Alice Hobbs Sep 21 '22 at 03:44
  • Hey Allan, I hope you are well. Would you have any idea how to make the loading arrows longer so the labels are not overlapping? Many thanks if you can advise. Take care – Alice Hobbs Sep 23 '22 at 14:50
0

Thank you Allan Cameron for providing this helpful answer

PCA_plot<-ggbiplot(PCA, ellipse=TRUE, circle=TRUE, varname.adjust = 2.5, groups=Box_Cox_Stan_Dataframe$Country, var.scale = 1) +
                  ggtitle("PCA of Acoustic Parameters") +
                  theme(plot.title = element_text(hjust = 0.5)) +
                  theme_minimal() +
                  theme(panel.background = element_blank(), 
                  panel.grid.major = element_blank(), 
                  panel.grid.minor = element_blank(),
                  panel.border = element_blank()) + 
                  theme(axis.line.x = element_line(color="black", size = 0.8),
                  axis.line.y = element_line(color="black", size = 0.8))

#Place the arrows in the forefront of the points
PCA_plot$layers <- c(PCA_plot$layers, PCA_plot$layers[[2]])

#The options for styling the plot within the function itself are somewhat limited, but since it produces a 
#ggplot object, we can re-specify the necessary layers. The following code should work on any object 
#output from ggbiplot. First we find the geom segment and geom text layers:

seg <- which(sapply(PCA_plot$layers, function(x) class(x$geom)[1] == 'GeomSegment'))
txt <- which(sapply(PCA_plot$layers, function(x) class(x$geom)[1] == 'GeomText'))

#We can change the colour and width of the segments by doing
PCA_plot$layers[[seg[1]]]$aes_params$colour <- 'black' 
PCA_plot$layers[[seg[2]]]$aes_params$colour <- 'black'

#Labels
# Extract loadings of the variables
PCAloadings <- data.frame(Variables = rownames(PCA$rotation), PCA$rotation)

#To change the labels to have a gray background, we need to overwrite the geom_text layer with a geom_label layer:
  PCA_plot$layers[[txt]] <- geom_label(aes(x = xvar, y = yvar, label = PCAloadings$Variables,
                                       angle = 0.45, hjust = 0.5, fontface = "bold"), 
                                       label.size = NA,
                                       data = PCA_plot$layers[[txt]]$data, 
                                       fill = '#dddddd80')
  
  PCA_plot

Output Figure

enter image description here

Alice Hobbs
  • 1,021
  • 1
  • 15
  • 31