1) For the legend you can simply use the cluster names as follows:
unqClusters = unique(clusters);
for i = 1:max(grp)
% Original legend showing "Cluster n"
%set(hPt(grp==i), 'Color', clrMap(i,:), 'DisplayName', sprintf('Cluster %d', grpID(i)))
% New legend showing the flower class name
set(hPt(grp==i), 'Color', clrMap(i,:), 'DisplayName', sprintf('%s', unqClusters{i}))
end
2) For the convex hull part, you could use the contour
s of the 2D normal distribution.
Here is a code that works on the first two raw PC scores, that you would need to adapt to the standardization performed by the biplot()
function.
The code essentially goes over each cluster, computes the mean and covariance of the first two PC values, and uses these values as the parameters of the 2D normal distribution whose PDF is computed on a meshgrid. Finally a level value of the 2D PDF is selected for plotting, which results in an ellipse covering most of the points in the cluster.
%% Convex hulls around each cluster (ellipses based on 2D normal distribution)
% Execution parameters:
% Number of points used for the meshgrid to generate the Normal PDF
resolution = 100;
% Factor of PC range by which to extend the min and max PC values
% on which the normal PDF is computed
% (adjust this value so that the ellipse around the points is drawn completely)
factor = 2;
% Variables to store the location and scale parameters of each cluster
mu = cell(1,3);
Sigma = cell(1,3);
% Do the calculations for each cluster and plot points and ellipses on a single plot
figure
hold on
for g = grpID
% Score (PC) values to analyze
scoregrp = score(grp==g,1:2);
mu{g} = mean(scoregrp);
Sigma{g} = cov(scoregrp);
% PC1 and PC2 values on which to compute the Normal PDF
pcvalues = cell(1,2);
for c = 1:length(pcvalues)
pcrange = range(scoregrp(:,c));
pcmin = min(scoregrp(:,c)) - factor*pcrange;
pcmax = max(scoregrp(:,c)) + factor*pcrange;
pcstep = pcrange / resolution;
pcvalues{c} = pcmin:pcstep:pcmax;
end
% Meshgrid generated from the above PC values
[XPoints, YPoints] = meshgrid(pcvalues{1}, pcvalues{2});
XYPoints = [XPoints(:) YPoints(:)];
npoints = size(XYPoints, 1);
% Variables used repeatedly in the computation of the normal PDF
detSigma = det(Sigma{g});
invSigma = inv(Sigma{g});
% Normal PDF on the meshgrid
% Note: the PDF is computed via a loop instead of matrix operation
% due to memory constraints
pdfnormal = nan(1, npoints);
den = sqrt(2*pi*detSigma);
for i = 1:npoints
point = XYPoints(i,:) - mu{g};
pdfnormal(i) = exp( -0.5 * point * invSigma * point' ) / den;
end
% Reshape the PDF to 2D matrix so that we can plot the contours
pdfnormal2D = reshape(pdfnormal, length(pcvalues{1}), length(pcvalues{2}));
% Value to plot as ellipse where the normal falls at about the value
% of the standard normal at two standard deviations
pdfvalue2Std = pdf('norm', 2);
% Plot!
plot(scoregrp(:,1), scoregrp(:,2), '.', 'Color', clrMap(g,:))
contour(XPoints, YPoints, pdfnormal2D, pdfvalue2Std, 'LineWidth', 2, ...
'Color', clrMap(g,:))
end
xlabel('PC1')
ylabel('PC2')
which generates the following graph:
