I am trying to find the optimal number of clusters and i get the error. Below is my code:
def mbkmeans_clusters(
X,
k,
mb,
print_silhouette_values):
km = MiniBatchKMeans(n_clusters=k, batch_size=mb).fit(X)
print(f"Silhouette coefficient: {silhouette_score(X, km.labels_, metric='euclidean'):0.2f}")
if print_silhouette_values:
sample_silhouette_values = silhouette_samples(X, km.labels_)
print(f"Silhouette values:")
silhouette_values = []
for i in range(5, k-1):
cluster_silhouette_values = sample_silhouette_values[km.labels_ == i]
silhouette_values.append(
(
i,
cluster_silhouette_values.shape[0],
cluster_silhouette_values.mean(),
cluster_silhouette_values.min(),
cluster_silhouette_values.max(),
)
)
silhouette_values = sorted(
silhouette_values, key=lambda tup: tup[2], reverse=True
)
for s in silhouette_values:
print(
f" Cluster {s[0]}: Size:{s[1]} | Avg:{s[2]:.2f} | Min:{s[3]:.2f} | Max: {s[4]:.2f}"
)
return km, km.labels_
clustering, cluster_labels = mbkmeans_clusters(X=vectorized_docs,k=50,mb=500, print_silhouette_values=True)
These links were not helpful: Link1 Link2
When I tried np.unique(km.labels_)
i get array([0]). what am i missing?