0

I am trying to find the optimal number of clusters and i get the error. Below is my code:

def mbkmeans_clusters(
X, 
k, 
mb, 
print_silhouette_values):
  
km = MiniBatchKMeans(n_clusters=k, batch_size=mb).fit(X)
print(f"Silhouette coefficient: {silhouette_score(X, km.labels_, metric='euclidean'):0.2f}")

if print_silhouette_values:
    sample_silhouette_values = silhouette_samples(X, km.labels_)
    print(f"Silhouette values:")
    silhouette_values = []
    for i in range(5, k-1): 
        cluster_silhouette_values = sample_silhouette_values[km.labels_ == i]
        silhouette_values.append(
            (
                i,
                cluster_silhouette_values.shape[0],
                cluster_silhouette_values.mean(),
                cluster_silhouette_values.min(),
                cluster_silhouette_values.max(),
            )
        )
    silhouette_values = sorted(
        silhouette_values, key=lambda tup: tup[2], reverse=True
    )
    for s in silhouette_values:
        print(
            f"    Cluster {s[0]}: Size:{s[1]} | Avg:{s[2]:.2f} | Min:{s[3]:.2f} | Max: {s[4]:.2f}"
        )
return km, km.labels_
clustering, cluster_labels = mbkmeans_clusters(X=vectorized_docs,k=50,mb=500, print_silhouette_values=True)

These links were not helpful: Link1 Link2

When I tried np.unique(km.labels_) i get array([0]). what am i missing?

The AG
  • 672
  • 9
  • 18

0 Answers0