I am trying to implement KMeans algorithm as a class and when i done all code i test with creating object and than train the dataset. But i get the following error. self.distances = np.zeros((self.N, self.n_clusters))
TypeError: only integer scalar arrays can be converted to a scalar index
from abc import ABC, abstractmethod
class KMeansInterface(ABC):
@abstractmethod
def fit(self, X):
pass
@abstractmethod
def predict(self, X):
pass
class Kmeans(KMeansInterface):
def __init__(self, n_clusters):
self.n_clusters = n_clusters
self.labels = None
self.distances = None
self.N = X.shape[0]
self.data_points = X
self.prev_labels = None
def choose_clusters(self, k, X):
self.n_clusters = k
size = X.shape[0]
numbers = np.random.choice(size, k, replace=False)
return X[numbers]
def calculate_sum_squared_distance(self, data_points, clusters):
self.distances = np.zeros((self.N, self.n_clusters))
for i, centroid in enumerate(clusters):
distance = np.sum(np.square(data_points-centroid), axis=1)
self.distances[:, i] = distance
return self.distances
def calculate_closest_clusters(self, distances):
self.closest_clusters = np.argmin(self.distances, axis=1)
return self.closest_clusters
def update_clusters(self, labels, data_points):
new_clusters = []
for i in range(self.n_clusters):
points_in_cluster = data_points[labels==i]
values = np.mean(points_in_cluster, axis=0)
new_clusters.append(values)
return np.array(new_clusters)
def fit(self, X):
self.n_clusters = self.choose_clusters(self.n_clusters, X)
for i in range(20000):
self.distances = self.calculate_sum_squared_distance(X, self.n_clusters)
self.labels = self.calculate_closest_clusters(self.distances)
self.new_cluster = self.update_clusters(self.labels, X)
if self.prev_labels is not None:
if np.all(self.prev_labels == self.labels):
break
self.prev_labels = self.labels
fitted = True
return self.labels
def predict(self, X, clusters):
self.distances = self.calculate_sum_squared_distance(X, clusters)
self.labels = self.calculate_closest_clusters(self.distances)
return self.labels
kmean_object = KMeans(2)
kmean_object.fit(X)