I have a set of about 200 images that I want to cluster into groups of images with similar features. I'm using Resnet50 to extract feature vectors from images and with the help of Faiss Kmeans I'm trying to cluster them into groups.
I have defined a class for Faiss KMeans as given on the link here.
class FaissKMeans:
def __init__(self, n_clusters=8, n_init=10, max_iter=300):
self.n_clusters = n_clusters
self.n_init = n_init
self.max_iter = max_iter
self.kmeans = None
self.cluster_centers_ = None
self.inertia_ = None
def fit(self, X, y):
self.kmeans = faiss.Kmeans(d=X.shape[1],
k=self.n_clusters,
niter=self.max_iter,
nredo=self.n_init)
self.kmeans.train(X.astype(np.float32))
self.cluster_centers_ = self.kmeans.centroids
self.inertia_ = self.kmeans.obj[-1]
def predict(self, X):
return self.kmeans.index.search(X.astype(np.float32), 1)[1]
I'm storing the images and their vectors in a dictionary as key-value pairs.
#function to extract image vector
def extract_features(file, model):
img = load_img(file,target_size=(224,224))
img = np.array(img)
reshaped_img = img.reshape(1,224,224,3)
imgx = preprocess_input(reshaped_img)
features = model.predict(imgx,use_multiprocessing=True)
return features
#append the images in a folder to list "products"
products = []
with os.scandir(mypath) as files:
for file in files:
if file.name.endswith('.jpg'):
products.append(file.name)
#load ResNet50 model
model = ResNet50()
model = Model(inputs = model.inputs, outputs = model.layers[-2].output)
#save image and image vector to dictionary "feature_dict" as key value pair
feature_dict = {}
p = pkl_path
for product in products:
try:
feat = extract_features(product,model)
feature_dict[product] = feat
except:
with open(p,'wb') as file:
pickle.dump(data,file)
#convert dictionary to a numpy array
filenames = np.array(list(feature_dict.keys()))
feat = np.array(list(feature_dict.values()))
feat = feat.reshape(-1,2048)
I'm using the package "kneed" to determine the number of clusters
#determine the number of clusters
length = len(filenames)
lim = 25
sse = []
list_k = list(range(1, lim))
for k in list_k:
km = KMeans(n_clusters=k,random_state=22, n_jobs=-1)
labels= km.fit_predict(feat)
sse.append(km.inertia_)
kneedle=KneeLocator(list_k,sse,curve='convex',direction='decreasing')
elbow = kneedle.elbow #number of clusters
Now I'm trying to cluster the images into different groups using faiss Kmeans but I'm getting the error of AttributeError: 'Kmeans' object has no attribute 'fit'
on kmeans.fit(feat)
kmeans = faiss.Kmeans(d=feat.shape[0] ,k=elbow, niter=200)
kmeans.fit(feat)
kmeans.train(feat)
When I try to use kmeans.train(feat)
which I found on the link, I get the error AssertionError