1

I have a set of about 200 images that I want to cluster into groups of images with similar features. I'm using Resnet50 to extract feature vectors from images and with the help of Faiss Kmeans I'm trying to cluster them into groups.

I have defined a class for Faiss KMeans as given on the link here.

class FaissKMeans:
    def __init__(self, n_clusters=8, n_init=10, max_iter=300):
        self.n_clusters = n_clusters
        self.n_init = n_init
        self.max_iter = max_iter
        self.kmeans = None
        self.cluster_centers_ = None
        self.inertia_ = None

    def fit(self, X, y):
        self.kmeans = faiss.Kmeans(d=X.shape[1],
                                   k=self.n_clusters,
                                   niter=self.max_iter,
                                   nredo=self.n_init)
        self.kmeans.train(X.astype(np.float32))
        self.cluster_centers_ = self.kmeans.centroids
        self.inertia_ = self.kmeans.obj[-1]

    def predict(self, X):
        return self.kmeans.index.search(X.astype(np.float32), 1)[1]

I'm storing the images and their vectors in a dictionary as key-value pairs.

#function to extract image vector
def extract_features(file, model):
    img = load_img(file,target_size=(224,224))
    img = np.array(img) 
    reshaped_img = img.reshape(1,224,224,3)
    imgx = preprocess_input(reshaped_img)
    features = model.predict(imgx,use_multiprocessing=True)
    return features

#append the images in a folder to list "products"
products = []
with os.scandir(mypath) as files:
for file in files:
    if file.name.endswith('.jpg'):
        products.append(file.name)

#load ResNet50 model
model = ResNet50()
model = Model(inputs = model.inputs, outputs = model.layers[-2].output)

#save image and image vector to dictionary "feature_dict" as key value pair
feature_dict = {}
p = pkl_path 
    
for product in products:
    try:
        feat = extract_features(product,model)
        feature_dict[product] = feat
    except:
        with open(p,'wb') as file:
            pickle.dump(data,file)

#convert dictionary to a numpy array    
filenames = np.array(list(feature_dict.keys()))
feat = np.array(list(feature_dict.values()))
feat = feat.reshape(-1,2048)

I'm using the package "kneed" to determine the number of clusters

#determine the number of clusters
length = len(filenames)
lim = 25
    
sse = []
list_k = list(range(1, lim))
    
for k in list_k:
    km = KMeans(n_clusters=k,random_state=22, n_jobs=-1)
    labels= km.fit_predict(feat)
    sse.append(km.inertia_)

kneedle=KneeLocator(list_k,sse,curve='convex',direction='decreasing')
elbow = kneedle.elbow #number of clusters

Now I'm trying to cluster the images into different groups using faiss Kmeans but I'm getting the error of AttributeError: 'Kmeans' object has no attribute 'fit' on kmeans.fit(feat)

kmeans = faiss.Kmeans(d=feat.shape[0] ,k=elbow, niter=200)
kmeans.fit(feat) 
kmeans.train(feat)

When I try to use kmeans.train(feat) which I found on the link, I get the error AssertionError

Nimantha
  • 6,405
  • 6
  • 28
  • 69
Cressida
  • 94
  • 1
  • 9

0 Answers0