I'm working on a personal project using deep image search, and I was planning on adding he functionality to add new data to the approximate nearest neighbor index. I tried a few other things, but the approach I got closest with is by passing a dataframe, and appending the vectorized images to that dataframe in every iteration. I changed the code a little bit in the library
class Index:
def __init__(self,image_list:list, dataframe:pd.DataFrame()):
self.image_list = image_list
self.dataframe=dataframe
if 'meta-data-files' not in os.listdir():
os.makedirs("meta-data-files")
self.FE = FeatureExtractor()
def start_feature_extraction(self, dataframe:pd.DataFrame()):
image_data = pd.DataFrame()
image_data['images_paths'] = self.image_list
f_data = self.FE.get_feature(self.image_list)
image_data['features'] = f_data
image_data = image_data.dropna().reset_index(drop=True)
dataframe.append(image_data)
dataframe.to_pickle(config.image_data_with_features_pkl)
print("Image Meta Information Saved: [meta-data-files/image_data_features.pkl]")
return dataframe
def start_indexing(self,dataframe):
self.dataframe = dataframe
f = len(dataframe['features'][0]) # Length of item vector that will be indexed
t = AnnoyIndex(f, 'euclidean')
for i,v in tqdm(zip(self.dataframe.index,dataframe['features'])):
t.add_item(i, v)
t.build(100) # 100 trees
print("Saved the Indexed File:"+"[meta-data-files/image_features_vectors.ann]")
t.save(config.image_features_vectors_ann)
def Start(self, dataframe):
self.dataframe=dataframe
if len(os.listdir("meta-data-files/"))==0:
data = self.start_feature_extraction(dataframe)
self.start_indexing(data)
else:
print("Metadata and Features are allready present, Do you want Extract Again? Enter yes or no")
flag = str(input())
if flag.lower() == 'yes':
data = self.start_feature_extraction(dataframe)
self.start_indexing(data)
else:
print("Meta data allready Present, Please Apply Search!")
print(os.listdir("meta-data-files/"))
As you can see, I changed it to accept a dataframe as an argument, and then in start_feature_extraction
, elements of the passed list are embedded, and appended to the dataframe. By doing this, I imagined that I could use the start()
method over and over again with new lists, extending the dataframe, and adding new vectors.
to use this new functionality, this is the code:
from DeepImageSearch import Index, LoadData, SearchImage
import pandas as pd
folders=LoadData().from_folder([List of images])
imgfolders=[]
for i in folders:
imgfolderstemp=LoadData().from_folder([i])
imgfolders.extend(imgfolderstemp)
imglist=[]
for i in imgfolders:
imglisttemp=LoadData().from_folder([i])
imglist.extend(imglisttemp)
dataframe=pd.DataFrame
Index(imglist, dataframe).Start(dataframe)
SearchImage().plot_similar_images(image_path=imglist[1])
imglistnew=LoadData().from_folder([new list])
Index(imglistnew, dataframe).Start(dataframe)
The index method works fine, but when I call search, I get this error:
KeyError Traceback (most recent call last)
File ~\anaconda3\lib\site-packages\pandas\core\indexes\base.py:3621, in Index.get_loc(self, key, method, tolerance)
3620 try:
-> 3621 return self._engine.get_loc(casted_key)
3622 except KeyError as err:
File ~\anaconda3\lib\site-packages\pandas\_libs\index.pyx:136, in pandas._libs.index.IndexEngine.get_loc()
File ~\anaconda3\lib\site-packages\pandas\_libs\index.pyx:163, in pandas._libs.index.IndexEngine.get_loc()
File pandas\_libs\hashtable_class_helper.pxi:5198, in pandas._libs.hashtable.PyObjectHashTable.get_item()
File pandas\_libs\hashtable_class_helper.pxi:5206, in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'features'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
Input In [8], in <cell line: 1>()
----> 1 SearchImage().plot_similar_images(image_path=imglist[690])
File ~\anaconda3\lib\site-packages\DeepImageSearch\DeepImageSearch.py:108, in SearchImage.__init__(self)
106 def __init__(self):
107 self.image_data = pd.read_pickle(config.image_data_with_features_pkl)
--> 108 self.f = len(self.image_data['features'][0])
File ~\anaconda3\lib\site-packages\pandas\core\frame.py:3505, in DataFrame.__getitem__(self, key)
3503 if self.columns.nlevels > 1:
3504 return self._getitem_multilevel(key)
-> 3505 indexer = self.columns.get_loc(key)
3506 if is_integer(indexer):
3507 indexer = [indexer]
File ~\anaconda3\lib\site-packages\pandas\core\indexes\base.py:3623, in Index.get_loc(self, key, method, tolerance)
3621 return self._engine.get_loc(casted_key)
3622 except KeyError as err:
-> 3623 raise KeyError(key) from err
3624 except TypeError:
3625 # If we have a listlike key, _check_indexing_error will raise
3626 # InvalidIndexError. Otherwise we fall through and re-raise
3627 # the TypeError.
3628 self._check_indexing_error(key)
KeyError: 'features'
I'm honestly stuck, everything I do I go 1 step forward, 2 steps back. I would really appreciate any help at all.