def SSIM_compute(files,WorkingFolder,DestinationAlikeFolder,DestinationUniqueFolder,results, start_time):
NumberAlike=0
loop=1
while True:
files=os.listdir(WorkingFolder)
if files==[]:
break
IsAlike=False
CountAlike=1
print("Loop : "+str(loop)+" --- starttime : "+str(time.time()-start_time))
for i in range (1,len(files)):
#print("\ti= "+str(i)+" : "+str(time.time()-start_time))
img1=cv2.imread(WorkingFolder+"/"+files[0])
img2=cv2.imread(WorkingFolder+"/"+files[i])
x1,y1=img1.shape[:2]
x2,y2=img2.shape[:2]
x=min(x1,x2)
y=min(y1,y2)
img1=cv2.resize(img1,(x,y),1)
img2=cv2.resize(img2,(x,y),1)
threshold=ssim(img1,img2,multichannel=True)
if threshold>0.8:
IsAlike=True
if os.path.exists((WorkingFolder+"/"+files[i])):
shutil.move((WorkingFolder+"/"+files[i]),DestinationAlikeFolder+"/alike"+str(NumberAlike)+"_"+str(CountAlike)+".jpg")
CountAlike+=1
#results.write("ALIKE : " +files[0] +" --- " +files[i]+"\n")
results.write("ALIKE : /alike"+str(NumberAlike)+"_0"+".jpg --- /alike"+str(NumberAlike)+"_"+str(CountAlike)+".jpg -> "+str(threshold))
if IsAlike:
if os.path.exists((WorkingFolder+"/"+files[0])):
shutil.move((WorkingFolder+"/"+files[0]),DestinationAlikeFolder+"/alike"+str(NumberAlike)+"_0"+".jpg")
NumberAlike+=1
else :
if os.path.exists((WorkingFolder+"/"+files[0])):
shutil.move((WorkingFolder+"/"+files[0]),DestinationUniqueFolder)
loop+=1
I have this code that must compare images to determine if they are identical or if some of them were modified (compression, artefact, etc...).
So, to check if two images are strictly similar, I just compute and compare their respective hashes (in another function not shown here), and to check if they are similar I compute the SSIM on those two files.
The next part is where the trouble begins : when I test this code on a quiet small set of pictures (approx. 50), the execution time is decent, but if I make the set bigger (something like 200 pictures), the execution time becomes way too high (several hours) as expected considering I have two imbricated for loops.
As I'm not very creative, has anybody ideas to reduce the time execution on a larger dataset ? Maybe a method in order to avoid those imbricated loops ?
Thank you for any provided help :)