I'm using scipy optimize for a biomedical imaging application. I'm trying to write an algorithm that will align two images by finding the affine transform (shifting, scaling, and rotation, in this case) between the two. I'm using an optimizer to minimize the difference between the two images. The parameters I'm trying to optimize are the x and y shifts, rotation about the center of the image, and the scaling in the x and y dimensions. Ideally, the step sizes would all be close to whole numbers rather than on the order of 1e-6 (current step size). Is there any way to specify the minimum step sizes used for the parameters in scipy optimize?
I'm currently mapping them to the proper order of magnitude, but this only works for a parameter or two at a time for the Nelder-Mead method. (I'm aiming to use L-BFGS-B because of the gradient, but that's a whole other set of problems at the moment.) Adding more parameters causes the optimization to think a misaligned image is the correct answer. Is this "false positive" result typical of scipy optimize? Is there any way to force more accuracy aside from changing the tolerance?
Working code:
class toy():
def __init__(self, img1, img2):
self.original = img1
self.shifted = img2
def find_affine(self, parameters):
# extract parameters
x = 0
y = 0
r = 0.0
sx = 1.0
sy = 1.0
# x = int(parameters[0]*10000)
# y = int(parameters[1]*10000)
# r = int(parameters[2]*10000)
# sx = 1.0 + abs(round(parameters[3]*1000, 1))
# sy = 1.0 + abs(round(parameters[4]*1000, 1))
print "Updated parameters: [" + str(x) + ", " + str(y) + ", " + str(r) + ", " + str(sx) + ", " + str(sy) + "]"
# image data
original_dims = [len(self.original), len(self.original[0])]
shifted_dims = [len(self.shifted), len(self.shifted[0])]
mod_orig = self.original
# set up rotation
if r != 0.0:
rotation = cv2.getRotationMatrix2D((original_dims[0]/2, original_dims[1]/2), r, 1)
mod_orig = cv2.warpAffine(mod_orig, rotation, (len(self.original), len(self.original[0])))
# set up scaling
if sx != 1.0 or sy != 1.0:
dims = [int(round(sx*original_dims[0])), int(round(sy*original_dims[1]))]
mod_orig = cv2.resize(mod_orig, (dims[0], dims[1]), interpolation=cv2.INTER_CUBIC)
original_dims = [len(mod_orig), len(mod_orig[0])]
print "New dimensions: " + str(len(mod_orig)) + " " + str(len(mod_orig[0]))
# set up translation
shift = [y*sy, x*sx, 0] # does this need to be scaled?
# convert to pixels
# set the overlap bounds: x
if (x <= 0):
rows_orig = [abs(shift[1]), original_dims[1]]
rows_shift = [0, shifted_dims[1]+shift[1]]
else:
rows_shift = [shift[1], shifted_dims[1]]
rows_orig = [0, original_dims[1]-shift[1]]
# set the overlap bounds: y
if (y <= 0):
cols_orig = [abs(shift[0]), original_dims[0]]
cols_shift = [0, shifted_dims[0]+shift[0]]
else:
cols_shift = [shift[0], shifted_dims[0]]
cols_orig = [0, original_dims[0]-shift[0]]
# get the relevant pixels
original_overlap = mod_orig[cols_orig[0]:cols_orig[1], rows_orig[0]:rows_orig[1]]
shifted_overlap = self.shifted[cols_shift[0]:cols_shift[1], rows_shift[0]:rows_shift[1]]
self.show_overlap(original_overlap, shifted_overlap)
return (original_overlap, shifted_overlap)
def objective(self, parameters):
stable_pts, warped_pts = self.find_affine(parameters)
stable_pts = stable_pts.flatten()
warped_pts = warped_pts.flatten()
N = max(len(stable_pts), len(warped_pts))
print "Number of points: " + str(N)
if N == 0:
return 99999999999
score = 0.0
for s_pt, w_pt in zip(stable_pts, warped_pts):
score = score + ((float(s_pt) - float(w_pt))**2)
score = score/N
print "Objective score: " + str(score) + " for x = " + str(parameters)
return score
def show_overlap(self, img1, img2):
print "Dims"
print " Img1 Dims: " + str(len(img1)) + " " + str(len(img1[0]))
print " Img2 Dims: " + str(len(img2)) + " " + str(len(img2[0]))
dims = [min(len(img1), len(img2)), min(len(img1[0]), len(img2[0]))]
overlap = numpy.zeros(dims)
for x in xrange(len(overlap)):
for y in xrange(len(overlap[0])):
overlap[x][y] = abs(int(img1[x][y])-int(img2[x][y]))
overlap = numpy.uint8(overlap)
cv2.imshow("Overlapping Image", overlap)
cv2.waitKey(500)
cv2.destroyAllWindows()
def _main(status):
# read in images
img1 = cv2.imread("data/smile_orig.png")
img2 = cv2.imread("data/smiletest.png")
# convert to grayscale
img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
test = toy(img1, img2)
initial_conditions = [0.0]
transform = optimize.minimize(test.objective, initial_conditions, method='nelder-mead', tol=1e-8)
# Show minimization at the end
params = transform.x
original_img, shifted_img = test.find_affine(params)
print "COMPLETED MINIMIZATION"
print transform
# show difference between transformed image and desired image
overlap = numpy.zeros([min(len(original_img), len(shifted_img)),
min(len(original_img[0]), len(shifted_img[0]))])
print "shape of overlap: " + str(overlap.shape)
for x in xrange(len(overlap)):
for y in xrange(len(overlap[0])):
overlap[x][y] = abs(int(original_img[x][y])-int(shifted_img[x][y]))
overlap = numpy.uint8(overlap)
cv2.imshow("Overlapping Image", overlap)
print "Final Image Shown"
print "Dimensions: " + str(len(overlap)) + " " + str(len(overlap[0]))
cv2.waitKey(0)
cv2.destroyAllWindows()
if __name__ == "__main__":
_main(status)
Images used:
Results:
COMPLETED MINIMIZATION
status: 0
nfev: 95
success: True
fun: 11.051886006332982
x: array([ -1.29629630e-04, 8.42592593e-04, 2.77777778e-05])
message: 'Optimization terminated successfully.'
nit: 29
With the parameter mapping, the transform achieved is a x dimension shift of -1, a y dimension shift of 8, and a rotation of 0. The actual affine transform between the two images is a x dimension shift of 10, a y dimension shift of 20, and a rotation of -10.
Difference between the two images after the transform is found:
It's close, it has a low score, but it needs to be closer (ideally perfect with a score of 0.0, or at least nothing left from the smiley face). Any suggestions would be welcome.
Related: