I have to program basic face detection using the fast convolution method for calculating correlation. So far I have this code:
def template_matching_fast_convolution(face_data, template_data):
#convert to greyscale
face_grey = color.rgb2grey(face_data)
template_grey = color.rgb2grey(template_data)
#apply zero padding to get pictures of the same size
shape_face = face_grey.shape
shape_template = template_grey.shape
rows_face = shape_face[0]
columns_face = shape_face[1]
rows_template = shape_template[0]
columns_template = shape_template[1]
extra_pad_row_face = 0
extra_pad_row_template = 0
extra_pad_columns_face = 0
extra_pad_colummns_template = 0
if rows_face % 2 == 1:
extra_pad_row_face = 1
if rows_template % 2 == 1:
extra_pad_row_template = 1
if columns_face % 2 == 1:
extra_pad_columns_face = 1
if columns_template % 2 == 1:
extra_pad_colummns_template = 1
if rows_face < rows_template:
rows_diff_div_2 = int((rows_template - rows_face) / 2)
face_grey = np.pad(face_grey, ((rows_diff_div_2,rows_diff_div_2 + extra_pad_row_face), (0, 0)), 'constant', constant_values=(0))
elif rows_template < rows_face:
rows_diff_div_2 = int((rows_face - rows_template) / 2)
template_grey = np.pad(template_grey, ((rows_diff_div_2,rows_diff_div_2 + extra_pad_row_template), (0, 0)), 'constant', constant_values=(0))
if columns_face < columns_template:
columns_diff_div_2 = int((columns_template - columns_face) / 2)
face_grey = np.pad(face_grey, ((0, 0), (columns_diff_div_2,columns_diff_div_2 + extra_pad_columns_face)), 'constant', constant_values=(0))
elif columns_template < columns_face:
columns_diff_div_2 = int((columns_face - columns_template) / 2)
template_grey = np.pad(template_grey, ((0, 0), (columns_diff_div_2,columns_diff_div_2 + extra_pad_colummns_template)), 'constant', constant_values=(0))
#extra padding to make it square
shape_face = face_grey.shape
shape_template = template_grey.shape
rows_face = shape_face[0]
columns_face = shape_face[1]
rows_template = shape_template[0]
columns_template = shape_template[1]
max_dimension = max(rows_face, rows_template, columns_template, columns_face)
square_pad_rows_face = int((max_dimension - rows_face) / 2)
square_pad_columns_face = int((max_dimension - columns_face) / 2)
square_pad_rows_template = int((max_dimension - rows_template) / 2)
square_pad_columns_template = int((max_dimension - columns_template) / 2)
face_grey = np.pad(face_grey, ((square_pad_rows_face,square_pad_rows_face), (square_pad_columns_face, square_pad_columns_face)), 'constant', constant_values=(0))
template_grey = np.pad(template_grey, ((square_pad_rows_template,square_pad_rows_template), (square_pad_columns_template, square_pad_columns_template)), 'constant', constant_values=(0))
#apply 2D-fft
face_fft = np.fft.fft2(face_grey)
template_fft = np.fft.fft2(template_grey)
#multipy the spectres in the frequency domain
multiplied_spectrum = np.fft.fftshift(np.dot(face_fft, template_fft)).real
#calculate the inverse fft of the spectrum
ifft_spectrum = np.fft.ifft2(multiplied_spectrum).real
return ifft_spectrum
With face_data
being a photo of a single person and template_data
being a picture of an average face (precomputed).
I'm not really sure if I'm approaching this the right way. Should I also resize the template in function of the size of the face picture? Also, how do I determine the face from the returned inverse spectrum? I know the place with the biggest amplitude is the place with the highest correlation, but what does it mean? Is the center of the face?
Would be nice if someone with experience could make this all more clear to me.