2

I have to program basic face detection using the fast convolution method for calculating correlation. So far I have this code:

def template_matching_fast_convolution(face_data, template_data):
    #convert to greyscale
    face_grey = color.rgb2grey(face_data)
    template_grey = color.rgb2grey(template_data)

    #apply zero padding to get pictures of the same size
    shape_face = face_grey.shape
    shape_template = template_grey.shape
    rows_face = shape_face[0]
    columns_face = shape_face[1]
    rows_template = shape_template[0]
    columns_template = shape_template[1]

    extra_pad_row_face = 0
    extra_pad_row_template = 0
    extra_pad_columns_face = 0
    extra_pad_colummns_template = 0

    if rows_face % 2 == 1:
        extra_pad_row_face = 1
    if rows_template % 2 == 1:
        extra_pad_row_template = 1
    if columns_face % 2 == 1:
        extra_pad_columns_face = 1
    if columns_template % 2 == 1:
        extra_pad_colummns_template = 1

    if rows_face < rows_template:
        rows_diff_div_2 = int((rows_template - rows_face) / 2)
        face_grey = np.pad(face_grey, ((rows_diff_div_2,rows_diff_div_2 + extra_pad_row_face), (0, 0)), 'constant', constant_values=(0))
    elif rows_template < rows_face:
        rows_diff_div_2 = int((rows_face - rows_template) / 2)
        template_grey = np.pad(template_grey, ((rows_diff_div_2,rows_diff_div_2 + extra_pad_row_template), (0, 0)), 'constant', constant_values=(0))

    if columns_face < columns_template:
        columns_diff_div_2 = int((columns_template - columns_face) / 2)
        face_grey = np.pad(face_grey, ((0, 0), (columns_diff_div_2,columns_diff_div_2 + extra_pad_columns_face)), 'constant', constant_values=(0))
    elif columns_template < columns_face:
        columns_diff_div_2 = int((columns_face - columns_template) / 2)
        template_grey = np.pad(template_grey, ((0, 0), (columns_diff_div_2,columns_diff_div_2 + extra_pad_colummns_template)), 'constant', constant_values=(0))

    #extra padding to make it square

    shape_face = face_grey.shape
    shape_template = template_grey.shape
    rows_face = shape_face[0]
    columns_face = shape_face[1]
    rows_template = shape_template[0]
    columns_template = shape_template[1]

    max_dimension = max(rows_face, rows_template, columns_template, columns_face)

    square_pad_rows_face = int((max_dimension - rows_face) / 2)
    square_pad_columns_face = int((max_dimension - columns_face) / 2)
    square_pad_rows_template = int((max_dimension - rows_template) / 2)
    square_pad_columns_template = int((max_dimension - columns_template) / 2)

    face_grey = np.pad(face_grey, ((square_pad_rows_face,square_pad_rows_face), (square_pad_columns_face, square_pad_columns_face)), 'constant', constant_values=(0))
    template_grey = np.pad(template_grey, ((square_pad_rows_template,square_pad_rows_template), (square_pad_columns_template, square_pad_columns_template)), 'constant', constant_values=(0))

    #apply 2D-fft
    face_fft = np.fft.fft2(face_grey)
    template_fft = np.fft.fft2(template_grey)

    #multipy the spectres in the frequency domain
    multiplied_spectrum = np.fft.fftshift(np.dot(face_fft, template_fft)).real

    #calculate the inverse fft of the spectrum
    ifft_spectrum = np.fft.ifft2(multiplied_spectrum).real

    return ifft_spectrum

With face_data being a photo of a single person and template_data being a picture of an average face (precomputed).

I'm not really sure if I'm approaching this the right way. Should I also resize the template in function of the size of the face picture? Also, how do I determine the face from the returned inverse spectrum? I know the place with the biggest amplitude is the place with the highest correlation, but what does it mean? Is the center of the face?

Would be nice if someone with experience could make this all more clear to me.

v1k45
  • 8,070
  • 2
  • 30
  • 38
svdotbe
  • 168
  • 1
  • 3
  • 16

0 Answers0