I am new to ML/AI as a start I am trying to run a convolution layer on M4 by using CMSIS-NN function calls and comparing the output results with a hand calculated outputs for the same inputs and kernel.
I am using an arm_convolve_HWC_q7_basic cmsis-nn function for my convolution layer test and I am successfully able to infer the convolution layer with one channel i.e. with cmsis-nn function call and the outputs are exactly matching with my hand calculation outputs.
But the tough part is when I am trying to run the same test with increase in INPUT-channels (greater than one) the outputs of the arm_convolve_HWC_q7_basic function outputs are not matching with my hand calculated outputs.
FYI... I took the same inputs, kernels as shown in the example reference just to make life easy i.e. to compare outputs easily.
#include <stdint.h>
#include <stdlib.h>
#include "CMSIS_5/CMSIS/NN/Include/arm_nn_math_types.h"
#include "CMSIS_5/CMSIS/NN/Include/arm_nnfunctions.h"
#define CONV_WT_M4 {1, 1, 1, -1, 0, 0, 1, 1, 1, 0, 0, -1, -1, 1, -1, 0, 1, 1, -1, 0, 0, 0, 1, 0, -1, -1, 0}
const int8_t in_data[75] = {
1, 1, 0, 1, 1, 1, 1, 1, 2, 1, 1, 2, 0, 0, 0, 0, 2, 1, 1, 2, 1, 2, 1, 1, 0, 2, 2, 1, 0, 2, 2, 2, 0, 1, 1, 1, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 2, 0, 1, 1, 0, 0, 2, 1, 0, 1, 2, 0, 2, 1, 1, 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 0, 0};
#define CONV_BIAS_M4 {1}
const int8_t expected_out_data[9] = {
6, 4, 3, 5, 5, 6, 1, -3, 2};
#define CONV_IN_DIM_M4 5
#define CONV_IN_CH_M4 3
#define CONV_KER_DIM_M4 3
#define CONV_PAD_M4 1
#define CONV_STRIDE_M4 2
#define CONV_OUT_CH_M4 1
#define CONV_OUT_DIM_M4 3
#define CONV_BIAS_LSHIFT_M4 0
#define CONV_OUT_RSHIFT_M4 0
static const q7_t conv2_wt[CONV_IN_CH_M4*CONV_KER_DIM_M4*CONV_KER_DIM_M4*CONV_OUT_CH_M4] = CONV_WT_M4;
static const q7_t conv2_bias[CONV_OUT_CH_M4] = CONV_BIAS_M4;
q7_t output_data[CONV_OUT_DIM_M4];
q15_t col_buffer[5000];
q7_t scratch_buffer[27200];
void main(void) {
q7_t* buffer1 = scratch_buffer;
arm_convolve_HWC_q7_basic((q7_t*)in_data, CONV_IN_DIM_M4, CONV_IN_CH_M4, conv2_wt, CONV_OUT_CH_M4, CONV_KER_DIM_M4, CONV_PAD_M4, CONV_STRIDE_M4, conv2_bias, CONV_BIAS_LSHIFT_M4, CONV_OUT_RSHIFT_M4, buffer1, CONV_OUT_DIM_M4, (q15_t*)col_buffer, NULL);
printf("INPUTS\n");
for (int i=0; i< CONV_IN_CH_M4; i++) {
printf("channel %d\n",i);
for (int j=0; j< CONV_IN_DIM_M4; j++) {
for (int k=0; k< CONV_IN_DIM_M4; k++) {
printf("%4d ",in_data[i * CONV_IN_DIM_M4 * CONV_IN_DIM_M4 + j * CONV_IN_DIM_M4 + k]);
}
printf("\n");
}
}
printf("WEIGHTS\n");
for (int i=0; i< CONV_OUT_CH_M4*CONV_IN_CH_M4; i++) {
printf("channel %d\n",i);
for (int j=0; j< CONV_KER_DIM_M4; j++) {
for (int k=0; k< CONV_KER_DIM_M4; k++) {
printf("%4d ",conv2_wt[i * CONV_KER_DIM_M4 * CONV_KER_DIM_M4 + j * CONV_KER_DIM_M4 + k]);
}
printf("\n");
}
}
printf("Expected OUTPUTS\n");
for (int i=0; i< CONV_OUT_CH_M4; i++) {
printf("channel %d\n",i);
for (int j=0; j< CONV_OUT_DIM_M4; j++) {
for (int k=0; k< CONV_OUT_DIM_M4; k++) {
printf("%4d ",expected_out_data[i * CONV_OUT_DIM_M4 * CONV_OUT_DIM_M4 + j * CONV_OUT_DIM_M4 + k]);
}
printf("\n");
}
}
printf("\nM4 OUTPUTS\n");
for (int i=0; i< CONV_OUT_CH_M4; i++) {
printf("channel %d\n",i);
for (int j=0; j< CONV_OUT_DIM_M4; j++) {
for (int k=0; k< CONV_OUT_DIM_M4; k++) {
printf("%4d ",buffer1[i * CONV_OUT_DIM_M4 * CONV_OUT_DIM_M4 + j * CONV_OUT_DIM_M4 + k]);
}
printf("\n");
}
}
if(!memcmp(expected_out_data,buffer1,CONV_OUT_DIM_M4*CONV_OUT_DIM_M4*CONV_OUT_CH_M4))
{
printf("Passed\n");
}
else
{
printf("Failed\n");
}
}