I am trying to add 50 to every element of a 2D array using neon intrinsic, here is my code, Is there any better way of doing it or optimizing it?
void fun(int height,int width,unsigned char array2D[][width],unsigned char *output){
uint8x16_t va,vb,res;
vb=vdupq_n_u8((unsigned char)50);
unsigned char *arr;
arr=&array2D[0][0]; //input array
//j=0;
int size=height*width;
for (i=0;i<size;i+=16)
{
va=vld1q_u8(arr+i);
res=vaddq_u8(va,vb);
vst1q_u8(output+i,res);
}
}