I'm trying to optimize this code by loop unrolling,
void naive_flip(int dim, pixel *src, pixel *dst)
{
int i, j;
for (i = 0; i < dim; i++){
for (j = 0; j < dim; j++){
dst[RIDX_F(i, j, dim)].red = src[RIDX(i, j, dim)].red;
dst[RIDX_F(i, j, dim)].green = src[RIDX(i, j, dim)].green;
dst[RIDX_F(i, j, dim)].blue = src[RIDX(i, j, dim)].blue;
}
}
}
However, I haven't really done it before, so when I tried it, I got this
void flip_one(int dim, pixel *src, pixel *dst)
{
//i will be attempting loop unrolling to optimize code
int i, j;
for (i=0; i<dim; i+=32)
{
for (int j=0; j<dim; j+=32)
{
dst[RIDX_F(i, j, dim)].red = src[RIDX(i, j, dim)].red;
dst[RIDX_F(i, j+1, dim)].green = src[RIDX(i, j, dim)].green;
dst[RIDX_F(i, j+2, dim)].blue = src[RIDX(i, j, dim)].blue;
}
for (int j=0; j<dim; j+=32)
{
dst[RIDX_F(i+1, j, dim)].red = src[RIDX(i, j, dim)].red;
dst[RIDX_F(i+1, j+1, dim)].green = src[RIDX(i, j, dim)].green;
dst[RIDX_F(i+1, j+2, dim)].blue = src[RIDX(i, j, dim)].blue;
}
for (int j=0; j<dim; j+=32)
{
dst[RIDX_F(i+2, j, dim)].red = src[RIDX(i, j, dim)].red;
dst[RIDX_F(i+2, j+1, dim)].green = src[RIDX(i, j, dim)].green;
dst[RIDX_F(i+2, j+2, dim)].blue = src[RIDX(i, j, dim)].blue;
}
for (int j=0; j<dim; j+=32)
{
dst[RIDX_F(i+3, j, dim)].red = src[RIDX(i, j, dim)].red;
dst[RIDX_F(i+3, j+1, dim)].green = src[RIDX(i, j, dim)].green;
dst[RIDX_F(i+3, j+2, dim)].blue = src[RIDX(i, j, dim)].blue;
}
}
}
When running the code, it doesn't work, and it give me this error:
"ERROR: Dimension=96, 9216 errors
E.g., The following two pixels should have equal value:
src[9215].{red,green,blue} = {22543,1426,53562}
dst[9120].{red,green,blue} = {0,0,0}"
Any help on what I'm doing wrong or what I should be doing is appreciated
EDIT I updated my code with this
void flip_one(int dim, pixel *src, pixel *dst)
{
//i will be attempting loop unrolling to optimize code
int i, j;
for (i=0; i<dim; i++)
{
for (int j=0; j<dim; j++)
{
dst[RIDX_F(i, j, dim)].red = src[RIDX(i, j, dim)].red;
dst[RIDX_F(i, j, dim)].green = src[RIDX(i, j, dim)].green;
dst[RIDX_F(i, j, dim)].blue = src[RIDX(i, j, dim)].blue;
dst[RIDX_F(i, j, dim)].red = src[RIDX(i, j, dim)].red;
dst[RIDX_F(i, j, dim)].green = src[RIDX(i, j, dim)].green;
dst[RIDX_F(i, j, dim)].blue = src[RIDX(i, j, dim)].blue;
dst[RIDX_F(i, j, dim)].red = src[RIDX(i, j, dim)].red;
dst[RIDX_F(i, j, dim)].green = src[RIDX(i, j, dim)].green;
dst[RIDX_F(i, j, dim)].blue = src[RIDX(i, j, dim)].blue;
dst[RIDX_F(i, j, dim)].red = src[RIDX(i, j, dim)].red;
dst[RIDX_F(i, j, dim)].green = src[RIDX(i, j, dim)].green;
dst[RIDX_F(i, j, dim)].blue = src[RIDX(i, j, dim)].blue;
}
}
}
I am no longer getting the error (yay!) but this doesn't actually speed it up, in fact it slows it down. Maybe I did something else wrong, but, I don't know what.
EDIT I updated the code to look like
void flip_one(int dim, pixel *src, pixel *dst)
{
//i will be attempting loop unrolling to optimize code
int i, j;
for (i=0; i<dim; i++)
{
for (int j=0; j<dim; j+=4)
{
dst[RIDX_F(i, j, dim)].red = src[RIDX(i, j, dim)].red;
dst[RIDX_F(i, j, dim)].green = src[RIDX(i, j, dim)].green;
dst[RIDX_F(i, j, dim)].blue = src[RIDX(i, j, dim)].blue;
dst[RIDX_F(i, j+1, dim)].red = src[RIDX(i, j+1, dim)].red;
dst[RIDX_F(i, j+1, dim)].green = src[RIDX(i, j+1, dim)].green;
dst[RIDX_F(i, j+1, dim)].blue = src[RIDX(i, j+1, dim)].blue;
dst[RIDX_F(i, j+2, dim)].red = src[RIDX(i, j+2, dim)].red;
dst[RIDX_F(i, j+2, dim)].green = src[RIDX(i, j+2, dim)].green;
dst[RIDX_F(i, j+2, dim)].blue = src[RIDX(i, j+2, dim)].blue;
dst[RIDX_F(i, j+3, dim)].red = src[RIDX(i, j+3, dim)].red;
dst[RIDX_F(i, j+3, dim)].green = src[RIDX(i, j+3, dim)].green;
dst[RIDX_F(i, j+3, dim)].blue = src[RIDX(i, j+3, dim)].blue;
}
}
}