I'm having trouble unrolling nested for
loops. I understand the concept, I'm trying to put it into practice, but I'm getting tripped up on editing the statements within my for
loops to match the unrolling.
If someone could just show me an efficient unroll and walk me through it that'd be a huge help.
Here is the loop section I want to unroll:
for (i=1 ; i < WIDTH-1 ; ++i)
{
for (j = 1 ; j < HEIGHT-1 ; ++j)
{
n = getNeighbors(prv, i, j); /* This is where I'm confused */
mask = (prev[i][j] << 1);
next[i][j] = !(((n >> prev[i][j]) ^ 3) ^ mask);
}
}
UPDATE: Would this be correct?
for (i=1 ; i < WIDTH-1 ; i+=4)
{
for (j = 1 ; j < HEIGHT-1 ; j+=4)
{
n = getNeighbors(prv, i, j);
mask = (prev[i][j] << 1);
next[i][j] = !(((n >> prev[i][j]) ^ 3) ^ mask);
n = getNeighbors(prv, i, j+1);
mask = (prev[i][j+1] << 1);
next[i][j+1] = !(((n >> prev[i][j+1]) ^ 3) ^ mask);
n = getNeighbors(prv, i, j+2);
mask = (prev[i][j+2] << 1);
next[i][j+2] = !(((n >> prev[i][j+2]) ^ 3) ^ mask);
n = getNeighbors(prv, i, j+3);
mask = (prev[i][j+3] << 1);
next[i][j+3] = !(((n >> prev[i][j+3]) ^ 3) ^ mask);
}
for (j = 1 ; j < HEIGHT-1 ; j+=4)
{
n = getNeighbors(prv, i+1, j);
mask = (prev[i+1][j] << 1);
next[i+1][j] = !(((n >> prev[i+1][j]) ^ 3) ^ mask);
n = getNeighbors(prv, i+1, j+1);
mask = (prev[i+!][j+1] << 1);
next[i+1][j+1] = !(((n >> prev[i+1][j+1]) ^ 3) ^ mask);
n = getNeighbors(prv, i+1, j+2);
mask = (prev[i+1][j+2] << 1);
next[i+1][j+2] = !(((n >> prev[i+1][j+2]) ^ 3) ^ mask);
n = getNeighbors(prv, i+1, j+3);
mask = (prev[i+1][j+3] << 1);
next[i+1][j+3] = !(((n >> prev[i+1][j+3]) ^ 3) ^ mask);
}
for (j = 1 ; j < HEIGHT-1 ; j+=4)
{
n = getNeighbors(prv, i+2, j);
mask = (prev[i+2][j] << 1);
next[i+2][j] = !(((n >> prev[i+2][j]) ^ 3) ^ mask);
n = getNeighbors(prv, i+2, j+1);
mask = (prev[i+2][j+1] << 1);
next[i+2][j+1] = !(((n >> prev[i+2][j+1]) ^ 3) ^ mask);
n = getNeighbors(prv, i+2, j+2);
mask = (prev[i+2][j+2] << 1);
next[i+2][j+2] = !(((n >> prev[i+2][j+2]) ^ 3) ^ mask);
n = getNeighbors(prv, i+2, j+3);
mask = (prev[i+2][j+3] << 1);
next[i+2][j+3] = !(((n >> prev[i+2][j+3]) ^ 3) ^ mask);
}
for (j = 1 ; j < HEIGHT-1 ; j+=4)
{
n = getNeighbors(prv, i+3, j);
mask = (prev[i+3][j] << 1);
next[i+3][j] = !(((n >> prev[i+3][j]) ^ 3) ^ mask);
n = getNeighbors(prv, i+3, j+1);
mask = (prev[i][j+1] << 1);
next[i+3][j+1] = !(((n >> prev[i+3][j+1]) ^ 3) ^ mask);
n = getNeighbors(prv, i+3, j+2);
mask = (prev[i][j+2] << 1);
next[i+3][j+2] = !(((n >> prev[i+3][j+2]) ^ 3) ^ mask);
n = getNeighbors(prv, i+3, j+3);
mask = (prev[i+3][j+3] << 1);
next[i+3][j+3] = !(((n >> prev[i+3][j+3]) ^ 3) ^ mask);
}
}