Any advise in optimizing the following code? The code first grayscales, inverts and then thresholds the image (code not included, because it is trivial). It then sums the elements of each row and column (all elements are either 1 or 0). It then finds the row and column index of the row and column with the highest value.
The code is supposed to find the centroid of the image and it works, but I want to make it faster
I'm developing for API 23, so a reduction kernel can not be used.
Java snippet:
private int[] sumValueY = new int[640];
private int[] sumValueX = new int[480];
rows_indices_alloc = Allocation.createSized( rs, Element.I32(rs), height, Allocation.USAGE_SCRIPT);
col_indices_alloc = Allocation.createSized( rs, Element.I32(rs), width, Allocation.USAGE_SCRIPT);
public RenderscriptProcessor(RenderScript rs, int width, int height)
{
mScript.set_gIn(mIntermAllocation);
mScript.forEach_detectX(rows_indices_alloc);
mScript.forEach_detectY(col_indices_alloc);
rows_indices_alloc.copyTo(sumValueX);
col_indices_alloc.copyTo(sumValueY);
}
Renderscript.rs snippet:
#pragma version(1)
#pragma rs java_package_name(org.gearvrf.renderscript)
#include "rs_debug.rsh"
#pragma rs_fp_relaxed
const int mImageWidth=640;
const int mImageHeight=480;
int32_t maxsX=-1;
int32_t maxIndexX;
int32_t maxsY=-1;
int32_t maxIndexY;
rs_allocation gIn;
void detectX(int32_t v_in, int32_t x, int32_t y) {
int32_t sum=0;
for ( int i = 0; i < (mImageWidth); i++) {
float4 f4 = rsUnpackColor8888(rsGetElementAt_uchar4(gIn, i, x));
sum+=(int)f4.r;
}
if((sum>maxsX)){
maxsX=sum;
maxIndexX = x;
}
}
void detectY(int32_t v_in, int32_t x, int32_t y) {
int32_t sum=0;
for ( int i = 0; i < (mImageHeight); i++) {
float4 f4 = rsUnpackColor8888(rsGetElementAt_uchar4(gIn, x, i));
sum+=(int)f4.r;
}
if((sum>maxsY)){
maxsY=sum;
maxIndexY = x;
}
}
Any help would be appreciated