I am trying to iterate over a cv::cuda::GpuMat with the following code:
__global__ void kernel(uchar* src, int rows, int cols, size_t step)
{
int rowInd = blockIdx.y * blockDim.y + threadIdx.y;
int colInd = blockIdx.x * blockDim.x + threadIdx.x;
if ((rowInd < rows) && (colInd < cols))
{
uchar * rowptr = src + (rowInd * step);
rowptr[colInd] = 255;
}
}
void invoke_kernel(cv::cuda::GpuMat _img)
{
dim3 tpb(50, 50);
dim3 bpg(((_img.cols + 49) / 50), ((_img.rows + 49)/ 50));
kernel<<<bpg, tpb>>> (_img.data, _img.rows, _img.cols, _img.step);
}
int main()
{
cv::cuda::GpuMat mat;
mat.create(cv::Size(500, 500), CV_8UC1);
std::cout << mat.rows << " " << mat.cols << std::endl;
invoke_kernel(mat);
cv::Mat img;
mat.download(img);
cv::namedWindow("test");
cv::imshow("test", img);
cv::waitKey(0);
return 0;
}
As you can see, it's just supposed to set the entire (originally black) image to white in kernels.
The image just stays black, other than first columnm, that is white. It feels like I'm doing something really stupid somewhere there, but I just can't figure it out :/
I checked that the kernel is running with correct dimnensions (10x10 blocks, 50x50 threads per block). Another problem comes when I try to use NVIDIA debugger. After some googling I've found that NVCC compiler seems to be deleting my rowInd and colInd variables during optimization and therefore the debugger does not display their values.
Thanks for your time.