-1

I am having an issue with the following code. The following code takes an input image and it should save the grayscale of it. Unfortunately, it seems to perform the expected behavior but it is processing just a part of the image and not the whole. It seems that the problems occurs in the cudamemcpy from device to host.

i believe that probably I got some issue while I am allocating memory in Cuda.

__global__ void rgb2grayCudaKernel(unsigned char *inputImage, unsigned char *grayImage, const int width, const int height) 
{


    int ty = (blockIdx.x * blockDim.x) + threadIdx.x;
    //int tx = (blockIdx.x * blockDim.x) + threadIdx.x;
    int tx = (blockIdx.y * blockDim.y) + threadIdx.y;

    if( (ty < height && tx<width) ) 
    {

            float grayPix = 0.0f;
            float r = static_cast< float >(inputImage[(ty * width) + tx]);          
            float g = static_cast< float >(inputImage[(width * height) + (ty * width) + tx]);
            float b = static_cast< float >(inputImage[(2 * width * height) + (ty * width) + tx]);

            grayPix = (0.3f * r) + (0.59f * g) + (0.11f * b); 

            grayImage[(ty * width) + tx] = static_cast< unsigned char >(grayPix);   

    }   
}

//***************************************rgb2gray function, call of kernel in here *************************************
void rgb2grayCuda(unsigned char *inputImage, unsigned char *grayImage, const int width, const int height)
{
    unsigned char *inputImage_c, *grayImage_c;
    const int sizee= (width*height);    

// **********memory allocation for pointers and cuda******************


    cudaMalloc((void **) &inputImage_c, sizee);
    checkCudaError("im not alloc!");
    cudaMalloc((void **) &grayImage_c, sizee);
    checkCudaError("gray not alloc !");

//***********copy to device*************************
    cudaMemcpy(inputImage_c, inputImage, sizee*sizeof(unsigned char), cudaMemcpyHostToDevice);
    checkCudaError("im not send !");
    cudaMemcpy(grayImage_c, grayImage, sizee*sizeof(unsigned char), cudaMemcpyHostToDevice);
    checkCudaError("gray not send !");
    dim3 thrb(32,32);
    dim3 numb (ceil(width*height/1024));
//**************Execute Kernel (Timer in here)**************************
    NSTimer kernelTime = NSTimer("kernelTime", false, false);
    kernelTime.start();

    rgb2grayCudaKernel<<<numb,1024>>> (inputImage_c, grayImage_c, width, height);
    checkCudaError("kernel!");
    kernelTime.stop();
//**************copy back to host*************************
    printf("/c");
    cudaMemcpy(grayImage, grayImage_c, sizee*sizeof(unsigned char), cudaMemcpyDeviceToHost);
    checkCudaError("Receiving data from CPU failed!");

//*********************free memory***************************
    cudaFree(inputImage_c);
    cudaFree(grayImage_c);


//**********************print time****************  
cout << fixed << setprecision(6);
cout << "rgb2gray (cpu): \t\t" << kernelTime.getElapsed() << " seconds." << endl;

}
Michalis
  • 3
  • 7

1 Answers1

1
const int sizee= (width*height); 

should be:

const int sizee= (width*height*3); 

for rgb data (1 byte per channel).

I believe in bitmap images, the colors should be interleaved as in:

rgb of pixel1, rgb of pixel 2 ... rgb of pixel width*height

Therefore your kernel should be:

__global__ void rgb2grayCudaKernel(unsigned char *inputImage, unsigned char *grayImage, const int width, const int height) 
{


    int tx = (blockIdx.y * blockDim.y) + threadIdx.y;
    int ty = (blockIdx.x * blockDim.x) + threadIdx.x;

    if( (ty < height && tx<width) ) 
    {
            unsigned int pixel = ty*width+tx;
            float grayPix = 0.0f;
            float r = static_cast< float >(inputImage[pixel*3]);          
            float g = static_cast< float >(inputImage[pixel*3+1]);
            float b = static_cast< float >(inputImage[pixel*3+2]);

            grayPix = (0.3f * r) + (0.59f * g) + (0.11f * b); 

            grayImage[pixel] = static_cast< unsigned char >(grayPix);   

    }   
}

Also, from what I saw luminosity is calculated as 0.21 R + 0.72 G + 0.07 B.

Christian Sarofeen
  • 2,202
  • 11
  • 18
  • Thank you for your reply. Unfortunately, i tried your approach but now i am ending up with segmentation fault. – Michalis May 15 '15 at 11:28
  • The error is persisted on the following line when i am trying to get the data back from the device: cudaMemcpy(grayImage, grayImage_c, sizee*sizeof(unsigned char), cudaMemcpyDeviceToHost); – Michalis May 15 '15 at 11:46
  • Your grayImage is not of sizee, it only has one channel, it is of width x height x sizeof(unsigned char) – Christian Sarofeen May 15 '15 at 12:56
  • Also, make sure memory for grayImage was allocated correctly, and sufficiently. – Christian Sarofeen May 15 '15 at 12:58
  • i found the solution. I add cudaMemset for the inputimage and the grayimage and now its working fine – Michalis May 15 '15 at 16:37