-1

I need to do a RGB2GRAY image processing algorithm. I just need some help in completing the global function or how I can access the * d_src pointer. This is my code, your help will be greatly appreciated.

#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#include "CImg.h"
#include <iostream>

using namespace std;
using namespace cimg_library;

__global__ void rgb2gray(unsigned char * d_src, unsigned char * d_dst, int width, int height){

    int pos_x = blockIdx.x * blockDim.x + threadIdx.x;
    int pos_y = blockIdx.y * blockDim.y + threadIdx.y;

    if (pos_x >= width || pos_y >= height)
        return;

}


int main(){
    //Load image
    CImg<unsigned char> src("lena.jpg");
    int width = src.width();
    int height = src.height();
    unsigned long sizee = src.size();

    int sze = width * height;

    cout << sze << endl;

    //create pointer to image
    unsigned char *h_src = src.data();

    CImg<unsigned char> dst(width, height, 1, 1);
    unsigned char *h_dst = dst.data();

    unsigned char *d_src;
    unsigned char *d_dst;

    cout << sizee << endl;

    cudaMalloc((void**)&d_src, sizee);
    cudaMalloc((void**)&d_dst, width*height*sizeof(int));

    cudaMemcpy(d_src, h_src, sizee, cudaMemcpyHostToDevice);

    //launch the kernel
    rgb2gray << <(width/16,height/16,1), (16, 16, 1) >> >(d_src, d_dst, width, height);

    //force the printf()s to flush
    cudaDeviceSynchronize();
    // copy back the result array to the CPU
    cudaMemcpy(h_dst, d_dst, width*height, cudaMemcpyDeviceToHost);

    cudaFree(d_src);
    cudaFree(d_dst);


    CImgDisplay main_disp(dst, "After Processing");
    while (!main_disp.is_closed())
        main_disp.wait();


    return 0;
}
talonmies
  • 70,661
  • 34
  • 192
  • 269
JA7
  • 11

1 Answers1

1

Firstly, since your dst object consists of unsigned char, allocate d_dst as follows;

cudaMalloc((void**)&d_dst, width*height*sizeof(unsigned char));

Next, grid must cover every pixels, considering cases when width or height are not a multiple of 16. Launch kernel with following kernel configuration.

dim3 blkDim (16, 16, 1);
dim3 grdDim ((width + 15)/16, (height + 15)/16, 1);
rgb2gray<<<grdDim, blkDim>>>(d_src, d_dst, width, height);

Lastly, your kernel should look like this. Note that RGB channels are split in d_src.

int pos_x = blockIdx.x * blockDim.x + threadIdx.x;
int pos_y = blockIdx.y * blockDim.y + threadIdx.y;

if (pos_x >= width || pos_y >= height)
    return;

unsigned char r = d_src[pos_y * width + pos_x];
unsigned char g = d_src[(height + pos_y) * width + pos_x];
unsigned char b = d_src[(height * 2 + pos_y) * width + pos_x];

unsigned int _gray = (unsigned int)((float)(r + g + b) / 3.0f + 0.5);
unsigned char gray = _gray > 255 ? 255 : _gray;

d_dst[pos_y * width + pos_x] = gray;

You can see the full code here.

nglee
  • 1,913
  • 9
  • 32
  • Thank you very much for the explanation I wasn't sure how the channels were split. – JA7 Apr 20 '17 at 19:59
  • @JA7 You can look at [this](http://cimg.eu/reference/group__cimg__storage.html) for more information on "How pixel data are stored with CImg". – nglee Apr 20 '17 at 23:47