0

Why am I getting 0 as result?

// Host code

#include <vector>
#include <iostream>
#include <string>
#include "CL\cl.h"

void runCL(double * a, double * b, double * c, const int & n) {

    cl_int err;
    cl_uint numEntries;
    cl_uint numPlatforms;

    err = clGetPlatformIDs(0, nullptr, &numPlatforms);
    //check err
    std::vector<cl_platform_id> platform(numPlatforms);
    err = clGetPlatformIDs(numPlatforms, &platform[0], nullptr);

    //Let's print the platforms
    size_t strLen;
    for (auto i = 0; i < numPlatforms; ++i) {
        err = clGetPlatformInfo(platform[i], CL_PLATFORM_NAME, 0, nullptr, &strLen);
        std::cout << "strLen = " << strLen << std::endl;
        //Check err
        std::vector<char> platformName(strLen);
        err = clGetPlatformInfo(platform[i], CL_PLATFORM_NAME, strLen, &platformName[0], nullptr);
        std::cout << "Platform[" << i << "] = " << std::string(platformName.data()) << std::endl;
    }

    //We now know what the platforms are let's pick a specific device

    cl_uint numDevices;
    cl_device_id device;

    err = clGetDeviceIDs(platform[0],CL_DEVICE_TYPE_GPU,0,nullptr,&numDevices);
    //check err
    std::vector<cl_device_id> deviceId(numDevices);
    err = clGetDeviceIDs(platform[0], CL_DEVICE_TYPE_GPU, numDevices, &deviceId[0],nullptr);

    for (auto i = 0; i < numDevices; ++i) {

        err = clGetDeviceInfo(deviceId[0], CL_DEVICE_NAME, 0, nullptr, &strLen);
        //check err
        std::vector<char> deviceName(strLen);
        err = clGetDeviceInfo(deviceId[0], CL_DEVICE_NAME, strLen, &deviceName[0], nullptr);
        std::cout << "device[" << i << "] = " << std::string(deviceName.data()) << std::endl;

    }

    //Now I know the device, I can create context and commant queuq

    cl_context context;
    cl_command_queue cmd_queue;

    context = clCreateContext(0, 1, &deviceId[0], nullptr, nullptr, nullptr);
    cmd_queue = clCreateCommandQueue(context, deviceId[0], 0, nullptr);

    //Let's allocate memory
    size_t bufferSize = sizeof(double)*n;
    cl_mem a_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, n, nullptr, nullptr);
    err = clEnqueueWriteBuffer(cmd_queue, a_mem, CL_TRUE, 0, bufferSize, (void*)a, 0, nullptr, nullptr);

    cl_mem b_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, n, nullptr, nullptr);
    err |= clEnqueueWriteBuffer(cmd_queue, b_mem, CL_TRUE, 0, bufferSize, (void*)b, 0, nullptr, nullptr);

    cl_mem c_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, bufferSize, nullptr, nullptr);

    if (CL_SUCCESS != err) {
        std::cout << "Error in clEnqueueWriteBuffer" << std::endl;
    }

    clFinish(cmd_queue);

    //Now let's create the program (compiling kernels)
    cl_program program[1];
    cl_kernel kernel[1];

    const char * filename = "device.cl";

    program[0] = clCreateProgramWithSource(context, 1, (const char**)&filename, nullptr, &err);
    err = clBuildProgram(program[0], 0, nullptr, nullptr, nullptr, nullptr);
    if (CL_SUCCESS != err) {
        std::cout << "Error in clBuildProgram" << std::endl;
    }

    kernel[0] = clCreateKernel(program[0], "vecAdd", &err);

    //Setting kernel args
    err = clSetKernelArg(kernel[0], 0, sizeof(cl_mem), &a_mem);
    err |= clSetKernelArg(kernel[0], 1, sizeof(cl_mem), &b_mem);
    err |= clSetKernelArg(kernel[0], 2, sizeof(cl_mem), &c_mem);
    if (CL_SUCCESS != err) {
        std::cout << "Error in clSetKernelArg" << std::endl;
    }

    //Actual execution
    size_t globalWorkSize = n;
    err = clEnqueueNDRangeKernel(cmd_queue, kernel[0], 1,nullptr, &globalWorkSize, nullptr,0,nullptr,nullptr);

    clFinish(cmd_queue);

    err = clEnqueueReadBuffer(cmd_queue, c_mem, CL_TRUE, 0, bufferSize, c, 0, nullptr, nullptr);
    clFinish(cmd_queue);

    //Teardown

    clReleaseCommandQueue(cmd_queue);
    clReleaseContext(context);
}

int main(int argc, char **argv) {
    //stuff before running openCL

    const int n = 32;

    std::vector<double> a(n);
    std::vector<double> b(n);
    double * c = new double[n];

    for (auto i = 0; i < n; ++i) {
        a[i] = static_cast<double>(i + 1);
        b[i] = static_cast<double>(n - i - 1);
    }

    runCL(a.data(),b.data(),c,n);
    //stuff after running openCL
    /*for (auto i = 0; i < n; ++i)
        std::cout << "res[" << i << "] = " << c[i] << std::endl;*/
    return 0;
}

And kernel code

// Add you device OpenCL code
__kernel void vecAdd(
    __global double * inputA, 
    __global double * inputB, 
    __global double * outputC) { 
    size_t idx = get_global_id(0);

    outputC[idx] = inputA[idx] + inputB[idx];
}

This is my first opencl program, tried to follow a tutorial from youtube (not cut and paste, but kind of inspired)

Can you tell me what I'm missing? (Hopefully is something silly, though I get a bit confused with the arguments of the several opencl functions).

Update: added some error checking and I get the output:

strLen = 12
Platform[0] = NVIDIA CUDA
strLen = 16
Platform[1] = Intel(R) OpenCL
strLen = 42
Platform[2] = Experimental OpenCL 2.1 CPU Only Platform
device[0] = GeForce GTX 960M
Error in clEnqueueWriteBuffer
Error in clBuildProgram
Error in clSetKernelArg

Therefore there's an error with the way I set the buffers, can you help me with that?

w-m
  • 10,772
  • 1
  • 42
  • 49
user8469759
  • 2,522
  • 6
  • 26
  • 50

1 Answers1

0

You should read the exact error code that it's throwing, so you know what you're dealing with, for example like this: https://stackoverflow.com/a/24336429/463796

Wrap that in a helper (maybe a macro) so you have it for every call that fails. You can then look up the error in the documentation for the command that threw the error. Usually there is a good explanation in the doc already.

Another indispensable thing in OpenCL development is to get the error string if the program build fails (as it does in your example). Have it print that as well, like this: https://stackoverflow.com/a/29492261/463796

w-m
  • 10,772
  • 1
  • 42
  • 49
  • Thanks for the answer, the actual error is `CL_INVALID_VALUE`, right after `clEnqueueWriteBuffer`, what could it be? – user8469759 Jun 02 '18 at 18:09
  • OpenCL doc says: `CL_INVALID_VALUE if the region being written specified by (offset, cb) is out of bounds or if ptr is a NULL value.` Hint: in this case the region being written is out of bounds. Create the buffers with the byte size (`bufferSize`), not with the element size (`n`). – w-m Jun 02 '18 at 18:31
  • I wrote that code at 1:00 am, it must be something silly. (let me fix the size for starter...) – user8469759 Jun 02 '18 at 18:34
  • Can you please point out what documentation are you talking about? (By the way the error is fixed, but now I have `CL_BUILD_PROGRAM_ERROR`) – user8469759 Jun 02 '18 at 18:40
  • The official Khronos OpenCL specification: https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/clEnqueueWriteBuffer.html (you can change the version number after sdk/ to get the one you're using). – w-m Jun 02 '18 at 18:42