1

I would like to make as automatic as possible the compilation and linking of my code projects using OpenCL on OS X, I know how to do it for C++ but I am experiencing problems for OpenCL. This is the code that I am using as an example:

main.cpp:

#include <stdio.h>
#include <stdlib.h>

#ifdef __APPLE__ //Mac OSX has a different name for the header file
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif

#define MEM_SIZE (128)//suppose we have a vector with 128 elements
#define MAX_SOURCE_SIZE (0x100000)

int main()
{
    //In general Intel CPU and NV/AMD's GPU are in different platforms
    //But in Mac OSX, all the OpenCL devices are in the platform "Apple"
    cl_platform_id platform_id = NULL;
    cl_device_id device_id = NULL;
    cl_context context = NULL;
    cl_command_queue command_queue = NULL; //"stream" in CUDA
    cl_mem memobj = NULL;//device memory
    cl_program program = NULL; //cl_prgram is a program executable created from the source or binary
    cl_kernel kernel = NULL; //kernel function
    cl_uint ret_num_devices;
    cl_uint ret_num_platforms;
    cl_int ret; //accepts return values for APIs

    float mem[MEM_SIZE]; //alloc memory on host(CPU) ram

    //OpenCL source can be placed in the source code as text strings or read from another file.
    FILE *fp;
    const char fileName[] = "./kernel.cl";
    size_t source_size;
    char *source_str;
    cl_int i;

    // read the kernel file into ram
    fp = fopen(fileName, "r");
    if (!fp) {
        fprintf(stderr, "Failed to load kernel.\n");
        exit(1);
    }
    source_str = (char *)malloc(MAX_SOURCE_SIZE);
    source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp );
    fclose( fp );

    //initialize the mem with 1,2,3...,n
    for( i = 0; i < MEM_SIZE; i++ ) {
        mem[i] = i;
    }

    //get the device info
    ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
    ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices);

    //create context on the specified device
    context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);

    //create the command_queue (stream)
    command_queue = clCreateCommandQueue(context, device_id, 0, &ret);

    //alloc mem on the device with the read/write flag
    memobj = clCreateBuffer(context, CL_MEM_READ_WRITE, MEM_SIZE * sizeof(float), NULL, &ret);

    //copy the memory from host to device, CL_TRUE means blocking write/read
    ret = clEnqueueWriteBuffer(command_queue, memobj, CL_TRUE, 0, MEM_SIZE * sizeof(float), mem, 0, NULL, NULL);

    //create a program object for a context
    //load the source code specified by the text strings into the program object
    program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);

    //build (compiles and links) a program executable from the program source or binary
    ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);

    //create a kernel object with specified name
    kernel = clCreateKernel(program, "vecAdd", &ret);

    //set the argument value for a specific argument of a kernel
    ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobj);

    //define the global size and local size (grid size and block size in CUDA)
    size_t global_work_size[3] = {MEM_SIZE, 0, 0};
    size_t local_work_size[3]  = {MEM_SIZE, 0, 0};

    //Enqueue a command to execute a kernel on a device ("1" indicates 1-dim work)
    ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);

    //copy memory from device to host
    ret = clEnqueueReadBuffer(command_queue, memobj, CL_TRUE, 0, MEM_SIZE * sizeof(float), mem, 0, NULL, NULL);

    //print out the result
    for(i=0; i<MEM_SIZE; i++) {
        printf("mem[%d] : %.2f\n", i, mem[i]);
    }

    //clFlush only guarantees that all queued commands to command_queue get issued to the appropriate device
    //There is no guarantee that they will be complete after clFlush returns
    ret = clFlush(command_queue);
    //clFinish blocks until all previously queued OpenCL commands in command_queue are issued to the associated device and have completed.
    ret = clFinish(command_queue);
    ret = clReleaseKernel(kernel);
    ret = clReleaseProgram(program);
    ret = clReleaseMemObject(memobj);//free memory on device
    ret = clReleaseCommandQueue(command_queue);
    ret = clReleaseContext(context);

    free(source_str);//free memory on host

    return 0;
}

kernel.cl:

__kernel void vecAdd(__global float* a)
{
    int gid = get_global_id(0);// in CUDA = blockIdx.x * blockDim.x + threadIdx.x

    a[gid] += a[gid];
}

and this is my CMakelists.txt so far:

#Minimal OpenCL CMakeLists.txt by StreamHPC
cmake_minimum_required (VERSION 3.1)
project(GreatProject)
# Handle OpenCL
find_package(OpenCL REQUIRED)
include_directories(${OpenCL_INCLUDE_DIRS})
link_directories(${OpenCL_LIBRARY})
add_executable (main main.cpp)
target_include_directories (main PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries (main ${OpenCL_LIBRARY})

Apparently it compiles, but when I run the executable I get the error:

Failed to load kernel.

I compiled successfully the code by hand following this answer, but my project is willing to have various kernels and various C++ files and headers, therefore I would like to use CMake in order to automatize the compilation of the project.

How should I modify my CMakeLists.txt script?

NOTE:

I guess that the file kernel.cl is not being compiled, I don't what is a proper way to guarantee having a CMakeLists.txt that always compiles all the *.cl files in the project directory in addition to all the *.cpp. Would be even better if it is posible to linking agains MKL.

  • The problem seems to be opening the file for reading. Just make sure the file exists in the directory you give. As for the note, compilation of .cl files will not happen during compilation of your C++ code. OpenCL will compile the .cl code during run-time which means you don't add .cl files to the CMakeList. Unless you use an offline compiler. Your source code as of now, will compile the .cl file as long as it can find the file. – parallel highway Jul 20 '17 at 14:32

1 Answers1

2

For mac opencl is used as a framework you need to do the following to link libraries from the framework.

cmake_minimum_required (VERSION 2.6)
project (montecarlo_cl)
find_package(OpenCL REQUIRED)
include_directories( ${OPENCL_INCLUDE_DIR})     
set (montecarlo_cl_src montecarlo_ocl.c)
add_executable (montecarlo_cl ${montecarlo_cl_src})
target_link_libraries(montecarlo_cl "-framework OpenCL" )
psnbaba
  • 91
  • 3