0

I know it looks like I only visit when I need something but I have another question.

Building on my previous question, where I successfully ran a basic OpenGL program. I'm now adding OpenCL interop with a trivial CL script that shrinks the triangle a small amount each time it renders.

All I'm getting is a blank screen. Commenting out the section that acquires and releases the GL objects allows the rendering to work as before. Just fails to integrate the OpenCL compnent.

opencl.h is just a header with helper functions that I use to register all CL objects and free them with a single function call in the end.

EDIT: 12th June 2015 Uncommented the /*fprintf(stderr, "ERROR: " x " failed %d\n", cl_stat);*/ line which reveals more info:

ERROR: Set cl kernel arg failed -38
ERROR: Acquiring GL objects failed -5

The OpenCL error code -38 means it's an invalid memory object according to this list followed by an out of resources error when trying to re-acquire GL objects.

Here's main.c

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include <glib.h>

#include <gdk/gdkx.h>
#include <epoxy/glx.h>
#include <epoxy/gl.h>
#include <gtk/gtk.h>

#include <CL/opencl.h>
#include "cl_utils.h"

#define IGNORE_VAR(type, identifier) \
{ \
  type IGNORED_VARIABLE_abcd = identifier; \
  identifier = IGNORED_VARIABLE_abcd; \
}

#define CL_ASSERT(x) \
  /*if(print_info) \
    printf(x "...\n"); */\
  if(cl_stat != CL_SUCCESS) \
  { \
    /*fprintf(stderr, "ERROR: " x " failed %d\n", cl_stat);*/ \
    goto exception; \
  }

const GLchar *vert_src = "\n" \
"#version 330                                  \n" \
"#extension GL_ARB_explicit_attrib_location: enable  \n" \
"                                              \n" \
"layout(location = 0) in vec2 in_position;     \n" \
"                                              \n" \
"void main()                                   \n" \
"{                                             \n" \
"  gl_Position = vec4(in_position, 0.0, 1.0);  \n" \
"}                                             \n";

const GLchar *frag_src = "\n" \
"void main (void)                              \n" \
"{                                             \n" \
"  gl_FragColor = vec4(1.0, 1.0, 1.0, 1.0);    \n" \
"}                                             \n";

const char *cl_src = "\n" \
"typedef struct Point{                         \n" \
"  float x;                                    \n" \
"  float y;                                    \n" \
"} Point;                                      \n" \
"                                              \n" \
"__kernel void cl_func(__global Point* point)  \n" \
"{                                             \n" \
"  const int i = get_global_id(0);             \n" \
"  const float d = 0.99;                       \n" \
"                                              \n" \
"  if(i>=3)                                    \n" \
"    return;                                   \n" \
"                                              \n" \
"  point[i].x = point[i].x * d;                \n" \
"  point[i].y = point[i].y * d;                \n" \
"}                                             \n";

struct cl
{
  clu_object_stack* stack;

  cl_platform_id* platform;
  cl_uint num_platforms;

  cl_device_id* device;
  cl_uint num_devices;

  cl_context context;
  cl_context_properties properties[7];

  cl_command_queue queue;

  cl_program program;
  cl_kernel kernel;

  cl_mem buffer;
} cl;

struct cl cl;

GLuint gl_vao, gl_buffer, gl_program;

cl_int init_cl(GtkGLArea *area)
{
  cl_int cl_stat;

  cl.stack = cluCreateObjectStack(44);

  cl_stat = clGetPlatformIDs(0, NULL, &cl.num_platforms);
  CL_ASSERT("Got number of platforms");

  cl.platform = malloc((size_t)cl.num_platforms * sizeof(cl_platform_id));
  cl_stat = clGetPlatformIDs(cl.num_platforms, cl.platform, NULL);
  CL_ASSERT("Got platforms");

  cl_stat = clGetDeviceIDs(cl.platform[0], CL_DEVICE_TYPE_GPU, 0, NULL, &cl.num_devices);
  CL_ASSERT("Got number of devices");
  printf("Number of GPU devices: %d\n", cl.num_devices);

  if(cl.num_devices == 0)
  {
    fprintf(stderr, "Num devices cannot be 0\n");
    goto exception;
  }

  cl.device = malloc((size_t)cl.num_devices * sizeof(cl_device_id));
  cl_stat = clGetDeviceIDs(cl.platform[0], CL_DEVICE_TYPE_GPU, cl.num_devices, cl.device, NULL);
  CL_ASSERT("Got devices");

  if(cl.device == NULL)
  {
    fprintf(stderr, "Devices list is NULL\n");
    goto exception;
  }

  gtk_gl_area_make_current (area);
  cl.properties[0] = CL_GL_CONTEXT_KHR;
  cl.properties[1] = (cl_context_properties) glXGetCurrentContext();
  cl.properties[2] = CL_GLX_DISPLAY_KHR;
  cl.properties[3] = (cl_context_properties) glXGetCurrentDisplay();
  cl.properties[4] = CL_CONTEXT_PLATFORM;
  cl.properties[5] = (cl_context_properties) cl.platform[0];
  cl.properties[6] = 0;

  cl.context = cluCreateContext(cl.stack, cl.properties, cl.num_devices, cl.device, NULL, NULL, &cl_stat);
  CL_ASSERT("Created cl context");

  cl.queue = cluCreateCommandQueue(cl.stack, cl.context, cl.device[0], 0, &cl_stat);
  CL_ASSERT("Created command queue");

  cl.buffer = cluCreateFromGLBuffer(cl.stack, cl.context, CL_MEM_WRITE_ONLY, gl_buffer, NULL);
  CL_ASSERT("Created cl memory object from gl buffer");

  cl.program = cluCreateProgramWithSource(cl.stack, cl.context, 1, (const char **)&cl_src, NULL, &cl_stat);
  CL_ASSERT("Created cl program object");

  cl_stat = clBuildProgram(cl.program, cl.num_devices, cl.device, NULL, NULL, NULL);
  if(cl_stat != CL_SUCCESS)
  {
    size_t ret_size;
    clGetProgramBuildInfo(cl.program, cl.device[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_size);
    char e_str[ret_size];
    clGetProgramBuildInfo(cl.program, cl.device[0], CL_PROGRAM_BUILD_LOG, ret_size, e_str, &ret_size);
    printf("%s\n", e_str);
  }
  CL_ASSERT("Built cl program object");

  cl.kernel = cluCreateKernel(cl.stack, cl.program, "cl_func", &cl_stat);
  CL_ASSERT("Created cl kernel object");

  return 0;
exception:
  return 1;
}

static gboolean realise(GtkGLArea *area, GdkGLContext *context);
static gboolean render(GtkGLArea *area, GdkGLContext *context);

int main(int argc, char** argv)
{
  gtk_init(&argc, &argv);

  GtkWidget *window  = gtk_window_new(GTK_WINDOW_TOPLEVEL),
            *gl_area = gtk_gl_area_new();

  g_signal_connect(window,  "delete-event", G_CALLBACK(gtk_main_quit), NULL);
  g_signal_connect(gl_area, "realize",      G_CALLBACK(realise),       NULL);
  g_signal_connect(gl_area, "render",       G_CALLBACK(render),        NULL);

  gtk_container_add(GTK_CONTAINER(window), gl_area);

  gtk_widget_show_all(window);

  gtk_main();

  cluFreeObjectStack(cl.stack);
  free(cl.platform);
  free(cl.device);

  return 0;
}

static gboolean realise(GtkGLArea *area, GdkGLContext *context)
{
  IGNORE_VAR(GdkGLContext*, context);

  gtk_gl_area_make_current(GTK_GL_AREA(area));
  if (gtk_gl_area_get_error (GTK_GL_AREA(area)) != NULL)
  {
    printf("Failed to initialiize buffers\n");
    return FALSE;
  }

  GLfloat verts[] = 
  {
    +0.0f, +1.0f,
    -1.0f, -1.0f,
    +1.0f, -1.0f,
  };

  GLuint frag_shader, vert_shader;
  frag_shader = glCreateShader(GL_FRAGMENT_SHADER);
  vert_shader = glCreateShader(GL_VERTEX_SHADER);

  glShaderSource(frag_shader, 1, &frag_src, NULL);
  glShaderSource(vert_shader, 1, &vert_src, NULL);

  glCompileShader(frag_shader);
  glCompileShader(vert_shader);

  gl_program = glCreateProgram();
  glAttachShader(gl_program, frag_shader);
  glAttachShader(gl_program, vert_shader);
  glLinkProgram(gl_program);

  glGenBuffers(1, &gl_buffer);
  glBindBuffer(GL_ARRAY_BUFFER, gl_buffer);
  glBufferData(GL_ARRAY_BUFFER, sizeof(verts), verts, GL_DYNAMIC_DRAW);

  glGenVertexArrays(1, &gl_vao);
  glBindVertexArray(gl_vao);

  glEnableVertexAttribArray(0);
  glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, (void*)0);
  glBindVertexArray(0);

  //glDeleteBuffers(1, &gl_buffer);
  if(init_cl(area))
    return FALSE;

  return TRUE;
}

static gboolean render(GtkGLArea *area, GdkGLContext *context)
{
  IGNORE_VAR(GdkGLContext*, context);
  IGNORE_VAR(GtkGLArea*, area);
  cl_int cl_stat;

  glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
  glClearColor(0.0, 0.0, 0.0, 1.0);

  glUseProgram(gl_program);
  glBindVertexArray(gl_vao);
  glDrawArrays(GL_TRIANGLES, 0, 3);

  glBindVertexArray (0);
  glUseProgram (0);

  glFinish();

  cl_stat = clEnqueueAcquireGLObjects(cl.queue, 1, &cl.buffer, 0, NULL, NULL);
  CL_ASSERT("Acquiring GL objects");

  cl_stat = clSetKernelArg(cl.kernel, 0, sizeof(cl_mem), (const void*)cl.buffer);
  CL_ASSERT("Set cl kernel arg");

  //size_t g_sz[1] = { 32 };
  //cl_stat = clEnqueueNDRangeKernel(cl.queue, cl.kernel, 1, NULL, g_sz, NULL, 0, NULL, NULL);
  cl_stat = clEnqueueTask(cl.queue, cl.kernel, 0, NULL, NULL);
  CL_ASSERT("Executing cl kernel");
  cl_stat = clEnqueueReleaseGLObjects(cl.queue, 1, &cl.buffer, 0, NULL, NULL);
  CL_ASSERT("Releasing GL buffer");
  clFinish(cl.queue);

  return TRUE;

exception:
  return FALSE;
}

The SConstruct

import os

CFLAGS = '--std=c11 -g -Wall'
ENV = {'PATH':os.environ['PATH']}

env = Environment(CFLAGS=CFLAGS, ENV=ENV)

if os.name is 'posix':
  env['ENV']['TERM'] = os.environ['TERM']
  env.ParseConfig('pkg-config --cflags --libs gtk+-3.0')
  env.Append(LIBS = ['epoxy', 'GL', 'OpenCL'])
  env.Program(target='gl', source=['main.c', 'cl_utils.c'])

# vim: set filetype=python:

cl_utils.c

#include <stdlib.h>
#include <stdio.h>

#include "cl_utils.h"

#define FUNC __func__

#define CL_ASSERT(object, label) \
  if(!object) \
  { \
    fprintf(stderr, "CL_ERROR: failed to create CL %s object: %d\n", #object, *cl_stat); \
    goto label; \
  } \
  else \
  { \
    printf("CL_INFO: created cl_%s object\n", #object); \
  }

#define PLATFORM_INFO(_) \
    _(PROFILE) \
    _(VERSION) \
    _(NAME) \
    _(VENDOR) \
    _(EXTENSIONS)

const char* space = " ";

#define TO_STRING(TYPE, ENUM, ...) [ENUM] = #TYPE,
const char* CLU_TYPE_STRING[] = { CLU_TABLE(TO_STRING) };
#undef TO_STRING

#define TO_STRING(x) [CL_PLATFORM_##x] = #x,

const char* platform_info_string[] = { PLATFORM_INFO(TO_STRING) };
#undef TO_STRING

const char* clGetPlatformInfoString(cl_platform_info platform_info)
{
    return platform_info_string[platform_info];
}

cl_int infoPlatforms(cl_platform_id* platforms, cl_int num_platforms, cl_platform_info* params, cl_int num_params)
{
    cl_int cl_stat;
    size_t buffer_size = 10, buffer_ret_size = 0;
    char* buffer = (char*)malloc(buffer_size);

    for(cl_int i = 0; i < num_platforms; i++)
    {
        for(cl_int j = 0; j < num_params; j++)
        {
            cl_stat = clGetPlatformInfo(platforms[i], params[j], 0, NULL, &buffer_ret_size);

            if(cl_stat != CL_SUCCESS)
            {
                fprintf(stderr, "ERROR: clGetPlatformInfo failed\n");
                goto end;
            }

            if(buffer_ret_size > buffer_size)
            {
                void* tmp = NULL;
                buffer_size = buffer_ret_size;
                tmp = realloc(buffer, buffer_size);
                if(tmp == NULL)
                {
                    fprintf(stderr, "ERROR: Could not realloc memory\n");
                    perror("ERROR: ");
                    goto end;
                }
                else
                    buffer = (char*)tmp;
            }

            cl_stat = clGetPlatformInfo(platforms[i], params[j], buffer_size, buffer, &buffer_ret_size);

            if(cl_stat != CL_SUCCESS)
            {
                fprintf(stderr, "ERROR: clGetPlatformInfo failed\n");
                goto end;
            }

            printf("%s: %s\n", clGetPlatformInfoString(params[j]), buffer);
        }
        printf("\n");
    }

end:
    if(cl_stat != CL_SUCCESS)
    {
        printf("SENT TO ERROR HANDLER!\n");
        printf("CL_ERROR: %d\n", cl_stat);
    }

    free(buffer);

    if(cl_stat != CL_SUCCESS)
        return 1;

        return 0;
}

clu_object_stack* cluCreateObjectStack(int init_len)
{
  clu_object_stack* stack = (clu_object_stack*)malloc(sizeof(clu_object_stack));
  stack->length = init_len;
  stack->num_objects = 0;
  stack->list = (clu_object*)malloc(sizeof(clu_object) * init_len);

  return stack;
}

void cluFreeObjectStack(clu_object_stack* stack)
{
  cl_int ret = CL_SUCCESS;

  printf("Freeing stack: %p\n", stack);
  int i;
  for(i=stack->num_objects-1; i>=0; i--)
  {
    switch(stack->list[i].type)
    {
      #define TO_FREE(cl_type, ENUM, element, cluRelease) \
      case ENUM: \
        ret = cluRelease(stack->list[i].element); \
        /*printf("Releasing " #cl_type "\n");*/ \
        break;

      CLU_TABLE(TO_FREE)
      #undef TO_FREE

      default:
        printf("ERROR: Invalid or unsupported object type\n");
        break;
    }

    if(ret != CL_SUCCESS)
    {
      printf("Failed to release %s\n", CLU_TYPE_STRING[stack->list[i].type]);
    }
  }
  free(stack->list);
  free(stack);
}

int cluAssignToObjectGroup(struct clu_object_stack* stack, void* obj, cl_type type)
{
  if(stack->num_objects >= stack->length)
  {
    printf("Stack Error\n");
    return -1;
  }

  stack->list[stack->num_objects].type = type;

  switch(type)
  {
    #define TO_ASSIGN(cl_type, ENUM, element, ...) \
      case ENUM: \
        stack->list[stack->num_objects].element=*(cl_type*)obj; \
        /*printf("Assigning " #cl_type "\n");*/ \
        break;

    CLU_TABLE(TO_ASSIGN)
    #undef TO_ASSIGN

    default:
      printf("cluAssignToObjectGroup Failed\n");
      break;
  }

  stack->num_objects++;

  return 0;
}

cl_context cluCreateContext(
    clu_object_stack *stack,
    cl_context_properties *properties,
    cl_uint num_devices,
    const cl_device_id *devices,
    pfn_notify func,
    void *user_data,
    cl_int* cl_stat)
{

  cl_context context= clCreateContext(properties, num_devices, devices, func, user_data, cl_stat);

  if(context == 0)
    printf("Created ZERO value cl context\n");
  CL_ASSERT(context, error_ret);
  cluAssignToObjectGroup(stack, &context, CLU_CONTEXT);

error_ret:
  if(devices == NULL)
    fprintf(stderr, "%2sdevices cannot be NULL\n", space);
  if(num_devices == 0)
    fprintf(stderr, "%2snum_devices cannot be zero\n", space);
  if((func == NULL) && (user_data != NULL))
    fprintf(stderr, "%2spfn_notify cannot be NULL when user_data is not NULL\n", space);

  return context;
}

cl_command_queue cluCreateCommandQueue(
    clu_object_stack *stack,
    cl_context context,
    cl_device_id device,
    cl_command_queue_properties properties,
    cl_int *cl_stat)
{
  cl_command_queue queue = clCreateCommandQueue(context, device, properties, cl_stat);

  CL_ASSERT(queue, error_ret);
  cluAssignToObjectGroup(stack, &queue, CLU_COMMAND_QUEUE);

error_ret:
  return queue;
}

cl_mem cluCreateBuffer(
    clu_object_stack* stack,
    cl_context context,
    cl_mem_flags flags,
    size_t size,
    void *host_ptr,
    cl_int *cl_stat)
{
  cl_mem buffer = clCreateBuffer(context, flags, size, host_ptr, cl_stat);

  CL_ASSERT(buffer, error_ret);
  cluAssignToObjectGroup(stack, &buffer, CLU_MEM_OBJECT);

error_ret:
  return buffer;
}

cl_program cluCreateProgramWithSource(
    clu_object_stack* stack, 
    cl_context context,
    cl_uint count,
    const char **strings,
    const size_t *lengths,
    cl_int *cl_stat)
{
  cl_program program = clCreateProgramWithSource(context, count, strings, lengths, cl_stat);

  CL_ASSERT(program, error_ret);
  cluAssignToObjectGroup(stack, &program, CLU_PROGRAM);

error_ret:
  return program;
}

cl_kernel cluCreateKernel(
    clu_object_stack* stack,
    cl_program  program,
    const char *kernel_name,
    cl_int *cl_stat)
{
  cl_kernel kernel = clCreateKernel(program, kernel_name, cl_stat);

  CL_ASSERT(kernel, error_ret);
  cluAssignToObjectGroup(stack, &kernel, CLU_KERNEL);

error_ret:
  return kernel;
}

cl_mem cluCreateFromGLBuffer(
    clu_object_stack* stack,
    cl_context context,
    cl_mem_flags flags,
    GLuint bufobj,
    cl_int* cl_stat)
{
  cl_mem gl_buffer = clCreateFromGLBuffer(context, flags, bufobj, cl_stat);

  CL_ASSERT(gl_buffer, error_ret);
  cluAssignToObjectGroup(stack, &gl_buffer, CLU_MEM_OBJECT);

error_ret:
  return gl_buffer;
}

cl_utils.h

#ifndef __CL_UTILS_H__
#define __CL_UTILS_H__

#ifdef __cplusplus
extern "C" {
#endif /* C++ */

#include <stdarg.h>

#if defined(__APPLE__) || defined(MACOSX)
#include <OpenCL/opencl.h>
#else
#include <CL/opencl.h>
#endif

#include <epoxy/gl.h>

#define CLU_TABLE(_) \
  _(cl_context, CLU_CONTEXT, context, clReleaseContext) \
  _(cl_command_queue, CLU_COMMAND_QUEUE, queue, clReleaseCommandQueue) \
  _(cl_mem,CLU_MEM_OBJECT, mem_object, clReleaseMemObject) \
  _(cl_program, CLU_PROGRAM, program, clReleaseProgram) \
  _(cl_kernel, CLU_KERNEL, kernel, clReleaseKernel)

#define TO_ENUM(cl_type, ENUM, ...) ENUM,
typedef enum cl_type{
  CLU_TABLE(TO_ENUM)
} cl_type;
#undef TO_ENUM

typedef void (*pfn_notify)(const char *errinfo, const void *private_info, size_t cb, void *user_data);

//extern const char* CLU_TYPE_STRING[];

typedef struct clu_object clu_object;

struct clu_object{
  cl_type type;
  union{
    cl_context context;
    cl_command_queue queue;
    cl_mem mem_object;
    cl_program program;
    cl_kernel kernel;
  };
};

typedef struct clu_object_stack{
  struct clu_object* list;
  int num_objects;
  int length;
} clu_object_stack;

const char* clGetPlatformInfoString(cl_platform_info platform_info);
int infoPlatforms(cl_platform_id* platforms, cl_int num_platforms, cl_platform_info* params, cl_int num_params);

clu_object_stack* cluCreateObjectStack(int init_len);
void cluFreeObjectStack(clu_object_stack* stack);

int cluAssignToObjectGroup(struct clu_object_stack* stack, void* obj, cl_type type);

cl_context cluCreateContext(
    clu_object_stack *stack,
    cl_context_properties *properties,
    cl_uint num_devices,
    const cl_device_id *devices,
    pfn_notify func,
    void *user_data,
    cl_int* cl_stat);

cl_command_queue cluCreateCommandQueue(
    clu_object_stack* stack,
    cl_context context,
    cl_device_id device,
    cl_command_queue_properties properties,
    cl_int *cl_stat);

cl_mem cluCreateBuffer(
    clu_object_stack* stack,
    cl_context context,
    cl_mem_flags flags,
    size_t size,
    void *host_ptr,
    cl_int * cl_stat);

cl_program cluCreateProgramWithSource(
    clu_object_stack* stack, 
    cl_context context,
    cl_uint count,
    const char **strings,
    const size_t *lengths,
    cl_int *cl_stat);

cl_kernel cluCreateKernel(
    clu_object_stack* stack,
    cl_program  program,
    const char *kernel_name,
    cl_int *cl_stat);

cl_mem cluCreateFromGLBuffer(
    clu_object_stack* stack,
    cl_context context,
    cl_mem_flags flags,
    GLuint bufobj,
    cl_int* cl_stat);

#ifdef __cplusplus
}
#endif /* C++ */

#endif /* __CL_UTILS_H__ */
Community
  • 1
  • 1
candronikos
  • 159
  • 1
  • 13

2 Answers2

1

To enable CL-GL interoperability, things have to be set up in a specific order:

 1. Create OpenGL context
 2. Create OpenCL context
 3. Create OpenGL buffers
 4. Start OpenGL rendering

One possible issue is that you create the OpenGL buffers before the OpenCL context (before the call to init_cl).

pAIgn10
  • 131
  • 1
  • 4
  • Had to read that again for it to sink in. What's wrong with doing all OpenGL initialisation before the OpenCL init stuff? I'll try it when I get home in any case – candronikos Jun 06 '15 at 23:36
  • I've done as you suggested but it made no difference. One thing I did forget was to uncomment: `/*fprintf(stderr, "ERROR: " x " failed %d\n", cl_stat);*/` Which shows some more info giving me the more errors. There is more info in the updated post. – candronikos Jun 12 '15 at 13:43
0

Found the problem. The last argument of clSetKernelArg() requires a pointer to the mem object and I forgot to prepend the & operator.

So this:

cl_stat = clSetKernelArg(cl.kernel, 0, sizeof(cl_mem), (const void*)cl.buffer);

becomes this:

cl_stat = clSetKernelArg(cl.kernel, 0, sizeof(cl_mem), (const void*)&cl.buffer);

Very simple.

candronikos
  • 159
  • 1
  • 13