0

This question follows from this question.

I have a kernel which calls a regular function. When I build and run my code, I get the following output:

Number of devices: 2
building program failed
-----COULD NOT CREATE KERNEL!!---

The problematic portion of my .cl file is as follows:

    #define  IDCT_INT_MIN   (- IDCT_INT_MAX - 1)
    #define  IDCT_INT_MAX   2147483647
      ....
      ....
      ....
      ....
      ....
      ....

    #define SCALE(x,n)      ((x) << (n))

    #define but(a,b,x,y)    { x = SUB(a,b); y = ADD(a,b); }

    static  int DESCALE (int x, int n)
    {
        return (x + (1 << (n - 1)) - (x < 0)) >> n;
    }

    static  int ADD(int x, int y)
    {
        int r = x + y;

        return r;       
    }

    static  int SUB(int x, int y)
    {
        int r = x - y;

        return r;        
    }

    static  int CMUL(int c, int x)
    {
        int r = c * x;
        r = (r + (1 << (C_BITS - 1))) >> C_BITS;
        return r;
    }

    static  void rot(int f, int k, int x, int y, int *rx, int *ry) {
        int COS[2][8] = {
            {c0_1, c1_1, c2_1, c3_1, c4_1, c5_1, c6_1, c7_1},
            {c0_s2, c1_s2, c2_s2, c3_s2, c4_s2, c5_s2, c6_s2, c7_s2}
        };

        *rx = SUB(CMUL(COS[f][k], x), CMUL(COS[f][8 - k], y));
        *ry = ADD(CMUL(COS[f][8 - k], x), CMUL(COS[f][k], y));
    }

void idct_1D(__private int *Y);

__kernel void IDCT(__global int *input, __global uchar *output) 
{
    int Y[64];
    int k, l;
    int Yc[8];

    for (k = 0; k < 8; k++)
    {
        for (l = 0; l < 8; l++) Y(k, l) = SCALE(input[(k << 3) + l], S_BITS);
        idct_1d(&Y(k, 0));
    }

    for (l = 0; l < 8; l++)
    {

        for (k = 0; k < 8; k++)
    {
            Yc[k] = Y(k, l);
    }

        idct_1d(Yc);

        for (k = 0; k < 8; k++)
        {
            int r = 128 + DESCALE(Yc[k], S_BITS + 3);
            r = r > 0 ? (r < 255 ? r : 255) : 0;
            X(k, l) = r;
        }

    }
}

void idct_1D(__private int *Y) 
{

int z1[8], z2[8], z3[8];


    but(Y[0], Y[4], z1[1], z1[0]);
    rot(1, 6, Y[2], Y[6], &z1[2], &z1[3]);
    but(Y[1], Y[7], z1[4], z1[7]);
    z1[5] = CMUL(sqrt2, Y[3]);
    z1[6] = CMUL(sqrt2, Y[5]);

    but(z1[0], z1[3], z2[3], z2[0]);
    but(z1[1], z1[2], z2[2], z2[1]);
    but(z1[4], z1[6], z2[6], z2[4]);
    but(z1[7], z1[5], z2[5], z2[7]);

    z3[0] = z2[0];
    z3[1] = z2[1];
    z3[2] = z2[2];
    z3[3] = z2[3];
    rot(0, 3, z2[4], z2[7], &z3[4], &z3[7]);
    rot(0, 1, z2[5], z2[6], &z3[5], &z3[6]);

    but(z3[0], z3[7], Y[7], Y[0]);
    but(z3[1], z3[6], Y[6], Y[1]);
    but(z3[2], z3[5], Y[5], Y[2]);
    but(z3[3], z3[4], Y[4], Y[3]);
}   

The error is resulting from the function idct_1D which I am calling from the kernel IDCT.

How can I resolve the error with my kernel function?

Is there any way to call a function from a kernel?

EDIT:

After following pmdj's answer, I wrote my build statements in the following way:

ret= clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);

if (ret != CL_SUCCESS) 
{
 printf("building program failed\n");

  size_t log_size;
  char buffer[2048];
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &log_size);
printf("%s\n", buffer);

 }

I got the following response after building and running my code:

Number of devices: 2
building program failed
ptxas application ptx input, line 71; error   : Call has wrong number of parameters
ptxas application ptx input, line 112; error   : Call has wrong number of parameters
ptxas application ptx input, line 153; error   : Call has wrong number of parameters
ptxas application ptx input, line 194; error   : Call has wrong number of parameters
ptxas application ptx input, line 235; error   : Call has wrong number of parameters
ptxas application ptx input, line 276; error   : Call has wrong number of parameters
ptxas application ptx input, line 317; error   : Call has wrong number of parameters
ptxas application ptx input, line 358; error   : Call has wrong number of parameters
ptxas application ptx input, line 392; error   : Call has wrong number of parameters
ptxas application ptx input, line 520; error   : Call has wrong number of parameters
ptxas application ptx input, line 648; error   : Call has wrong number of parameters
ptxas application ptx input, line 776; error   : Call has wrong number of parameters
ptxas application ptx input, line 904; error   : Call has wrong number of parameters
ptxas application ptx input, line 1032; error   : Call has wrong number of parameters
ptxas application ptx input, line 1160; error   : Call has wrong number of parameters
ptxas application ptx input, line 1288; error   : Call has wrong number of parameters
ptxas fatal   : Ptx assembly aborted due to errors

-----COULD NOT CREATE KERNEL!!---

I checked this github link and removed all the comments and printf statements from my .cl file. Despite that, I keep getting the same errors after building and running the code.

EDIT:

The code reflects the change I tried to implement in the code after following pmdj's suggestion. Now I am only getting syntax errors.

The output I am now getting is as follows:

Number of devices: 2
building program failed
<kernel>:98:70: error: invalid address space for pointee of pointer argument to __kernel function
__kernel void IDCT(__global int *input, __global uchar *output, int *Yc, int *Yin) 
                                                                     ^
<kernel>:98:79: error: invalid address space for pointee of pointer argument to __kernel function
__kernel void IDCT(__global int *input, __global uchar *output, int *Yc, int *Yin) 
                                                                              ^
<kernel>:105:30: error: called object type '__attribute__((address_space(16776963))) int *' is not a function or function pointer
                for (l = 0; l < 8; l++) Yin(k, l) = SCALE(input[(k << 3) + l], S_BITS);
                                        ~~~^
<kernel>:106:3: warning: implicit declaration of function 'idct_1d' is invalid in C99
                idct_1d(&Yin(k, 0));
                ^
<kernel>:106:15: error: called object type '__attribute__((address_space(16776963))) int *' is not a function or function pointer
                idct_1d(&Yin(k, 0));
                         ~~~^
<kernel>:114:15: error: called object type '__attribute__((address_space(16776963))) int *' is not a function or function pointer
                        Yc[k] = Yin(k, l);
                                ~~~^

-----COULD NOT CREATE KERNEL!!---

EDIT:

I put __private before the function parameters and I am again getting ptx errors.

EDIT:

The code now works. It turns out I was calling the function with the wrong name (idct_1d instead of idct_1D).

The headings of my functions are now as follows:

void idct_1D(int *Y);


__kernel void IDCT(__global int *input, __global uchar *output);
a_sid
  • 577
  • 10
  • 28
  • Utterly incomprehensible. – Weather Vane Jul 10 '17 at 20:28
  • @WeatherVane What is not clear about my question? How can I improve it? – a_sid Jul 10 '17 at 20:29
  • By asking a specific question. Not throwing a wall of incomplete code. – Weather Vane Jul 10 '17 at 20:30
  • @WeatherVane I have removed my host code in order to direct the focus to the problematic kernel code. – a_sid Jul 10 '17 at 20:35
  • How on earth do you expect anyone to answer `void idct_1D(int *Y) {int z1[8], z2[8], z3[8]; but(Y[0], Y[4], z1[1], z1[0]); rot(1, 6, Y[2], Y[6], &z1[2], &z1[3]); but(Y[1], Y[7], z1[4], z1[7]); z1[5] = CMUL(sqrt2, Y[3]); z1[6] = CMUL(sqrt2, Y[5]); but(z1[0], z1[3], z2[3], z2[0]); but(z1[1], z1[2], z2[2], z2[1]); but(z1[4], z1[6], z2[6], z2[4]); but(z1[7], z1[5], z2[5], z2[7]); z3[0] = z2[0]; z3[1] = z2[1]; z3[2] = z2[2]; z3[3] = z2[3]; rot(0, 3, z2[4], z2[7], &z3[4], &z3[7]); ... etc... }`? You probably won't even know what is means **yourself** in 6 months' time. – Weather Vane Jul 10 '17 at 20:35
  • If you are asking a question then how do you know what parts are relevant? Please post the [Minimal, Complete, and Verifiable example](http://stackoverflow.com/help/mcve) that shows the problem. – Weather Vane Jul 10 '17 at 20:40
  • 2
    Is `int Y[64]; ... Y(k, l) = ...` valid C code? Maybe the C tag should be removed? – chux - Reinstate Monica Jul 10 '17 at 20:43
  • @WeatherVane This is a code I got from a github project. I am only trying to run it on a GPU. The code you have posted in your comment is proper and builds properly without giving any errors. All I want to do is call that code from a kernel function which I named `IDCT` in the snippet I have posted. – a_sid Jul 10 '17 at 20:52

1 Answers1

2

You can see the detailed compiler output with the clGetProgramBuildInfo() function after clBuildProgram() fails. Something like this:

size_t len;
char buffer[2048];
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);

https://www.khronos.org/registry/OpenCL/sdk/1.0/docs/man/xhtml/clGetProgramBuildInfo.html

Incidentally, the problem most likely is that your idct_1D function takes a pointer to __global memory (the default), and you're trying to pass it an array in __private memory. In OpenCL, always tag your pointers with the correct memory space.

pmdj
  • 22,018
  • 3
  • 52
  • 103
  • After using the command `clGetProgramBuildInfo()` and running my code, I got errors of the kind `ptxas fatal : Ptx assembly aborted due to errors`. – a_sid Jul 11 '17 at 05:32
  • Do you have any suggestions for trouble-shooting these errors? I removed the comments from my .cl file but I keep getting the same output. – a_sid Jul 11 '17 at 05:33
  • Build your kernels up one line at a time. Then you know which line introduces the problem, and you have a better idea of what's going on. If you get stuck on a specific line and can't work out what's wrong, post that reduced code and we'll try to help you. – pmdj Jul 11 '17 at 11:46
  • Also note that the code you've posted is not enough for us to help you - the SCALE and DESCALE functions aren't available. And I don't think you can have a function (which you haven't posted) *and* a variable both named Y in C or OpenCL and expect one not to shadow the other, as others have pointed out. Are you sure you're not trying to translate C++ code or some other language? (Rename the function or the variable!) – pmdj Jul 11 '17 at 11:52
  • _Build your kernels up one line at a time_. What do you mean by this statement? My kernel code has only one kernel and isn't using the command `clBuildProgram()` the only way to build kernel code? – a_sid Jul 11 '17 at 15:01
  • I have included the functional descriptions of SCALE and DESCALE. The code I am trying to change is C code and can be found in [this](https://github.com/claf/MJPEG/tree/master/tima_seq_version) link. – a_sid Jul 11 '17 at 15:03
  • Ah, I see, Y() is a macro - that's OK then. By line by line I mean get an empty kernel working, then add ~one line at a time and see when it falls over. For functions, start with an empty function which just returns a constant value, and so on. – pmdj Jul 11 '17 at 15:11
  • I have already done that test. The kernel builds properly when I remove the function calls for `idct_1D` from it. The problem is with declaration of the function inside the kernel. – a_sid Jul 11 '17 at 15:14
  • Have you fixed the pointer memory space issue I pointed out in my answer yet? In the version of the code currently in the question you're still passing pointers to `__private` array elements (`Y`, `Yc`) to a function which takes pointer to `__global` memory. (`void idct_1D(int *Y);`) And after that it fails even if `idct_1D` is an empty function (`{}`)? – pmdj Jul 11 '17 at 15:34
  • I put `__private` before the function argument in `idct_1d` and I got `ptx` errors upon building the code. I discovered that these errors result from invalid binaries. – a_sid Jul 12 '17 at 01:08
  • I discovered that `__private` is the default address space as removing it is also giving me the same response after the code is run. Howver, when I set the arguments of `idct_1d` as global (`void idct_1D(_int *Y);`). I then changed the arguments of IDCT as well in the following way: `__kernel void IDCT (__global int *input, __global uchar *output, __global int* Yc, __global int* Yin)`. Upon doing this, I got syntax errors which I appended to my original question. – a_sid Jul 12 '17 at 01:25
  • I get the `ptx` errors even after removing the contents of idct_1D. – a_sid Jul 12 '17 at 05:13
  • Why are you now trying to call a function called "idct_1d()"? It's declared and defined as "idct_1D". Also, your change from private array to kernel argument for Y and Yc makes no sense, aside from the fact that you're missing the macros. – pmdj Jul 12 '17 at 07:35
  • Oh yes....I did not notice that I was calling an entirely different function! I changed the name of the function and the code began working right away. I did not have to change any address spaces to accomplish this. – a_sid Jul 12 '17 at 16:38
  • Let us [continue this discussion in chat](http://chat.stackoverflow.com/rooms/149029/discussion-between-a-sid-and-pmdj). – a_sid Jul 12 '17 at 17:01