0

https://i.stack.imgur.com/TA9v6.png

I have been trying to get to compile a kernel that assigns certain indices to an std::vector using OpenCL through clEnqueueReadBuffer function but it does not seem to work correctly since the first result is the only assigned in the std::vector

the source code for the host in c++ is the following:

cl_mem originalPixelsBuffer = clCreateBuffer(p1.context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(Color) * imageObj->SourceLength(), source, &p1.status);
        CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed to Create buffer 0");


        cl_mem targetBuffer = clCreateBuffer(p1.context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, sizeof(Color) * imageObj->OutputLength(), target, &p1.status);
        CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed to Create buffer 1");


//write buffers
p1.status = clEnqueueWriteBuffer(p1.commandQueue, originalPixelsBuffer, CL_FALSE, 0, sizeof(Color) * imageObj->SourceLength(), source, 0, NULL, NULL);
        CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed to write buffer 0");
        p1.status = clEnqueueWriteBuffer(p1.commandQueue, targetBuffer, CL_TRUE, 0, sizeof(Color) * imageObj->OutputLength(), target, 0, NULL, NULL);
        CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed to write buffer 1");

        size_t  globalWorkSize[2] = { imageObj->originalWidth * 4, imageObj->originalHeight * 4 };
        size_t localWorkSize[2]{ 64,64 };
        SetLocalWorkSize(IsDivisibleBy64(localWorkSize[0]), localWorkSize);

//execute kernel
        p1.status = clEnqueueNDRangeKernel(p1.commandQueue, Kernel, 1, NULL, globalWorkSize, IsDisibibleByLocalWorkSize(globalWorkSize, localWorkSize) ? localWorkSize : NULL, 0, NULL, NULL);
        CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed to clEnqueueDRangeKernel");

//read buffer

        p1.status = clEnqueueReadBuffer(p1.commandQueue, targetBuffer, CL_TRUE, 0, sizeof(Color) * imageObj->OutputLength(), target, 0, NULL, NULL);
        CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed to write buffer 1");

the kernel code:


      __kernel void interp(__global struct Color* source,__global struct Color* target,uint64 width,uint64 height,uint64 ratio,uint64 limit, uint64 originalHeight)
        {
            __private fp32 wIndex = (int64)get_global_id(0);
            __private fp32 hIndex = (int64)get_global_id(1);

            if(((int64)wIndex)%ratio==MATCH && ((int64)hIndex)%ratio ==MATCH)
            {
                __private int64  Index = (wIndex/ratio) * (originalHeight/ratio) + (hIndex/ratio);

                if(Index < limit)
                {
                
                        __private int64 tIndex  = wIndex * height + hIndex;
                        
                        target[tIndex].R = source[Index].R;
                        target[tIndex].G = source[Index].G;
                        target[tIndex].B = source[Index].B;
                        target[tIndex].A = source[Index].A;
                }
            }
        
        }```


 
PontiacGTX
  • 185
  • 2
  • 15
  • 1
    I don't see any use of a `std::vector` in this code. Generally, there seems to be a lot of code *not* shown which is quite critical to understanding what this code is intended to do. For your best chance of a useful answer, post [minimal, reproducible code](https://stackoverflow.com/help/minimal-reproducible-example). If we can copy-paste your question into our editor and compile and run it, debugging becomes **much** easier, which means we're more likely to help. – pmdj Jun 23 '20 at 11:47
  • @pmdj I can't paste all the project because probably it is too long but I uploaded it here https://drive.google.com/file/d/14vywjFxZc2E0t-k-ax_bIvn8w1lgE4dC/view?usp=sharing – PontiacGTX Jun 23 '20 at 12:14

0 Answers0