2

I'm working in Julia and I need call some customize C functions that use ArraFire library, when I use a code like:

  void copy(const af::array &A, af::array &B,size_t length) {
      // 2.Obtain the device, context, and queue used by ArrayFire 
      // 3.Obtain cl_mem references to af::array objects
      cl_mem * d_A = A.device<cl_mem>();
      cl_mem * d_B = B.device<cl_mem>();

      // 4. Load, build, and use your kernels.

      // Set arguments and launch your kernels
      //kernel is the function build in step 4
      clSetKernelArg(kernel, 0, sizeof(cl_mem), d_A);
      clSetKernelArg(kernel, 1, sizeof(cl_mem), d_B);
      clEnqueueNDRangeKernel(af_queue, kernel, 1, NULL, &length, NULL, 0, NULL, NULL);
      // 5. Return control of af::array memory to ArrayFire
      A.unlock();
      B.unlock();
   }

I used as reference the example provided in:Interoperability with OpenCL

I call this function in Julia as follows:

ccall((:copy,"path/to/dll"),Cvoid(Ref{af_array},Ref{af_array}),Af.arr,Bf.arr)

Af and Bf are ArrayFire arrays, the call works as expected, the problem is when I use directly B=A only to test i.e.

  void copy(const af::array &A, af::array &B,size_t length) {        
     B=A;//only to test
  }

the call stop works in Julia, this made me doubt if I'm using the correct way to write and call this functions.

Some of the Arrayfire functions incorporated in Julia that I saw, call functions that have af_array as arguments that are different from the arguments af :: array. Well I want to change the arguments, then I do this:

void copy(const af_array &dA, af_array &dB,size_t length) {

//this to be able to use A.device and B.device
array A=array(dA);
array B=array(dB);

//steps 2 to 5 in the original code 

}

It doesn't work in C or in Julia, the question is if I want to use af_array as arguments how I get the device pointer? or what is the corret way to handle this functions to avoid problems when I call them in Julia?

thanks in advance.

UPD


I changed B=A; inside the function:

  void copy(const af::array &A, af::array &B,size_t length) {        
        size_t len = A.dims(0);
        seq idx(0, len - 1, 1);
        af::copy(B, A, idx);
  }

And works! However, I still doubt that this is the correct way, since this code is very simple. I will work with a more complex code that may stop working in a similar way.

4lrdyD
  • 433
  • 2
  • 10
  • 1
    I am not well versed with Julia but I think it would be good idea to check out https://github.com/JuliaComputing/ArrayFire.jl on how they call c/c++ functions of arrayfire using FFI. – pradeep Mar 12 '19 at 07:07
  • I have ArrayFire.jl, I saw the code of some functions, they call C functions using af_array as arguments,for example, the definition of the addition function in C: `AFAPI af_err af_add (af_array *out, const af_array lhs, const af_array rhs, const bool batch);` – 4lrdyD Mar 12 '19 at 20:17
  • in Julia this function is call with: `ccall((:af_add,af_lib),af_err,(Ptr{af_array},af_array,af_array,Bool),out,lhs.arr,rhs.arr,batch)` but, I can't use af_array as arguments if i want to use customize kernels, since I don't know how to get the device pointer. – 4lrdyD Mar 12 '19 at 20:25
  • I changed `B=A;` for: `size_t len = A.dims(0); seq idx(0, len - 1, 1); af::copy(B, A, idx);` and works. why? – 4lrdyD Mar 13 '19 at 05:09

1 Answers1

0

This is not a definitive answer, but I think it significantly improves functionality. The af_get_device_ptr function is a solution to get the device pointer from a af_array object, and the correct way to write functions to be able to call from Julia seems to be those with af_array arguments (See: calling custom C ArrayFire functions in Julia #229 ) , Since the functions integrated in ArrayFire.jl do it this way. Here is a simple and complete example of how to write and call the function from Julia:

in C


//function for adding ArrayFire arrays   
void AFire::sumaaf(af_array* out , af_array dA, af_array dB) {

        //to store the result
        af_array dC;
        af_copy_array(&dC, dA);

        // 2. Obtain the device, context, and queue used by ArrayFire   
        static cl_context af_context = afcl::getContext();
        static cl_device_id af_device_id = afcl::getDeviceId();
        static cl_command_queue af_queue = afcl::getQueue();

        dim_t _order[4];
        af_get_dims(&_order[0], &_order[1], &_order[2], &_order[3], dA);
        size_t order = _order[0];

        int status = CL_SUCCESS;

        // 3. Obtain cl_mem references to af_array objects
        cl_mem *d_A = (cl_mem*)clCreateBuffer(af_context,
            CL_MEM_READ_ONLY, sizeof(float) * order,
            NULL, &status);
        af_get_device_ptr((void**)d_A, dA);

        cl_mem *d_B = (cl_mem*)clCreateBuffer(af_context,
            CL_MEM_READ_ONLY, sizeof(float) * order,
            NULL, &status);
        af_get_device_ptr((void**)d_B, dB);

        cl_mem *d_C = (cl_mem*)clCreateBuffer(af_context,
            CL_MEM_WRITE_ONLY, sizeof(float) * order,
            NULL, &status);
        af_get_device_ptr((void**)d_C, dC);

        // 4. Load, build, and use your kernels.
        //    For the sake of readability, we have omitted error checking.
        // A simple sum kernel, uses C++11 syntax for multi-line strings.
        const char * kernel_name = "sum_kernel";
        const char * source = R"(
            void __kernel
            sum_kernel(__global float * gC, __global float * gA, __global float * gB)
            {
                int id = get_global_id(0);
                gC[id] = gA[id]+gB[id];
            }
        )";
        // Create the program, build the executable, and extract the entry point
        // for the kernel.
        cl_program program = clCreateProgramWithSource(af_context, 1, &source, NULL, &status);
        status = clBuildProgram(program, 1, &af_device_id, NULL, NULL, NULL);
        cl_kernel sumkernel = clCreateKernel(program, kernel_name, &status);
        // Set arguments and launch your kernels
        clSetKernelArg(sumkernel, 0, sizeof(cl_mem), d_C);
        clSetKernelArg(sumkernel, 1, sizeof(cl_mem), d_A);
        clSetKernelArg(sumkernel, 2, sizeof(cl_mem), d_B);
        clEnqueueNDRangeKernel(af_queue, sumkernel, 1, NULL, &order, NULL, 0, NULL, NULL);

        // 5. Return control of af::array memory to ArrayFire
        af_unlock_array(dA);
        af_unlock_array(dB);
        af_unlock_array(dC);

        //copy results to output argument
        af_copy_array(out, dC);

        // ... resume ArrayFire operations
        // Because the device pointers, d_x and d_y, were returned to ArrayFire's
        // control by the unlock function, there is no need to free them using
        // clReleaseMemObject()
    }

in Julia the call would be:


function sumaaf(A::AFArray{Float32,1},B::AFArray{Float32,1})
    out = ArrayFire.RefValue{af_array}(0);
    ccall((:sumaaf,"path/to/dll")
            ,Cvoid,(Ptr{af_array},af_array,af_array),out,Af.arr,Bf.arr);
    AFArray{Float32,1}(out[])
end
4lrdyD
  • 433
  • 2
  • 10