-2

I am trying to replace some thrust calls to arrayfire to check the performance.

I am not sure if I am using properly arrayfire because the results I am taking do not match at all.

So , the thrust code for example I am using is:

cudaMalloc( (void**) &devRow, N * sizeof(float) );
...//devRow is filled

thrust::device_ptr<float> SlBegin( devRow );
for ( int i = 0; i < N; i++, SlBegin += PerSlElmts )
{
    thrust::inclusive_scan( SlBegin, SlBegin + PerSlElmts, SlBegin );
}

cudaMemcpy( theRow, devRow, N * sizeof(float), cudaMemcpyDeviceToHost );
//use theRow...

Arrayfire:

af::array SlBegin( N , devRow );
for ( int i = 0;i < N; i++,SlBegin += PerSlElmts )
{
    accum( SlBegin );
}

cudaMemcpy( theRow, devRow, N * sizeof(float), cudaMemcpyDeviceToHost );
//use theRow..

I am not sure how arrayfire handles the copy : af::array SlBegin( N , devRow ); .In thrust we have the device pointer which points from devRow to SlBegin , but in arrayfire..?

Also , I wanted to ask about using gfor . In arrayfire webpage , it states that

Do not use this function directly; see GFOR: Parallel For-Loops.

And then for GFOR :

GFOR is disabled in the current version of ArrayFire

So , we can't use gfor?

---------UPDATE---------------------------

I have a small running example which shows the different results:

#include <stdio.h>
#include <stdlib.h>

#include <cuda.h>
#include <cuda_runtime.h>
#include <curand_kernel.h>

#include "arrayfire.h"

#include <thrust/scan.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>

__global__ void Kernel( const int N ,float * const devRow )
{

   int i = threadIdx.x;
   if ( i < N )
        devRow[ i ] = i;

 }

int main(){

    int N = 6;
    int Slices = 2;
    int PerSlElmts = 3;

    float * theRow = (float*) malloc ( N * sizeof( float ));

    for ( int i = 0; i < N; i ++ )
        theRow[ i ] = 0;

    // raw pointer to device memory
    float * devRow;
    cudaMalloc( (void **) &devRow, N * sizeof( float ) );

    Kernel<<< 1,N >>>( N , devRow );
    cudaDeviceSynchronize();

    // wrap raw pointer with a device_ptr
    thrust::device_ptr<float> SlBegin( devRow );

    for ( int i = 0; i < Slices; i++ , SlBegin += PerSlElmts )
        thrust::inclusive_scan( SlBegin, SlBegin + PerSlElmts , SlBegin );

    cudaMemcpy( theRow, devRow, N * sizeof(float), cudaMemcpyDeviceToHost );

    for ( int i = 0; i < N; i++ )
        printf("\n Thrust accum : %f",theRow[ i ] );


    //--------------------------------------------------------------------//
    Kernel<<< 1,N >>>( N , devRow );
    cudaDeviceSynchronize();

    af::array SlBeginFire( N, devRow );

    for ( int i = 0; i < Slices; i++ , SlBeginFire += PerSlElmts )
        af::accum( SlBeginFire );

    SlBeginFire.host( theRow );

    for ( int i = 0; i < N; i++ )
            printf("\n Arrayfire accum : %f",theRow[ i ] );

    cudaFree( devRow );
    free( theRow );


    return 0;

}
George
  • 5,808
  • 15
  • 83
  • 160

1 Answers1

2

It looks like you are trying to run a column-wise (0th-dim in ArrayFire) scan on a 2D array. Here is some code that you could use:

af::array SlBegin(N, devRow);
af::array result = accum(SlBegin, 0);

Here is a sample output

A [5 3 1 1]
0.7402     0.4464     0.7762 
0.9210     0.6673     0.2948 
0.0390     0.1099     0.7140 
0.9690     0.4702     0.3585 
0.9251     0.5132     0.6814 

accum(A, 0) [5 3 1 1]
0.7402     0.4464     0.7762 
1.6612     1.1137     1.0709 
1.7002     1.2236     1.7850 
2.6692     1.6938     2.1435 
3.5943     2.2070     2.8249 

This runs and inclusive scan on each column independently.

As for gfor, it has been added to the Open Source version of ArrayFire. As this code base is still a beta, improvements and fixes are taking place very rapidly. So keep a watch on our github page.

shehzan
  • 331
  • 1
  • 5
  • :Hello and thanks for helping.Can you please write the analogous piece of code , as I have it?Is it necessary to use another array ( result ) fro the accum or not?After the loop do I have to do SlBegin.host( theRow );?Can you please provide me with this example?I am using a 1D ,not 2D array. – George Mar 03 '15 at 14:26
  • 2
    The code you have shown suggests the wrong use of ArrayFire. I would suggest you to post on the ArrayFire mailing list https://groups.google.com/forum/#!forum/arrayfire-users if you want to discuss your code. – shehzan Mar 03 '15 at 18:14