1d complex to complex transform with cuda cufft library

Question

i'm writing a simple code for complex to complex 1d FFT with cuda library "cufft". I've changed "cudaMalloc" with "malloc" because if i use cudaMalloc exe file not work and crashing! But this is not the primary problem. When i execute the program, cufftExecC2C return a value != CUFFT_SUCCESS so FFT not work!! why? this is my code

  #include <stdio.h>
  #include <cufft.h>
  #include <cuda.h>
  #include <math.h>


  #define NX 64
  #define BATCH 1
  #define PI 3.14159265
  #define FREQ 10


  int main(){


//dichiarazione delle variabili
cufftHandle plan; 
cufftComplex *out;
cufftComplex *in;
char premi_invio;
int i;


//assegnazione memoria agli array in e out
in=(cufftComplex*)malloc(sizeof(cufftComplex)*NX*BATCH);
/*
if (cudaGetLastError() != cudaSuccess){
printf("Cuda error: allocazione fallita\n");
return;
};
*/

out=(cufftComplex*)malloc(sizeof(cufftComplex)*NX*BATCH);
/*
if (cudaGetLastError() != cudaSuccess){
printf("Cuda error: allocazione fallita\n");
return;
};
*/

//creazione del piano per la trasformata e controllo della corretta creazione del piano
if (cufftPlan1d(&plan, NX, CUFFT_C2C, BATCH) == CUFFT_SUCCESS){
    printf("piano creato correttamente!\npremi invio per continuare...\n");
    scanf("%c", &premi_invio);
};

//assegnazione campioni all'array di input

for (i = 0; i < NX; i++){
    in[i].x = cos(2 * PI * FREQ * i / NX);
    in[i].y = 0;
};
printf("Vuoi visualizzare i campioni? (y/n)\n");
scanf("%c", &premi_invio);
if (premi_invio == 'y'){
    for (i = 0; i < NX; i++){
        printf("in[%d].x = %f \tin[%d].y = %f \n", i, in[i].x, i, in[i].y);
    }
}

//esecuzione trasformata e controllo della corretta esecuzione
if (cufftExecC2C(plan,in,out,CUFFT_FORWARD) == CUFFT_SUCCESS){
    printf("trasformata eseguita correttamente!\npremi invio per vedere i risultati...\n");
    scanf("%c", &premi_invio);}
else {
    printf("trasformata non eseguita\nPremi invio per tornare...\n");
    scanf("%c", &premi_invio);
    return;
}
//visualizzazione risultati (vettore out)
for (i = 0; i < NX; i++){
    printf("out[%d].x = %f \tout[%d].y = %f\n", i, cuCrealf(out[i]),i, cuCimagf(out[i]) );

}

cufftDestroy(plan);
//cudaFree(in);
//cudaFree(out);
return 0;
}

score 2 · Answer 1 · answered May 15 '15 at 15:59

The documentation explains that the input and output data must be on the GPU, so you need to use cudaMalloc() instead of malloc().

SO the real question is why you had a problem when using cudaMalloc(); probably the simplest explanation is that you were allocating GPU memory and then trying to write to it directly in the CPU code:

for (i = 0; i < NX; i++){
  in[i].x = cos(2 * PI * FREQ * i / NX);
  in[i].y = 0;
};

Instead you should malloc() the same size region on the CPU, initialise this region on the CPU and then use cudaMemcpy() to copy it to the GPU. Likewise to copy the results back to read them. Ideally, of course, you would leave the data on the GPU for further processing!

1d complex to complex transform with cuda cufft library

1 Answers1