I am trying to offload an existing C code to GPU using OpenACC. In the original CPU code, many times, it is required to select a data array based on the value of some parameter. A sample CPU code is given below:
#include <stdio.h>
#include <stdlib.h>
void selectArray (int **F, int a);
#define NN 1000
int *C, *D, *E;
int main(void)
{
int *F, a = 10; // a is the parameter used to select the array
C = (int *)malloc(NN * sizeof(int));
D = (int *)malloc(NN * sizeof(int));
E = (int *)malloc(NN * sizeof(int));
for (int i = 0; i < NN; i++)
{
C[i] = 10;
D[i] = 20;
}
selectArray(&F, a);
for (int i = 0; i < NN; i++)
{
E[i] = 2 * F[i];
}
for (int i = 0; i < 200; i++)
printf("%d %d \n", i, E[i]);
return 0;
}
void selectArray(int **F, int a)
{
if (a <= 15)
{
(*F) = C;
}
else
{
(*F) = D;
}
}
For OpenACC version of code, arrays C and D are already present on GPU and further calculations need to be done on the array selected on the basis of parameter a.
#include <stdio.h>
#include <stdlib.h>
void selectArray(int **F, int a);
#define NN 1000
int *C, *D, *E;
int main(void)
{
int *F, a = 10; // a is the parameter used to select the array
C = (int *)malloc(NN * sizeof(int));
D = (int *)malloc(NN * sizeof(int));
E = (int *)malloc(NN * sizeof(int));
#pragma acc enter data create(C[:NN], D[:NN])
#pragma acc parallel loop present(C[:NN], D[:NN])
for (int i = 0; i < NN; i++)
{
C[i] = 10;
D[i] = 20;
}
selectArray(&F, a);
#pragma acc enter data copyin(F[:1]) create(E[:NN])
// Here, I cannot figure out how to point F to a selected array (C or D) on the device
#pragma acc parallel loop
for (int i = 0; i < NN; i++)
{
E[i] = 2 * F[i]; //further calculations on selected array on GPU
}
}
#pragma acc exit data delete (C[:NN], D[:NN], F)copyout(E[:200])
for (int i = 0; i < 200; i++)
printf("%d %d \n", i, E[i]);
return 0;
}
void selectArray(int **F, int a)
{
if (a <= 15)
{
(*F) = C;
}
else
{
(*F) = D;
}
}
In actual code, arrays C and D are calculated in different functions and not in the main function. I have tried searching the internet to solve this issue but I could not find any related example. I am using PGI 19.10 compiler on Windows 10. Help in this regard is requested. Thanks in advance