I've been working with a heat transfer code. This code, basically, stablishes the initial conditions for a cube and all of its faces. The six faces start at different temperatures, and then the code will be calculating how the temperature changes in all of the faces due to the heat transfer between them. Now, I've been trying offloading to an NVIDIA GPU using OpenMP directives. This code initializes the faces conditions using a triple pointer, which is sort of an array of arrays. Reading a little bit about this matter, I've come to know that 3D architectures are not easily offloaded to the GPUs. So my question is if it is possible to offload this triple pointer arrays to the GPU or if I have to use a more flat array form.
Here I leave the code, which is still working on CPU. Parallel version of the code.
#include <omp.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define N 25 //This defines the number of points per dimension (Cube = N*N*N)
#define NUM_STEPS 6000 //This is the number of simulations time steps
/*writeFile: this function writes simulation results into a file.
* A file is created for each iteration that's passed to the function
* as a parameter. It also takes the triple pointer to the simulation
* data*/
void writeFile(int iteration, double*** data){
char filename[50];
char itr[12];
sprintf(itr, "%d", iteration);
strcpy(filename, "heat_");
strcat(filename, itr);
strcat(filename, ".txt");
//printf("Filename is %s\n", filename);
FILE *fp;
fp = fopen(filename, "w");
fprintf(fp, "x,y,z,T\n");
for(int i=0; i<N; i++){
for(int j=0;j<N; j++){
for(int k=0; k<N; k++){
fprintf(fp,"%d,%d,%d,%f\n", i,j,k,data[i][j][k]);
}
}
}
fclose(fp);
}
void compute_heat_transfer(double ***arrayOld, double ***arrayNew){
int i,j,k;
/*Compute steady-state solution*/
for(int nsteps=0; nsteps < NUM_STEPS; nsteps++){
/*if(nsteps % 100 == 0){
writeFile(nsteps, arrayOld);
}*/
#pragma omp parallel shared(arrayNew, arrayOld) private(i,j,k)
{
#pragma omp for
for(i=1; i<N-1; i++){
for(j=1; j<N-1; j++){
for(k=1;k<N-1;k++){
//This is the 6-neighbor stencil computation
arrayNew[i][j][k] = (arrayOld[i-1][j][k] + arrayOld[i+1][j][k] + arrayOld[i][j-1][k] + arrayOld[i][j+1][k] +
arrayOld[i][j][k-1] + arrayOld[i][j][k+1])/6.0;
}
}
}
#pragma omp for
for(i=1; i<N-1; i++){
for(j=1; j<N-1; j++){
for(k=1; k<N-1; k++){
arrayOld[i][j][k] = arrayNew[i][j][k];
}
}
}
}
}
}
int main (int argc, char *argv[]) {
int i,j,k,nsteps;
double mean;
double ***arrayOld; //Variable that will hold the data of the past iteration
double ***arrayNew; //Variable where newly computed data will be stored
arrayOld = (double***)malloc(N*sizeof(double**));
arrayNew = (double***)malloc(N*sizeof(double**));
if(arrayOld== NULL){
fprintf(stderr, "Out of memory");
exit(0);
}
for(i=0; i<N;i++){
arrayOld[i] = (double**)malloc(N*sizeof(double*));
arrayNew[i] = (double**)malloc(N*sizeof(double*));
if(arrayOld[i]==NULL){
fprintf(stderr, "Out of memory");
exit(0);
}
for(int j=0;j<N;j++){
arrayOld[i][j] = (double*)malloc(N*sizeof(double));
arrayNew[i][j] = (double*)malloc(N*sizeof(double));
if(arrayOld[i][j]==NULL){
fprintf(stderr,"Out of memory");
exit(0);
}
}
}
/*Set boundary values and compute mean boundary values*/
mean = 0.0;
for(i=0; i<N; i++){
for(j=0;j<N;j++){
arrayOld[i][j][0] = 100.0;
mean += arrayOld[i][j][0];
}
}
for(i=0; i<N; i++){
for(j=0;j<N;j++){
arrayOld[i][j][N-1] = 100.0;
mean += arrayOld[i][j][N-1];
}
}
for(j=0; j<N; j++){
for(k=0;k<N;k++){
arrayOld[0][j][k] = 100.0;
mean += arrayOld[0][j][k];
}
}
for(j=0; j<N; j++){
for(k=0;k<N;k++){
arrayOld[N-1][j][k] = 100.0;
mean += arrayOld[N-1][j][k];
}
}
for(i=0; i<N; i++){
for(k=0;k<N;k++){
arrayOld[i][0][k] = 100.0;
mean += arrayOld[i][0][k];
}
}
for(i=0; i<N; i++){
for(k=0;k<N;k++){
arrayOld[i][N-1][k] = 0.0;
mean += arrayOld[i][N-1][k];
}
}
mean /= (6.0 * (N*N));
/*Initialize interior values*/
for(i=1; i<N-1; i++){
for(j=1; j<N-1; j++){
for(k=1; k<N-1;k++){
arrayOld[i][j][k] = mean;
}
}
}
double tdata = omp_get_wtime();
compute_heat_transfer(arrayOld, arrayNew);
tdata = omp_get_wtime()-tdata;
printf("Execution time was %f secs\n", tdata);
for(i=0; i<N;i++){
for(int j=0;j<N;j++){
free(arrayOld[i][j]);
free(arrayNew[i][j]);
}
free(arrayOld[i]);
free(arrayNew[i]);
}
free(arrayOld);
free(arrayNew);
return 0;
}