Why doesn't this compile? The following MRE is built from a number of examples trying to get OpenMP and OpenACC to work together. The compile command produces the following error:
$ mpic++ -mcmodel=medium -fopenmp -acc -ta=tesla:managed -Minfo=accel mp_acc.c -o mp_acc
"mp_acc.c", line 49: error: invalid text in pragma
#pragma omp loop for
^
1 error detected in the compilation of "mp_acc.c".
$
I am running:
$ mpic++ --version
nvc++ 22.1-0 64-bit target on x86-64 Linux -tp skylake-avx512
NVIDIA Compilers and Tools
Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
The C++ array allocation is a bit new to me, but it does appear to work better than the array-of-array-of-pointers approach. The MPI compiler doesn't tell me what is "invalid" about a standard, pragma command.
Here is the MRE:
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <cstdlib>
#include <string>
#include <mpi.h>
#include <omp.h>
#include <openacc.h>
#include <bits/stdc++.h>
#include <sys/stat.h>
using namespace std;
void* allocMatrix (int nRow, int nCol) {
void* restrict m = malloc (sizeof(int[nRow][nCol]));
return(m);
}
#pragma acc routine gang
void* func(void* a, int nrows, int ncols) {
return(a);
}
int main(int argc, char *argv[]) {
int nrows = 5;
int ncols = 3;
int (*a)[ncols] = (int (*)[ncols])allocMatrix(nrows, ncols);
int* restrict ta = (int*)malloc(nrows * sizeof(int));
for ( int i=0; i<nrows; i++ ) {
for ( int j=0; j<ncols; j++ ) {
a[i][j] = 1;
}
}
for ( int i=0; i<nrows; i++ ) {
for ( int j=0; j<ncols; j++ ) {
cout << a[i][j] << " ";
}
cout << endl;
}
#pragma omp parallel num_threads()
{
size_t tid = omp_get_thread_num();
#pragma omp loop
for (int i = 0; i < nrows; ++i) {
#pragma acc parallel deviceptr(a,nrows,ncols) async(tid)
{
a = (int (*)[ncols]) func(a, nrows, ncols);
}
}
#pragma acc wait
}
for ( int i=0; i<nrows; i++ ) {
for ( int j=0; j<ncols; j++ ) {
cout << a[i][j] << " ";
}
cout << endl;
}
memset( a, 0, nrows*ncols*sizeof(int) );
for ( int i=0; i<nrows; i++ ) {
for ( int j=0; j<ncols; j++ ) {
cout << a[i][j] << " ";
}
cout << endl;
}
free(a);
}