I'm trying to implement the following codes to see how OpenMP threads are managed over the nested loop where each inner/outer loops are separately implemented in a function and its caller.
Each loop is implemented with the statement
#pragma omp parallel for
and I'm assuming the pragma
for the inner loop is ignored.
To see this, I printed the thread number in each loop.
Then, what I could see is the following, where the thread id in the inner loop is always zero not identical to the thread number corresponding to the caller. Why does this happen?
Calling 0 from 0
Calling 2 from 1
Calling 6 from 4
Calling 8 from 6
Calling 4 from 2
Calling 7 from 5
Calling 5 from 3
Calling 0 from 0 // Expecting 3
Calling 1 from 0
Calling 2 from 0
Calling 3 from 0
Calling 0 from 0
Calling 1 from 0
Calling 2 from 0
Calling 3 from 0
Calling 0 from 0
Calling 0 from 0
Calling 0 from 0
Calling 1 from 0
Calling 2 from 0
Calling 3 from 0
Calling 9 from 7
Calling 1 from 0 // Expecting 7
Calling 2 from 0
Calling 3 from 0
Calling 0 from 0
Calling 3 from 1
Calling 0 from 0 // Expecting 1
Calling 1 from 0
Calling 2 from 0
Calling 1 from 0
Calling 2 from 0
Calling 3 from 0
Calling 3 from 0
Calling 1 from 0
Calling 2 from 0
Calling 3 from 0
Calling 0 from 0
Calling 1 from 0
Calling 2 from 0
Calling 3 from 0
Calling 0 from 0
Calling 1 from 0
Calling 2 from 0
Calling 3 from 0
Calling 1 from 0
Calling 0 from 0
Calling 1 from 0
Calling 2 from 0
Calling 3 from 0
#include <vector>
#include <omp.h>
#include <iostream>
#include <cstdio>
#include <limits>
#include <cstdint>
#include <cinttypes>
using namespace std;
const size_t kM = 4;
struct Mat
{
int elem[kM];
Mat(const Mat& copy)
{
for (size_t i = 0; i<kM; ++i)
this->elem[i] = copy.elem[i];
}
Mat()
{
for (size_t i = 0; i<kM; ++i)
elem[i] = 0;
}
void do_mat(Mat& m)
{
#pragma omp parallel for
for (int i = 0; i<kM; ++i)
{
printf(" \tCalling %d from %d\n", i, omp_get_thread_num());
elem[i] += m.elem[i];
}
}
};
int main ()
{
const int kN = 10;
vector<Mat> matrices(kN);
Mat m;
#pragma omp parallel for
for (int i = 0; i < kN; i++)
{
int tid = omp_get_thread_num();
printf("Calling %d from %d\n", i, tid);
matrices[i].do_mat(m);
}
return 0;
}