I tried to call an OpenBlas function from an OpenMP thread while the Blas parallelisation is set to a value unequal to one. I am using OpenBlas 0.3.9, after downloading the source I untared it and called
make USE_OPENMP=1
make PREFIX=/someFolder/ install
However I always get the following error message from my executeable
OpenBLAS Warning : Detect OpenMP Loop and this application may hang. Please rebuild the library with USE_OPENMP=1 option.
Does anyone know, why this is the case and how I can change it? Here is a minimal example of my code:
#include <complex>
#include <vector>
#include <random>
#include <iostream>
#include <algorithm>
#include <omp.h>
#include <cblas.h>
#include <lapacke.h>
int main(int, char**) {
int const blas_threads = 2,
omp_threads = 2,
matrix_size = 100;
openblas_set_num_threads(blas_threads);
omp_set_max_active_levels(2);
double alpha = 1.,
beta = 0.;
std::vector<std::vector<double>> as(omp_threads,
std::vector<double>(matrix_size*matrix_size));
std::vector<std::vector<double>> bs(omp_threads,
std::vector<double>(matrix_size*matrix_size));
std::vector<std::vector<double>> cs(omp_threads,
std::vector<double>(matrix_size*matrix_size));
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<double> dis;
for(int i = 0; i < omp_threads; ++i) {
std::generate(as[i].begin(),
as[i].end(),
[&dis,&gen]() { return dis(gen); });
std::generate(bs[i].begin(),
bs[i].end(),
[&dis,&gen]() { return dis(gen); });
}
// for(int i = 0; i < matrix_size*matrix_size; ++i) {
// std::cout << as[0][i] << " " << bs[0][i] << std::endl;
// }
#pragma omp parallel for num_threads(omp_threads), schedule(static, 1)
for(int i = 0; i < omp_threads; ++i) {
cblas_dgemm(CblasColMajor,
CblasNoTrans,
CblasNoTrans,
matrix_size,
matrix_size,
matrix_size,
alpha,
as[i].data(),
matrix_size,
bs[i].data(),
matrix_size,
beta,
cs[i].data(),
matrix_size);
}
// for(int i = 0; i < matrix_size*matrix_size; ++i) {
// std::cout << cs[0][i] << std::endl;
// }
return 0;
}