I have a function within DLL that uses OpenMP to parallelize few loops. The functions have been tested/exported by calling them from an app built in C++ and everything works nicely. I then export the functions to another platform ( MetaTrader Terminal 4 which uses MQL4
programming language) where the code also works but substantially slower (please see the piece of code below where OpenMP is being used). So my best guess is that parallelization is not working while calling from the platform. I am using Visual Studio 2015 for my project though.
double dtime;
dtime = omp_get_wtime();
ofstream fopen("C:\\output.txt", 'a');
ofstream fout("C:\\output.txt", 'a');
dtime = omp_get_wtime();
#pragma omp parallel for num_threads(num)
for (int p = 1; p <= r1; p++) {
int k = omp_get_thread_num();
int i = I1[p], j = i + l;
double alpha = 0, beta = 0, gamma = 0;
double zeta, t, c, s;
for (int k = 0; k < N; k++) {
alpha = alpha + (U_t[i][k] * U_t[i][k]);
beta = beta + (U_t[j][k] * U_t[j][k]);
gamma = gamma + (U_t[i][k] * U_t[j][k]);
}
C[k] = max(C[k], abs(gamma) / sqrt(alpha*beta));
//converge = max(converge, abs(gamma)/sqrt(alpha*beta)); //compute convergence
//basicaly is the angle
//between column i and j
zeta = (beta - alpha) / (2.0 * gamma);
t = sgn(zeta) / (abs(zeta) + sqrt(1.0 + (zeta*zeta))); //compute tan of angle
c = 1.0 / (sqrt(1.0 + (t*t))); //extract cos
s = c*t; //extrac sin
for (int k = 0; k<N; k++) {
t = U_t[i][k];
U_t[i][k] = c*t - s*U_t[j][k];
if (!(U_t[i][k] < 0 || U_t[i][k] > 0)) {
U_t[i][k] = 0;
}
U_t[j][k] = s*t + c*U_t[j][k];
if (!(U_t[j][k] < 0 || U_t[j][k] > 0)) {
U_t[j][k] = 0;
}
t = V_t[i][k];
V_t[i][k] = c*t - s*V_t[j][k];
if (!(V_t[i][k] < 0 || V_t[i][k] > 0)) {
V_t[i][k] = 0;
}
V_t[j][k] = s*t + c*V_t[j][k];
if (!(V_t[j][k] < 0 || V_t[j][k] > 0)) {
V_t[j][k] = 0;
}
}
}
fout << endl;
#pragma omp parallel for num_threads(num)
for (int p = 1; p <= r2; p++) {
int k = omp_get_thread_num();
int i = I2[p], j = i + l;
double alpha = 0, beta = 0, gamma = 0;
double zeta, t, c, s;
for (int k = 0; k < N; k++) {
alpha = alpha + (U_t[i][k] * U_t[i][k]);
beta = beta + (U_t[j][k] * U_t[j][k]);
gamma = gamma + (U_t[i][k] * U_t[j][k]);
}
C[k] = max(C[k], abs(gamma) / sqrt(alpha*beta));
//converge = max(converge, abs(gamma)/sqrt(alpha*beta)); //compute convergence
//basicaly is the angle
//between column i and j
zeta = (beta - alpha) / (2.0 * gamma);
t = sgn(zeta) / (abs(zeta) + sqrt(1.0 + (zeta*zeta))); //compute tan of angle
c = 1.0 / (sqrt(1.0 + (t*t))); //extract cos
s = c*t; //extrac sin
for (int k = 0; k<N; k++) {
t = U_t[i][k];
U_t[i][k] = c*t - s*U_t[j][k];
if (!(U_t[i][k] < 0 || U_t[i][k] > 0)) {
U_t[i][k] = 0;
}
U_t[j][k] = s*t + c*U_t[j][k];
if (!(U_t[j][k] < 0 || U_t[j][k] > 0)) {
U_t[j][k] = 0;
}
t = V_t[i][k];
V_t[i][k] = c*t - s*V_t[j][k];
if (!(V_t[i][k] < 0 || V_t[i][k] > 0)) {
V_t[i][k] = 0;
}
V_t[j][k] = s*t + c*V_t[j][k];
if (!(V_t[j][k] < 0 || V_t[j][k] > 0)) {
V_t[j][k] = 0;
}
}
}
fout << endl;
for (int k = 0; k < num; k++)
converge = max(converge, C[k]);
fout << endl;
if (l == M) {
fout << converge << '\t';
fout << endl;
dtime = omp_get_wtime() - dtime;
fout << "\n" << "dtime: " << dtime << " ";
dtime = omp_get_wtime();
fout << endl;
}
fout << endl;
fout.close();
These are outputs for convergence and time taken:
C++ app:
0.999996 dtime: 7.91817e-05
0.954982 dtime: 8.01593e-05
0.964351 dtime: 0.000116817
0.934475 dtime: 7.86929e-05
0.781737 dtime: 7.77154e-05
0.812496 dtime: 7.96705e-05
0.500925 dtime: 7.77154e-05
0.174739 dtime: 7.77154e-05
0.0407444 dtime: 7.86929e-05
0.0137945 dtime: 8.01593e-05
0.0039458 dtime: 0.000136857
0.000550945 dtime: 7.86929e-05
0.000149865 dtime: 7.96705e-05
3.76775e-05 dtime: 7.96705e-05
6.86001e-06 dtime: 8.0648e-05
2.04005e-06 dtime: 7.82042e-05
5.6817e-07 dtime: 8.84685e-05
2.70614e-07 dtime: 7.96705e-05
5.78656e-08 dtime: 7.86929e-05
1.90527e-08 dtime: 8.01593e-05
1.00316e-09 dtime: 7.96705e-05
From the platform:
0.999997 dtime: 0.222026
0.917038 dtime: 0.219041
0.982879 dtime: 0.215614
0.723091 dtime: 0.219034
0.295653 dtime: 0.215915
0.097825 dtime: 0.21803
0.0350881 dtime: 0.21804
0.00654856 dtime: 0.219009
0.00188476 dtime: 0.217366
0.000435981 dtime: 0.223172
9.50818e-05 dtime: 0.21804
2.27348e-05 dtime: 0.260625
1.39124e-05 dtime: 0.219027
1.72161e-06 dtime: 0.218035
3.18178e-07 dtime: 0.218927
1.77708e-07 dtime: 0.218026
3.81575e-08 dtime: 0.204294
9.53867e-09 dtime: 0.221036
MQL4
function declarations:
#property copyright "Adrijus"
#property version "1.00"
#property strict
#import "LMBRDLL.dll"
double getWeights( double &data[],
int &topology[],
int topSize,
double &TV[],
double validationDifference,
int vSize,
int timeSteps,
int nVabs,
double &weights[]
);
double testWeights( double &weights[],
double ¤tData[],
int &topology[],
int topSize,
int timeSteps,
int nVabs
);
#import
#include <stdlib.mqh>
MQL4
function calls:
getWeights( data,
topology,
topSize,
TV,
validationDifference,
vSize,
timeSteps,
nVabs,
weights
);
double output = testWeights( weights, // DLL function returns predicted percentage change
currentData,
topology,
topSize,
timeSteps,
nVabs
);
Definitions in the DLL
:
#include "stdafx.h"
double *getWeights( double const *idata,
int const *aTopology,
int topSize,
double const *aTV,
double validationDifference,
int vSize,
int tSteps,
int nVabs,
double *T
) {
vector<unsigned> topology(topSize);
for (i = 0; i < topSize; i++) {
topology[i] = aTopology[i];
}
vector<double> TV(2);
TV[0] = aTV[0];
TV[1] = aTV[1];
Matrix inputVals = buildInputs(idata, vSize, tSteps, nVabs);
inputVals = sortInputs(inputVals, TV);
sortTargets(TV);
Matrix targetVals = getTargets();
Net myNet(topology, inputVals);
double currentValidationError = 1000000000777;
double previousValidationError = 1000000000000000;
double difference = 1;
while (currentValidationError < previousValidationError) {
difference = previousValidationError - currentValidationError;
if (difference < validationDifference)
break;
FeedForward(&topology, &myNet);
Backpropagation(&topology, &myNet, &targetVals);
BuildJacobian(&topology, &myNet);
LevenberMarquardtBeyesianRegularization( &topology,
&myNet,
&targetVals
);
previousValidationError = currentValidationError;
currentValidationError = Validation( &myNet.allLMweights,
topology
);
//UpdateSynapses(topology, myNet);
}
vector<double> rowWeights = buildRWeights(myNet.allSynapses);
for (i = 0; i < rowWeights.size(); i++) {
T[i] = rowWeights[i];
}
return T;
}
double testWeights( double const *rowWeights,
double const *testData,
int const *aTopology,
int topSize,
int timeSteps,
int nVariables
) {
vector<unsigned> topology(topSize);
for (i = 0; i < topSize; i++) {
topology[i] = aTopology[i];
}
vector<Matrix> testWeights = buildWeightMatrices( rowWeights,
topology
);
vector<double> input = buildTestInputs( testData,
timeSteps,
nVariables
);
double output = getOutput(testWeights, input);
return output;
}
LMBRDLL.def
for exporting to MetaTrader Terminal platform:
LIBRARY "LMBRDLL"
EXPORTS
getWeights
testWeights
HeaderDLL.h
for exporting to C++ app:
#include "stdafx.h"
using namespace std;
__declspec(dllexport) double *getWeights( double const *idata,
int const *aTopology,
int topSize,
double const *aTV,
double validationDifference,
int vSize,
int tSteps,
int nVabs,
double *T
);
__declspec(dllexport) double testWeights( double const *rowWeights,
double const *testData,
int const *aTopology,
int topSize,
int timeSteps,
int nVariables
);