I am trying to multiple the values of a matrix.
#include <stdio.h>
#include <omp.h>
#include <time.h>
#include <stdlib.h>
#include <omp.h>
#define N 2048
#define FactorIntToDouble 1.1;
#define THREAD_NUM 4
double firstMatrix [N] [N] = {0.0};
double secondMatrix [N] [N] = {0.0};
double matrixMultiResult [N] [N] = {0.0};
// Sync
void matrixMulti() {
for(int row = 0 ; row < N ; row++){
for(int col = 0; col < N ; col++){
double resultValue = 0;
for(int transNumber = 0 ; transNumber < N ; transNumber++) {
resultValue += firstMatrix [row] [transNumber] * secondMatrix [transNumber] [col] ;
}
matrixMultiResult [row] [col] = resultValue;
}
}
}
void matrixInit() {
for(int row = 0 ; row < N ; row++ ) {
for(int col = 0 ; col < N ;col++){
srand(row+col);
firstMatrix [row] [col] = ( rand() % 10 ) * FactorIntToDouble;
secondMatrix [row] [col] = ( rand() % 10 ) * FactorIntToDouble;
}
}
}
// Parallel
void matrixMulti2(int start, int end) {
printf("Op: %d - %d\n", start, end);
for(int row = start ; row < end ; row++){
for(int col = 0; col < N ; col++){
double resultValue = 0;
for(int transNumber = 0 ; transNumber < N ; transNumber++) {
resultValue += firstMatrix [row] [transNumber] * secondMatrix [transNumber] [col] ;
}
matrixMultiResult [row] [col] = resultValue;
}
}
}
void process1(){
clock_t t1 = clock();
#pragma omp parallel
{
int thread = omp_get_thread_num();
int thread_multi = N / 4;
int start = (thread) * thread_multi;
int end = 0;
if(thread == (THREAD_NUM - 1)){
end = (start + thread_multi);
}else{
end = (start + thread_multi) - 1;
}
matrixMulti2(start, end);
}
clock_t t2 = clock();
printf("time 2: %ld\n", t2-t1);
}
int main(){
matrixInit();
clock_t t1 = clock();
matrixMulti();
clock_t t2 = clock();
printf("time: %ld", t2-t1);
process1();
return 0;
}
I have both a parallel and sync version. But the parallel version is longer than the sync version.
Current the sync takes around 90 seconds and the parallel over 100. Which makes no sense to me.
My logic was to split the matrix into 4 parts from the first 4 statement. Which I believe is logical.
After I finish this part. I would like to figure out how to speed up this process for the parallel even more. Possibly using Strassen's Matrix Multiplication. I just don't know where to start or how to get to this point.
I've already spent around 5 hours trying to figure this out.