I wrote paralell program based on a Strassen multiplication algorithm using pointers. this program return the result of multiplication of two matrices that are the same size. when the size is 256 , program fill about 1 GB of ram, and when it is 512 ram total\y become full and my windows doesn't work then I must restart.
I replace whole pointers with vectors then incredibly Ram usage decreased!.for 1024 size , just 80 MB of ram used.
I know a little about vector that is bound statically at first then if we need more space during runtime its bound dynamically.
Why pointers needed more space than vectors ?
this is my first code :
#include <iostream>
#include<cilk\cilk.h>
#include <cilk/cilk_api.h>
#include<conio.h>
#include<ctime>
#include<string>
#include<random>
#include <Windows.h>
#include <Psapi.h>
#include<vector>
using namespace std;
int ** matrix_1;
int ** matrix_2;
#define number_thread:4;
void show(string name, int n, int **show)
{
cout << " matrix " << name << " :" << endl;
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
cout << show[i][j] << " ";
cout << endl;
}
}
int ** strassen(int n, int **matrix_a, int ** matrix_b)
{
int ** A11;
int ** A12;
int ** A21;
int ** A22;
int ** B11;
int ** B12;
int ** B21;
int ** B22;
int ** result;
int **m1, **m2, **m3, ** m4, ** m5, ** m6, ** m7, ** m8;
A11 = new int*[n / 2];
A12 = new int*[n / 2];
A21 = new int*[n / 2];
A22 = new int*[n / 2];
B11 = new int*[n / 2];
B12 = new int*[n / 2];
B21 = new int*[n / 2];
B22 = new int*[n / 2];
result = new int *[n];
m1 = new int*[n / 2];
m2 = new int*[n / 2];
m3 = new int*[n / 2];
m4 = new int*[n / 2];
m5 = new int*[n / 2];
m6 = new int*[n / 2];
m7 = new int*[n / 2];
m8 = new int*[n / 2];
cilk_for(int i = 0; i < n / 2; i++)
{
//cout << " value i : " << i << endl;
A11[i] = new int[n / 2];
A12[i] = new int[n / 2];
A21[i] = new int[n / 2];
A22[i] = new int[n / 2];
B11[i] = new int[n / 2];
B12[i] = new int[n / 2];
B21[i] = new int[n / 2];
B22[i] = new int[n / 2];
m1[i] = new int[n / 2];
m2[i] = new int[n / 2];
m3[i] = new int[n / 2];
m4[i] = new int[n / 2];
m5[i] = new int[n / 2];
m6[i] = new int[n / 2];
m7[i] = new int[n / 2];
m8[i] = new int[n / 2];
}
cilk_for(int i = 0; i < n; i++) // matrix result
result[i] = new int[n];
if (n == 2)
{
result[0][0] = matrix_a[0][0] * matrix_b[0][0] + matrix_a[0][1] * matrix_b[1][0];
result[0][1] = matrix_a[0][0] * matrix_b[0][1] + matrix_a[0][1] * matrix_b[1][1];
result[1][0] = matrix_a[1][0] * matrix_b[0][0] + matrix_a[1][1] * matrix_b[1][0];
result[1][1] = matrix_a[1][0] * matrix_b[0][1] + matrix_a[1][1] * matrix_b[1][1];
return result;
}
// for (int i = 0; i < n;i++)
cilk_for(int i = 0; i < (n / 2); i++)
{
for (int j = 0; j < (n / 2); j++)
{
A11[i][j] = matrix_a[i][j];
B11[i][j] = matrix_b[i][j];
A12[i][j] = matrix_a[i][j + n / 2];
B12[i][j] = matrix_b[i][j + n / 2];
A21[i][j] = matrix_a[i + n / 2][j];
B21[i][j] = matrix_b[i + n / 2][j];
A22[i][j] = matrix_a[i + n / 2][j + n / 2];
B22[i][j] = matrix_b[i + n / 2][j + n / 2];
}
}
/*
show("A11", n / 2, A11);
show("A12", n / 2, A12);
show("A21", n / 2, A21);
show("A22", n / 2, A22);
show("B11", n / 2, B11);
show("B12", n / 2, B12);
show("B21", n / 2, B21);
show("B22", n / 2, B22);*/
// Run By eight_thread
m1 = cilk_spawn(strassen(n / 2, A11, B11));// A11B11
m2 = cilk_spawn(strassen(n / 2, A12, B21));// A12B21
m3 = cilk_spawn(strassen(n / 2, A11, B12));// A11B12
m4 = cilk_spawn(strassen(n / 2, A12, B22));// A12B22
m5 = cilk_spawn(strassen(n / 2, A21, B11));// A21B11
m6 = cilk_spawn(strassen(n / 2, A22, B21));// A22B21
m7 = cilk_spawn(strassen(n / 2, A21, B12));// A21B12
m8 = cilk_spawn(strassen(n / 2, A22, B22));// A22B22
cilk_sync;
/*
cout << "****************************\n";
cout << "*********** before add :\n";
show("m1", n / 2, m1);
show("m2", n / 2, m2);
show("m3", n / 2, m3);
show("m4", n / 2, m4);
show("m5", n / 2, m5);
show("m6", n / 2, m6);
show("m7", n / 2, m7);
show("m8", n / 2, m8);*/
cilk_for(int i = 0; i < n / 2; i++)
for (int j = 0; j < n / 2; j++)
{
m1[i][j] = m1[i][j] + m2[i][j];
m3[i][j] = m3[i][j] + m4[i][j];
m5[i][j] = m5[i][j] + m6[i][j];
m7[i][j] = m7[i][j] + m8[i][j];
}
/*cout << "after adding hello \n";
show("m1", n / 2, m1);
show("m3", n / 2, m3);
show("m5", n / 2, m5);
show("m7", n / 2, m7);*/
cilk_for(int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
if (i < n / 2 && j < n / 2)
{
result[i][j] = m1[i][j];
}
else if (i < n / 2 && j >= n / 2)
{
result[i][j] = m3[i][j - n / 2];
}
else if (i >= n / 2 && j < n / 2)
{
result[i][j] = m5[i - n / 2][j];
}
else if (i >= n / 2 && j >= n / 2)
{
result[i][j] = m7[i - n / 2][j - n / 2];
}
}
}
/*
cilk_for(int i = 0; i < n / 2; i++)
{
for (int j = 0; j < n / 2; j++)
{
delete A11[i][j];
delete A12[i][j];
delete A21[i][j];
delete A22[i][j];
delete B11[i][j];
delete B12[i][j];
delete B21[i][j];
delete B22[i][j];
delete m1[i][j];
delete m2[i][j];
delete m3[i][j];
delete m4[i][j];
delete m5[i][j];
delete m6[i][j];
delete m7[i][j];
delete m8[i][j];*/
/* }
delete[] A11[i];
delete[] A12[i];
delete[] A21[i];
delete[] A22[i];
delete[] B11[i];
delete[] B12[i];
delete[] B21[i];
delete[] B22[i];
delete[] m1[i];
delete[] m2[i];
delete[] m3[i];
delete[] m4[i];
delete[] m5[i];
delete[] m6[i];
delete[] m7[i];
delete[] m8[i];
}*/
delete[] A11;
delete[] A12;
delete[] A21;
delete[] A22;
delete[] B11;
delete[] B12;
delete[] B21;
delete[] B22;
delete[] m1;
delete[] m2;
delete[] m3;
delete[] m4;
delete[] m5;
delete[] m6;
delete[] m7;
delete[] m8;
return result;
}
int main()
{
int size;
freopen("in.txt", "r", stdin);
freopen("out.txt", "w", stdout);
__cilkrts_set_param("nworkers", "4");
//cout << " please Enter the size OF ur matrix /n";
cin >> size;
matrix_1 = new int*[size];
matrix_2 = new int*[size];
if (size % 2 == 0)
{
//instialize matrix1
//cout << "matrix_1 :" << endl;
for (int i = 0; i < size; i++)
{
matrix_1[i] = new int[size];
for (int j = 0; j < size; j++)
{
matrix_1[i][j] = rand() % 3;
//cin >> matrix_1[i][j];
//cout << matrix_1[i][j] << " ";
}
//cout << endl;
}
//instialize matrix2
//cout << "matrix2_is :\n";
for (int i = 0; i < size; i++)
{
matrix_2[i] = new int[size];
for (int j = 0; j < size; j++)
{
matrix_2[i][j] = rand() % 3;
//cout << matrix_2[i][j]<<" ";
//cin >> matrix_2[i][j];
}
// cout << endl;
}
clock_t begin = clock();
matrix_2 = strassen(size, matrix_1, matrix_2);
clock_t end = clock();
double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
cout << "*******\ntime is : " << elapsed_secs << endl;
//answer:
/* for (int i = 0; i < size; i++)
{
for (int j = 0; j < size; j++)
{
cout<< matrix_2[i][j]<<" ";
}
cout << endl;
}*/
}
else
cout << " we couldnt use strasen ";
cout << "\nTotal Virtual Memory:" << endl;
MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
GlobalMemoryStatusEx(&memInfo);
DWORDLONG totalVirtualMem = memInfo.ullTotalPageFile;
printf("%u", totalVirtualMem);
cout << "\nVirtual Memory currently used:" << endl;
// MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
GlobalMemoryStatusEx(&memInfo);
DWORDLONG virtualMemUsed = memInfo.ullTotalPageFile - memInfo.ullAvailPageFile;
printf("%u", virtualMemUsed);
cout << "\nVirtual Memory currently used by current process:" << endl;
PROCESS_MEMORY_COUNTERS_EX pmc;
GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
SIZE_T virtualMemUsedByMe = pmc.PrivateUsage;
printf("%u", virtualMemUsedByMe);
cout << "\nPhysical Memory currently used: " << endl;
//MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
GlobalMemoryStatusEx(&memInfo);
DWORDLONG physMemUsed = memInfo.ullTotalPhys - memInfo.ullAvailPhys;
printf("%u", physMemUsed);
cout << endl;
cout << "\nPhysical Memory currently used by current process : " << endl;
// PROCESS_MEMORY_COUNTERS_EX pmc;
GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
SIZE_T physMemUsedByMe = pmc.WorkingSetSize;
printf("%u", physMemUsedByMe);
//cout << "memory usage :"<<double(totalVirtualMem) << endl;
//_getch();
return 0;
}
I replace whole pointers array with vectors :
#include <iostream>
#include<cilk\cilk.h>
#include <cilk/cilk_api.h>
#include<conio.h>
#include<ctime>
#include<string>
#include<random>
#include <Windows.h>
#include <Psapi.h>
#include<vector>
using namespace std;
vector<vector<int> > matrix_1, matrix_2;
//int matrix_1;
//int ** matrix_2;
#define number_thread:4;
void show(string name ,int n, int **show)
{
cout << " matrix " << name<<" :" << endl;
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
cout << show[i][j] << " ";
cout << endl;
}
}
vector<vector<int>> strassen(int n, vector<vector<int>> matrix_a, vector<vector<int>> matrix_b)
{
vector<vector<int>> A11;
vector<vector<int>> A12;
vector<vector<int>> A21;
vector<vector<int>> A22;
vector<vector<int>> B11;
vector<vector<int>> B12;
vector<vector<int>> B21;
vector<vector<int>> B22;
vector<vector<int>> result;
vector<int> help;
vector<vector<int>> m1, m2, m3, m4, m5, m6, m7, m8;
help.clear();
for (int j = 0; j < n / 2; j++)
{
help.push_back(2);
}
for(int i = 0; i < n / 2; i++)
{
A11.push_back(help);
A12.push_back(help);
A21.push_back(help);
A22.push_back(help);
B11.push_back(help);
B12.push_back(help);
B21.push_back(help);
B22.push_back(help);
m1.push_back(help);
m2.push_back(help);
m3.push_back(help);
m4.push_back(help);
m5.push_back(help);
m6.push_back(help);
m7.push_back(help);
m8.push_back(help);
}
for (int j = 0; j < n / 2; j++)
help.push_back(2);
for(int i = 0; i < n; i++)
{
result.push_back(help);
}
if (n == 2)
{
result[0][0] = matrix_a[0][0] * matrix_b[0][0] + matrix_a[0][1] * matrix_b[1][0];
result[0][1] = matrix_a[0][0] * matrix_b[0][1] + matrix_a[0][1] * matrix_b[1][1];
result[1][0] = matrix_a[1][0] * matrix_b[0][0] + matrix_a[1][1] * matrix_b[1][0];
result[1][1] = matrix_a[1][0] * matrix_b[0][1] + matrix_a[1][1] * matrix_b[1][1];
return result;
}
// for (int i = 0; i < n;i++)
for(int i = 0; i < (n / 2); i++)
{
for(int j = 0; j <( n / 2); j++)
{
A11[i][j] = matrix_a[i][j];
B11[i][j] = matrix_b[i][j];
A12[i][j] = matrix_a[i][j + n / 2];
B12[i][j] = matrix_b[i][j + n / 2];
A21[i][j] = matrix_a[i + n / 2][j];
B21[i][j] = matrix_b[i + n / 2][j];
A22[i][j] = matrix_a[i + n / 2][j + n / 2];
B22[i][j] = matrix_b[i + n / 2][j + n / 2];
}
}
/*
show("A11", n / 2, A11);
show("A12", n / 2, A12);
show("A21", n / 2, A21);
show("A22", n / 2, A22);
show("B11", n / 2, B11);
show("B12", n / 2, B12);
show("B21", n / 2, B21);
show("B22", n / 2, B22);*/
// Run By eight_thread
m1 = cilk_spawn(strassen(n / 2, A11, B11));// A11B11
m2 = cilk_spawn(strassen(n / 2, A12, B21));// A12B21
m3 = cilk_spawn(strassen(n / 2, A11, B12));// A11B12
m4 = cilk_spawn(strassen(n / 2, A12, B22));// A12B22
m5 = cilk_spawn(strassen(n / 2, A21, B11));// A21B11
m6 = cilk_spawn(strassen(n / 2, A22, B21));// A22B21
m7 = cilk_spawn(strassen(n / 2, A21, B12));// A21B12
m8 = cilk_spawn(strassen(n / 2, A22, B22));// A22B22
cilk_sync;
/*
cout << "****************************\n";
cout << "*********** before add :\n";
show("m1", n / 2, m1);
show("m2", n / 2, m2);
show
("m3", n / 2, m3);
show("m4", n / 2, m4);
show("m5", n / 2, m5);
show("m6", n / 2, m6);
show("m7", n / 2, m7);
show("m8", n / 2, m8);*/
for(int i = 0; i < n / 2; i++)
for (int j = 0; j < n / 2; j++)
{
m1[i][j] = m1[i][j] + m2[i][j];
m3[i][j] = m3[i][j] + m4[i][j];
m5[i][j] = m5[i][j] + m6[i][j];
m7[i][j] = m7[i][j] + m8[i][j];
}
/*cout << "after adding hello \n";
show("m1", n / 2, m1);
show("m3", n / 2, m3);
show("m5", n / 2, m5);
show("m7", n / 2, m7);*/
for(int i = 0; i < n ; i++)
{
for(int j = 0; j < n ; j++)
{
if (i < n / 2 && j < n / 2)
{
result[i][j] = m1[i][j];
}
else if (i < n / 2 && j >= n / 2)
{
result[i][j] = m3[i][j - n / 2];
}
else if (i >= n / 2 && j < n / 2)
{
result[i][j] = m5[i - n / 2][j];
}
else if (i >= n / 2 && j >= n / 2)
{
result[i][j] = m7[i - n / 2][j - n / 2];
}
}
}
/*
cilk_for(int i = 0; i < n / 2; i++)
{
for (int j = 0; j < n / 2; j++)
{
delete A11[i][j];
delete A12[i][j];
delete A21[i][j];
delete A22[i][j];
delete B11[i][j];
delete B12[i][j];
delete B21[i][j];
delete B22[i][j];
delete m1[i][j];
delete m2[i][j];
delete m3[i][j];
delete m4[i][j];
delete m5[i][j];
delete m6[i][j];
delete m7[i][j];
delete m8[i][j];*/
/* }
delete[] A11[i];
delete[] A12[i];
delete[] A21[i];
delete[] A22[i];
delete[] B11[i];
delete[] B12[i];
delete[] B21[i];
delete[] B22[i];
delete[] m1[i];
delete[] m2[i];
delete[] m3[i];
delete[] m4[i];
delete[] m5[i];
delete[] m6[i];
delete[] m7[i];
delete[] m8[i];
}*/
/* for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
cout << result[i][j] << " ";
}
cout << endl;
}*/
return result;
}
int main()
{
int size;
freopen("in.txt","r",stdin);
freopen("out.txt", "w", stdout);
__cilkrts_set_param("nworkers", "1");
//cout << " please Enter the size OF ur matrix /n";
cin >> size;
vector<int> inner;
if (size % 2 == 0)
{
//instialize matrix1
cout << "matrix_1 :" << endl;
for (int i = 0; i < size; i++)
{
inner.clear();
for (int j = 0; j < size; j++)
{
inner.push_back(rand()%3);
//cin >> matrix_1[i][j];
cout << inner[j]<<" ";
}
cout << endl;
matrix_1.push_back(inner);
}
//instialize matrix2
cout << "matrix2_is :\n";
inner.clear();
for (int i = 0; i < size; i++)
{
inner.clear();
//matrix_2[i] = new int[size];
for (int j = 0; j < size; j++)
{
inner.push_back(rand()%3);
cout << inner[j]<<" ";
//cin >> matrix_2[i][j];
}
cout << endl;
matrix_2.push_back(inner);
}
clock_t begin = clock();
matrix_2 = strassen(size, matrix_1, matrix_2);
clock_t end = clock();
double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
cout << "*******\ntime is : " << elapsed_secs << endl;
//answer:
cout << "answerrr :" << endl;
for (int i = 0; i < size; i++)
{
for (int j = 0; j < size; j++)
{
cout<< matrix_2[i][j]<<" ";
}
cout << endl;
}
}
else
cout << " we couldnt use strasen ";
cout << "\nTotal Virtual Memory:" << endl;
MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
GlobalMemoryStatusEx(&memInfo);
DWORDLONG totalVirtualMem = memInfo.ullTotalPageFile;
printf("%u", totalVirtualMem);
cout << "\nVirtual Memory currently used:" << endl;
// MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
GlobalMemoryStatusEx(&memInfo);
DWORDLONG virtualMemUsed = memInfo.ullTotalPageFile - memInfo.ullAvailPageFile;
printf("%u", virtualMemUsed);
cout << "\nVirtual Memory currently used by current process:" << endl;
PROCESS_MEMORY_COUNTERS_EX pmc;
GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
SIZE_T virtualMemUsedByMe = pmc.PrivateUsage;
printf("%u", virtualMemUsedByMe);
cout << "\nPhysical Memory currently used: " << endl;
//MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
GlobalMemoryStatusEx(&memInfo);
DWORDLONG physMemUsed = memInfo.ullTotalPhys - memInfo.ullAvailPhys;
printf("%u", physMemUsed);
cout << endl;
cout << "\nPhysical Memory currently used by current process : " << endl;
// PROCESS_MEMORY_COUNTERS_EX pmc;
GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
SIZE_T physMemUsedByMe = pmc.WorkingSetSize;
printf("%u", physMemUsedByMe);
//cout << "memory usage :"<<double(totalVirtualMem) << endl;
//_getch();
return 0;
}