I am studying about memory in C++. But there is one thing that makes me doubtful.
I am trying 2 different methods for array sum. One is that I access only 1 index at a time and increment i
by 1. Another is that I access 5 indices of array at a time and increment i
by 5. With 40 million elements, at first, I thought the one with loop unrolling might work better by reducing execution time. But both of the results are the same. I would like to know why?
#include <bits/stdc++.h>
using namespace std;
using namespace chrono;
void printVector(vector<int>& vect);
int main(int argc, char const *argv[])
{
int n = 40000000;
vector<int> a(n);
vector<int> b(n);
vector<int> c(n);
srand((unsigned) time(0));
for (int i = 0; i < n; ++i)
{
a[i] = ((rand() % 100) + 1);
b[i] = ((rand() % 100) + 1);
c[i] = ((rand() % 100) + 1);
}
// printVector(a);
// printVector(b);
// printVector(c);
auto start = steady_clock::now();
vector<int> vect1(n);
for (int i = 0; i < n; i++) {
vect1[i] = a[i] + b[i] + c[i];
}
// printVector(vect1);
auto end = steady_clock::now();
cout << duration_cast<milliseconds>(end - start).count() << " milliseconds" << endl;
start = steady_clock::now();
vector<int> vect2(n);
for (int i = 0; i < n; i+=10) {
vect2[i] = a[i] + b[i] + c[i];
vect2[i+1] = a[i+1] + b[i+1] + c[i+1];
vect2[i+2] = a[i+2] + b[i+2] + c[i+2];
vect2[i+3] = a[i+3] + b[i+3] + c[i+3];
vect2[i+4] = a[i+4] + b[i+4] + c[i+4];
vect2[i+5] = a[i+5] + b[i+5] + c[i+5];
vect2[i+6] = a[i+6] + b[i+6] + c[i+6];
vect2[i+7] = a[i+7] + b[i+7] + c[i+7];
vect2[i+8] = a[i+8] + b[i+8] + c[i+8];
vect2[i+9] = a[i+9] + b[i+9] + c[i+9];
}
// printVector(vect2);
end = steady_clock::now();
cout << duration_cast<milliseconds>(end - start).count() << " milliseconds" << endl;
return 0;
}
void printVector(vector<int>& vect) {
cout << "Vector elements: " << endl;
for (int i = 0; i < vect.size(); i++) {
cout << vect[i] << " ";
}
cout << endl;
}