The code is written using C++11. Each Process got tow Matrix Data(Sparse). The test data can be downloaded from enter link description here
Test data contains 2 file : a0 (Sparse Matrix 0) and a1 (Sparse Matrix 1). Each line in file is "i j v", means the sparse matrix Row i, Column j has the value v. i,j,v are all integers.
Use c++11 unordered_map as the sparse matrix's data structure.
unordered_map<int, unordered_map<int, double> > matrix1 ;
matrix1[i][j] = v ; //means at row i column j of matrix1 is value v;
The following code took about 2 minutes. The compile command is g++ -O2 -std=c++11 ./matmult.cpp
.
g++ version is 4.8.1, Opensuse 13.1. My computer's info : Intel(R) Core(TM) i5-4200U CPU @ 1.60GHz, 4G memory.
#include <iostream>
#include <fstream>
#include <unordered_map>
#include <vector>
#include <thread>
using namespace std;
void load(string fn, unordered_map<int,unordered_map<int, double> > &m) {
ifstream input ;
input.open(fn);
int i, j ; double v;
while (input >> i >> j >> v) {
m[i][j] = v;
}
}
unordered_map<int,unordered_map<int, double> > m1;
unordered_map<int,unordered_map<int, double> > m2;
//vector<vector<int> > keys(BLK_SIZE);
int main() {
load("./a0",m1);
load("./a1",m2);
for (auto r1 : m1) {
for (auto r2 : m2) {
double sim = 0.0 ;
for (auto c1 : r1.second) {
auto f = r2.second.find(c1.first);
if (f != r2.second.end()) {
sim += (f->second) * (c1.second) ;
}
}
}
}
return 0;
}
The code above is too slow. How can I make it run faster? I use multithread.
The new code is following, compile command is g++ -O2 -std=c++11 -pthread ./test.cpp
. And it took about 1 minute. I want it to be faster.
How Can I make the task faster? Thank you!
#include <iostream>
#include <fstream>
#include <unordered_map>
#include <vector>
#include <thread>
#define BLK_SIZE 8
using namespace std;
void load(string fn, unordered_map<int,unordered_map<int, double> > &m) {
ifstream input ;
input.open(fn);
int i, j ; double v;
while (input >> i >> j >> v) {
m[i][j] = v;
}
}
unordered_map<int,unordered_map<int, double> > m1;
unordered_map<int,unordered_map<int, double> > m2;
vector<vector<int> > keys(BLK_SIZE);
void thread_sim(int blk_id) {
for (auto row1_id : keys[blk_id]) {
auto r1 = m1[row1_id];
for (auto r2p : m2) {
double sim = 0.0;
for (auto col1 : r1) {
auto f = r2p.second.find(col1.first);
if (f != r2p.second.end()) {
sim += (f->second) * col1.second ;
}
}
}
}
}
int main() {
load("./a0",m1);
load("./a1",m2);
int df = BLK_SIZE - (m1.size() % BLK_SIZE);
int blk_rows = (m1.size() + df) / (BLK_SIZE - 1);
int curr_thread_id = 0;
int index = 0;
for (auto k : m1) {
keys[curr_thread_id].push_back(k.first);
index++;
if (index==blk_rows) {
index = 0;
curr_thread_id++;
}
}
cout << "ok" << endl;
std::thread t[BLK_SIZE];
for (int i = 0 ; i < BLK_SIZE ; ++i){
t[i] = std::thread(thread_sim,i);
}
for (int i = 0; i< BLK_SIZE; ++i)
t[i].join();
return 0 ;
}