I'm trying to do divide and conquer matrix multiplication so i can parallelize it, but I'm getting half random garbage numbers and half 0's in the result, e.g. on a 2x2 matrix "[[15909360,0][15909360,0]]". This is what I have so far based on the algorithm on Wikipedia but I don't really know where to go from here. I'm not using pointers yet for the partitioning or threads. This is homework btw.
void partition(const std::vector<std::vector<IntElement> >& m, std::vector<std::vector<IntElement> >& m11, std::vector<std::vector<IntElement> >& m12,
std::vector<std::vector<IntElement> >& m21, std::vector<std::vector<IntElement> >& m22, int n){
for(int i=0;i<n/2;i++)
for(int j=0;j<n/2;j++){
m11[i][j] = m[i][j]; // top left
m12[i][j] = m[i][j + n / 2]; // top right
m21[i][j] = m[i + n / 2][j]; // bottom left
m22[i][j] = m[i + n / 2][j + n / 2]; // bottom right
}
};
void add(std::vector<std::vector<IntElement> >& C, std::vector<std::vector<IntElement> >& T, int n){
if(n==1){
C[0][0] += C[0][0] + T[0][0];
}
else{
std::vector<std::vector<IntElement> > c11(n/2, std::vector<IntElement>(n/2)), c12(n/2, std::vector<IntElement>(n/2)),
c21(n/2, std::vector<IntElement>(n/2)), c22(n/2, std::vector<IntElement>(n/2));
std::vector<std::vector<IntElement> > t11(n/2, std::vector<IntElement>(n/2)), t12(n/2, std::vector<IntElement>(n/2)),
t21(n/2, std::vector<IntElement>(n/2)), t22(n/2, std::vector<IntElement>(n/2));
partition(C, c11, c12, c21, c22, n);
partition(T, t11, t12, t21, t22, n);
add(c11, t11, n/2);
add(c12, t12, n/2);
add(c21, t21, n/2);
add(c22, t22, n/2);
}
};
void multiply(std::vector<std::vector<IntElement> >& C, const std::vector<std::vector<IntElement> >& A,
const std::vector<std::vector<IntElement> >& B, int n){
if(n==1)
C[0][0] += A[0][0] * B[0][0];
else{
std::vector<std::vector<IntElement> > T(n, std::vector<IntElement>(n));
std::vector<std::vector<IntElement> > a11(n/2, std::vector<IntElement>(n/2)), a12(n/2, std::vector<IntElement>(n/2)),
a21(n/2, std::vector<IntElement>(n/2)), a22(n/2, std::vector<IntElement>(n/2));
std::vector<std::vector<IntElement> > b11(n/2, std::vector<IntElement>(n/2)), b12(n/2, std::vector<IntElement>(n/2)),
b21(n/2, std::vector<IntElement>(n/2)), b22(n/2, std::vector<IntElement>(n/2));
std::vector<std::vector<IntElement> > c11(n/2, std::vector<IntElement>(n/2)), c12(n/2, std::vector<IntElement>(n/2)),
c21(n/2, std::vector<IntElement>(n/2)), c22(n/2, std::vector<IntElement>(n/2));
std::vector<std::vector<IntElement> > t11(n/2, std::vector<IntElement>(n/2)), t12(n/2, std::vector<IntElement>(n/2)),
t21(n/2, std::vector<IntElement>(n/2)), t22(n/2, std::vector<IntElement>(n/2));
partition(A, a11, a12, a21, a22, n);
partition(B, b11, b12, b21, b22, n);
partition(C, c11, c12, c21, c22, n);
partition(T, t11, t12, t21, t22, n);
multiply(c11, a11, b11, n/2);
multiply(c12, a11, b12, n/2);
multiply(c21, a21, b11, n/2);
multiply(c22, a21, b12, n/2);
multiply(t11, a12, b21, n/2);
multiply(t12, a12, b22, n/2);
multiply(t21, a22, b21, n/2);
multiply(t22, a22, b22, n/2);
add(C, T, n);
}
return;
};
SquareMatrix& SquareMatrix::operator*=(const SquareMatrix& m){
std::vector<std::vector<IntElement> > C(n, std::vector<IntElement>(n));
multiply(C, elements, m.elements, n);
elements = C;
return *this;
}
SquareMatrix operator*(const SquareMatrix& a, const SquareMatrix& b){
SquareMatrix c = a;
c *= b;
return c;
}
EDIT: I changed C[0][0] += C[0][0] + T[0][0]; in add() to C[0][0] += T[0][0]; Also I made a unpartition function that basically does the reverse and puts the partitions back into C and T after multiplying and adding:
void unpartition(std::vector<std::vector<IntElement> >& m,std::vector<std::vector<IntElement> >& m11, std::vector<std::vector<IntElement> >& m12,
std::vector<std::vector<IntElement> >& m21, std::vector<std::vector<IntElement> >& m22, int n){
for(int i=0;i<n/2;i++)
for(int j=0;j<n/2;j++){
m[i][j] = m11[i][j]; // top left
m[i][j + n / 2] = m12[i][j]; // top right
m[i + n / 2][j] = m21[i][j]; // bottom left
m[i + n / 2][j + n / 2] = m22[i][j]; // bottom right
}
}
My vectors get initialized correctly after I fixed the default constructor for my IntElement class.