The problem is that the speed is very slow, terrible slow, even under a small n, for example: when n=1024, there must be something wrong, anyone?
I didn't create new matrix C whenever function call, I add the new result to the previous result which is stored in original matrix C when base case occurs.
int **matA,**matB,**matC;
void matmul_div_rec(int Arow,int Acol,int Brow,int Bcol,int n) {
if(n==1)
{
matC[Arow][Bcol]+=matA[Arow][Acol]*matB[Brow][Bcol];
}
else
{
matmul_div_rec(Arow+0,Acol+0,Brow+0,Bcol+0,n/2);
matmul_div_rec(Arow+0,Acol+n/2,Brow+n/2,Bcol+0,n/2);
matmul_div_rec(Arow+0,Acol+0,Brow+0,Bcol+n/2,n/2);
matmul_div_rec(Arow+0,Acol+n/2,Brow+n/2,Bcol+n/2,n/2);
matmul_div_rec(Arow+n/2,Acol+0,Brow+0,Bcol+0,n/2);
matmul_div_rec(Arow+n/2,Acol+n/2,Brow+n/2,Bcol+0,n/2);
matmul_div_rec(Arow+n/2,Acol+0,Brow+0,Bcol+n/2,n/2);
matmul_div_rec(Arow+n/2,Acol+n/2,Brow+n/2,Bcol+n/2,n/2);
}
return; }
int main()
{
matmul_div_rec(0,0,0,0,n); //n must be the power of 2
}