I'm currently writing a program in C using MPI to perform matrix multiplication in parallel. I'm very new to C and MPI, so it's a pretty rough code. I can't seem to get my code to work, so could someone help me read through it and help me understand what I need to do to fix it?
Here's the code:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <mpi.h>
// code adapted from source codes from
// http://www.programiz.com/c-programming/c-multi-dimensional-arrays
// http://www.cs.hofstra.edu/~cscccl/csc145/imul.c
// GENERAL VARIABLES
int **A, **B, **AB;
int i,j,k;
int rows_A, cols_A, rows_B, cols_B;
int dimensions[3];
// MATRIX MULTIPLICATION
void matrixMult(int start, int interval){
for (i = start; i < start+interval; ++i){
for (j = 0; j < cols_B; ++j){
for (k = 0; k < cols_A; ++k)
AB[i][j] += (A[i][k] * B[k][j]);}}}
int main(int argc, char *argv[]){
// MPI VARIABLES, INITIALIZE MPI
int rank, size, interval, remainder;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (rank == 0){
// READ AND WRITE MATRICES ------------------------------------
FILE *matrix1, *matrix2;
matrix1 = fopen("matrix1", "r");
fscanf(matrix1, "%d", &rows_A);
fscanf(matrix1, "%d", &cols_A);
matrix2 = fopen("matrix2", "r");
fscanf(matrix2, "%d", &rows_B);
fscanf(matrix2, "%d", &cols_B);
int dimensions[3] = {rows_A, cols_A, cols_B};
/*printf("\n\nRows A = %d",rows_A);
printf("\nCols A = %d",cols_A);
printf("\n\nRows B = %d",rows_B);
printf("\nCols B = %d",cols_B);*/
// Allocate memory for matrices
int **A = malloc(rows_A * sizeof(int*));
// The cast to size_t prevents integer overflow with big matrices
A[0] = malloc((size_t)rows_A * (size_t)cols_A * sizeof(int));
for(i = 1; i < rows_A; i++)
A[i] = A[0] + i*cols_A;
int **B = malloc(rows_B * sizeof(int*));
// The cast to size_t prevents integer overflow with big matrices
B[0] = malloc((size_t)rows_B * (size_t)cols_B * sizeof(int));
for(i = 1; i < rows_A; i++)
B[i] = B[0] + i*cols_B;
int **AB = malloc(rows_A * sizeof(int*));
// The cast to size_t prevents integer overflow with big matrices
AB[0] = malloc((size_t)rows_A * (size_t)cols_B * sizeof(int));
for(i = 1; i < rows_A; i++)
AB[i] = AB[0] + i*cols_B;
/*int **A = (int **)malloc(rows_A * sizeof(int*));
for(i = 0; i < rows_A; i++)
A[i] = (int *)malloc(cols_A * sizeof(int));
int **B = (int **)malloc(rows_B * sizeof(int*));
for(i = 0; i < rows_B; i++)
B[i] = (int *)malloc(cols_B * sizeof(int));
int **AB = (int **)malloc(rows_A * sizeof(int*));
for(i = 0; i < rows_B; i++)
AB[i] = (int *)malloc(cols_B * sizeof(int));*/
// Write matrices
while(!feof(matrix1)){
for(i=0;i<rows_A;i++){
for(j=0;j<cols_A;j++)
fscanf(matrix1,"%d",&A[i][j]);}}
while(!feof(matrix2)){
for(i=0;i<rows_B;i++){
for(j=0;j<cols_B;j++)
fscanf(matrix2,"%d",&B[i][j]);}}
/*
// Print Matrices
printf("\n\n");
//print matrix 1
printf("Matrix A:\n");
for(i=0;i<rows_A;i++){
for(j=0;j<cols_A;j++)
printf("%d\t",A[i][j]);
printf("\n");}
printf("\n");
//print matrix 2
printf("Matrix B:\n");
for(i=0;i<rows_B;i++){
for(j=0;j<cols_B;j++)
printf("%d\t",B[i][j]);
printf("\n");} */
// ------------------------------------------------------------------
// MULTIPLICATION (Parallelize here)
printf("begin rank 0\n");
interval = rows_A / size; // work per processor
remainder = rows_A % size;
// SEND B BROADCAST to all
MPI_Bcast(B, rows_B * cols_B, MPI_INT, 0, MPI_COMM_WORLD);
printf("1\n");
// SEND A, ROWS, COLS, interval to each rank
for(i=1;i<size;i++)
MPI_Send(dimensions,3,MPI_INT,i,123,MPI_COMM_WORLD);
printf("2\n");
for(i=1;i<size;i++)
MPI_Send(A[i*interval],interval*rows_A,MPI_INT,i,123,MPI_COMM_WORLD);
printf("3\n");
// ROOT MM
matrixMult(0, interval);
printf("3.5\n");
matrixMult(size * interval, remainder);
printf("4\n");
// receive AB from workers, add to current AB
for(i=1;i<size;i++)
MPI_Recv(AB[i*interval],interval*rows_A,MPI_INT,i,123,MPI_COMM_WORLD, MPI_STATUS_IGNORE);
printf("5\n");
// PRINT MATRIX PRODUCT
printf("\nSum Of Matrix:\n");
for(i = 0; i < rows_A; ++i){
for(j = 0; j < cols_B; ++j){
printf("%d\t",AB[i][j]);
if(j == cols_B - 1)/* To display matrix sum in order. */
printf("\n");}}
// CLOSE FILES
fclose(matrix1);
fclose(matrix2);
}
else{ // WORKER NODES
printf("bring workers\n");
// RECEIVE B BROADCAST
MPI_Bcast(B, rows_B * cols_B, MPI_INT, 0, MPI_COMM_WORLD);
printf("a\n");
// RECEIVE A, INTERVAL
MPI_Recv(dimensions,3,MPI_INT,0,123, MPI_COMM_WORLD,MPI_STATUS_IGNORE);
printf("b\n");
rows_A = dimensions[0];
cols_A = dimensions[1];
cols_B = dimensions[2];
printf("c\n");
MPI_Recv(A[rank*interval],interval*rows_A,MPI_INT,0,123, MPI_COMM_WORLD,MPI_STATUS_IGNORE);
printf("d\n");
// WORKER MM
matrixMult(rank*interval, interval);
printf("e\n");
// send AB to root
MPI_Send(AB[rank*interval],interval*rows_A,MPI_INT,0,123,MPI_COMM_WORLD);
printf("f\n");
}
// FINALIZE MPI
MPI_Finalize(); /* EXIT MPI */
}
I stuck in some prints to try to understand where my code was failing and it looks like it gets to the actual matrix multiplication part in the workers and in the rank 0 root. Does that mean it's a problem with my receive? The input is a 2x3 matrix of 1 2 3 4 5 6 and a 3x2 of 7 8 9 10 11 12 Here's what the output looks like:
hjiang1@cook:~/cs287/PMatrixMultiply$ make
mpicc parallelMatrixMult.c -std=c99 -lm -o parallelMatrix.out
hjiang1@cook:~/cs287/PMatrixMultiply$ mpirun --hostfile QuaCS parallelMatrix.out
No protocol specified
No protocol specified
bring workers
a
bring workers
a
bring workers
a
begin rank 0
1
2
b
c
b
c
b
c
3
d
e
d
3.5
[cook:06730] *** Process received signal ***
[cook:06730] Signal: Segmentation fault (11)
[cook:06730] Signal code: Address not mapped (1)
[cook:06730] Failing at address: 0xffffffffbbc4d600
[cook:06728] *** Process received signal ***
[cook:06728] Signal: Segmentation fault (11)
[cook:06728] Signal code: Address not mapped (1)
[cook:06728] Failing at address: 0x5d99f200
[cook:06727] *** Process received signal ***
[cook:06730] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0xfcb0)[0x7fdaa80eccb0]
[cook:06730] [ 1] [cook:06728] [ 0] /lib/x86_64-linux-gnu/libc.so.6(+0x147b55)[0x7fdaa7e65b55]
[cook:06730] [ 2] /usr/local/lib/openmpi/mca_btl_vader.so(+0x23f9)[0x7fda9e70f3f9]
[cook:06730] [ 3] /usr/local/lib/openmpi/mca_pml_ob1.so(mca_pml_ob1_send_request_start_rndv+0x1d3)[0x7fda9e0df393]
[cook:06730] [ 4] /usr/local/lib/openmpi/mca_pml_ob1.so(mca_pml_ob1_send+0x754)[0x7fda9e0d5404]
[cook:06730] [ 5] /lib/x86_64-linux-gnu/libpthread.so.0(+0xfcb0)[0x7f910bef2cb0]
[cook:06728] [ 1] parallelMatrix.out[0x400bad]
[cook:06728] [ 2] parallelMatrix.out[0x401448]
[cook:06728] [ 3] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xed)[0x7f910bb4576d]
[cook:06728] [ 4] parallelMatrix.out[0x400a79]
[cook:06728] *** End of error message ***
/usr/local/lib/libmpi.so.1(PMPI_Send+0xf2)[0x7fdaa8368332]
[cook:06730] [ 6] parallelMatrix.out[0x401492]
[cook:06730] [ 7] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xed)[0x7fdaa7d3f76d]
[cook:06730] [ 8] parallelMatrix.out[0x400a79]
[cook:06730] *** End of error message ***
[cook:06727] Signal: Segmentation fault (11)
[cook:06727] Signal code: Address not mapped (1)
[cook:06727] Failing at address: (nil)
[cook:06727] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0xfcb0)[0x7f73e0d09cb0]
[cook:06727] [ 1] parallelMatrix.out[0x400bad]
[cook:06727] [ 2] [cook:6729] *** An error occurred in MPI_Recv
[cook:6729] *** reported by process [1864040449,2]
[cook:6729] *** on communicator MPI_COMM_WORLD
[cook:6729] *** MPI_ERR_COUNT: invalid count argument
[cook:6729] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort,
[cook:6729] *** and potentially your MPI job)
If anyone can help that'd be greatly appreciated. Again, I'm new to C and MPI so bear with me on how terrible my code is.