Matrix multiplication using serial and parallel algorithm with c++ and MPI

Question

the program supposed to calculate matrices multiplication using two methods and compare their value , speedup and efficiency. Both take two same matrices and return the resulting matrix product The program have two methods to calculate the matrix product:

these is the problems with the code I tried to debug where the problem is the program crashes in the multiplyParallel method when I tried to gather values of sendbuf from each process and store them in the recvbuf by using MPI_Allgather.

// Function to multiply matrices using parallel algorithm
int** multiplyParallel(int** matrixA, int** matrixB, int rowsA, int colsA, int colsB, int rank, int size) {
    int** result = new int*[rowsA];
    for (int i = 0; i < rowsA; i++) {
        result[i] = new int[colsB];
        for (int j = 0; j < colsB; j++) {
            result[i][j] = 0;
        }
    }

    int* sendbuf = new int[colsA];
    int* recvbuf = new int[colsA];
    MPI_Status status;

    for (int k = 0; k < colsA; k++) {
        for (int i = 0; i < rowsA; i++) {
            sendbuf[i] = matrixA[i][k];
        }

        MPI_Allgather(sendbuf, rowsA, MPI_INT, recvbuf, rowsA, MPI_INT, MPI_COMM_WORLD);

        for (int j = rank; j < colsB; j += size) {
            for (int i = 0; i < rowsA; i++) {
                result[i][j] = 0;
                for (int k = 0; k < colsA; k++) {
                    result[i][j] += recvbuf[k] * matrixB[k][j];
                }
            }
        }
    }

    delete[] sendbuf;
    delete[] recvbuf;

    return result;
}

And here is the whole code could you please help me whats wrong with the code.

#include <iostream>
#include <cstdlib>
#include <ctime>
#include <mpi.h>

using namespace std;

// Function to generate random matrices
void generateRandomMatrix(int** matrix, int rows, int columns) {
    srand(time(NULL));
    for (int i = 0; i < rows; i++) {
        for (int j = 0; j < columns; j++) {
            matrix[i][j] = rand() % 10;
        }
    }
}

// Function to print a matrix
void printMatrix(int** matrix, int rows, int columns) {
    for (int i = 0; i < rows; i++) {
        for (int j = 0; j < columns; j++) {
            cout << matrix[i][j] << " ";
        }
        cout << endl;
    }
}

// Function to multiply matrices using serial algorithm
int** multiplySerial(int** matrixA, int** matrixB, int rowsA, int colsA, int colsB) {
    int** result = new int*[rowsA];
    for (int i = 0; i < rowsA; i++) {
        result[i] = new int[colsB];
        for (int j = 0; j < colsB; j++) {
            result[i][j] = 0;
            for (int k = 0; k < colsA; k++) {
                result[i][j] += matrixA[i][k] * matrixB[k][j];
            }
        }
    }
    return result;
}

// Function to multiply matrices using parallel algorithm
int** multiplyParallel(int** matrixA, int** matrixB, int rowsA, int colsA, int colsB, int rank, int size) {
    int** result = new int*[rowsA];
    for (int i = 0; i < rowsA; i++) {
        result[i] = new int[colsB];
        for (int j = 0; j < colsB; j++) {
            result[i][j] = 0;
        }
    }

    int* sendbuf = new int[colsA];
    int* recvbuf = new int[colsA];
    MPI_Status status;

    for (int k = 0; k < colsA; k++) {
        for (int i = 0; i < rowsA; i++) {
            sendbuf[i] = matrixA[i][k];
        }

        MPI_Allgather(sendbuf, rowsA, MPI_INT, recvbuf, rowsA, MPI_INT, MPI_COMM_WORLD);

        for (int j = rank; j < colsB; j += size) {
            for (int i = 0; i < rowsA; i++) {
                result[i][j] = 0;
                for (int k = 0; k < colsA; k++) {
                    result[i][j] += recvbuf[k] * matrixB[k][j];
                }
            }
        }
    }

    delete[] sendbuf;
    delete[] recvbuf;

    return result;
}

// Function to deallocate memory for a matrix
void deallocateMatrix(int** matrix, int rows) {
    for (int i = 0; i < rows; i++) {
        delete[] matrix[i];
    }
    delete[] matrix;
}

int main(int argc, char* argv[]) {
    int rank, size;
    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    int rowsA = 10; // Number of rows in matrix A
    int colsA = 10; // Number of columns in matrix A
    int colsB = 10; // Number of columns in matrix B

    // Generate random matrices A and B
    int** matrixA = nullptr;
    int** matrixB = nullptr;

    if (rank == 0) {
        matrixA = new int*[rowsA];
        matrixB = new int*[colsA];

        for (int i = 0; i < rowsA; i++) {
            matrixA[i] = new int[colsA];
        }

        for (int i = 0; i < colsA; i++) {
            matrixB[i] = new int[colsB];
        }

        generateRandomMatrix(matrixA, rowsA, colsA);
        generateRandomMatrix(matrixB, colsA, colsB);
    }

    // Broadcast matrix B to all processes
    MPI_Bcast(&matrixB[0][0], colsA * colsB, MPI_INT, 0, MPI_COMM_WORLD);

    // Calculate the product using serial algorithm
    double startTime = MPI_Wtime();
    int** serialResult = multiplySerial(matrixA, matrixB, rowsA, colsA, colsB);
    double endTime = MPI_Wtime();
    double serialTime = endTime - startTime;

    // Calculate the product using parallel algorithm
    startTime = MPI_Wtime();
    int** parallelResult = multiplyParallel(matrixA, matrixB, rowsA, colsA, colsB, rank, size);
    endTime = MPI_Wtime();
    double parallelTime = endTime - startTime;

    // Compare the results of serial and parallel algorithms
    bool isEqual = true;
    if (rank == 0) {
        for (int i = 0; i < rowsA; i++) {
            for (int j = 0; j < colsB; j++) {
                if (serialResult[i][j] != parallelResult[i][j]) {
                    isEqual = false;
                    break;
                }
            }
            if (!isEqual) {
                break;
            }
        }
    }

    // Print results
    if (rank == 0) {
        cout << endl << "Matrix A:" << endl;
        printMatrix(matrixA, rowsA, colsA);

        cout << endl << "Matrix B:" << endl;
        printMatrix(matrixB, colsA, colsB);
        
        cout << endl <<"Serial algorithm result:" << endl;
        printMatrix(serialResult, rowsA, colsB);

        cout << endl << "Parallel algorithm result:" << endl;
        printMatrix(parallelResult, rowsA, colsB);
        if (isEqual) {
            cout << endl << "Results are equal." << endl;
        } else {
            cout << endl << "Results are not equal." << endl;
        }

        double speedup = serialTime / parallelTime;
        double efficiency = speedup / size;

        cout << "Serial Time: " << serialTime << " seconds" << endl;
        cout << "Parallel Time: " << parallelTime << " seconds" << endl;
        cout << "Speedup: " << speedup << endl;
        cout << "Efficiency: " << efficiency << endl;

        deallocateMatrix(matrixA, rowsA);
        deallocateMatrix(matrixB, colsA);
    }

    deallocateMatrix(serialResult, rowsA);
    deallocateMatrix(parallelResult, rowsA);

    MPI_Finalize();

    return 0;
}

`int** result = new int*[rowsA];` -- Are you sure you want to time how fast the allocator works? This is included in your timing code. Shouldn't the raw multiplication be the only thing that is being timed? — PaulMcKenzie, Jun 28 '23 at 17:07

Matrix multiplication using serial and parallel algorithm with c++ and MPI

0 Answers0