1

So I created a program that calculates matrix multiplication sequentially then records the time, then calculates matrix multiplication using any number of pthreads entered in the command line numberOfThreads. But regardless of how many threads I enter it still giving me the same time each time. I'm currently on i7 Macbook so I'm not sure if thats why adding more threads doesn't optimize the calculations or If I just don't have the correct program.

Heres the code:

/*Program to generate two square 2D arrays of random doubles and
   time their multiplication.
   Program utlizies pthreads to efficiently perform matrix Multiplication
   Compile by: gcc -o mmult -O3 mmultHW6.c -lpthread
   Run by:  ./mmult  1000 2
 */

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <pthread.h>

#define TRUE 1
#define FALSE 0
#define BOOL int

typedef struct {
  int threadId;
  int start_row;
  int end_row;
  int start_col;
  int end_col;
} BLOCK;


// function prototypes
double ** allocate2DArray(int rows, int columns);
void print2DArray(int rows, int columns, double ** array2D);
void generateRandom2DArray(int rows, int columns,
               double min, double max, double ** random2DArray);
BOOL equal2DArrays(int rows, int columns, double ** array1, double ** array2,
           double tolerance);
void matrixMultiplication(int rows1, int columns1, double ** array1,
              int rows2, int columns2, double ** array2,
              double ** product);
void matrixMultiplicationAlt(int rows1, int columns1, double ** array1,
                 int rows2, int columns2, double ** array2,
                 double ** product);
void * threadMMult(void * rank);

int numberOfThreads;
double ** A;
double ** B;
double ** C;
double ** C_alt;
int rows, columns;

int main(int argc, char ** argv) {


  long i, startTime, endTime,seqTime, paralellTime;
  BLOCK * blocksOfWork;
  int errorCode;
  double tolerence;
  pthread_t * threadHandles;
  if (argc !=3) {
    printf("Usage: %s <# of rows><# of Threads>\n", argv[0]);
    exit(-1);
  } // end if

  sscanf(argv[1], "%d", &rows);
  sscanf(argv[1], "%d", &numberOfThreads);
  columns = rows;

  // seed the random number generator
  srand( time(NULL) );

  A = allocate2DArray(rows, columns);
  B = allocate2DArray(rows, columns);
  C = allocate2DArray(rows, columns);
  C_alt = allocate2DArray(rows, columns);
  generateRandom2DArray(rows, columns, -1.0, +1.0, A);
  generateRandom2DArray(rows, columns, -1.0, +1.0, B);

  printf("after initializing matrices\n");

  time(&startTime);

  matrixMultiplicationAlt(rows, columns, A, rows, columns, B, C_alt);

  time(&endTime);

  seqTime = endTime-startTime;
  printf("Matrix Multiplication Alt. time = %ld\n",seqTime);


  time(&startTime);

  threadHandles = (pthread_t *) malloc(numberOfThreads*sizeof(pthread_t));
  blocksOfWork = (BLOCK *) malloc(numberOfThreads*sizeof(BLOCK));

  for(i=0; i < numberOfThreads; i++){
    blocksOfWork[i].threadId = i;
    blocksOfWork[i].start_row = i * rows/numberOfThreads;
    if (i == numberOfThreads -1){
      blocksOfWork[i].end_row = rows - 1;
    }
    else{
      blocksOfWork[i].end_row = (i+1)*rows/numberOfThreads -1;
    }
  }
  for (i=0; i < numberOfThreads; i++) {
    if (errorCode = pthread_create(&threadHandles[i], NULL, threadMMult,
                   &blocksOfWork[i]) != 0) {
      printf("pthread %d failed to be created with error code %d\n", i, errorCode);
    } // end if
  } // end for

  for (i=0; i < numberOfThreads; i++) {
    if (errorCode = pthread_join(threadHandles[i], (void **) NULL) != 0) {
      printf("pthread %d failed to be joined with error code %d\n", i, errorCode);
    } // end if
  } // end for

  time(&endTime);
  paralellTime = endTime-startTime;
  printf("Parallel Matrix Multiplication time = %ld\n",paralellTime);


  if (equal2DArrays(rows, columns, C, C_alt, 0.000001)) {
    printf("Arrays match with tolerance of %.000001f\n", 0.000001);
  } else {
    printf("Arrays DON'T match with tolerance of %.000001f\n", 0.000001);
  } // end if

  return 0;

} // end main


void * threadMMult(void * arg){
  BLOCK * block = (BLOCK *) arg;
  int threadId = block->threadId;
  int startRow = block->start_row;
  int endRow = block->end_row;
  int i, j, k, sum;

  for(i=startRow; i<=endRow;i++){
    for(j = 0; j<rows;j++){
      C[i][j] = 0;
      for(k=0; k<rows ; k++){
        C[i][j] += A[i][k]*B[k][j];
        //printf("%lu - C[%d][%d] += A[%d][%d] * B[%d][%d]\n",
        //pthread_self(), i,j,i,k,k,j);
      }
    }
    return 0;
  }
}
//C[i][j] += A[i][k] * B_transpose[j][k];
/*******************************************************************
 * Function matrixMultiplicationAlt passed two matrices and returns
 * their product.
 ********************************************************************/
void matrixMultiplicationAlt(int rows1, int columns1, double ** array1,
                 int rows2, int columns2, double ** array2,
                 double ** product) {
  int i, j, k;
  double ** array2_transpose;

  if (columns1 != rows2) {
    printf("Matrices cannot be multiplied -- incompatible dimensions!\n");
    exit(-1);
  } // end if

  // Transposes array2
  array2_transpose = allocate2DArray(columns2, rows2);
  for (i=0; i < rows2; i++) {
    for (j=0; j < columns2; j++) {
      array2_transpose[j][i] = array2[i][j];
    } /* end for (j */
  } /* end for (i */

  // Matrix Multiplication uses array1 and array2_transpose
  for (i=0; i < rows1; i++) {
    for (j=0; j < columns2; j++) {
      C_alt[i][j] = 0.0;
      for (k=0; k < columns1; k++) {
        C_alt[i][j] += array1[i][k]*array2_transpose[j][k];
      } /* end for (k */
    } /* end for (j */
  } /* end for (i */

} // end matrixMultiplicationAlt



/*******************************************************************
 * Function allocate2DArray dynamically allocates a 2D array of
 * size rows x columns, and returns it.
 ********************************************************************/
double ** allocate2DArray(int rows, int columns) {
  double ** local2DArray;
  int r;

  local2DArray = (double **) malloc(sizeof(double *)*rows);

  for (r=0; r < rows; r++) {
    local2DArray[r] = (double *) malloc(sizeof(double)*columns);
  } // end for

  return local2DArray;
} // end allocate2DArray


/*******************************************************************
 * Function generateRandom2DArray is passed the # rows, the # columns,
 * min. value, max. value, and returns random2DArray containing
 * randomly generated doubles.
 ********************************************************************/
void generateRandom2DArray(int rows, int columns,
               double min, double max, double ** random2DArray) {
  int r, c;
  double range, div;

  for (r = 0; r < rows; r++) {
    for (c = 0; c < columns; c++) {
      range = max - min;
      div = RAND_MAX / range;
      random2DArray[r][c] = min + (rand() / div);
    } // end for (c...
  } // end for (r...
} // end generateRandom2DArray


/*******************************************************************
 * Function print2DArray is passed the # rows, # columns, and the
 * array2D.  It prints the 2D array to the screen.
 ********************************************************************/
void print2DArray(int rows, int columns, double ** array2D) {
  int r, c;
  for(r = 0; r < rows; r++) {
    for (c = 0; c < columns; c++) {
      printf("%10.5lf", array2D[r][c]);
    } // end for (c...
    printf("\n");
  } // end for(r...

} // end print2DArray



/*******************************************************************
 * Function equal2DArrays is passed the # rows, # columns, two
 * array2Ds, and tolerance.  It returns TRUE if corresponding array
 * elements are equal within the specified tolerance; otherwise it
 * returns FALSE.
 ********************************************************************/
BOOL equal2DArrays(int rows, int columns, double ** array1, double ** array2,
           double tolerance) {

  int r, c;

  for(r = 0; r < rows; r++) {
    for (c = 0; c < columns; c++) {
      if (fabs(array1[r][c] - array2[r][c]) > tolerance) {
        return FALSE;
      } // end if
    } // end for (c...
  } // end for(r...
  return TRUE;

} // end equal2DArray
Reed Sager
  • 25
  • 3
  • this is way more code than most people here will be willing to sift through. Try to whittle it down to an [MCVE](https://stackoverflow.com/help/mcve) – yano Nov 08 '18 at 23:04
  • I like how well organized, indented and commented the code is, not something you see often from new users on stackoverflow. Why do you use `time()` to measure time? I usually see `gettimeofday()` when people measure runtime time or `clock_gettime(CLOCK_REALTIME, ...)`. What is the input? What is the expected output? What are measurements example? Grab a read - [stack overflow question checklist](https://meta.stackoverflow.com/questions/260648/stack-overflow-question-checklist). – KamilCuk Nov 08 '18 at 23:09

1 Answers1

0

This is a bit suspicious:

  sscanf(argv[1], "%d", &rows);
  sscanf(argv[1], "%d", &numberOfThreads);

I am not sure where you are with the learning C thing, but “man 3 getopt” should show a much better way to pass runtime parameters into your program than accidentally re-using argv[1]....

The second problem that you have created is using two different mechanisms for performing matrix multiplication; one for the sequential version, and a separate one for the parallel one. Logically, you should be able to have one, fully parameterized function to perform the operation, then the fact that M parallel threads are invoking it with different data is transparent to the function itself. Since you didn’t, you left a question mark: is the lack of scale because one of your matrix multiply functions doesn’t work correctly?

Looking at your code, I have little faith that it does; you have utilized a barrage of mechanisms to implement this. The core is:

Mult(double *A, int Ar, int Ac, double *B, int Br, int Bc, double C, int Cr, int Cc) { / multiply C = A * B */

}

and whether it is N threads converging on a solution, or one thread trudging through the solution, they should be able to execute the same code.

mevets
  • 10,070
  • 1
  • 21
  • 33