I am testing pthread parallel code on Linux with gcc (GCC) 4.8.3 20140911, on a CentOS 7 Server.
The single thread version is simple, it is used to init a 10000 * 10000 matrix :
int main(int argc)
{
int size = 10000;
int * r = (int*)malloc(size * size * sizeof(int));
for (int i=0; i<size; i++) {
for (int j=0; j<size; j++) {
r[i * size + j] = rand();
}
}
free(r);
}
Then I wanted to see if parallel code can improve the performance:
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
int size = 10000;
void *SetOdd(void *param)
{
printf("Enter odd\n");
int * r = (int*)param;
for (int i=0; i<size; i+=2) {
for (int j=0; j<size; j++) {
r[i * size + j] = rand();
}
}
printf("Exit Odd\n");
pthread_exit(NULL);
return 0;
}
void *SetEven(void *param)
{
printf("Enter Even\n");
int * r = (int*)param;
for (int i=1; i<size; i+=2) {
for (int j=0; j<size; j++) {
r[i * size + j] = rand();
}
}
printf("Exit Even\n");
pthread_exit(NULL);
return 0;
}
int main(int argc)
{
printf("running in thread\n");
pthread_t threads[2];
int * r = (int*)malloc(size * size * sizeof(int));
int rc0 = pthread_create(&threads[0], NULL, SetOdd, (void *)r);
int rc1 = pthread_create(&threads[1], NULL, SetEven, (void *)r);
for(int t=0; t<2; t++) {
void* status;
int rc = pthread_join(threads[t], &status);
if (rc) {
printf("ERROR; return code from pthread_join() is %d\n", rc);
exit(-1);
}
printf("Completed join with thread %d status= %ld\n",t, (long)status);
}
free(r);
return 0;
}
The simple code runs for about 0.8 second, while the multiple threaded version runs for about 10 seconds!!!!!!!
I am running on a 4 core server. But why the multiple threaded version is so slow ?