0

I'm trying to implement this code on a 8 core cluster. It has 2 sockets each with 4 cores. I am trying to create 8 threads and set affinity using pthread_attr_setaffinity_np function. But when I look at my performance in VTunes , it shows me that 3969 odd threads are being created. I don't understand why and how! Above all, my performance is exactly the same as it was when no affinity was set (OS thread scheduling). Can someone please help me debug this problem? My code is running perfectly fine but I have no control over the threads! Thanks in advance.

--------------------------------------CODE-------------------------------------------

const int num_thrd=8;
bool RCTAlgorithmBackprojection(RabbitCtGlobalData* r)
{
float        O_L = r->O_L;
float        R_L = r->R_L;
double*      A_n = r->A_n;
float*       I_n = r->I_n;
float*       f_L = r->f_L;*/

cpu_set_t cpu[num_thrd];    
pthread_t thread[num_thrd];
pthread_attr_t attr[num_thrd];
for(int i =0; i< num_thrd; i++)
{
    threadCopy[i].L = r->L;
    threadCopy[i].O_L = r->O_L;
    threadCopy[i].R_L = r->R_L;
    threadCopy[i].A_n = r->A_n;
    threadCopy[i].I_n = r->I_n;
    threadCopy[i].f_L = r->f_L;
    threadCopy[i].slice= i;
    threadCopy[i].S_x = r->S_x;
    threadCopy[i].S_y = r->S_y;

    pthread_attr_init(&attr[i]);
    CPU_ZERO(&cpu[i]);
    CPU_SET(i, &cpu[i]);
    pthread_attr_setaffinity_np(&attr[i], CPU_SETSIZE, &cpu[i]);

    int rc=pthread_create(&thread[i], &attr[i], backProject, (void*)&threadCopy[i]);

    if (rc!=0)
    {
        cout<<"Can't create thread\n"<<endl;
        return -1;
    }
    //  sleep(1);
}
for (int i = 0; i < num_thrd; i++) {
    pthread_join(thread[i], NULL);
}   
//s_rcgd = r;       
return true;
}


void* backProject (void* parm)
{
copyStruct* s = (copyStruct*)parm;   // retrive the slice info
unsigned int L   = s->L;
float        O_L = s->O_L;
float        R_L = s->R_L;
double*      A_n = s->A_n;
float*       I_n = s->I_n;
float*       f_L = s->f_L;
int slice1 = s->slice; 
//cout<<"The size of volume is L= "<<L<<endl;
int from = (slice1 * L) / num_thrd; // note that this 'slicing' works fine
int to = ((slice1+1) * L) / num_thrd; // even if SIZE is not divisible by num_thrd
//cout<<"computing slice  " << slice1<< "  from row " << from<< "  to " << to-1<<endl;
for (unsigned int k=from; k<to; k++)
{
    double z = O_L + (double)k * R_L;
    for (unsigned int j=0; j<L; j++)
    {
        double y = O_L + (double)j * R_L;
        for (unsigned int i=0; i<L; i++)
        {
            double x = O_L + (double)i * R_L;

            double w_n =  A_n[2] * x + A_n[5] * y + A_n[8] * z + A_n[11];
            double u_n = (A_n[0] * x + A_n[3] * y + A_n[6] * z + A_n[9] ) / w_n;
            double v_n = (A_n[1] * x + A_n[4] * y + A_n[7] * z + A_n[10]) / w_n;

            f_L[k * L * L + j * L + i] += (float)(1.0 / (w_n * w_n) * p_hat_n(u_n, v_n)); 
        }
    }
}
//cout<<" finished slice "<<slice1<<endl;
return NULL;
}
quantumshiv
  • 97
  • 10

1 Answers1

1

Alright, so I found out the reason was because of CPU_SETSIZE that I was using as an argument in pthread_attr_setaffinity_np. I replaced it with num_thrd . Apparently CPU_SETSIZE which will be declared inside #define __USE_GNU was not included in my file.!! Sorry if I bothered any of y'all who were trying to debug this thanks again!

quantumshiv
  • 97
  • 10