I'm trying to implement this code on a 8 core cluster. It has 2 sockets each with 4 cores. I am trying to create 8 threads and set affinity using pthread_attr_setaffinity_np
function. But when I look at my performance in VTunes , it shows me that 3969 odd threads are being created. I don't understand why and how! Above all, my performance is exactly the same as it was when no affinity was set (OS thread scheduling). Can someone please help me debug this problem? My code is running perfectly fine but I have no control over the threads! Thanks in advance.
--------------------------------------CODE-------------------------------------------
const int num_thrd=8;
bool RCTAlgorithmBackprojection(RabbitCtGlobalData* r)
{
float O_L = r->O_L;
float R_L = r->R_L;
double* A_n = r->A_n;
float* I_n = r->I_n;
float* f_L = r->f_L;*/
cpu_set_t cpu[num_thrd];
pthread_t thread[num_thrd];
pthread_attr_t attr[num_thrd];
for(int i =0; i< num_thrd; i++)
{
threadCopy[i].L = r->L;
threadCopy[i].O_L = r->O_L;
threadCopy[i].R_L = r->R_L;
threadCopy[i].A_n = r->A_n;
threadCopy[i].I_n = r->I_n;
threadCopy[i].f_L = r->f_L;
threadCopy[i].slice= i;
threadCopy[i].S_x = r->S_x;
threadCopy[i].S_y = r->S_y;
pthread_attr_init(&attr[i]);
CPU_ZERO(&cpu[i]);
CPU_SET(i, &cpu[i]);
pthread_attr_setaffinity_np(&attr[i], CPU_SETSIZE, &cpu[i]);
int rc=pthread_create(&thread[i], &attr[i], backProject, (void*)&threadCopy[i]);
if (rc!=0)
{
cout<<"Can't create thread\n"<<endl;
return -1;
}
// sleep(1);
}
for (int i = 0; i < num_thrd; i++) {
pthread_join(thread[i], NULL);
}
//s_rcgd = r;
return true;
}
void* backProject (void* parm)
{
copyStruct* s = (copyStruct*)parm; // retrive the slice info
unsigned int L = s->L;
float O_L = s->O_L;
float R_L = s->R_L;
double* A_n = s->A_n;
float* I_n = s->I_n;
float* f_L = s->f_L;
int slice1 = s->slice;
//cout<<"The size of volume is L= "<<L<<endl;
int from = (slice1 * L) / num_thrd; // note that this 'slicing' works fine
int to = ((slice1+1) * L) / num_thrd; // even if SIZE is not divisible by num_thrd
//cout<<"computing slice " << slice1<< " from row " << from<< " to " << to-1<<endl;
for (unsigned int k=from; k<to; k++)
{
double z = O_L + (double)k * R_L;
for (unsigned int j=0; j<L; j++)
{
double y = O_L + (double)j * R_L;
for (unsigned int i=0; i<L; i++)
{
double x = O_L + (double)i * R_L;
double w_n = A_n[2] * x + A_n[5] * y + A_n[8] * z + A_n[11];
double u_n = (A_n[0] * x + A_n[3] * y + A_n[6] * z + A_n[9] ) / w_n;
double v_n = (A_n[1] * x + A_n[4] * y + A_n[7] * z + A_n[10]) / w_n;
f_L[k * L * L + j * L + i] += (float)(1.0 / (w_n * w_n) * p_hat_n(u_n, v_n));
}
}
}
//cout<<" finished slice "<<slice1<<endl;
return NULL;
}