I have a 64 bit NUMA machine with 2 banks. I know there is performance penalty if CPU from 1 bank tries to access the memory of other bank. I want to measure it by some example code. I tried to write the below (CPU1 and CPU7 are on different banks)
#define _GNU_SOURCE
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#define BUFSZ 1048576*500
#define SCRATCHSZ 1048576*10
int main()
{
char *scratch;
char *buf;
char *buf2;
struct timeval tv1, tv2;
cpu_set_t affinity_mask;
CPU_ZERO(&affinity_mask);
CPU_SET(1, &affinity_mask);
if (sched_setaffinity(0, sizeof(cpu_set_t), &affinity_mask)) {
return -1;
}
buf = malloc(BUFSZ);
buf2 = malloc(BUFSZ);
scratch = malloc(SCRATCHSZ);
/* Let page fault happen */
memset(buf, 0, BUFSZ);
memset(buf2, 0, BUFSZ);
/* clear the cpu cache */
memset(scratch, 0, SCRATCHSZ);
gettimeofday(&tv1, NULL);
memcpy(buf2, buf, BUFSZ);
gettimeofday(&tv2, NULL);
printf ("Total time 1 = %f useconds\n",
(double) (tv2.tv_usec - tv1.tv_usec) +
(double) (tv2.tv_sec - tv1.tv_sec)*1000000);
free(buf2);
free(scratch);
CPU_ZERO(&affinity_mask);
CPU_SET(7, &affinity_mask);
if (sched_setaffinity(0, sizeof(cpu_set_t), &affinity_mask)) {
return -1;
}
buf2 = malloc(BUFSZ);
scratch = malloc(SCRATCHSZ);
memset(buf2, 0, BUFSZ);
memset(scratch, 0, SCRATCHSZ);
gettimeofday(&tv1, NULL);
memcpy(buf2, buf, BUFSZ);
gettimeofday(&tv2, NULL);
printf ("Total time 2 = %f useconds\n",
(double) (tv2.tv_usec - tv1.tv_usec) +
(double) (tv2.tv_sec - tv1.tv_sec)*1000000);
free(buf);
free(buf2);
free(scratch);
return 0;
}
On executing this:
prmpt:> ./a.out Total time 1 = 169583.000000 useconds Total time 2 = 129527.000000 useconds
Its not what I was expecting. Time 2 has to be greater than time1 as in case of time2 CPU7 is trying to access memory from different bank. Any ideas?