1

I have a 64 bit NUMA machine with 2 banks. I know there is performance penalty if CPU from 1 bank tries to access the memory of other bank. I want to measure it by some example code. I tried to write the below (CPU1 and CPU7 are on different banks)

#define _GNU_SOURCE
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>

#define BUFSZ 1048576*500
#define SCRATCHSZ 1048576*10
int main()
{
  char *scratch;
  char *buf;
  char *buf2;
  struct timeval  tv1, tv2;
  cpu_set_t affinity_mask;

  CPU_ZERO(&affinity_mask);
  CPU_SET(1, &affinity_mask);
  if (sched_setaffinity(0, sizeof(cpu_set_t), &affinity_mask)) {
    return -1;
  }

  buf = malloc(BUFSZ);
  buf2 = malloc(BUFSZ);
  scratch = malloc(SCRATCHSZ);

  /* Let page fault happen */
  memset(buf, 0, BUFSZ);
  memset(buf2, 0, BUFSZ);
  /* clear the cpu cache */
  memset(scratch, 0, SCRATCHSZ);

  gettimeofday(&tv1, NULL);
  memcpy(buf2, buf, BUFSZ);
  gettimeofday(&tv2, NULL);
  printf ("Total time 1 = %f useconds\n",
           (double) (tv2.tv_usec - tv1.tv_usec)  +
           (double) (tv2.tv_sec - tv1.tv_sec)*1000000);

  free(buf2);
  free(scratch);

  CPU_ZERO(&affinity_mask);
  CPU_SET(7, &affinity_mask);
  if (sched_setaffinity(0, sizeof(cpu_set_t), &affinity_mask)) {
    return -1;
  }
  buf2 = malloc(BUFSZ);
  scratch = malloc(SCRATCHSZ);

  memset(buf2, 0, BUFSZ);
  memset(scratch, 0, SCRATCHSZ);

  gettimeofday(&tv1, NULL);
  memcpy(buf2, buf, BUFSZ);
  gettimeofday(&tv2, NULL);

  printf ("Total time 2 = %f useconds\n",
          (double) (tv2.tv_usec - tv1.tv_usec)  +
          (double) (tv2.tv_sec - tv1.tv_sec)*1000000);

  free(buf);
  free(buf2);
  free(scratch);
  return 0;
}

On executing this:

prmpt:> ./a.out Total time 1 = 169583.000000 useconds Total time 2 = 129527.000000 useconds

Its not what I was expecting. Time 2 has to be greater than time1 as in case of time2 CPU7 is trying to access memory from different bank. Any ideas?

ashish
  • 813
  • 3
  • 10
  • 18
  • Adding cpulist for 2 nodes. we can see cpu1 and cpu 7 are on different nodes. cat /sys/devices/system/node/node1/cpulist 0-5,12-17 cat /sys/devices/system/node/node0/cpulist 6-11,18-23 – ashish Jun 21 '16 at 08:46

0 Answers0