-7

memcpy increases CPU usage to 100% for copying every 10000 elements from the buffer. is there any way to optimize memcpy so that it would reduce the CPU usage?

Sam
  • 813
  • 2
  • 16
  • 30
  • You'll want the time in milliseconds. http://stackoverflow.com/questions/3756323/getting-the-current-time-in-milliseconds – kfsone Jun 19 '13 at 04:39
  • Your program doesn't perform any I/O. If it consumes only 20% of your CPU time, the other 80% will either be wasted or will be consumed by other processes. Why is that a goal? – Keith Thompson Jun 19 '13 at 04:52
  • i am told that the CPU usage should be minimum 20% in my program. However, when I run the program, and use htop, it shows CPU rate 60%. – Sam Jun 19 '13 at 05:12
  • @user1596226: If your CPU usage should be "mininum 20%" (*why?*), then 60% satisfies your requirement. Higher CPU usage means you're using your CPU more efficiently. On the other hand, if you write a loop that copies a byte and sleeps for a second on each iteration, you can probably get your CPU usage below 1%. – Keith Thompson Jun 19 '13 at 14:37

1 Answers1

3

(The question has been completely rewritten since this answer).

Your code can be altered to run on Linux as follows:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

const size_t NUM_ELEMENTS = 2*1024 * 1024;
const size_t ITERATIONS = 10000;

int main(int argc, char *argv[])
{
    struct timespec start, stop;
    unsigned short * src = (unsigned short *) malloc(sizeof(unsigned short) * NUM_ELEMENTS);
    unsigned short * dest = (unsigned short *) malloc(sizeof(unsigned short) * NUM_ELEMENTS);

    for(int ctr = 0; ctr < NUM_ELEMENTS; ctr++)
    {
        src[ctr] = rand();
    }

    clock_gettime(CLOCK_MONOTONIC, &start);

    for(int iter = 0; iter < ITERATIONS; iter++){
        memcpy(dest, src, NUM_ELEMENTS * sizeof(unsigned short));
    }

    clock_gettime(CLOCK_MONOTONIC, &stop);

    double duration_d = (double)(stop.tv_sec - start.tv_sec) + (stop.tv_nsec - start.tv_nsec) / 1000000000.0;

    double bytes_sec = (ITERATIONS * (NUM_ELEMENTS/1024/1024) * sizeof(unsigned short)) / duration_d;

    printf("Duration: %.5lfs for %d iterations, %.3lfMB/sec\n", duration_d, ITERATIONS, bytes_sec);

    free(src);
    free(dest);

    return 0;
}

You may need to link with -lrt to get the clock_gettime() function.

caf
  • 233,326
  • 40
  • 323
  • 462