I was playing around with high precision timers and one of my first tests was to use rdtsc to measure printf. Below is my test prpgram followed by its output. The thing I noticed is that the first time printf runs, it consistently takes about 25 times longer on the first print than it does on subsequent prints. Why is that?
#include <stdio.h>
#include <stdint.h>
// Sample code grabbed from wikipedia
__inline__ uint64_t rdtsc(void)
{
uint32_t lo, hi;
__asm__ __volatile__ (
"xorl %%eax,%%eax \n cpuid"
::: "%rax", "%rbx", "%rcx", "%rdx");
__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
return (uint64_t)hi << 32 | lo;
}
int main(int argc, const char *argv[])
{
unsigned int i;
uint64_t counter[10];
uint64_t sum = 0;
for (i = 0; i < 10; i++)
{
counter[i] = rdtsc();
printf("Hello, world\n");
counter[i] = rdtsc() - counter[i];
}
for (i = 0; i < 10; i++)
{
printf("counter[%d] = %lld\n", i, counter[i]);
sum += counter[i];
}
printf("avg = %lld\n", sum/10);
return 0;
}
And the output:
Hello, world
Hello, world
Hello, world
Hello, world
Hello, world
Hello, world
Hello, world
Hello, world
Hello, world
Hello, world
counter[0] = 108165
counter[1] = 6375
counter[2] = 4388
counter[3] = 4388
counter[4] = 4380
counter[5] = 4545
counter[6] = 4215
counter[7] = 4290
counter[8] = 4237
counter[9] = 4320
avg = 14930
(For reference, this was compiled with gcc on OSX)