I'm comparing two different methods of buffer checking.
The first method is to check on every iteration if the end of the buffer has been reached, and the second method is to use a guard page to detect the end.
While the guard page method should in theory be faster, this does not appear to be the case.
The disparity between the two is even worse for stores, where the guard page method takes 5x longer than the buffer check method.
What's causing this to happen?
Benchmarks on my machine (averages over 10 trials):
branch + load:
58947659.3
branch + store:
15234306.6
seh + load:
84706608.6
seh + store:
84822314.3
My code:
#include <Windows.h>
#include <stdio.h>
#define BUFFER_SIZE 16ull * 1024ull * 1024ull * 1024ull
//remove this to do stores
#define LOAD
//remove this to use seh
#define USE_BRANCH
int main()
{
HANDLE consoleHandle = GetStdHandle(STD_OUTPUT_HANDLE);
char* memory = VirtualAlloc(NULL, BUFFER_SIZE, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
if (memory == NULL)
return 0;
unsigned long long total = 0;
char* memoryStart = memory;
#ifdef USE_BRANCH
LARGE_INTEGER perfcountBefore;
QueryPerformanceCounter(&perfcountBefore);
while (memory < memoryStart + BUFFER_SIZE)
{
#ifdef LOAD
total += *memory;
#else
(*memory)++;
#endif
memory++;
}
LARGE_INTEGER perfcountAfter;
QueryPerformanceCounter(&perfcountAfter);
char buffer[30];
int stringlength = _snprintf_s(buffer, 30, _TRUNCATE, "operation took %i\n", perfcountAfter.QuadPart - perfcountBefore.QuadPart);
WriteConsoleA(consoleHandle, buffer, stringlength, NULL, NULL);
#else
SYSTEM_INFO si;
GetSystemInfo(&si);
DWORD garbage;
VirtualProtect(memory + BUFFER_SIZE - si.dwPageSize, si.dwPageSize, PAGE_READWRITE | PAGE_GUARD, &garbage);
LARGE_INTEGER perfcountBefore;
QueryPerformanceCounter(&perfcountBefore);
__try
{
while (1)
{
#ifdef LOAD
total += *memory;
#else
(*memory)++;
#endif
memory++;
}
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
while (memory < memoryStart + BUFFER_SIZE)
{
#ifdef LOAD
total += *memory;
#else
(*memory)++;
#endif
memory++;
}
LARGE_INTEGER perfcountAfter;
QueryPerformanceCounter(&perfcountAfter);
char buffer[30];
int stringlength = _snprintf_s(buffer, 30, _TRUNCATE, "operation took %i\n", perfcountAfter.QuadPart - perfcountBefore.QuadPart);
WriteConsoleA(consoleHandle, buffer, stringlength, NULL, NULL);
}
#endif
return total;
}