Comparing reading files using the following three techniques:
- C
<stdio.h>
FILE*
- Win32
CreateFile()
/ReadFile()
- Win32 memory mapping
I noted that #1 is faster than #2, and #3 is the fastest.
e.g. Sorted from the fastest to the slowest, for processing a 900MB test file, I got these results:
Win32 memory mapping: 821.308 ms
C file (FILE*): 1779.83 ms
Win32 file (CreateFile): 3649.67 ms
Why is the C <stdio.h>
technique faster than Win32 ReadFile()
access? I'd expect raw Win32 APIs to have less overhead than CRT. What am I missing here?
Compilable test C++ source code follows.
EDIT
I repeated the tests with 4KB read buffers and using three different files (with the same content) to avoid caching effects that could distort performance measurements, and now the results are as expected.
For example, for a file of circa 400 MB the results are:
Win32 memory mapping: 305.908 ms
Win32 file (CreateFile): 451.402 ms
C file (FILE*): 460.579 ms
////////////////////////////////////////////////////////////////////////////////
// Test file reading using C FILE*, Win32 CreateFile and Win32 memory mapping.
////////////////////////////////////////////////////////////////////////////////
#include <stdio.h>
#include <stdlib.h>
#include <algorithm>
#include <exception>
#include <iostream>
#include <stdexcept>
#include <vector>
#include <Windows.h>
//------------------------------------------------------------------------
// Performance (speed) measurement
//------------------------------------------------------------------------
long long counter()
{
LARGE_INTEGER li;
QueryPerformanceCounter(&li);
return li.QuadPart;
}
long long frequency()
{
LARGE_INTEGER li;
QueryPerformanceFrequency(&li);
return li.QuadPart;
}
void print_time(const long long start, const long long finish,
const char * const s)
{
std::cout << s << ": " << (finish - start) * 1000.0 / frequency() << " ms\n";
}
//------------------------------------------------------------------------
// RAII handle wrappers
//------------------------------------------------------------------------
struct c_file_traits
{
typedef FILE* type;
static FILE* invalid_value()
{
return nullptr;
}
static void close(FILE* f)
{
fclose(f);
}
};
struct win32_file_traits
{
typedef HANDLE type;
static HANDLE invalid_value()
{
return INVALID_HANDLE_VALUE;
}
static void close(HANDLE h)
{
CloseHandle(h);
}
};
struct win32_handle_traits
{
typedef HANDLE type;
static HANDLE invalid_value()
{
return nullptr;
}
static void close(HANDLE h)
{
CloseHandle(h);
}
};
template <typename Traits>
class handle
{
public:
typedef typename Traits::type type;
handle()
: _h(Traits::invalid_value())
{
}
explicit handle(type h)
: _h(h)
{
}
~handle()
{
close();
}
bool valid() const
{
return (_h != Traits::invalid_value());
}
type get() const
{
return _h;
}
void close()
{
if (valid())
Traits::close(_h);
_h = Traits::invalid_value();
}
void reset(type h)
{
if (h != _h)
{
close();
_h = h;
}
}
private: // Ban copy
handle(const handle&);
handle& operator=(const handle&);
private:
type _h; // wrapped raw handle
};
typedef handle<c_file_traits> c_file_handle;
typedef handle<win32_file_traits> win32_file_handle;
typedef handle<win32_handle_traits> win32_handle;
//------------------------------------------------------------------------
// File reading tests using various techniques
//------------------------------------------------------------------------
unsigned long long count_char_using_c_file(const std::string& filename, const char ch)
{
unsigned long long char_count = 0;
#pragma warning(push)
#pragma warning(disable: 4996) // fopen use is OK
c_file_handle file(fopen(filename.c_str(), "rb"));
#pragma warning(pop)
if (!file.valid())
throw std::runtime_error("Can't open file.");
std::vector<char> read_buffer(4*1024); // 4 KB
bool has_more_data = true;
while (has_more_data)
{
size_t read_count = fread(read_buffer.data(), 1, read_buffer.size(), file.get());
for (size_t i = 0; i < read_count; i++)
{
if (read_buffer[i] == ch)
char_count++;
}
if (read_count < read_buffer.size())
has_more_data = false;
}
return char_count;
}
unsigned long long count_char_using_win32_file(const std::string& filename, const char ch)
{
unsigned long long char_count = 0;
win32_file_handle file(::CreateFileA(
filename.c_str(),
GENERIC_READ,
FILE_SHARE_READ,
nullptr,
OPEN_EXISTING,
FILE_FLAG_SEQUENTIAL_SCAN,
nullptr
)
);
if (!file.valid())
throw std::runtime_error("Can't open file.");
std::vector<char> read_buffer(4*1024); // 4 KB
bool has_more_data = true;
while (has_more_data)
{
DWORD read_count = 0;
if (!ReadFile(file.get(), read_buffer.data(), read_buffer.size(), &read_count, nullptr))
throw std::runtime_error("File read error using ReadFile().");
for (size_t i = 0; i < read_count; i++)
{
if (read_buffer[i] == ch)
char_count++;
}
if (read_count < sizeof(read_buffer))
has_more_data = false;
}
return char_count;
}
// Memory-map a file.
class file_map
{
public:
explicit file_map(const std::string& filename)
: _view(nullptr), _length(0)
{
_file.reset(::CreateFileA(
filename.c_str(),
GENERIC_READ,
FILE_SHARE_READ,
nullptr,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
nullptr));
if (!_file.valid())
return;
LARGE_INTEGER file_size;
if (!GetFileSizeEx(_file.get(), &file_size))
return;
if (file_size.QuadPart == 0)
return;
_mapping.reset(::CreateFileMapping(
_file.get(), nullptr,
PAGE_READONLY,
0,
0,
nullptr)
);
if (!_mapping.valid())
return;
_view = reinterpret_cast<char*>
(::MapViewOfFile(_mapping.get(), FILE_MAP_READ, 0, 0, 0));
if (!_view)
return;
_length = file_size.QuadPart;
}
~file_map()
{
if (_view)
UnmapViewOfFile(_view);
}
bool valid() const
{
return (_view != nullptr);
}
const char * begin() const
{
return _view;
}
const char * end() const
{
return begin() + length();
}
unsigned long long length() const
{
return _length;
}
private: // ban copy
file_map(const file_map&);
file_map& operator=(const file_map&);
private:
win32_file_handle _file;
win32_handle _mapping;
char* _view;
unsigned long long _length; // in bytes
};
unsigned long long count_char_using_memory_mapping(const std::string& filename, const char ch)
{
unsigned long long char_count = 0;
file_map view(filename);
if (!view.valid())
throw std::runtime_error("Can't create memory-mapping of file.");
for (auto it = view.begin(); it != view.end(); ++it)
{
if (*it == ch)
{
char_count++;
}
}
return char_count;
}
template <typename TestFunc>
void run_test(const char * message, TestFunc test, const std::string& filename, const char ch)
{
const long long start = counter();
const unsigned long long char_count = test(filename, ch);
const long long finish = counter();
print_time(start, finish, message);
std::cout << "Count of \'" << ch << "\' : " << char_count << "\n\n";
}
int main(int argc, char* argv[])
{
static const int kExitOk = 0;
static const int kExitError = 1;
if (argc != 3)
{
std::cerr << argv[0] << " <char> <filename>.\n";
std::cerr << "Counts occurrences of ASCII character <char>\n";
std::cerr << "in the <filename> file.\n\n";
return kExitError;
}
const char ch = *(argv[1]);
const std::string filename = argv[2];
try
{
// Execute tests on THREE different files with the same content,
// to avoid caching effects.
// (file names have incremental number suffix).
run_test("C <stdio.h> file (FILE*)", count_char_using_c_file, filename + "1", ch);
run_test("Win32 file (CreateFile)", count_char_using_win32_file, filename + "2", ch);
run_test("Win32 memory mapping", count_char_using_memory_mapping, filename + "3", ch);
return kExitOk;
}
catch (const std::exception& e)
{
std::cerr << "\n*** ERROR: " << e.what() << '\n';
return kExitError;
}
}
////////////////////////////////////////////////////////////////////////////////