I have a simple implementation of a thread-safe allocator of same-sized buffers. Inside, the implementation is a very simple interlocked singly-linked list, that utilizes the unused space in unallocated buffers to maintain a singly-linked list.
Also wrote some tests, that test the code in single-threaded mode - everything seems to be ok. Managed to isolate the problem to the Free function, but I just can't seem to find it.
I have to mention, that I ran some tests with the exact same code using Microsoft's Interlocked Singly Linked Lists, and obviously it works, but I still want to find out what is the problem with my implementation. Even tried disassembling the code and applying similar intrinsics, but it did not help (also to note, that I don't need to keep track of number of list entries, so that is why I don't need interlock function to exchange double register-size elements like InterlockedCompareExchange128 for x64)
Here is the allocator's code:
#ifndef _POOLNOLOCK_HPP_
#define _POOLNOLOCK_HPP_
#include <windows.h>
template<size_t TSizeOfElem>
class PoolNoLock {
public:
PoolNoLock(size_t N) :
n(N),
arr(new ELEMENT[n])
{
for (size_t i = 0; (n - 1) > i; ++i)
{
arr[i].next = &arr[i + 1];
}
arr[n - 1].next = nullptr;
for (size_t i = 0; n > i; ++i)
{
arr[i].allocated = false;
}
}
~PoolNoLock() { delete[] arr; }
void *Alloc()
{
ELEMENT *allocBuff;
do
{
allocBuff = ptrFree;
if (!allocBuff)
{
return nullptr;
}
} while (allocBuff != InterlockedCompareExchangePointer(
reinterpret_cast<void *volatile *>(&ptrFree),
allocBuff->next,
allocBuff
));
if (allocBuff->allocated)
{
__debugbreak(); //will break here
}
allocBuff->allocated = true;
return &allocBuff->buff;
}
void Free(void *Address)
{
ELEMENT *const freeBuff = reinterpret_cast<ELEMENT *>(Address);
if (!freeBuff->allocated)
{
__debugbreak();
}
freeBuff->allocated = false;
ELEMENT *cmpFree = ptrFree;
do
{
freeBuff->next = cmpFree;
ELEMENT *const xchgFree =
reinterpret_cast<ELEMENT *>(InterlockedCompareExchangePointer(
reinterpret_cast<void *volatile *>(&ptrFree),
freeBuff,
cmpFree
));
if (xchgFree == cmpFree)
{
break;
}
cmpFree = xchgFree;
} while (true);
}
private:
typedef struct _ELEMENT {
union {
_ELEMENT *next;
unsigned char buff[TSizeOfElem];
};
bool allocated; //debug info
}ELEMENT;
const size_t n;
ELEMENT *const arr; //array of list elements
ELEMENT *volatile ptrFree = &arr[0]; //head of "singly" linked list
};
#endif // _POOLNOLOCK_HPP_
And this is the code I use to stress-test the class:
- 64 is the maximum number of objects, that WaitForMultipleObjects can wait for
- the wait in the thread is needed to help achieve a scenario, where as much thread as possible is accessing the resource
- the number of thread spawned is exactly equal to the number of elements in the allocator, this is the reason why the alloc-only test works
#include "PoolNoLock.hpp"
#include <vector>
#include <map>
#include <iostream>
static constexpr size_t N_THREAD = 64;
static constexpr size_t N_TEST_RUN = 4;
static constexpr size_t N_ALLOC_FREE = 1024;
struct ThreadParam {
PoolNoLock<sizeof(size_t)> *allocator;
const HANDLE &hStartEvent;
void *addressAlloc = nullptr;
ThreadParam(PoolNoLock<sizeof(size_t)> *Allocator, const HANDLE &StartEvent) :
allocator(Allocator),
hStartEvent(StartEvent)
{};
};
template<bool TAllocOnly>
class Test {
public:
~Test()
{
CloseHandle(hStartEvent);
}
bool RunSingle(PoolNoLock<sizeof(size_t)> *Allocator)
{
std::vector<ThreadParam> params(N_THREAD, ThreadParam(Allocator, hStartEvent));
if (TRUE != ResetEvent(hStartEvent))
{
return false;
}
for (size_t i = 0; N_THREAD != i; ++i)
{
handles[i] = CreateThread(nullptr,
0,
reinterpret_cast<PTHREAD_START_ROUTINE>(threadProc),
¶ms[i],
CREATE_SUSPENDED,
&tids[i]);
if (!handles[i])
{
return false;
}
}
for (HANDLE handle : handles)
{
if (1 != ResumeThread(handle))
{
return false;
}
}
if (TRUE != SetEvent(hStartEvent))
{
return false;
}
if ((WAIT_OBJECT_0 + N_THREAD - 1) < WaitForMultipleObjects(N_THREAD, handles, TRUE, INFINITE))
{
return false;
}
for (size_t i = 0; N_THREAD != i; ++i)
{
if (WAIT_OBJECT_0 != WaitForSingleObject(handles[i], 0))
{
return false;
}
DWORD exitCode;
if (TRUE != GetExitCodeThread(handles[i], &exitCode))
{
return false;
}
if (0 != exitCode)
{
return false;
}
if (TRUE != CloseHandle(handles[i]))
{
return false;
}
}
if (TAllocOnly)
{
std::map<void *, DWORD> threadAllocations;
for (size_t i = 0; N_THREAD != i; ++i)
{
if (!params[i].addressAlloc)
{
return false;
}
if (threadAllocations.end() == threadAllocations.find(params[i].addressAlloc))
{
return false;
}
std::pair<std::map<void *, DWORD>::iterator, bool> res =
threadAllocations.insert(std::make_pair(params[i].addressAlloc, tids[i]));
if (!res.second)
{
return false;
}
Allocator->Free(params[i].addressAlloc);
}
if (N_THREAD != threadAllocations.size())
{
return false;
}
}
return false;
}
bool RunMultiple()
{
for (size_t i = 0; N_TEST_RUN != i; ++i)
{
PoolNoLock<sizeof(size_t)> allocator(N_THREAD);
RunSingle(&allocator);
}
return true;
}
private:
const HANDLE hStartEvent = CreateEventW(nullptr, TRUE, FALSE, nullptr);
HANDLE handles[N_THREAD] = { nullptr };
DWORD tids[N_THREAD] = { 0 };
static DWORD WINAPI ThreadProcAllocOnly(_In_ ThreadParam *Param)
{
if (WAIT_OBJECT_0 != WaitForSingleObject(Param->hStartEvent, INFINITE))
{
return 2;
}
Param->addressAlloc = Param->allocator->Alloc();
if (!Param->addressAlloc)
{
return 3;
}
return 0;
}
static DWORD WINAPI ThreadProcAllocFree(_In_ ThreadParam *Param)
{
if (WAIT_OBJECT_0 != WaitForSingleObject(Param->hStartEvent, INFINITE))
{
return 2;
}
for (size_t i = 0; N_ALLOC_FREE != i; ++i)
{
void *ptrTest = Param->allocator->Alloc();
if (!ptrTest)
{
return 3;
}
Param->allocator->Free(ptrTest);
}
return 0;
}
const LPTHREAD_START_ROUTINE threadProc =
TAllocOnly
? reinterpret_cast<LPTHREAD_START_ROUTINE>(ThreadProcAllocOnly)
: reinterpret_cast<LPTHREAD_START_ROUTINE>(ThreadProcAllocFree);
};
int main()
{
Test<true> testAllocOnly0;
Test<false> TestAllocFree0;
if (!testAllocOnly0.RunMultiple()) //this test will succeed
{
std::cout << "Test failed" << std::endl;
return 1;
}
std::cout << "Alloc-ONLY tests succeeded" << std::endl;
if (!TestAllocFree0.RunMultiple()) //this test will fail
{
std::cout << "Test failed" << std::endl;
return 1;
}
std::cout << "Alloc/free tests succeeded" << std::endl;
std::cout << "All tests succeeded" << std::endl;
return 0;
}