I'm working on a lowest-possible latency MIDI synthetizer software. I'm aware of ASIO and other alternatives, but as they have apparently made significant improvements to the WASAPI stack (in shared mode, at least), I'm curious to try it out. I first wrote a simple event-driven version of program, but as that's not the recommended way to do low-latency audio on Windows 10 (according to the docs), I'm trying to migrate to the Real-Time Work Queue API.
The documentation on Low Latency Audio states that it is recommended to use the Real-Time Work Queue API or MFCreateMFByteStreamOnStreamEx
with WASAPI in order for the OS to manage work items in a way that will avoid interference from non-audio subsystems. This seems like a good idea, but the latter option seems to require some managed code (demonstrated in this WindowsAudioSession example), which I know nothing about and would preferably avoid (also the header Robytestream.h
which has defs for the IRandomAccessStream
isn't found on my system either).
The RTWQ example included in the docs is incomplete (doesn't compile as such), and I have made the necessary additions to make it compilable:
class my_rtqueue : IRtwqAsyncCallback {
public:
IRtwqAsyncResult* pAsyncResult;
RTWQWORKITEM_KEY workItemKey;
DWORD WorkQueueId;
STDMETHODIMP GetParameters(DWORD* pdwFlags, DWORD* pdwQueue)
{
HRESULT hr = S_OK;
*pdwFlags = 0;
*pdwQueue = WorkQueueId;
return hr;
}
//-------------------------------------------------------
STDMETHODIMP Invoke(IRtwqAsyncResult* pAsyncResult)
{
HRESULT hr = S_OK;
IUnknown* pState = NULL;
WCHAR className[20];
DWORD bufferLength = 20;
DWORD taskID = 0;
LONG priority = 0;
BYTE* pData;
hr = render_info.renderclient->GetBuffer(render_info.buffer_framecount, &pData);
ERROR_EXIT(hr);
update_buffer((unsigned short*)pData, render_info.framesize_bytes / (2*sizeof(unsigned short))); // 2 channels, sizeof(unsigned short) == 2
hr = render_info.renderclient->ReleaseBuffer(render_info.buffer_framecount, 0);
ERROR_EXIT(hr);
return S_OK;
}
STDMETHODIMP QueryInterface(const IID &riid, void **ppvObject) {
return 0;
}
ULONG AddRef() {
return 0;
}
ULONG Release() {
return 0;
}
HRESULT queue(HANDLE event) {
HRESULT hr;
hr = RtwqPutWaitingWorkItem(event, 1, this->pAsyncResult, &this->workItemKey);
return hr;
}
my_rtqueue() : workItemKey(0) {
HRESULT hr = S_OK;
IRtwqAsyncCallback* callback = NULL;
DWORD taskId = 0;
WorkQueueId = RTWQ_MULTITHREADED_WORKQUEUE;
//WorkQueueId = RTWQ_STANDARD_WORKQUEUE;
hr = RtwqLockSharedWorkQueue(L"Pro Audio", 0, &taskId, &WorkQueueId);
ERROR_THROW(hr);
hr = RtwqCreateAsyncResult(NULL, reinterpret_cast<IRtwqAsyncCallback*>(this), NULL, &pAsyncResult);
ERROR_THROW(hr);
}
int stop() {
HRESULT hr;
if (pAsyncResult)
pAsyncResult->Release();
if (0xFFFFFFFF != this->WorkQueueId) {
hr = RtwqUnlockWorkQueue(this->WorkQueueId);
if (FAILED(hr)) {
printf("Failed with RtwqUnlockWorkQueue 0x%x\n", hr);
return 0;
}
}
return 1;
}
};
And so, the actual WASAPI code (HRESULT
error checking is omitted for clarity):
void thread_main(LPVOID param) {
HRESULT hr;
REFERENCE_TIME hnsRequestedDuration = 0;
IMMDeviceEnumerator* pEnumerator = NULL;
IMMDevice* pDevice = NULL;
IAudioClient3* pAudioClient = NULL;
IAudioRenderClient* pRenderClient = NULL;
WAVEFORMATEX* pwfx = NULL;
HANDLE hEvent = NULL;
HANDLE hTask = NULL;
UINT32 bufferFrameCount;
BYTE* pData;
DWORD flags = 0;
hr = RtwqStartup();
// also, hr is checked for errors every step of the way
hr = CoInitialize(NULL);
hr = CoCreateInstance(
CLSID_MMDeviceEnumerator, NULL,
CLSCTX_ALL, IID_IMMDeviceEnumerator,
(void**)&pEnumerator);
hr = pEnumerator->GetDefaultAudioEndpoint(
eRender, eConsole, &pDevice);
hr = pDevice->Activate(
IID_IAudioClient, CLSCTX_ALL,
NULL, (void**)&pAudioClient);
WAVEFORMATEX wave_format = {};
wave_format.wFormatTag = WAVE_FORMAT_PCM;
wave_format.nChannels = 2;
wave_format.nSamplesPerSec = 48000;
wave_format.nAvgBytesPerSec = 48000 * 2 * 16 / 8;
wave_format.nBlockAlign = 2 * 16 / 8;
wave_format.wBitsPerSample = 16;
UINT32 DP, FP, MINP, MAXP;
hr = pAudioClient->GetSharedModeEnginePeriod(&wave_format, &DP, &FP, &MINP, &MAXP);
printf("DefaultPeriod: %u, Fundamental period: %u, min_period: %u, max_period: %u\n", DP, FP, MINP, MAXP);
hr = pAudioClient->InitializeSharedAudioStream(AUDCLNT_STREAMFLAGS_EVENTCALLBACK, MINP, &wave_format, 0);
my_rtqueue* workqueue = NULL;
try {
workqueue = new my_rtqueue();
}
catch (...) {
hr = E_ABORT;
ERROR_EXIT(hr);
}
hr = pAudioClient->GetBufferSize(&bufferFrameCount);
PWAVEFORMATEX wf = &wave_format;
UINT32 current_period;
pAudioClient->GetCurrentSharedModeEnginePeriod(&wf, ¤t_period);
INT32 FrameSize_bytes = bufferFrameCount * wave_format.nChannels * wave_format.wBitsPerSample / 8;
printf("bufferFrameCount: %u, FrameSize_bytes: %d, current_period: %u\n", bufferFrameCount, FrameSize_bytes, current_period);
hr = pAudioClient->GetService(
IID_IAudioRenderClient,
(void**)&pRenderClient);
render_info.framesize_bytes = FrameSize_bytes;
render_info.buffer_framecount = bufferFrameCount;
render_info.renderclient = pRenderClient;
hEvent = CreateEvent(nullptr, false, false, nullptr);
if (hEvent == INVALID_HANDLE_VALUE) { ERROR_EXIT(0); }
hr = pAudioClient->SetEventHandle(hEvent);
const size_t num_samples = FrameSize_bytes / sizeof(unsigned short);
DWORD taskIndex = 0;
hTask = AvSetMmThreadCharacteristics(TEXT("Pro Audio"), &taskIndex);
if (hTask == NULL) {
hr = E_FAIL;
}
hr = pAudioClient->Start(); // Start playing.
running = 1;
while (running) {
workqueue->queue(hEvent);
}
workqueue->stop();
hr = RtwqShutdown();
delete workqueue;
running = 0;
return 1;
}
This seems to kind of work (ie. audio is being output), but on every other invocation of my_rtqueue::Invoke()
, IAudioRenderClient::GetBuffer()
returns a HRESULT
of 0x88890006 (-> AUDCLNT_E_BUFFER_TOO_LARGE)
, and the actual audio output is certainly not what I intend it to be.
What issues are there with my code? Is this the right way to use RTWQ with WASAPI?