I am trying to build an application that visualizes the waveform of generated audio samples. The application continuously generates audio samples, fills a buffer, and supplies said buffer to WASAPI for playback. The user can change the frequency of the generated audio samples by pressing various keyboard keys, like playing keys on a piano (in the range of C4 to B4, or 261.6256 Hz to 493.8833 Hz). This is a minimalistic version of the code for generating those samples.
void Update()
{
static float prev_freq = 0;
//process input & events
{
if (main_window->IsKeyPressed(KeyCode::C))
{
pressed = PIANO::C;
frequency = C4_FREQUENCY;
}
else if (main_window->IsKeyPressed(KeyCode::F))
{
pressed = PIANO::C_SHARP;
frequency = C4_SHARP_FREQUENCY;
}
else
{
pressed = PIANO::NONE;
frequency = 0.0f;
}
}
if (prev_freq != frequency)
{
prev_freq = frequency;
time_held = 0;
}
//grab all available audio frames
static float time = 0;
HRESULT hr;
UINT32 num_frames, padding_frames;
hr = pAudioClient->GetBufferSize(&num_frames);
hr = pAudioClient->GetCurrentPadding(&padding_frames);
available_frames = num_frames - padding_frames;
//write data to audio frames
hr = pAudioRenderClient->GetBuffer(available_frames, (BYTE**)&buffer);
for (unsigned int i = 0; i < available_frames; i++)
{
float amp = max_amplitude * sin(2 * PI * frequency * time);
buffer[i] = (int16_t)amp;
time += 1.f / wave_fmt->nSamplesPerSec;
}
hr = pAudioRenderClient->ReleaseBuffer(available_frames, NULL);
//time one key has been held in milliseconds
time_held += (frequency == 0 ? (-time_held) : dt);
}
The audio generation works well enough so far, but now I'm trying to achieve a real-time visualization of the generated audio waveform, i.e., the application should display a wave that reflects what the speakers are currently playing. I had a couple ideas for achieving this but none worked out.
Directly retrieve a pointer to the audio buffer next to be played from WASAPI. As far as I can tell there is no way to do this, but I'm curious if there's a workaround.
Approximate the waveform by using the most recently submitted buffer's data. Given the size of the buffer is only long enough to hold 0.01 seconds of audio, I figure the visual delay won't be very noticeable. The only problems with this method are that it produces overlapping sine waves at certain frequencies, instead of the 1 sine wave I'm expecting. I suspect this issue has to do with timing/synchronization between the audio generation loop and the render loop. This is the implementation of this solution.
void Render()
{
for (int i = 0; i < available_frames; i++)
{
ball_y = main_window->GetHeight() / 2;
float deflection = (float)buffer[i] / (float)max_amplitude;
deflection *= DEFAULT_AMPLITUDE_DEFLECTION;
ball_y += deflection;
ball_x = ((float)i / (float)available_frames);
ball_x *= main_window->GetWidth();
main_window->DrawCircle(ball_x, ball_y, 2);
}
}
- Approximate the waveform based on how long a key is held. This method produces a slightly jittering effect and the overlapping sine wave problem persists, so the solution is worse than #2.
void Update()
{
...
...
...
//time one key has been held in milliseconds
time_held += (frequency == 0 ? (-time_held) : dt);
}
void Render()
{
float time_held_seconds = time_held / 1000.f;
for (int i = 0; i < available_frames; i++)
{
//get the number of samples generated since the key was pressed
unsigned int generated_samples = wave_fmt->nSamplesPerSec * (time_held_seconds);
//modulo number of samples by buffer size to get current index in buffer
unsigned int buffer_position = generated_samples % available_frames;
unsigned int buffer_index = (buffer_position + i) % available_frames;
//convert to screen space coordinates
ball_y = main_window->GetHeight() / 2;
float deflection = ((float)buffer[buffer_index] / (float)max_amplitude);
deflection *= DEFAULT_AMPLITUDE_DEFLECTION;
ball_y += deflection;
ball_x = ((float)i / (float)available_frames);
ball_x *= main_window->GetWidth();
main_window->DrawCircle(ball_x, ball_y, 2);
}
}
- While debugging, I noticed that if I add a
std::cout
statement inside the renderfor (int i = 0; i < available_frames; i++)
loop, the of overlapping sine waves virtually disappears. The drawback being that the application slows to a crawl. This is what led me to believe the issue lies with timing or synchronizaiton. With this psuedo-fix in mind, I then tried implementing a controlled delay inside the render loop, i.e., locking the framerate. The drawback with this method is depending on TARGET_FPS, the application either has a pronounced "flickering" effect with little to no sine wave overlap (when TARGET_FPS is in the 1 - 30 range) or little to no flickering but with pronounced overlap of sine waves ( when TARGET_FPS is in the 300+ range). With TARGET_FPS between 30 to 300, both the flickering and the sine wave overlap are prominent.
void OnRender()
{
QueryPerformanceFrequency(&freq);
QueryPerformanceCounter(&end_t);
LONGLONG dt = end_t.QuadPart - start_t.QuadPart;
float cycle_time = dt / (float)freq.QuadPart;
if (cycle_time < (1.f / TARGET_FPS))
return;
start_t = end_t;
for (int i = 0; i < available_frames; i++)
{
//get the number of samples generated since the key was pressed
unsigned int generated_samples = wave_fmt->nSamplesPerSec * (time_held_seconds);
//modulo number of samples by buffer size to get current index in buffer
unsigned int buffer_position = generated_samples % available_frames;
unsigned int buffer_index = (buffer_position + i) % available_frames;
//convert to screen space coordinates
ball_y = main_window->GetHeight() / 2;
float deflection = ((float)buffer[buffer_index] / (float)max_amplitude);
//std::cout << deflection << '\n'; <-- uncommenting this indirectly fixes the overlapping sine waves issue but slows the audio and the program to a crawl
deflection *= DEFAULT_AMPLITUDE_DEFLECTION;
ball_y += deflection;
ball_x = ((float)i / (float)available_frames);
ball_x *= main_window->GetWidth();
main_window->DrawCircle(ball_x, ball_y, 2);
}
}
- I thought the flickering might have something do with the background being cleared before every call to Render(), so I disabled background clearing to test this. The sine waves are very clearly being drawn at phase-offset positions.
This is as far as I could get. Any help is greatly appreciated. I apologize for the length and if the format of this question is incorrect, this is my first post. Thanks.
Edit: This is how the application runs the Update and Render methods.
void Application::Run()
{
while (running)
{
Update();
Render();
}
}
Edit #2: Updated to use ring buffer and event callback
//constructor creates worker thread
SoundSnythApp::SoundSynthApp()
{
//initialize buffer with 0 amplitude
UINT32 num_frames, padding_frames;
hr = pAudioClient->GetBufferSize(&num_frames);
hr = pAudioClient->GetCurrentPadding(&padding_frames);
available_frames = num_frames - padding_frames;
ring_buffer_size = (size_t)num_frames;
ring_buffer = new int16_t[ring_buffer_size];
hr = pAudioRenderClient->GetBuffer(available_frames, (BYTE**)&buffer);
for (unsigned int i = 0; i < available_frames; i++)
{
int16_t* pWrite = &ring_buffer[write_pos];
*pWrite = (int16_t)0;
buffer[i] = *pWrite;
write_pos = (write_pos + 1) % ring_buffer_size;
if (write_pos == read_pos)
read_pos = (read_pos + 1) % ring_buffer_size;
}
hr = pAudioRenderClient->ReleaseBuffer(available_frames, NULL);
//start audio stream
pAudioClient->Start();
//start buffer-fill thread
callback_thread = std::thread(&SoundSynthApp::AudioCallback, this);
}
//Audio Callback thread
void SoundSynthApp::AudioCallback()
{
while (main_window->IsAlive())
{
DWORD callback_signal = WaitForSingleObject(callback_event_handle, INFINITE);
if(callback_signal == WAIT_OBJECT_0)
{
UINT32 num_frames = 0, padding_frames = 0;
HRESULT hr;
static float time = 0;
hr = pAudioClient->GetBufferSize(&num_frames);
hr = pAudioClient->GetCurrentPadding(&padding_frames);
available_frames = num_frames - padding_frames;
//write data to audio frames
hr = pAudioRenderClient->GetBuffer(available_frames, (BYTE**)&buffer);
for (unsigned int i = 0; i < available_frames; i++)
{
//generate audio
float amp = max_amplitude * sin(2 * PI * frequency * time);
int16_t* pWrite = &ring_buffer[write_pos];
*pWrite = (int16_t)amp;
buffer[i] = ring_buffer[write_pos];
write_pos = (write_pos + 1) % ring_buffer_size;
if (write_pos == read_pos)
read_pos = (read_pos + 1) % ring_buffer_size;
time += 1.f / wave_fmt->nSamplesPerSec;
}
hr = pAudioRenderClient->ReleaseBuffer(available_frames, NULL);
}
}
}
//Render loop
void SoundSnythApp::OnRender()
{
for (unsigned int i = 0; i < available_frames; i++)
{
ball_y = (float)main_window->GetHeight() / 2;
float deflection = (float)ring_buffer[read_pos]/max_amplitude;
read_pos = (read_pos + 1) % ring_buffer_size;
deflection *= DEFAULT_AMPLITUDE_DEFLECTION;
ball_y += deflection;
ball_x = ((float)i / (float)available_frames);
ball_x *= main_window->GetWidth();
main_window->DrawCircle(ball_x, ball_y, 2);
}
}
Edit #3: Zero-crossing pinned to left-hand side of window
void Render()
{
int zero_crossing_index = -1;
const unsigned int samples_to_render = 400;
//find sample where zero crossing occurs, moving backwards starting from the most recently writ sample
for (unsigned int i = (unsigned int)ring_buffer_size - 1; i > 0; i--)
{
if (ring_buffer[i] >= 0 && ring_buffer[i - 1] <= 0)
{
zero_crossing_index = i - 1;
break;
}
}
if (zero_crossing_index == -1)
zero_crossing_index = (unsigned int)ring_buffer_size - 1;
//pin zero-crossing to left side of screen
for (unsigned int i = 0; i < samples_to_render; i++)
{
ball_x = (float)i / (float)samples_to_render;
ball_x *= (float)main_window->GetWidth();
float deflection = (float)ring_buffer[((unsigned int)zero_crossing_index + i) % ring_buffer_size];
deflection /= (float)max_amplitude;
deflection *= DEFAULT_AMPLITUDE_DEFLECTION;
ball_y = (float)main_window->GetHeight() / 2;
ball_y += deflection;
main_window->DrawCircle(ball_x, ball_y, 1);
}
}