I am trying to create a program that takes an input sound/voice from the microphone and based on this input, it plots the mfcc images in rolling window. The thing is, when i run my code, no input is being processed. The mfcc_img variable is not properly calculated, it has only 0s. The code does not generate any error, but it does generate the rela-time mfcc-plots
import pyaudio
import numpy as np
import matplotlib.pyplot as plt
from threading import Thread
import librosa
p = pyaudio.PyAudio()
mic_device_index = 1
WINDOW_SIZE = 2048
CHANNELS = 1
RATE = 44100
N_MFCC = 20
FFT_FRAMES_IN_SPEC = 20
global_blocks = np.zeros( ( FFT_FRAMES_IN_SPEC, WINDOW_SIZE ) )
fft_frame = np.array( WINDOW_SIZE//2 )
win = np.hamming(WINDOW_SIZE)
mfcc_img = np.zeros( ( N_MFCC, FFT_FRAMES_IN_SPEC ) )
user_terminated = False
def callback( in_data, frame_count, time_info, status):
global global_blocks, fft_frame, win, mfcc_img
numpy_block_from_bytes = np.frombuffer( in_data , dtype=np.float32)
block_for_speakers = np.zeros( (numpy_block_from_bytes.size , CHANNELS) , dtype='int16' )
block_for_speakers[:,0] = numpy_block_from_bytes
if len(win) == len(numpy_block_from_bytes):
mfcc = librosa.feature.mfcc(y=numpy_block_from_bytes, sr=RATE, n_fft=WINDOW_SIZE,hop_length=WINDOW_SIZE//2, n_mfcc=N_MFCC)
mfcc_normalised = (mfcc-np.min(mfcc))/(np.max(mfcc)-np.min(mfcc))
mfcc_profile = np.mean(mfcc_normalised , axis=1 )
mfcc_img = np.roll(mfcc_img, -1, axis=1)
mfcc_img[:, -1] = mfcc_profile[::-1, 0]
global_blocks = np.roll( global_blocks, -1, axis=0 )
global_blocks[-1,:] = numpy_block_from_bytes
return (block_for_speakers, pyaudio.paContinue)
def user_input_function():
k = input('press "s" to terminate (then press "Enter"): ')
print('pressed: ', k)
if k == 's' or k == 'S':
global user_terminated
user_terminated = True
print('user_terminated 1: ', user_terminated )
input=p.open(format=pyaudio.paInt16,channels=CHANNELS,rate=RATE,output=False,input=True,input_device_index=mic_device_index,frames_per_buffer=WINDOW_SIZE,stream_callback=callback,start=False)
input.start_stream()
threaded_input = Thread( target=user_input_function )
threaded_input.start()
while input.is_active() and not user_terminated:
plt.imshow(mfcc_img , aspect='auto')
plt.show()
plt.pause(0.01)
print('stopping audio')
input.stop_stream()