0

I am trying to create a program that takes an input sound/voice from the microphone and based on this input, it plots the mfcc images in rolling window. The thing is, when i run my code, no input is being processed. The mfcc_img variable is not properly calculated, it has only 0s. The code does not generate any error, but it does generate the rela-time mfcc-plots

import pyaudio
import numpy as np
import matplotlib.pyplot as plt
from threading import Thread
import librosa
p = pyaudio.PyAudio()
mic_device_index = 1
WINDOW_SIZE = 2048
CHANNELS = 1
RATE = 44100
N_MFCC = 20
FFT_FRAMES_IN_SPEC = 20
global_blocks = np.zeros( ( FFT_FRAMES_IN_SPEC, WINDOW_SIZE ) )
fft_frame = np.array( WINDOW_SIZE//2 )
win = np.hamming(WINDOW_SIZE)
mfcc_img = np.zeros( ( N_MFCC, FFT_FRAMES_IN_SPEC ) )
user_terminated = False

def callback( in_data, frame_count, time_info, status):
    global global_blocks, fft_frame, win, mfcc_img
    numpy_block_from_bytes = np.frombuffer( in_data , dtype=np.float32)
    block_for_speakers = np.zeros( (numpy_block_from_bytes.size , CHANNELS) , dtype='int16' )
    block_for_speakers[:,0] = numpy_block_from_bytes
    if len(win) == len(numpy_block_from_bytes):
        mfcc = librosa.feature.mfcc(y=numpy_block_from_bytes, sr=RATE,    n_fft=WINDOW_SIZE,hop_length=WINDOW_SIZE//2, n_mfcc=N_MFCC)
        mfcc_normalised = (mfcc-np.min(mfcc))/(np.max(mfcc)-np.min(mfcc))
        mfcc_profile = np.mean(mfcc_normalised , axis=1 )
        mfcc_img = np.roll(mfcc_img, -1, axis=1)
        mfcc_img[:, -1] = mfcc_profile[::-1, 0]
        global_blocks = np.roll( global_blocks, -1, axis=0 )
        global_blocks[-1,:] = numpy_block_from_bytes
    return (block_for_speakers, pyaudio.paContinue)

def user_input_function():
    k = input('press "s" to terminate (then press "Enter"): ')
    print('pressed: ', k)
    if k == 's' or k == 'S':
        global user_terminated
        user_terminated = True
        print('user_terminated 1: ', user_terminated )


input=p.open(format=pyaudio.paInt16,channels=CHANNELS,rate=RATE,output=False,input=True,input_device_index=mic_device_index,frames_per_buffer=WINDOW_SIZE,stream_callback=callback,start=False)

input.start_stream()

threaded_input = Thread( target=user_input_function )
threaded_input.start()

while input.is_active() and not user_terminated:
    plt.imshow(mfcc_img , aspect='auto')
    plt.show()
    plt.pause(0.01)

print('stopping audio')
input.stop_stream()

0 Answers0