I tested DeepSpeech for wav files and it work's fine. My problem with deep speech comes when I try using an audio stream it doesn't recognize a single word. The audio stream is PCM 48khz stereo signed 16-bit little endian. I've been trying to convert the stream in other formats, sampleRate and channels with no success at all. I'm using DeepSpeech on nodejs
modelStream = englishModel.createStream();
let chunks = [];
stream.on('data', chunk => {
chunks.push(chunk);
}).on('close', () => {
const buffer = Buffer.concat(chunks);
let stream = new Duplex();
stream.push(buffer);
stream.push(null);
let audioStream = new MemoryStream();
stream.pipe(Sox({
global: {
'no-dither': true,
},
output: {
bits: 16,
rate: desiredSampleRate,
channels: 1,
encoding: 'signed-integer',
endian: 'little',
compression: 0.0,
type: 'raw'
}
})).
pipe(audioStream);
audioStream.on('finish', () => {
let audioBuffer = audioStream.toBuffer();
const audioLength = (audioBuffer.length / 2) * (1 / desiredSampleRate);
console.log('audio length', audioLength);
let result = englishModel.stt(audioBuffer);
console.log('result:', result);
});