I am following the deepspeech example for nodejs_wav and I keep getting the following result,
audio length 0
result:
The audio files are present as well. Here are the additional console output I get when I run the code using node index.js
TensorFlow: v2.3.0-6-g23ad988fcd
DeepSpeech: v0.9.3-0-gf2e9c858
2022-11-29 11:01:35.452488: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the
following CPU instructions in performance-critical operations: AVX2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
Code from sample
const DeepSpeech = require("deepspeech");
const Fs = require("fs");
const Sox = require("sox-stream");
const MemoryStream = require("memory-stream");
const Duplex = require("stream").Duplex;
const Wav = require("node-wav");
let modelPath = "./models/deepspeech-0.9.3-models.pbmm";
let model = new DeepSpeech.Model(modelPath);
let desiredSampleRate = model.sampleRate();
let scorerPath = "./models/deepspeech-0.9.3-models.scorer";
model.enableExternalScorer(scorerPath);
let audioFile = process.argv[2] || "./audio/2830-3980-0043.wav";
if (!Fs.existsSync(audioFile)) {
console.log("file missing:", audioFile);
process.exit();
}
const buffer = Fs.readFileSync(audioFile);
const result = Wav.decode(buffer);
if (result.sampleRate < desiredSampleRate) {
console.error(
"Warning: original sample rate (" +
result.sampleRate +
") is lower than " +
desiredSampleRate +
"Hz. Up-sampling might produce erratic speech recognition."
);
}
function bufferToStream(buffer) {
let stream = new Duplex();
stream.push(buffer);
stream.push(null);
return stream;
}
let audioStream = new MemoryStream();
bufferToStream(buffer)
.pipe(
Sox({
global: {
"no-dither": true,
},
output: {
bits: 16,
rate: desiredSampleRate,
channels: 1,
encoding: "signed-integer",
endian: "little",
compression: 0.0,
type: "raw",
},
})
)
.pipe(audioStream);
audioStream.on("finish", () => {
let audioBuffer = audioStream.toBuffer();
const audioLength = (audioBuffer.length / 2) * (1 / desiredSampleRate);
console.log("audio length", audioLength);
let result = model.stt(audioBuffer);
console.log("result:", result);
});
Any idea? I am looking into the tensor flow binary, but I wasn't 100% this would be causing this issue.