Audio Length is 0 and so is the results for DeepSpeech Example

Question

I am following the deepspeech example for nodejs_wav and I keep getting the following result,

audio length 0
result:

The audio files are present as well. Here are the additional console output I get when I run the code using node index.js

TensorFlow: v2.3.0-6-g23ad988fcd
DeepSpeech: v0.9.3-0-gf2e9c858
2022-11-29 11:01:35.452488: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the 
following CPU instructions in performance-critical operations:  AVX2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.

Code from sample

const DeepSpeech = require("deepspeech");
const Fs = require("fs");
const Sox = require("sox-stream");
const MemoryStream = require("memory-stream");
const Duplex = require("stream").Duplex;
const Wav = require("node-wav");

let modelPath = "./models/deepspeech-0.9.3-models.pbmm";

let model = new DeepSpeech.Model(modelPath);

let desiredSampleRate = model.sampleRate();

let scorerPath = "./models/deepspeech-0.9.3-models.scorer";

model.enableExternalScorer(scorerPath);

let audioFile = process.argv[2] || "./audio/2830-3980-0043.wav";

if (!Fs.existsSync(audioFile)) {
  console.log("file missing:", audioFile);
  process.exit();
}

const buffer = Fs.readFileSync(audioFile);
const result = Wav.decode(buffer);

if (result.sampleRate < desiredSampleRate) {
  console.error(
    "Warning: original sample rate (" +
      result.sampleRate +
      ") is lower than " +
      desiredSampleRate +
      "Hz. Up-sampling might produce erratic speech recognition."
  );
}

function bufferToStream(buffer) {
  let stream = new Duplex();
  stream.push(buffer);
  stream.push(null);
  return stream;
}

let audioStream = new MemoryStream();
bufferToStream(buffer)
  .pipe(
    Sox({
      global: {
        "no-dither": true,
      },
      output: {
        bits: 16,
        rate: desiredSampleRate,
        channels: 1,
        encoding: "signed-integer",
        endian: "little",
        compression: 0.0,
        type: "raw",
      },
    })
  )
  .pipe(audioStream);

audioStream.on("finish", () => {
  let audioBuffer = audioStream.toBuffer();
  const audioLength = (audioBuffer.length / 2) * (1 / desiredSampleRate);
  console.log("audio length", audioLength);

  let result = model.stt(audioBuffer);

  console.log("result:", result);
});

Any idea? I am looking into the tensor flow binary, but I wasn't 100% this would be causing this issue.

I am running this on windows 10, I set it up on my Mac and it ran just fine... I am guessing maybe it's my sox installation? — Snoopy, Nov 29 '22 at 17:49

Audio Length is 0 and so is the results for DeepSpeech Example

0 Answers0