1

I am trying to implement a speech to text with userMedia. I write stream in the file on Nodejs server file created successfully but when try to convert speech to text with Azure fromStreamInput getting an undefined result.


var subscriptionKey = "--";
  var serviceRegion = "--"; // e.g., "westus"


  var s = sdk.SpeechConfig.fromSubscription(subscriptionKey, serviceRegion);
  function LoadArrayFromFile (filename) {
    const fileContents = fs.readFileSync(filename);

    const ret = Uint8Array.from(fileContents.slice(44));
 
    return ret.buffer;
  }
  const fileBuffer = LoadArrayFromFile('output.mp3');

  let bytesSent = 0;
  let p;

  p = sdk.AudioInputStream.createPullStream(
    {
      close: () => { return; },
      read: (buffer) => {
        const copyArray = new Uint8Array(buffer);
        const start = bytesSent;
        const end = buffer.byteLength > (fileBuffer.byteLength - bytesSent) ? (fileBuffer.byteLength - 1) : (bytesSent + buffer.byteLength - 1);
        copyArray.set(new Uint8Array(fileBuffer.slice(start, end)));
        bytesSent += (end - start) + 1;

        if (bytesSent < buffer.byteLength) {
          setTimeout(() => p.close(), 1000);
        }

        return (end - start) + 1;
      },
    });

  const config = sdk.AudioConfig.fromStreamInput(p);

  const r = new sdk.SpeechRecognizer(s, config);

  // expect(r).not.toBeUndefined();
  // expect(r instanceof sdk.Recognizer);

  r.canceled = (o, e) => {
    try {
      console.log("canceled", res)

    } catch (error) {
      console.log("canceled error", error)

    }
  };

  r.recognizeOnceAsync(
    (p2) => {
      const res = p2;
      try {
       console.log(res)
      } catch (error) {
        console.log(error)
        // done.fail(error);
      }
    },
    (error) => {
      console.log(error)

      // done.fail(error);
    });
  });

1 Answers1

0

Your code is reading the contents of a .mp3 file and passing it to AudioConfig.fromStreamInput(). But the azure doc says quite clearly that only uncompressed audio (they say Pulse Code Modulated or .WAV) works here.

You'll need to figure out how to decompress your audio before you pass it to the speech recognizer. Or Azure will have to figure out how to handle compressed audio.

O. Jones
  • 103,626
  • 17
  • 118
  • 172