2

I am trying to send the transcript generated by this example code from google for speech-to-text to nodejs server and display to user.

var http = require('http');
const recorder = require('node-record-lpcm16');
const speech = require('@google-cloud/speech');
const client = new speech.SpeechClient();

const encoding = 'LINEAR16';
const sampleRateHertz = 16000;
const languageCode = 'en-US';

const request = {
  config: {
    encoding: encoding,
    sampleRateHertz: sampleRateHertz,
    languageCode: languageCode,
  },
  interimResults: false, // if you want interim results, set this to true
};

// create a recognize stream
const recognizeStream = client
  .streamingRecognize(request)
  .on('error', console.error)
  .on('data', data =>
    //console.log('test');
    process.stdout.write(
      data.results[0] && data.results[0].alternatives[0]
        ? `Transcription: ${data.results[0].alternatives[0].transcript}\n`
        : '\n\nReached transcription time limit, press Ctrl+C\n'
    )
    writeToServer(data.results[0].alternatives[0].transcript);
  );

function writeToServer(data){
  http.createServer(function (req, res) {
    res.writeHead(200, {'Content-Type': 'text/plain'});
    res.write(data);
    res.end();
  }).listen(8080);
}

// start recording and send the microphone input to the speech api.
// ensure SoX is installed, see https://www.npmjs.com/package/node-record-lpcm16#dependencies
recorder
  .record({
    sampleRateHertz: sampleRateHertz,
    threshold: 0,
    // other options, see https://www.npmjs.com/package/node-record-lpcm16#options
    verbose: false,
    recordProgram: 'rec', // try also "arecord" or "sox"
    silence: '1.0',
  })
  .stream()
  .on('error', console.error)
  .pipe(recognizeStream);

console.log('listening, press Ctrl+C to stop.');

I am having hard time accessing the transcript from within the google speech client. Also references to how to pass mic input from webapp instead of local mic are appreciated as the goal is to take mic input from user via browser and pass to google-speech-to-text api.

babsndeep
  • 59
  • 1
  • 2
  • 10

1 Answers1

0

I recommend to you the following workflow:

Record the user audio -> send it to your web app -> transcript with Google-Speech-To-Text -> send the response to client

For the render of your page I recommended ExpressJs, which is a minimal and flexible Node.js web application framework that provides a robust set of features for web and mobile applications.

Once you have set up your web app, you can try with Recorder.Js to record the user mic, and then send it to your web app.

Client:

var filename = new Date().toISOString();
//filename to send to server without extension 
//upload link 
var upload = document.createElement('a');
upload.href = “/transcriptaudio”;
upload.innerHTML = "Upload";
upload.addEventListener("click", function(event) {
    var xhr = new XMLHttpRequest();
    xhr.onload = function(e) {
        if (this.readyState === 4) {
            console.log("Server returned: ", e.target.responseText);
        }
    };
    var fd = new FormData();
    fd.append("audio_data", blob, filename);
    xhr.open("POST", "upload.php", true);
    xhr.send(fd);
})
li.appendChild(document.createTextNode(" ")) //add a space in between 
li.appendChild(upload) //add the upload link to li

Server:

app.post(‘/transcriptaudio’, function (req, res) {

audio = req.body.audio_data;

// create a recognize stream
const recognizeStream = client
  .streamingRecognize(audio)
  .on('error', console.error)
  .on('data', data =>
    //console.log('test');
    process.stdout.write(
      data.results[0] && data.results[0].alternatives[0]
        ? Transcription: ${data.results[0].alternatives[0].transcript}\n
        : '\n\nReached transcription time limit, press Ctrl+C\n'
    )
    res.send(data.results[0].alternatives[0].transcript);
  );

});