I have a raw audio file that I would like to transcript using watson speech to text service. I am using the default example provided at the repo.
But it works if I use http interface.
// working code
package main
import (
"os"
"github.com/IBM/go-sdk-core/v5/core"
"github.com/watson-developer-cloud/go-sdk/v2/speechtotextv1"
)
func main() {
// Instantiate the Watson Speech To Text service
authenticator := &core.IamAuthenticator{
ApiKey: "YOUR API KEY",
}
service, serviceErr := speechtotextv1.
NewSpeechToTextV1(&speechtotextv1.SpeechToTextV1Options{
URL: "YOUR SERVICE URL",
Authenticator: authenticator,
})
// Check successful instantiation
if serviceErr != nil {
panic(serviceErr)
}
// Open file with mp3 to recognize
audio, audioErr := os.Open("/opt/audioRaw.raw")
if audioErr != nil {
panic(audioErr)
}
// Create a new RecognizeOptions for ContentType "audio/mp3"
recognizeOptions := service.
NewRecognizeOptions(audio).
SetContentType("audio/mulaw;rate=8000;channels=1").SetModel("en-US_NarrowbandModel")
// Call the speechToText Recognize method
recognizeResult, _, responseErr := service.Recognize(recognizeOptions)
// Check successful call
if responseErr != nil {
panic(responseErr)
}
// Check successful casting
if recognizeResult != nil {
core.PrettyPrint(recognizeResult, "Recognize")
}
}
But if i use websocket for the raw audio file, it does not work and panic with below error upon debugging.
"error": "unable to transcode data stream application/octet-stream -> audio/l16 "
// Does not work
package main
import (
"encoding/json"
"fmt"
"os"
"github.com/IBM/go-sdk-core/v5/core"
"github.com/watson-developer-cloud/go-sdk/v2/speechtotextv1"
)
func main() {
// Instantiate the Watson Speech To Text service
authenticator := &core.IamAuthenticator{
ApiKey: "YOUR API KEY",
}
service, serviceErr := speechtotextv1.
NewSpeechToTextV1(&speechtotextv1.SpeechToTextV1Options{
URL: "YOUR SERVICE URL",
Authenticator: authenticator,
})
// Check successful instantiation
if serviceErr != nil {
panic(serviceErr)
}
// Open file with mp3 to recognize
audio, audioErr := os.Open("/opt/audioRaw.raw")
if audioErr != nil {
panic(audioErr)
}
// callbook can have `OnOpen`, `onData`, `OnClose` and `onError` functions
callback := myCallBack{}
recognizeUsingWebsocketOptions := service.
NewRecognizeUsingWebsocketOptions(audio, "audio/mulaw;rate=8000;channels=1")
recognizeUsingWebsocketOptions.
SetModel("en-US_NarrowbandModel").
SetWordConfidence(true).
SetSpeakerLabels(true).
SetTimestamps(true)
service.RecognizeUsingWebsocket(recognizeUsingWebsocketOptions, callback)
}
type myCallBack struct{}
func (cb myCallBack) OnOpen() {
fmt.Println("Handshake successful")
}
func (cb myCallBack) OnClose() {
fmt.Println("Closing connection")
}
func (cb myCallBack) OnData(resp *core.DetailedResponse) {
var speechResults speechtotextv1.SpeechRecognitionResults
result := resp.GetResult().([]byte)
json.Unmarshal(result, &speechResults)
core.PrettyPrint(speechResults, "Recognized audio: ")
}
func (cb myCallBack) OnError(err error) {
panic(err)
}
Could someone please help me in figure out why raw audio does not work through websocket interface?
Any pointer is much appreciated.
Ryan