I am new to Python, and working on a project with Google Speech to text. Finally figured out how to import results of Google STT (JSON) and format data in csv. BUT....
Google gives you alternative words which is good and bad. The attached code will only read the first alternative and stop, so I can only select one alternative.
I would love to import the other alternatives and show in their own column it Main, alt1, alt2. Sometimes time stamps the same as Main, sometimes differ.
Advice appreciated. - feeling I am getting the hang of it slowly.
{
"@type": "type.googleapis.com/google.cloud.speech.v1p1beta1.LongRunningRecognizeResponse",
"timestamp": "2018-12-28 14:13:18",
"results": [
{
"alternatives": [
{
"confidence": 0.9319887,
"words": [
{
"confidence": 0.9572171,
"endTime": "2s",
"startTime": "1s",
"word": "Bla1a"
},
{
"confidence": 0.9572171,
"endTime": "3s",
"startTime": "2s",
"word": "Bla1b"
}
]
}
],
"languageCode": "th-th"
},
{
"alternatives": [
{
"confidence": 0.95174015,
"words": [
{
"confidence": 0.9572171,
"endTime": "2s",
"startTime": "1s",
"word": "Bla2a"
},
{
"confidence": 0.9572171,
"endTime": "3s",
"startTime": "2s",
"word": "Bla2b"
}
]
}
],
"languageCode": "th-th"
},
{
"alternatives": [
{
"confidence": 0.95298487,
"words": [
{
"confidence": 0.9572171,
"endTime": "2s",
"startTime": "1s",
"word": "bla3b"
},
{
"confidence": 0.9572171,
"endTime": "3s",
"startTime": "2s",
"word": "Bla3b"
}
]
}
],
"languageCode": "th-th"
},
{
"alternatives": [
{
"confidence": 0.8774771,
"words": [
{
"confidence": 0.7337543,
"endTime": "3s",
"startTime": "2s",
"word": "Bla4a"
},
{
"confidence": 0.9363319,
"endTime": "4s",
"startTime": "3s",
"word": "bla4b"
}
]
}
],
"languageCode": "th-th"
},
{
"alternatives": [
{
"confidence": 0.9491383,
"words": [
{
"confidence": 0.8349256,
"endTime": "4s",
"startTime": "3s",
"word": "Bla5a"
},
{
"confidence": 0.9572171,
"endTime": "5s",
"speakerTag": 1,
"startTime": "4s",
"word": "Bla5b"
}
]
}
],
"languageCode": "th-th"
}
]
}
#!/usr/bin/python
# note = can only show one alternatives list
import json
import pandas as pd
from pandas import ExcelWriter
import numpy as np
with open('Thai_Unicode(bk).json') as f: # this ensures opening and closing file
a = json.loads(f.read())
data = a["results"][0]["alternatives"][0]["words"]
df = pd.DataFrame(data)
#print(df)
df.to_excel('pandas4.xls')