Here are four versions of input texts to google cloud text to speech:
Version 1 (This one works fine)
<speak version="1.1" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.w3.org/2001/10/synthesis http://www.w3.org/TR/speech-synthesis11/synthesis.xsd" xml:lang="en-GB">
The rain in Spain stays mainly in the plain.
How kind of you to let me come.
</speak>
Version 2: Same as Version 1 except trying to insert external audio. It results in an error
<speak version="1.1" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.w3.org/2001/10/synthesis http://www.w3.org/TR/speech-synthesis11/synthesis.xsd" xml:lang="en-GB">
The rain in Spain stays mainly in the plain.
<audio src="uhm_male.mp3" />
How kind of you to let me come.
</speak>
Version 3: Same as version 2 but with simpler form of
<speak>
The rain in Spain stays mainly in the plain.
<audio src="uhm_male.mp3" />
How kind of you to let me come.
</speak>
Results in same error as version 3
And, finally, same as version 3 but excluding audio. This again, works fine
Here is the python code:
import os
from google.cloud import texttospeech_v1
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] =\
'not_my_real_credentials.json'
def getText(infile_name):
with open(infile_name, 'r') as fobj:
intext = fobj.read()
return intext
def setVoice(language_code, name, ssml_gender):
theVoice = tts.VoiceSelectionParams(
language_code=language_code,
name=name,
ssml_gender=ssml_gender
)
return theVoice
def doAudioConfig(speaking_rate, pitch, volume_gain_db):
audioConfig = tts.AudioConfig(
audio_encoding = tts.AudioEncoding.MP3,
speaking_rate = speaking_rate,
pitch = pitch,
volume_gain_db = volume_gain_db
)
return audioConfig
#We try each of these in turn:
#This one works fine
infile_name = './texts/test1.txt'
#This one gives an error message
#infile_name = './texts/test2.txt'
#This one gives same error message as prev
#infile_name = './texts/test3.txt'
#This one works fine
infile_name = './texts/test4.txt'
outfile_name = './audio/audioOutput.mp3'
tts = texttospeech_v1
client = tts.TextToSpeechClient()
language_code = "en-GB"
name = "en-GB-Wavenet-F"
ssml_gender = "FEMALE"
pitch = -8.0
speaking_rate = 0.9
volume_gain_db = 0
intext = getText(infile_name)
print(f'\n{intext}\n')
theVoice = setVoice(language_code, name, ssml_gender)
audioConfig = doAudioConfig(speaking_rate, pitch, volume_gain_db)
synthesis_input = tts.SynthesisInput(ssml=intext)
response = client.synthesize_speech(
input=synthesis_input, voice=theVoice, audio_config=audioConfig)
with open(outfile_name, 'wb') as output1:
output1.write(response.audio_content)
And here is the error thrown by version 2 of the text:
Traceback (most recent call last):
File "D:\py\_new\envo\lib\site-packages\google\api_core\grpc_helpers.py", line 66, in error_remapped_callable
return callable_(*args, **kwargs)
File "D:\py\_new\envo\lib\site-packages\grpc\_channel.py", line 946, in __call__
return _end_unary_response_blocking(state, call, False, None)
File "D:\py\_new\envo\lib\site-packages\grpc\_channel.py", line 849, in _end_unary_response_blocking
raise _InactiveRpcError(state)
grpc._channel._InactiveRpcError: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.INTERNAL
details = "Internal error encountered."
debug_error_string = "{"created":"@1681519238.596000000","description":"Error received from peer ipv4:142.250.70.170:443","file":"src/core/lib/surface/call.cc","file_line":1075,"grpc_message":"Internal error encountered.","grpc_status":13}"
>
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "D:\py\_new\ttsgcp2\insertAudio.py", line 62, in <module>
response = client.synthesize_speech(
File "D:\py\_new\envo\lib\site-packages\google\cloud\texttospeech_v1\services\text_to_speech\client.py", line 497, in synthesize_speech
response = rpc(request, retry=retry, timeout=timeout, metadata=metadata,)
File "D:\py\_new\envo\lib\site-packages\google\api_core\gapic_v1\method.py", line 154, in __call__
return wrapped_func(*args, **kwargs)
File "D:\py\_new\envo\lib\site-packages\google\api_core\grpc_helpers.py", line 68, in error_remapped_callable
raise exceptions.from_grpc_error(exc) from exc
google.api_core.exceptions.InternalServerError: 500 Internal error encountered.
Using ".wav" and ".ogg" files results in the same error.
Also tried importing "texttospeech" instead of "texttospeech_v1" Same outcomes.
The external audio I'm trying to import sounds fine on all audio players I've tried. It's just an "uhm" sound. Very short. Duration 0.302 seconds
Can anyone help?