We are using amazon transcribe to make speach to text but we need to capture sound from both microphone and speakers. Do you think that this can be done with sounddevice or should we use something else?
amazon-transcribe-streaming-sdk
- Mic Function
async def mic_stream():
# This function wraps the raw input stream from the microphone forwarding
# the blocks to an asyncio.Queue.
loop = asyncio.get_event_loop()
input_queue = asyncio.Queue()
def callback(indata, outdata, frame_count, time_info, status):
'''
if status:
print(status)
'''
#indata[:] = outdata
loop.call_soon_threadsafe(input_queue.put_nowait, (bytes(indata), status))
# Be sure to use the correct parameters for the audio stream that matches
# the audio formats described for the source language you'll be using:
# https://docs.aws.amazon.com/transcribe/latest/dg/streaming.html
stream = sounddevice.RawStream(
#device=3,
channels=1,
samplerate=16000,
callback=callback,
blocksize=1024 * 2,
dtype="int16",
)
# Initiate the audio stream and asynchronously yield the audio chunks
# as they become available.
with stream:
while True:
indata, status = await input_queue.get()
yield indata, status