As the title states, I am trying to do a continual listening AI named Sapphire using the speech_recognition library. For about one minute after starting the code fresh it works fine, however, after it has been running for more than a minute, the speech_to_text() takes forever to run.
Any help would be appreciated, I am looking for some form of solution to this issue. Perhaps I am not understanding the functions well enough, or there may be a way to stop the speech_to_text() function after a certain time.
I am running a texting/email version of the bot as well in addition to the voice version using threading, but I was having this problem with speech_to_text() before threading was involved.
Thank you for your help!
Here is the output:
Me --> Sapphire what time is it
speech_to_text() Time = 5.611827599990647
Sapphire --> 16:46.
Listening...
Me --> ERROR
speech_to_text() Time = 3.4650153999973554
Listening...
Me --> ERROR
speech_to_text() Time = 6.241592899998068
Listening...
Me --> ERROR
speech_to_text() Time = 12.198483600004693
Listening...
Me --> ERROR
speech_to_text() Time = 3.7981161000061547
Listening...
Me --> shoe stamps
speech_to_text() Time = 51.52946890000021
Listening...
Me --> ERROR
speech_to_text() Time = 6.57019980000041
Listening...
Me --> ERROR
speech_to_text() Time = 46.647391800011974
Listening...
Here is my code to run the Sapphire AI:
class ChatBot():
def __init__(self, name):
print("----- Starting up", name, "-----")
self.name = name
def speech_to_text(self):
recognizer = sr.Recognizer()
# with sr.Microphone(device_index=3) as mic:
with sr.Microphone() as mic:
recognizer.adjust_for_ambient_noise(mic)
print("Listening...")
audio = recognizer.listen(mic)
self.text="ERROR"
try:
self.text = recognizer.recognize_google(audio)
print("Me --> ", self.text)
except:
print("Me --> ERROR")
@staticmethod
def text_to_speech(text):
if text == "":
print("ERROR")
else:
print((ai.name+" --> "), text)
speaker = gTTS(text=text, lang="en", slow=False)
speaker.save("res.mp3")
vlc_instance = vlc.Instance("--no-video")
player = vlc_instance.media_player_new()
media = vlc_instance.media_new("res.mp3")
player.set_media(media)
player.play()
def wake_up(self, text):
return True if (self.name).lower() in text.lower() else False
def parse_input(txt):
## action time
if "time" in txt and "is" in txt and "it" in txt:
res = action_time()
elif ai.name.lower() in txt:
res = np.random.choice(
["That's me!, Sapphire!", "Hello I am Sapphire the AI", "Yes I am Sapphire!", "My name is Sapphire, okay?!", "I am Sapphire and I am alive!",
"It's-a Me!, Sapphire!"])
## respond politely
elif any(i in txt for i in ["thank", "thanks"]):
res = np.random.choice(
["you're welcome!", "anytime!", "no problem!", "cool!", "I'm here if you need me!",
"mention not."])
elif any(i in txt for i in ["exit", "close"]):
res = np.random.choice(
["Tata!", "Have a good day!", "Bye!", "Goodbye!", "Hope to meet soon!", "peace out!"])
ex = False
## conversation
else:
if txt == "ERROR":
# res="Sorry, come again?"
res = ""
else:
starttime1 = timeit.default_timer()
chat = nlp(transformers.Conversation(txt), pad_token_id=50256)
endtime1 = timeit.default_timer()
print("Transformer Time = ", (endtime1 - starttime1))
res = str(chat)
res = res[res.find("bot >> ") + 6:].strip()
return res
def sapphire_audio():
ex = True
start = 0
while ex:
starttime1 = timeit.default_timer()
ai.speech_to_text()
endtime1 = timeit.default_timer()
print("speech_to_text() Time = ", (endtime1 - starttime1))
## wake up
if ai.wake_up(ai.text) is True:
#remove Sapphire from phrase
ai.text = ai.text.lower().replace(ai.name.lower(), "", 1)
if start == 0:
res = "Hello I am Sapphire the AI, what can I do for you?"
start = 1
else:
res = parse_input(ai.text)
ai.text_to_speech(res)
if __name__ == "__main__":
os.environ["TOKENIZERS_PARALLELISM"] = "true"
# sapphire_email()
threading.Thread(target=sapphire_email).start()
threading.Thread(target=sapphire_audio).start()