I am doing a project in which I need to extract the audio from a video and then transcribe that audio using google speech recognizer. The problem comes that when extracting the audio from the video, instead of extracting the piece that I want to extract, it extracts the entire audio so I can not transcribe that because the audios are 8 minutes and the maximum that google allows is 3. I attach the code below in case it is helpful.
import numpy
from numpy import clip
import speech_recognition as sr
import moviepy.editor as mp
from moviepy.editor import *
#lectura de video
clip = mp.VideoFileClip("video.mp4")
clip_final=clip.subclip(0,100)
#extraemos el audio
clip_final.audio.write_audiofile("video.wav")
#iniciamos el speechrecognition
r = sr.Recognizer()
#leemos el archivo audio extraido
audio = sr.AudioFile("video.wav")
print("audio extraido")
#lectura de audio
with audio as source:
r.adjust_for_ambient_noise(source)
clean_file = r.record(source)
print("lectura terminada")
#reconocemos la voz del audio
result = r.recognize_google(clean_file,language="es-ES")
#result = r.recognize_ibm(clean_file, username="apkikey", password= "your API Key")
print("")
print(result)
#escribimos el texto en un nuevo archivo txt
with open('video.txt','a+') as file:
file.write("RECOGNIZED SPEECH: \n")
file.write(result)
#file.write(" "+result)
print("\nEscritura terminada")