0

I am doing a project in which I need to extract the audio from a video and then transcribe that audio using google speech recognizer. The problem comes that when extracting the audio from the video, instead of extracting the piece that I want to extract, it extracts the entire audio so I can not transcribe that because the audios are 8 minutes and the maximum that google allows is 3. I attach the code below in case it is helpful.

import numpy 
from numpy import clip
import speech_recognition as sr
import moviepy.editor as mp
from moviepy.editor import *

#lectura de video
clip = mp.VideoFileClip("video.mp4")

clip_final=clip.subclip(0,100)

#extraemos el audio
clip_final.audio.write_audiofile("video.wav")

#iniciamos el speechrecognition
r = sr.Recognizer()

#leemos el archivo audio extraido
audio = sr.AudioFile("video.wav")
print("audio extraido")

#lectura de audio
with audio as source:
    r.adjust_for_ambient_noise(source)
    clean_file = r.record(source)

print("lectura terminada")
#reconocemos la voz del audio
result = r.recognize_google(clean_file,language="es-ES")
#result = r.recognize_ibm(clean_file, username="apkikey", password= "your API Key")


print("")
print(result)
#escribimos el texto en un nuevo archivo txt
with open('video.txt','a+') as file:
    file.write("RECOGNIZED SPEECH: \n") 
    file.write(result)
    #file.write(" "+result)

print("\nEscritura terminada")
bahrep
  • 29,961
  • 12
  • 103
  • 150
Letie18
  • 1
  • 1

0 Answers0