0

I am trying to combine image files and audio files to create videos.

Libraries used:

from gtts import gTTS
from pydub import AudioSegment
import pandas as pd
from moviepy.editor import AudioFileClip, ImageClip, VideoClip
from PIL import Image, ImageDraw, ImageFont

I have the text content in English and Korean on a csv file. I loaded them into a dataframe at first:

# Loading csv into pandas dataframe
df = pd.read_csv('/content/phrases.csv')

# Creating Korean and English text list from datafram
input_text = df['Korean'].tolist()
input_translation = df['English'].tolist()

# Setting Language for TTS
language = 'ko' # zh = Chinese, en = English, ko = Korean

Then, I create TTS audio files (Korean and English) and joined them. This is the function I used and it works fine:

# Generating Korean and English TTS Audio and combining them into an mp3 file
def generateAudio(itext, itranslation): 
    for i in range(len(itext)):

        file_name = itext[i]

        normal = gTTS(text = itext[i], lang=language, slow=False)
        audio_normal = f'/content/temp/{file_name}_normal.mp3'
        normal.save(audio_normal)

        slow = gTTS(text = itext[i], lang=language, slow=True)
        audio_slow = f'/content/temp/{file_name}_slow.mp3'
        slow.save(audio_slow)

        english = gTTS(text = itranslation[i], lang='en', slow=False)
        audio_english = f'/content/temp/{file_name}_english.mp3'
        english.save(audio_english)

        playlist_songs = [AudioSegment.from_mp3(audio_normal), AudioSegment.silent(duration=1000), AudioSegment.from_mp3(audio_slow), AudioSegment.silent(duration=1000), AudioSegment.from_mp3(audio_english)]

        combined = AudioSegment.empty()
        for song in playlist_songs:
            combined += song
        combined.export(f'/content/mp3/{file_name}.mp3', format="mp3")

generateAudio(input_text, input_translation)

Next, I used this function to create images also for each sentence in the dataframe and it also works fine:

def genImage(input_text):
    width = 512
    height = 512
    font = ImageFont.truetype("/content/NotoSerifKR-Regular.otf", size=30)
    i = 0
    for line in input_text:
        file_name = input_text[i]
        message = input_text[i] + "\n" + input_translation[i]
        i += 1
        img = Image.new('RGB', (width, height), color='#9599DF')
        imgDraw = ImageDraw.Draw(img)

        textWidth, textHeight = imgDraw.textsize(message, font=font)
        xText = (width - textWidth) / 2
        yText = (height - textHeight) / 2

        imgDraw.text((xText, yText), message, font=font, fill=(241, 242, 228))
        img.save(f'/content/img/{file_name}.png')
genImage(input_text)

Finally, I want to merge the images in png format and audios in mp3 format. So I used this function but I am getting error

def mp3PNGMerge(input_text):
    
    for i in range(len(input_text)):
      file_name = input_text[i]
      audio_clip = AudioFileClip(f'/content/mp3/{file_name}.mp3')
      image_clip = ImageClip(f'/content/img/{file_name}.png')
      
      video_clip = VideoClip(image_clip)
      video_clip.set_audio(audio_clip)
      video_clip.duration = audio_clip.duration
      
      video_clip.fps = 30
      video_clip.write_videofile(f'/content/video/{file_name}.mp4')

mp3PNGMerge(input_text)

I am getting this error:

TypeError: 'ImageClip' object is not callable

AutumnRain
  • 25
  • 4

0 Answers0