I am trying to combine image files and audio files to create videos.
Libraries used:
from gtts import gTTS
from pydub import AudioSegment
import pandas as pd
from moviepy.editor import AudioFileClip, ImageClip, VideoClip
from PIL import Image, ImageDraw, ImageFont
I have the text content in English and Korean on a csv file. I loaded them into a dataframe at first:
# Loading csv into pandas dataframe
df = pd.read_csv('/content/phrases.csv')
# Creating Korean and English text list from datafram
input_text = df['Korean'].tolist()
input_translation = df['English'].tolist()
# Setting Language for TTS
language = 'ko' # zh = Chinese, en = English, ko = Korean
Then, I create TTS audio files (Korean and English) and joined them. This is the function I used and it works fine:
# Generating Korean and English TTS Audio and combining them into an mp3 file
def generateAudio(itext, itranslation):
for i in range(len(itext)):
file_name = itext[i]
normal = gTTS(text = itext[i], lang=language, slow=False)
audio_normal = f'/content/temp/{file_name}_normal.mp3'
normal.save(audio_normal)
slow = gTTS(text = itext[i], lang=language, slow=True)
audio_slow = f'/content/temp/{file_name}_slow.mp3'
slow.save(audio_slow)
english = gTTS(text = itranslation[i], lang='en', slow=False)
audio_english = f'/content/temp/{file_name}_english.mp3'
english.save(audio_english)
playlist_songs = [AudioSegment.from_mp3(audio_normal), AudioSegment.silent(duration=1000), AudioSegment.from_mp3(audio_slow), AudioSegment.silent(duration=1000), AudioSegment.from_mp3(audio_english)]
combined = AudioSegment.empty()
for song in playlist_songs:
combined += song
combined.export(f'/content/mp3/{file_name}.mp3', format="mp3")
generateAudio(input_text, input_translation)
Next, I used this function to create images also for each sentence in the dataframe and it also works fine:
def genImage(input_text):
width = 512
height = 512
font = ImageFont.truetype("/content/NotoSerifKR-Regular.otf", size=30)
i = 0
for line in input_text:
file_name = input_text[i]
message = input_text[i] + "\n" + input_translation[i]
i += 1
img = Image.new('RGB', (width, height), color='#9599DF')
imgDraw = ImageDraw.Draw(img)
textWidth, textHeight = imgDraw.textsize(message, font=font)
xText = (width - textWidth) / 2
yText = (height - textHeight) / 2
imgDraw.text((xText, yText), message, font=font, fill=(241, 242, 228))
img.save(f'/content/img/{file_name}.png')
genImage(input_text)
Finally, I want to merge the images in png format and audios in mp3 format. So I used this function but I am getting error
def mp3PNGMerge(input_text):
for i in range(len(input_text)):
file_name = input_text[i]
audio_clip = AudioFileClip(f'/content/mp3/{file_name}.mp3')
image_clip = ImageClip(f'/content/img/{file_name}.png')
video_clip = VideoClip(image_clip)
video_clip.set_audio(audio_clip)
video_clip.duration = audio_clip.duration
video_clip.fps = 30
video_clip.write_videofile(f'/content/video/{file_name}.mp4')
mp3PNGMerge(input_text)
I am getting this error:
TypeError: 'ImageClip' object is not callable