I want to compress my movies automatically. So I've written a mediainfo wrapper class in python, to generate a xml output, which I then parse to a movieinfo class, with a list of audio and subtitle tracks.
__author__ = 'dominik'
class Error(Exception):
""" Error class
"""
class ValidationError(Error):
""" Invalid or missing xml items
"""
class MovieInfo(object):
""" Description of movie file
"""
def __init__(self, media_info):
self._video_track = None
self._audio_tracks = []
self._subtitle_tracks = []
self.valid_movie = True
for track in media_info.tracks:
if track.track_type == "Audio":
self._audio_tracks.append(AudioTrack(track))
elif track.track_type == "Text":
self._subtitle_tracks.append(SubtitleTrack(track))
elif track.track_type == "Video":
self._video_track = VideoTrack(track)
@property
def audio_tracks(self):
if not hasattr(self, "_audio_tracks"):
self._audio_tracks = []
if len(self._audio_tracks) != 0:
return self._audio_tracks
@property
def subtitle_tracks(self):
if not hasattr(self, "_subtitle_tracks"):
self._subtitle_tracks = []
if len(self._subtitle_tracks) != 0:
return self._subtitle_tracks
class Track(object):
""" Abstract track class for audio and subtitle tracks
"""
__KNOWN_LANGUAGE_CODES = {"en": "ENG", "de": "DE"}
def __init__(self, track, valid_codecs):
self._valid = True
track_id = int(track.id)
codec_id = self._determine_codec(track.codec_id, valid_codecs)
language = self._determine_language(track.language)
self._id = track_id
self._codec_id = codec_id
self._language = language
def _determine_codec(self, track_codec, types):
result = types.get(track_codec, None)
if result is None:
self._valid = False
return result
def _determine_language(self, track_language, types=__KNOWN_LANGUAGE_CODES):
result = types.get(track_language, None)
if result is None:
self._valid = False
return result
class AudioTrack(Track):
""" Audio track class
"""
__KNOWN_AUDIO_CODECS = {"A_DTS": "DTS", "A_AC3": "AC3"}
def __init__(self, track):
self._type = 1
Track.__init__(self, track, self.__KNOWN_AUDIO_CODECS)
class SubtitleTrack(Track):
""" Subtitle track class
"""
__KNOWN_SUBTITLE_CODECS = {"S_VOBSUB": "VOBSUB"}
def __init__(self, track):
self._type = 2
if track.forced == "Yes":
self._forced = True
else:
self._forced = False
Track.__init__(self, track, self.__KNOWN_SUBTITLE_CODECS)
class VideoTrack(object):
""" Video track class (only one video track in movie info!)
"""
def __init__(self, track):
self._type = 0
self._framerate = float(track.frame_rate)
self._width = track.width
self._height = track.height
Here is the mediainfo class (it's the pymediainfo class):
from subprocess import Popen
import os
from tempfile import mkstemp
from bs4 import BeautifulSoup, NavigableString
from setuptools.compat import unicode
class Track(object):
""" Hold the track information
"""
def __getattr__(self, item):
try:
return object.__getattribute__(self, item)
except:
pass
return None
def __init__(self, xml_track):
self.xml_track = xml_track
self.track_type = xml_track.attrs["type"]
for child in self.xml_track.children:
if not isinstance(child, NavigableString):
node_name = child.name.lower().strip()
node_value = unicode(child.string)
node_other_name = "other_%s" % node_name
if getattr(self, node_name) is None:
setattr(self, node_name, node_value)
else:
if getattr(self, node_other_name) is None:
setattr(self, node_other_name, [node_value, ])
else:
getattr(self, node_other_name).append(node_value)
for key in [c for c in self.__dict__.keys() if c.startswith("other_")]:
try:
primary = key.replace("other_", "")
setattr(self, primary, int(getattr(self, primary)))
except:
for value in getattr(self, key):
try:
actual = getattr(self, primary)
setattr(self, primary, int(value))
getattr(self, key).append(actual)
break
except:
pass
def __repr__(self):
return("<Track id='{0}', type='{1}'>".format(self.id, self.track_type))
def to_data(self):
data = {}
for k, v in self.__dict__.items():
if k != 'xml_track':
data[k] = v
return data
class Mediainfo(object):
""" MediaInfo wrapper
"""
def __init__(self, xml):
self.xml_dom = xml
if isinstance(xml, str):
self.xml_dom = BeautifulSoup(xml, "xml")
def _populate_tracks(self):
if self.xml_dom is None:
return
for xml_track in self.xml_dom.Mediainfo.File.find_all("track"):
self._tracks.append(Track(xml_track))
@property
def tracks(self):
if not hasattr(self, "_tracks"):
self._tracks = []
if len(self._tracks) == 0:
self._populate_tracks()
return self._tracks
@staticmethod
def parse(filename):
filehandler_out, filename_out = mkstemp(".xml", "mediainfo-")
filehandler_err, filename_err = mkstemp(".error", "mediainfo-")
filepointer_out = os.fdopen(filehandler_out, "r+b")
filepointer_err = os.fdopen(filehandler_err, "r+b")
mediainfo_command = ["mediainfo", "-f", "--Output=XML", filename]
p = Popen(mediainfo_command, stdout=filepointer_out, stderr=filepointer_err)
p.wait()
filepointer_out.seek(0)
xml_dom = BeautifulSoup(filepointer_out.read(), "xml")
filepointer_out.close()
filepointer_err.close()
print(xml_dom)
return Mediainfo(xml_dom)
def to_data(self):
data = {'tracks': []}
for track in self.tracks:
data['tracks'].append(track.to_data())
return data
This class gives me every track in the xml and then I parse the relevant info in movieinfo.
Ok now I have a list of audiotracks e.g. 3 tracks one in german language and DTS, one in german and AC3 and one in english and AC3. Now I want to get the ids from the tracks in the format "1,2,3" to give it to handbrake cli.
My problem is the order of the tracks. If there is a german DTS track this schould be the first track, the second track should be also the first, but compressed to aac and the third track should be one english track in AAC. If there is only a german AC3 track then the first track should be this track but compressed to AAC, and the second track should englisch and AAC. I don't know exactly how I can achive that, can you help me? I'm new to python, and come from C, C++ and C#. In C# this is very easy to get with lambda.