5

I have installed pytube to extract captions from some youtube videos. Both the following code give me the xml captions.

from pytube import YouTube
yt = YouTube('https://www.youtube.com/watch?v=4ZQQofkz9eE')
caption = yt.captions['a.en']
print(caption.xml_captions)

and also as mentioned in the docs

yt = YouTube('http://youtube.com/watch?v=2lAe1cqCOXo')
caption = yt.captions.get_by_language_code('en')
caption.xml_captions

But in both cases, I get the xml output and when use

print(caption.generate_srt_captions())

I get an error like the following. Can you help on how to extract the srt format?

KeyError
~/anaconda3/envs/myenv/lib/python3.6/site-packages/pytube/captions.py in 
generate_srt_captions(self)
49         recompiles them into the "SubRip Subtitle" format.
50         """
51         return self.xml_caption_to_srt(self.xml_captions)
52 
53     @staticmethod

~/anaconda3/envs/myenv/lib/python3.6/site-packages/pytube/captions.py in 
xml_caption_to_srt(self, xml_captions)
81             except KeyError:
82                 duration = 0.0
83             start = float(child.attrib["start"])
84             end = start + duration
85             sequence_number = i + 1  # convert from 0-indexed to 1.

KeyError: 'start'
Sandra
  • 91
  • 8

4 Answers4

6

This is a bug in the library itself. Everything below is done in pytube 11.01. In the captions.py file on line 76 replace:

for i, child in enumerate(list(root)):

to:

for i, child in enumerate(list(root.findall('body/p'))):

Then on line 83, replace:

duration = float(child.attrib["dur"])

to:

duration = float(child.attrib["d"])

Then on line 86, replace:

start = float(child.attrib["start"])

to:

start = float(child.attrib["t"])

If only the number of lines and time will be displayed but no subtitle text, replace line 77:

text = child.text or ""

to:

text = ''.join(child.itertext()).strip()
if not text:
    continue

It worked for me, python 3.9, pytube 11.01. Good luck!

  • This worked perfectly! I found my file path to be here: `/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pytube/captions.py` – JayRizzo Dec 26 '22 at 00:37
6

I did some work on the source code of the file captions.py. Just replace the entire code of this file with this code:

import math
import os
import time
import xml.etree.ElementTree as ElementTree
from html import unescape
from typing import Dict, Optional

from pytube import request
from pytube.helpers import safe_filename, target_directory


class Caption:

    def __init__(self, caption_track: Dict):

        self.url = caption_track.get("baseUrl")

        name_dict = caption_track['name']
        if 'simpleText' in name_dict:
            self.name = name_dict['simpleText']
        else:
            for el in name_dict['runs']:
                if 'text' in el:
                    self.name = el['text']

        self.code = caption_track["vssId"]

        self.code = self.code.strip('.')

    @property
    def xml_captions(self) -> str:

        return request.get(self.url)

    def generate_srt_captions(self) -> str:

        return self.xml_caption_to_srt(self.xml_captions)

    @staticmethod
    def float_to_srt_time_format(d: float) -> str:

        fraction, whole = math.modf(d/1000)
        time_fmt = time.strftime("%H:%M:%S,", time.gmtime(whole))
        ms = f"{fraction:.3f}".replace("0.", "")
        return time_fmt + ms

    def xml_caption_to_srt(self, xml_captions: str) -> str:

        segments = []
        root = ElementTree.fromstring(xml_captions)
        count_line = 0
        for i, child in enumerate(list(root.findall('body/p'))):
        
            text = ''.join(child.itertext()).strip()
            if not text:
                continue
            count_line += 1
            caption = unescape(text.replace("\n", " ").replace("  ", " "),)
            try:
                duration = float(child.attrib["d"])
            except KeyError:
                duration = 0.0
            start = float(child.attrib["t"])
            end = start + duration
            try:
                end2 = float(root.findall('body/p')[i+2].attrib['t'])
            except:
                end2 = float(root.findall('body/p')[i].attrib['t']) + duration
            sequence_number = i + 1  # convert from 0-indexed to 1.
            line = "{seq}\n{start} --> {end}\n{text}\n".format(
                seq=count_line,
                start=self.float_to_srt_time_format(start),
                end=self.float_to_srt_time_format(end2),
                text=caption,
            )
            segments.append(line)

        return "\n".join(segments).strip()

    def download(
        self,
        title: str,
        srt: bool = True,
        output_path: Optional[str] = None,
        filename_prefix: Optional[str] = None,
    ) -> str:

        if title.endswith(".srt") or title.endswith(".xml"):
            filename = ".".join(title.split(".")[:-1])
        else:
            filename = title

        if filename_prefix:
            filename = f"{safe_filename(filename_prefix)}{filename}"

        filename = safe_filename(filename)

        filename += f" ({self.code})"

        if srt:
            filename += ".srt"
        else:
            filename += ".xml"

        file_path = os.path.join(target_directory(output_path), filename)

        with open(file_path, "w", encoding="utf-8") as file_handle:
            if srt:
                file_handle.write(self.generate_srt_captions())
            else:
                file_handle.write(self.xml_captions)

        return file_path

    def __repr__(self):
        return '<Caption lang="{s.name}" code="{s.code}">'.format(s=self)
0
# Apparently YouTube changed their captions format.

# Here's my version of the function for the function in captions.py

I found this good
   

     def xml_caption_to_srt(self, xml_captions: str) -> str:
            """Convert xml caption tracks to "SubRip Subtitle (srt)".
    
            :param str xml_captions:
            XML formatted caption tracks.
            """
            segments = []
            root = ElementTree.fromstring(xml_captions)
            i=0
            for child in list(root.iter("body"))[0]:
                if child.tag == 'p':
                    caption = ''
                    if len(list(child))==0:
                        # instead of 'continue'
                        caption = child.text
                    for s in list(child):
                        if s.tag == 's':
                            caption += ' ' + s.text
                    caption = unescape(caption.replace("\n", " ").replace("  ", " "),)
                    try:
                        duration = float(child.attrib["d"])/1000.0
                    except KeyError:
                        duration = 0.0
                    start = float(child.attrib["t"])/1000.0
                    end = start + duration
                    sequence_number = i + 1  # convert from 0-indexed to 1.
                    line = "{seq}\n{start} --> {end}\n{text}\n".format(
                        seq=sequence_number,
                        start=self.float_to_srt_time_format(start),
                        end=self.float_to_srt_time_format(end),
                        text=caption,
                    )
                    segments.append(line)
                    i += 1
            return "\n".join(segments).strip()
    
        
0

To further support and add to yaroslav-miklin's example.

I found the file on my mac under:

/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pytube/captions.py

This is the functioning code with yaroslav-miklin's changes.

import math
import os
import time
import xml.etree.ElementTree as ElementTree
from html import unescape
from typing import Dict, Optional
from pytube import request
from pytube.helpers import safe_filename, target_directory

class Caption:
    """Container for caption tracks."""
    def __init__(self, caption_track: Dict):
        """Construct a :class:`Caption <Caption>`.

        :param dict caption_track:
            Caption track data extracted from ``watch_html``.
        """
        self.url = caption_track.get("baseUrl")

        # Certain videos have runs instead of simpleText
        #  this handles that edge case
        name_dict = caption_track['name']
        if 'simpleText' in name_dict:
            self.name = name_dict['simpleText']
        else:
            for el in name_dict['runs']:
                if 'text' in el:
                    self.name = el['text']

        # Use "vssId" instead of "languageCode", fix issue #779
        self.code = caption_track["vssId"]
        # Remove preceding '.' for backwards compatibility, e.g.:
        # English -> vssId: .en, languageCode: en
        # English (auto-generated) -> vssId: a.en, languageCode: en
        self.code = self.code.strip('.')

    @property
    def xml_captions(self) -> str:
        """Download the xml caption tracks."""
        return request.get(self.url)

    def generate_srt_captions(self) -> str:
        """Generate "SubRip Subtitle" captions.

        Takes the xml captions from :meth:`~pytube.Caption.xml_captions` and
        recompiles them into the "SubRip Subtitle" format.
        """
        return self.xml_caption_to_srt(self.xml_captions)

    @staticmethod
    def float_to_srt_time_format(d: float) -> str:
        """Convert decimal durations into proper srt format.

        :rtype: str
        :returns:
            SubRip Subtitle (str) formatted time duration.

        float_to_srt_time_format(3.89) -> '00:00:03,890'
        """
        fraction, whole = math.modf(d)
        time_fmt = time.strftime("%H:%M:%S,", time.gmtime(whole))
        ms = f"{fraction:.3f}".replace("0.", "")
        return time_fmt + ms

    def xml_caption_to_srt(self, xml_captions: str) -> str:
        """Convert xml caption tracks to "SubRip Subtitle (srt)".

        :param str xml_captions:
            XML formatted caption tracks.
        """
        segments = []
        root = ElementTree.fromstring(xml_captions)
        for i, child in enumerate(list(root.findall('body/p'))):
        # for i, child in enumerate(list(root)):
            # text = child.text or ""
            text = ''.join(child.itertext()).strip()
            if not text:
                continue
            caption = unescape(text.replace("\n", " ").replace("  ", " "),)
            try:
                # duration = float(child.attrib["dur"])
                duration = float(child.attrib["d"])
            except KeyError:
                duration = 0.0
            # start = float(child.attrib["start"])
            start = float(child.attrib["t"])
            end = start + duration
            sequence_number = i + 1  # convert from 0-indexed to 1.
            line = "{seq}\n{start} --> {end}\n{text}\n".format(
                seq=sequence_number,
                start=self.float_to_srt_time_format(start),
                end=self.float_to_srt_time_format(end),
                text=caption,
            )
            segments.append(line)
        return "\n".join(segments).strip()

    def download(
        self,
        title: str,
        srt: bool = True,
        output_path: Optional[str] = None,
        filename_prefix: Optional[str] = None,
    ) -> str:
        """Write the media stream to disk.

        :param title:
            Output filename (stem only) for writing media file.
            If one is not specified, the default filename is used.
        :type title: str
        :param srt:
            Set to True to download srt, false to download xml. Defaults to True.
        :type srt bool
        :param output_path:
            (optional) Output path for writing media file. If one is not
            specified, defaults to the current working directory.
        :type output_path: str or None
        :param filename_prefix:
            (optional) A string that will be prepended to the filename.
            For example a number in a playlist or the name of a series.
            If one is not specified, nothing will be prepended
            This is separate from filename so you can use the default
            filename but still add a prefix.
        :type filename_prefix: str or None

        :rtype: str
        """
        if title.endswith(".srt") or title.endswith(".xml"):
            filename = ".".join(title.split(".")[:-1])
        else:
            filename = title

        if filename_prefix:
            filename = f"{safe_filename(filename_prefix)}{filename}"

        filename = safe_filename(filename)

        filename += f" ({self.code})"

        if srt:
            filename += ".srt"
        else:
            filename += ".xml"

        file_path = os.path.join(target_directory(output_path), filename)

        with open(file_path, "w", encoding="utf-8") as file_handle:
            if srt:
                file_handle.write(self.generate_srt_captions())
            else:
                file_handle.write(self.xml_captions)

        return file_path

    def __repr__(self):
        """Printable object representation."""
        return '<Caption lang="{s.name}" code="{s.code}">'.format(s=self)

After saving the file I was now able to run the following code to get my desired result.

from pytube import YouTube
yt = YouTube('https://www.youtube.com/watch?v=pvDOnqGbghU')
caption = yt.captions['a.en']
print(f"{yt.title}:\n {caption.generate_srt_captions()}")
Tom MacDonald - Angels (Lyrics):
...
165
22:34:40,000 --> 23:37:20,000
and the truth ain't pretty listen up

167
23:04:00,000 --> 00:08:00,000
this is a tough one you get saved by

169
23:37:20,000 --> 00:58:39,000
your angels or become one

171
00:08:00,000 --> 01:40:00,000
crash the whip and ditch the car burn a

173
00:58:39,000 --> 02:25:19,000
bridge follow the stars you'll find

175
01:40:00,000 --> 02:58:39,000
monsters in the dark but nothing's worth

177
02:25:19,000 --> 03:49:20,000
it till it's hard

JayRizzo
  • 3,234
  • 3
  • 33
  • 49