Merge branch 'ted_subtitles'

This commit is contained in:
Ismaël Mejía 2013-11-02 19:50:45 +01:00
commit 38db46794f
5 changed files with 97 additions and 14 deletions

View file

@ -141,9 +141,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
raise ExtractorError(u'Unable to extract video URL')
# subtitles
video_subtitles = self.extract_subtitles(video_id)
video_subtitles = self.extract_subtitles(video_id, webpage)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id)
self._list_available_subtitles(video_id, webpage)
return
return {
@ -157,7 +157,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
'age_limit': age_limit,
}
def _get_available_subtitles(self, video_id):
def _get_available_subtitles(self, video_id, webpage):
try:
sub_list = self._download_webpage(
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,

View file

@ -12,9 +12,9 @@ class SubtitlesInfoExtractor(InfoExtractor):
return any([self._downloader.params.get('writesubtitles', False),
self._downloader.params.get('writeautomaticsub')])
def _list_available_subtitles(self, video_id, webpage=None):
def _list_available_subtitles(self, video_id, webpage):
""" outputs the available subtitles for the video """
sub_lang_list = self._get_available_subtitles(video_id)
sub_lang_list = self._get_available_subtitles(video_id, webpage)
auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
sub_lang = ",".join(list(sub_lang_list.keys()))
self.to_screen(u'%s: Available subtitles for video: %s' %
@ -23,7 +23,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
self.to_screen(u'%s: Available automatic captions for video: %s' %
(video_id, auto_lang))
def extract_subtitles(self, video_id, video_webpage=None):
def extract_subtitles(self, video_id, webpage):
"""
returns {sub_lang: sub} ,{} if subtitles not found or None if the
subtitles aren't requested.
@ -32,9 +32,9 @@ class SubtitlesInfoExtractor(InfoExtractor):
return None
available_subs_list = {}
if self._downloader.params.get('writeautomaticsub', False):
available_subs_list.update(self._get_available_automatic_caption(video_id, video_webpage))
available_subs_list.update(self._get_available_automatic_caption(video_id, webpage))
if self._downloader.params.get('writesubtitles', False):
available_subs_list.update(self._get_available_subtitles(video_id))
available_subs_list.update(self._get_available_subtitles(video_id, webpage))
if not available_subs_list: # error, it didn't get the available subtitles
return {}
@ -74,7 +74,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
return
return sub
def _get_available_subtitles(self, video_id):
def _get_available_subtitles(self, video_id, webpage):
"""
returns {sub_lang: url} or {} if not available
Must be redefined by the subclasses

View file

@ -1,10 +1,9 @@
import json
import re
from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor
class TEDIE(InfoExtractor):
class TEDIE(SubtitlesInfoExtractor):
_VALID_URL=r'''http://www\.ted\.com/
(
((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist
@ -82,11 +81,21 @@ class TEDIE(InfoExtractor):
'url': stream['file'],
'format': stream['id']
} for stream in info['htmlStreams']]
video_id = info['id']
# subtitles
video_subtitles = self.extract_subtitles(video_id, webpage)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, webpage)
return
info = {
'id': info['id'],
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'description': desc,
'subtitles': video_subtitles,
'formats': formats,
}
@ -94,3 +103,14 @@ class TEDIE(InfoExtractor):
info.update(info['formats'][-1])
return info
def _get_available_subtitles(self, video_id, webpage):
options = self._search_regex(r'(?:<select name="subtitles_language_select" id="subtitles_language_select">)(.*?)(?:</select>)', webpage, 'subtitles_language_select', flags=re.DOTALL)
languages = re.findall(r'(?:<option value=")(\S+)"', options)
if languages:
sub_lang_list = {}
for l in languages:
url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
sub_lang_list[l] = url
return sub_lang_list
return {}

View file

@ -1094,7 +1094,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
else:
raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
def _get_available_subtitles(self, video_id):
def _get_available_subtitles(self, video_id, webpage):
try:
sub_list = self._download_webpage(
'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,