[dailymotion] Added support for subtitles + new InfoExtractor for
generic subtitle download. The idea is that all subtitle downloaders must descend from SubtitlesIE and implement only three basic methods to achieve the complete subtitle download functionality. This will allow to reduce the code in YoutubeIE once it is rewritten.
This commit is contained in:
parent
5898e28272
commit
953e32b2c1
4 changed files with 242 additions and 11 deletions
|
@ -1,14 +1,49 @@
|
|||
import re
|
||||
import json
|
||||
import itertools
|
||||
import socket
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .subtitles import SubtitlesIE
|
||||
|
||||
from ..utils import (
|
||||
compat_http_client,
|
||||
compat_urllib_error,
|
||||
compat_urllib_request,
|
||||
compat_str,
|
||||
get_element_by_attribute,
|
||||
get_element_by_id,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
class DailymotionIE(InfoExtractor):
|
||||
|
||||
class DailyMotionSubtitlesIE(SubtitlesIE):
|
||||
|
||||
def _get_available_subtitles(self, video_id):
|
||||
request = compat_urllib_request.Request('https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id)
|
||||
try:
|
||||
sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
|
||||
return {}
|
||||
info = json.loads(sub_list)
|
||||
if (info['total'] > 0):
|
||||
sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
|
||||
return sub_lang_list
|
||||
self._downloader.report_warning(u'video doesn\'t have subtitles')
|
||||
return {}
|
||||
|
||||
def _get_subtitle_url(self, sub_lang, sub_name, video_id, format):
|
||||
sub_lang_list = self._get_available_subtitles(video_id)
|
||||
return sub_lang_list[sub_lang]
|
||||
|
||||
def _request_automatic_caption(self, video_id, webpage):
|
||||
self._downloader.report_warning(u'Automatic Captions not supported by dailymotion')
|
||||
return {}
|
||||
|
||||
|
||||
class DailymotionIE(DailyMotionSubtitlesIE): #,InfoExtractor):
|
||||
"""Information Extractor for Dailymotion"""
|
||||
|
||||
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
|
||||
|
@ -18,7 +53,7 @@ class DailymotionIE(InfoExtractor):
|
|||
u'file': u'x33vw9.mp4',
|
||||
u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
|
||||
u'info_dict': {
|
||||
u"uploader": u"Alex and Van .",
|
||||
u"uploader": u"Alex and Van .",
|
||||
u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
|
||||
}
|
||||
}
|
||||
|
@ -57,17 +92,36 @@ class DailymotionIE(InfoExtractor):
|
|||
|
||||
# TODO: support choosing qualities
|
||||
|
||||
for key in ['stream_h264_hd1080_url','stream_h264_hd_url',
|
||||
'stream_h264_hq_url','stream_h264_url',
|
||||
for key in ['stream_h264_hd1080_url', 'stream_h264_hd_url',
|
||||
'stream_h264_hq_url', 'stream_h264_url',
|
||||
'stream_h264_ld_url']:
|
||||
if info.get(key):#key in info and info[key]:
|
||||
if info.get(key): # key in info and info[key]:
|
||||
max_quality = key
|
||||
self.to_screen(u'Using %s' % key)
|
||||
self.to_screen(u'%s: Using %s' % (video_id, key))
|
||||
break
|
||||
else:
|
||||
raise ExtractorError(u'Unable to extract video URL')
|
||||
video_url = info[max_quality]
|
||||
|
||||
# subtitles
|
||||
video_subtitles = None
|
||||
video_webpage = None
|
||||
|
||||
if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
|
||||
video_subtitles = self._extract_subtitles(video_id)
|
||||
elif self._downloader.params.get('writeautomaticsub', False):
|
||||
video_subtitles = self._request_automatic_caption(video_id, video_webpage)
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id)
|
||||
return
|
||||
|
||||
if 'length_seconds' not in info:
|
||||
self._downloader.report_warning(u'unable to extract video duration')
|
||||
video_duration = ''
|
||||
else:
|
||||
video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
|
@ -75,5 +129,6 @@ class DailymotionIE(InfoExtractor):
|
|||
'upload_date': video_upload_date,
|
||||
'title': self._og_search_title(webpage),
|
||||
'ext': video_extension,
|
||||
'subtitles': video_subtitles,
|
||||
'thumbnail': info['thumbnail_url']
|
||||
}]
|
||||
|
|
80
youtube_dl/extractor/subtitles.py
Normal file
80
youtube_dl/extractor/subtitles.py
Normal file
|
@ -0,0 +1,80 @@
|
|||
import socket
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
compat_http_client,
|
||||
compat_urllib_error,
|
||||
compat_urllib_request,
|
||||
compat_str,
|
||||
)
|
||||
|
||||
|
||||
class SubtitlesIE(InfoExtractor):
|
||||
|
||||
def report_video_subtitles_available(self, video_id, sub_lang_list):
|
||||
"""Report available subtitles."""
|
||||
sub_lang = ",".join(list(sub_lang_list.keys()))
|
||||
self.to_screen(u'%s: Available subtitles for video: %s' % (video_id, sub_lang))
|
||||
|
||||
def _list_available_subtitles(self, video_id):
|
||||
sub_lang_list = self._get_available_subtitles(video_id)
|
||||
self.report_video_subtitles_available(video_id, sub_lang_list)
|
||||
|
||||
def _extract_subtitles(self, video_id):
|
||||
"""
|
||||
Return a dictionary: {language: subtitles} or {} if the subtitles
|
||||
couldn't be found
|
||||
"""
|
||||
sub_lang_list = self._get_available_subtitles(video_id)
|
||||
sub_format = self._downloader.params.get('subtitlesformat')
|
||||
if not sub_lang_list: #There was some error, it didn't get the available subtitles
|
||||
return {}
|
||||
if self._downloader.params.get('writesubtitles', False):
|
||||
if self._downloader.params.get('subtitleslang', False):
|
||||
sub_lang = self._downloader.params.get('subtitleslang')
|
||||
elif 'en' in sub_lang_list:
|
||||
sub_lang = 'en'
|
||||
else:
|
||||
sub_lang = list(sub_lang_list.keys())[0]
|
||||
if not sub_lang in sub_lang_list:
|
||||
self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
|
||||
return {}
|
||||
sub_lang_list = {sub_lang: sub_lang_list[sub_lang]}
|
||||
subtitles = {}
|
||||
for sub_lang in sub_lang_list:
|
||||
subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
|
||||
if subtitle:
|
||||
subtitles[sub_lang] = subtitle
|
||||
return subtitles
|
||||
|
||||
def _request_subtitle(self, sub_lang, sub_name, video_id, format):
|
||||
""" Return the subtitle as a string or None if they are not found """
|
||||
# return (u'Did not fetch video subtitles for %s' % sub_lang, None, None)
|
||||
self.to_screen(u'%s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format))
|
||||
url = self._get_subtitle_url(sub_lang, sub_name, video_id, format)
|
||||
try:
|
||||
sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
|
||||
return
|
||||
if not sub:
|
||||
self._downloader.report_warning(u'Did not fetch video subtitles')
|
||||
return
|
||||
return sub
|
||||
|
||||
def _get_available_subtitles(self, video_id):
|
||||
"""Get available subtitles. Redefine in subclasses."""
|
||||
"""returns {(lang, url)} """
|
||||
# return {}
|
||||
pass
|
||||
|
||||
def _get_subtitle_url(self, sub_lang, sub_name, video_id, format):
|
||||
"""returns the url for the given subtitle. Redefine in subclasses."""
|
||||
pass
|
||||
|
||||
def _request_automatic_caption(self, video_id, webpage):
|
||||
"""Request automatic caption. Redefine in subclasses."""
|
||||
"""returns a tuple of ... """
|
||||
# return [(err_msg, None, None)]
|
||||
pass
|
Loading…
Add table
Add a link
Reference in a new issue