Merge remote-tracking branch 'jtwaleson/master'
This commit is contained in:
commit
784b6d3a9b
164 changed files with 574 additions and 570 deletions
|
@ -529,4 +529,4 @@ def gen_extractors():
|
|||
|
||||
def get_info_extractor(ie_name):
|
||||
"""Returns the info extractor class with the given ie_name"""
|
||||
return globals()[ie_name+'IE']
|
||||
return globals()[ie_name + 'IE']
|
||||
|
|
|
@ -5,6 +5,7 @@ import re
|
|||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class AdultSwimIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.adultswim\.com/(?P<path>.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$'
|
||||
_TEST = {
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#coding: utf-8
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
|
|
@ -70,11 +70,13 @@ class AppleTrailersIE(InfoExtractor):
|
|||
uploader_id = mobj.group('company')
|
||||
|
||||
playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
|
||||
|
||||
def fix_html(s):
|
||||
s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
|
||||
s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s)
|
||||
# The ' in the onClick attributes are not escaped, it couldn't be parsed
|
||||
# like: http://trailers.apple.com/trailers/wb/gravity/
|
||||
|
||||
def _clean_json(m):
|
||||
return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''')
|
||||
s = re.sub(self._JSON_RE, _clean_json, s)
|
||||
|
|
|
@ -192,4 +192,3 @@ class ARDIE(InfoExtractor):
|
|||
'upload_date': upload_date,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ from ..utils import (
|
|||
qualities,
|
||||
)
|
||||
|
||||
# There are different sources of video in arte.tv, the extraction process
|
||||
# There are different sources of video in arte.tv, the extraction process
|
||||
# is different for each one. The videos usually expire in 7 days, so we can't
|
||||
# add tests.
|
||||
|
||||
|
|
|
@ -12,17 +12,17 @@ class AudiomackIE(InfoExtractor):
|
|||
_VALID_URL = r'https?://(?:www\.)?audiomack\.com/song/(?P<id>[\w/-]+)'
|
||||
IE_NAME = 'audiomack'
|
||||
_TESTS = [
|
||||
#hosted on audiomack
|
||||
# hosted on audiomack
|
||||
{
|
||||
'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary',
|
||||
'info_dict':
|
||||
{
|
||||
'id' : 'roosh-williams/extraordinary',
|
||||
'id': 'roosh-williams/extraordinary',
|
||||
'ext': 'mp3',
|
||||
'title': 'Roosh Williams - Extraordinary'
|
||||
}
|
||||
},
|
||||
#hosted on soundcloud via audiomack
|
||||
# hosted on soundcloud via audiomack
|
||||
{
|
||||
'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare',
|
||||
'file': '172419696.mp3',
|
||||
|
@ -49,7 +49,7 @@ class AudiomackIE(InfoExtractor):
|
|||
raise ExtractorError("Unable to deduce api url of song")
|
||||
realurl = api_response["url"]
|
||||
|
||||
#Audiomack wraps a lot of soundcloud tracks in their branded wrapper
|
||||
# Audiomack wraps a lot of soundcloud tracks in their branded wrapper
|
||||
# - if so, pass the work off to the soundcloud extractor
|
||||
if SoundcloudIE.suitable(realurl):
|
||||
return {'_type': 'url', 'url': realurl, 'ie_key': 'Soundcloud'}
|
||||
|
|
|
@ -18,7 +18,7 @@ class BambuserIE(InfoExtractor):
|
|||
_TEST = {
|
||||
'url': 'http://bambuser.com/v/4050584',
|
||||
# MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388
|
||||
#u'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
|
||||
# u'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
|
||||
'info_dict': {
|
||||
'id': '4050584',
|
||||
'ext': 'flv',
|
||||
|
|
|
@ -83,12 +83,12 @@ class BandcampIE(InfoExtractor):
|
|||
initial_url = mp3_info['url']
|
||||
re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
|
||||
m_url = re.match(re_url, initial_url)
|
||||
#We build the url we will use to get the final track url
|
||||
# We build the url we will use to get the final track url
|
||||
# This url is build in Bandcamp in the script download_bunde_*.js
|
||||
request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))
|
||||
final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url')
|
||||
# If we could correctly generate the .rand field the url would be
|
||||
#in the "download_url" key
|
||||
# in the "download_url" key
|
||||
final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
|
||||
|
||||
return {
|
||||
|
|
|
@ -195,7 +195,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
|||
duration = int(item.get('duration'))
|
||||
|
||||
media_selection = self._download_xml(
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
|
||||
programme_id, 'Downloading media selection XML')
|
||||
|
||||
for media in self._extract_medias(media_selection):
|
||||
|
@ -220,4 +220,4 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
|||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -40,7 +40,7 @@ class BeegIE(InfoExtractor):
|
|||
|
||||
title = self._html_search_regex(
|
||||
r'<title>([^<]+)\s*-\s*beeg\.?</title>', webpage, 'title')
|
||||
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'<meta name="description" content="([^"]*)"',
|
||||
webpage, 'description', fatal=False)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#coding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
|
|
@ -112,4 +112,4 @@ class CanalplusIE(InfoExtractor):
|
|||
'like_count': int(infos.find('NB_LIKES').text),
|
||||
'comment_count': int(infos.find('NB_COMMENTS').text),
|
||||
'formats': formats,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -84,4 +84,4 @@ class CBSNewsIE(InfoExtractor):
|
|||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -92,7 +92,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||
req.add_header('Referer', url)
|
||||
|
||||
playlist = self._download_xml(req, video_id)
|
||||
|
||||
|
||||
formats = []
|
||||
for i in playlist.find('smilRoot/body'):
|
||||
if 'AD' not in i.attrib['id']:
|
||||
|
|
|
@ -5,6 +5,7 @@ import re
|
|||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class Channel9IE(InfoExtractor):
|
||||
'''
|
||||
Common extractor for channel9.msdn.com.
|
||||
|
@ -31,7 +32,7 @@ class Channel9IE(InfoExtractor):
|
|||
'session_code': 'KOS002',
|
||||
'session_day': 'Day 1',
|
||||
'session_room': 'Arena 1A',
|
||||
'session_speakers': [ 'Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', 'Mads Kristensen' ],
|
||||
'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', 'Mads Kristensen'],
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -44,7 +45,7 @@ class Channel9IE(InfoExtractor):
|
|||
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
|
||||
'duration': 1540,
|
||||
'thumbnail': 'http://video.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg',
|
||||
'authors': [ 'Mike Wilmot' ],
|
||||
'authors': ['Mike Wilmot'],
|
||||
},
|
||||
}
|
||||
]
|
||||
|
@ -83,7 +84,7 @@ class Channel9IE(InfoExtractor):
|
|||
'format_id': x.group('quality'),
|
||||
'format_note': x.group('note'),
|
||||
'format': '%s (%s)' % (x.group('quality'), x.group('note')),
|
||||
'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate
|
||||
'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate
|
||||
'preference': self._known_formats.index(x.group('quality')),
|
||||
'vcodec': 'none' if x.group('note') == 'Audio only' else None,
|
||||
} for x in list(re.finditer(FORMAT_REGEX, html)) if x.group('quality') in self._known_formats]
|
||||
|
@ -202,17 +203,17 @@ class Channel9IE(InfoExtractor):
|
|||
|
||||
if slides is not None:
|
||||
d = common.copy()
|
||||
d.update({ 'title': title + '-Slides', 'url': slides })
|
||||
d.update({'title': title + '-Slides', 'url': slides})
|
||||
result.append(d)
|
||||
|
||||
if zip_ is not None:
|
||||
d = common.copy()
|
||||
d.update({ 'title': title + '-Zip', 'url': zip_ })
|
||||
d.update({'title': title + '-Zip', 'url': zip_})
|
||||
result.append(d)
|
||||
|
||||
if len(formats) > 0:
|
||||
d = common.copy()
|
||||
d.update({ 'title': title, 'formats': formats })
|
||||
d.update({'title': title, 'formats': formats})
|
||||
result.append(d)
|
||||
|
||||
return result
|
||||
|
@ -270,5 +271,5 @@ class Channel9IE(InfoExtractor):
|
|||
else:
|
||||
raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True)
|
||||
|
||||
else: # Assuming list
|
||||
else: # Assuming list
|
||||
return self._extract_list(content_path)
|
||||
|
|
|
@ -77,7 +77,7 @@ class CinemassacreIE(InfoExtractor):
|
|||
if videolist_url:
|
||||
videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')
|
||||
formats = []
|
||||
baseurl = vidurl[:vidurl.rfind('/')+1]
|
||||
baseurl = vidurl[:vidurl.rfind('/') + 1]
|
||||
for video in videolist.findall('.//video'):
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
|
|
|
@ -39,6 +39,7 @@ class ClipsyndicateIE(InfoExtractor):
|
|||
transform_source=fix_xml_ampersands)
|
||||
|
||||
track_doc = pdoc.find('trackList/track')
|
||||
|
||||
def find_param(name):
|
||||
node = find_xpath_attr(track_doc, './/param', 'name', name)
|
||||
if node is not None:
|
||||
|
|
|
@ -423,17 +423,18 @@ class InfoExtractor(object):
|
|||
"""Report attempt to log in."""
|
||||
self.to_screen('Logging in')
|
||||
|
||||
#Methods for following #608
|
||||
# Methods for following #608
|
||||
@staticmethod
|
||||
def url_result(url, ie=None, video_id=None):
|
||||
"""Returns a url that points to a page that should be processed"""
|
||||
#TODO: ie should be the class used for getting the info
|
||||
# TODO: ie should be the class used for getting the info
|
||||
video_info = {'_type': 'url',
|
||||
'url': url,
|
||||
'ie_key': ie}
|
||||
if video_id is not None:
|
||||
video_info['id'] = video_id
|
||||
return video_info
|
||||
|
||||
@staticmethod
|
||||
def playlist_result(entries, playlist_id=None, playlist_title=None):
|
||||
"""Returns a playlist"""
|
||||
|
@ -517,7 +518,7 @@ class InfoExtractor(object):
|
|||
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
|
||||
except (IOError, netrc.NetrcParseError) as err:
|
||||
self._downloader.report_warning('parsing .netrc: %s' % compat_str(err))
|
||||
|
||||
|
||||
return (username, password)
|
||||
|
||||
def _get_tfa_info(self):
|
||||
|
|
|
@ -54,7 +54,7 @@ class CrackedIE(InfoExtractor):
|
|||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url':video_url,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
|
@ -62,4 +62,4 @@ class CrackedIE(InfoExtractor):
|
|||
'comment_count': comment_count,
|
||||
'height': height,
|
||||
'width': width,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -69,11 +69,9 @@ class CrunchyrollIE(SubtitlesInfoExtractor):
|
|||
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
self._download_webpage(login_request, None, False, 'Wrong login info')
|
||||
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
|
||||
def _decrypt_subtitles(self, data, iv, id):
|
||||
data = bytes_to_intlist(data)
|
||||
iv = bytes_to_intlist(iv)
|
||||
|
@ -99,8 +97,10 @@ class CrunchyrollIE(SubtitlesInfoExtractor):
|
|||
return shaHash + [0] * 12
|
||||
|
||||
key = obfuscate_key(id)
|
||||
|
||||
class Counter:
|
||||
__value = iv
|
||||
|
||||
def next_value(self):
|
||||
temp = self.__value
|
||||
self.__value = inc(self.__value)
|
||||
|
@ -183,7 +183,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||
|
||||
return output
|
||||
|
||||
def _real_extract(self,url):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
|
||||
|
@ -226,10 +226,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||
formats = []
|
||||
for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage):
|
||||
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
||||
video_format = fmt+'p'
|
||||
video_format = fmt + 'p'
|
||||
streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/')
|
||||
# urlencode doesn't work!
|
||||
streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+'&media%5Fid='+stream_id+'&video%5Fformat='+stream_format
|
||||
streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality=' + stream_quality + '&media%5Fid=' + stream_id + '&video%5Fformat=' + stream_format
|
||||
streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
streamdata_req.add_header('Content-Length', str(len(streamdata_req.data)))
|
||||
streamdata = self._download_xml(
|
||||
|
@ -248,8 +248,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||
subtitles = {}
|
||||
sub_format = self._downloader.params.get('subtitlesformat', 'srt')
|
||||
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
|
||||
sub_page = self._download_webpage('http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\
|
||||
video_id, note='Downloading subtitles for '+sub_name)
|
||||
sub_page = self._download_webpage('http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,\
|
||||
video_id, note='Downloading subtitles for ' + sub_name)
|
||||
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
|
||||
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
|
||||
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
|
||||
|
@ -274,14 +274,14 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||
return
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'thumbnail': video_thumbnail,
|
||||
'uploader': video_uploader,
|
||||
'thumbnail': video_thumbnail,
|
||||
'uploader': video_uploader,
|
||||
'upload_date': video_upload_date,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#coding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
@ -18,6 +18,7 @@ from ..utils import (
|
|||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||
@staticmethod
|
||||
def _build_request(url):
|
||||
|
@ -27,6 +28,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
|||
request.add_header('Cookie', 'ff=off')
|
||||
return request
|
||||
|
||||
|
||||
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
"""Information Extractor for Dailymotion"""
|
||||
|
||||
|
|
|
@ -26,13 +26,13 @@ class DefenseGouvFrIE(InfoExtractor):
|
|||
video_id = self._search_regex(
|
||||
r"flashvars.pvg_id=\"(\d+)\";",
|
||||
webpage, 'ID')
|
||||
|
||||
|
||||
json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
|
||||
+ video_id)
|
||||
info = self._download_webpage(json_url, title,
|
||||
'Downloading JSON config')
|
||||
video_url = json.loads(info)['renditions'][0]['url']
|
||||
|
||||
|
||||
return {'id': video_id,
|
||||
'ext': 'mp4',
|
||||
'url': video_url,
|
||||
|
|
|
@ -27,7 +27,7 @@ class DotsubIE(InfoExtractor):
|
|||
video_id = mobj.group('id')
|
||||
info_url = "https://dotsub.com/api/media/%s/metadata" % video_id
|
||||
info = self._download_json(info_url, video_id)
|
||||
date = time.gmtime(info['dateCreated']/1000) # The timestamp is in miliseconds
|
||||
date = time.gmtime(info['dateCreated'] / 1000) # The timestamp is in miliseconds
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
|
|
@ -40,7 +40,7 @@ class FC2IE(InfoExtractor):
|
|||
|
||||
info_url = (
|
||||
"http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&".
|
||||
format(video_id, mimi, compat_urllib_request.quote(refer, safe='').replace('.','%2E')))
|
||||
format(video_id, mimi, compat_urllib_request.quote(refer, safe='').replace('.', '%2E')))
|
||||
|
||||
info_webpage = self._download_webpage(
|
||||
info_url, video_id, note='Downloading info page')
|
||||
|
|
|
@ -57,4 +57,4 @@ class FirstTVIE(InfoExtractor):
|
|||
'duration': int_or_none(duration),
|
||||
'like_count': int_or_none(like_count),
|
||||
'dislike_count': int_or_none(dislike_count),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,8 +17,8 @@ class FlickrIE(InfoExtractor):
|
|||
'info_dict': {
|
||||
'id': '5645318632',
|
||||
'ext': 'mp4',
|
||||
"description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.",
|
||||
"uploader_id": "forestwander-nature-pictures",
|
||||
"description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.",
|
||||
"uploader_id": "forestwander-nature-pictures",
|
||||
"title": "Dark Hollow Waterfalls"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -92,4 +92,4 @@ class FourTubeIE(InfoExtractor):
|
|||
'duration': duration,
|
||||
'age_limit': 18,
|
||||
'webpage_url': webpage_url,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -733,7 +733,7 @@ class GenericIE(InfoExtractor):
|
|||
'title': video_title,
|
||||
'id': video_id,
|
||||
}
|
||||
|
||||
|
||||
match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
|
||||
if match:
|
||||
return {
|
||||
|
@ -748,7 +748,7 @@ class GenericIE(InfoExtractor):
|
|||
# Look for embedded blip.tv player
|
||||
mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
|
||||
if mobj:
|
||||
return self.url_result('http://blip.tv/a/a-'+mobj.group(1), 'BlipTV')
|
||||
return self.url_result('http://blip.tv/a/a-' + mobj.group(1), 'BlipTV')
|
||||
mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
|
||||
if mobj:
|
||||
return self.url_result(mobj.group(1), 'BlipTV')
|
||||
|
@ -1025,4 +1025,3 @@ class GenericIE(InfoExtractor):
|
|||
'_type': 'playlist',
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
|
|
|
@ -397,4 +397,4 @@ class GloboIE(InfoExtractor):
|
|||
'uploader_id': uploader_id,
|
||||
'like_count': like_count,
|
||||
'formats': formats
|
||||
}
|
||||
}
|
||||
|
|
|
@ -69,7 +69,7 @@ class GorillaVidIE(InfoExtractor):
|
|||
(?:id="[^"]+"\s+)?
|
||||
value="([^"]*)"
|
||||
''', webpage))
|
||||
|
||||
|
||||
if fields['op'] == 'download1':
|
||||
post = compat_urllib_parse.urlencode(fields)
|
||||
|
||||
|
|
|
@ -37,7 +37,7 @@ class HornBunnyIE(InfoExtractor):
|
|||
webpage2 = self._download_webpage(redirect_url, video_id)
|
||||
video_url = self._html_search_regex(
|
||||
r'flvMask:(.*?);', webpage2, 'video_url')
|
||||
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'<strong>Runtime:</strong>\s*([0-9:]+)</div>',
|
||||
webpage, 'duration', fatal=False))
|
||||
|
|
|
@ -13,7 +13,7 @@ class HowcastIE(InfoExtractor):
|
|||
'info_dict': {
|
||||
'id': '390161',
|
||||
'ext': 'mp4',
|
||||
'description': 'The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here\'s the proper way to tie a square knot.',
|
||||
'description': 'The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here\'s the proper way to tie a square knot.',
|
||||
'title': 'How to Tie a Square Knot Properly',
|
||||
}
|
||||
}
|
||||
|
|
|
@ -71,7 +71,7 @@ class ImdbListIE(InfoExtractor):
|
|||
},
|
||||
'playlist_count': 7,
|
||||
}
|
||||
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, list_id)
|
||||
|
|
|
@ -32,7 +32,7 @@ class InternetVideoArchiveIE(InfoExtractor):
|
|||
def _clean_query(query):
|
||||
NEEDED_ARGS = ['publishedid', 'customerid']
|
||||
query_dic = compat_urlparse.parse_qs(query)
|
||||
cleaned_dic = dict((k,v[0]) for (k,v) in query_dic.items() if k in NEEDED_ARGS)
|
||||
cleaned_dic = dict((k, v[0]) for (k, v) in query_dic.items() if k in NEEDED_ARGS)
|
||||
# Other player ids return m3u8 urls
|
||||
cleaned_dic['playerid'] = '247'
|
||||
cleaned_dic['videokbrate'] = '100000'
|
||||
|
|
|
@ -54,7 +54,7 @@ class IPrimaIE(InfoExtractor):
|
|||
|
||||
player_url = (
|
||||
'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' %
|
||||
(floor(random()*1073741824), floor(random()*1073741824))
|
||||
(floor(random() * 1073741824), floor(random() * 1073741824))
|
||||
)
|
||||
|
||||
req = compat_urllib_request.Request(player_url)
|
||||
|
|
|
@ -102,7 +102,7 @@ class IviIE(InfoExtractor):
|
|||
compilation = result['compilation']
|
||||
title = result['title']
|
||||
|
||||
title = '%s - %s' % (compilation, title) if compilation is not None else title
|
||||
title = '%s - %s' % (compilation, title) if compilation is not None else title
|
||||
|
||||
previews = result['preview']
|
||||
previews.sort(key=lambda fmt: self._known_thumbnails.index(fmt['content_format']))
|
||||
|
@ -152,17 +152,17 @@ class IviCompilationIE(InfoExtractor):
|
|||
compilation_id = mobj.group('compilationid')
|
||||
season_id = mobj.group('seasonid')
|
||||
|
||||
if season_id is not None: # Season link
|
||||
if season_id is not None: # Season link
|
||||
season_page = self._download_webpage(url, compilation_id, 'Downloading season %s web page' % season_id)
|
||||
playlist_id = '%s/season%s' % (compilation_id, season_id)
|
||||
playlist_title = self._html_search_meta('title', season_page, 'title')
|
||||
entries = self._extract_entries(season_page, compilation_id)
|
||||
else: # Compilation link
|
||||
else: # Compilation link
|
||||
compilation_page = self._download_webpage(url, compilation_id, 'Downloading compilation web page')
|
||||
playlist_id = compilation_id
|
||||
playlist_title = self._html_search_meta('title', compilation_page, 'title')
|
||||
seasons = re.findall(r'<a href="/watch/%s/season(\d+)">[^<]+</a>' % compilation_id, compilation_page)
|
||||
if len(seasons) == 0: # No seasons in this compilation
|
||||
if len(seasons) == 0: # No seasons in this compilation
|
||||
entries = self._extract_entries(compilation_page, compilation_id)
|
||||
else:
|
||||
entries = []
|
||||
|
@ -172,4 +172,4 @@ class IviCompilationIE(InfoExtractor):
|
|||
compilation_id, 'Downloading season %s web page' % season_id)
|
||||
entries.extend(self._extract_entries(season_page, compilation_id))
|
||||
|
||||
return self.playlist_result(entries, playlist_id, playlist_title)
|
||||
return self.playlist_result(entries, playlist_id, playlist_title)
|
||||
|
|
|
@ -45,4 +45,3 @@ class JadoreCettePubIE(InfoExtractor):
|
|||
'title': title,
|
||||
'description': description,
|
||||
}
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ class JeuxVideoIE(InfoExtractor):
|
|||
xml_link = self._html_search_regex(
|
||||
r'<param name="flashvars" value="config=(.*?)" />',
|
||||
webpage, 'config URL')
|
||||
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
|
||||
xml_link, 'video ID')
|
||||
|
@ -38,7 +38,7 @@ class JeuxVideoIE(InfoExtractor):
|
|||
xml_link, title, 'Downloading XML config')
|
||||
info_json = config.find('format.json').text
|
||||
info = json.loads(info_json)['versions'][0]
|
||||
|
||||
|
||||
video_url = 'http://video720.jeuxvideo.com/' + info['file']
|
||||
|
||||
return {
|
||||
|
|
|
@ -10,7 +10,7 @@ _md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
|
|||
|
||||
class KankanIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
|
||||
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://yinyue.kankan.com/vod/48/48863.shtml',
|
||||
'file': '48863.flv',
|
||||
|
|
|
@ -63,4 +63,4 @@ class KontrTubeIE(InfoExtractor):
|
|||
'duration': duration,
|
||||
'view_count': int_or_none(view_count),
|
||||
'comment_count': int_or_none(comment_count),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,4 +30,3 @@ class Ku6IE(InfoExtractor):
|
|||
'title': title,
|
||||
'url': downloadUrl
|
||||
}
|
||||
|
||||
|
|
|
@ -75,4 +75,3 @@ class Laola1TvIE(InfoExtractor):
|
|||
'categories': categories,
|
||||
'ext': 'mp4',
|
||||
}
|
||||
|
||||
|
|
|
@ -52,7 +52,7 @@ class LifeNewsIE(InfoExtractor):
|
|||
r'<div class=\'comments\'>\s*<span class=\'counter\'>(\d+)</span>', webpage, 'comment count', fatal=False)
|
||||
|
||||
upload_date = self._html_search_regex(
|
||||
r'<time datetime=\'([^\']+)\'>', webpage, 'upload date',fatal=False)
|
||||
r'<time datetime=\'([^\']+)\'>', webpage, 'upload date', fatal=False)
|
||||
if upload_date is not None:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
|
||||
|
@ -71,4 +71,4 @@ class LifeNewsIE(InfoExtractor):
|
|||
if len(videos) == 1:
|
||||
return make_entry(video_id, videos[0])
|
||||
else:
|
||||
return [make_entry(video_id, media, video_number+1) for video_number, media in enumerate(videos)]
|
||||
return [make_entry(video_id, media, video_number + 1) for video_number, media in enumerate(videos)]
|
||||
|
|
|
@ -19,8 +19,7 @@ class LiveLeakIE(InfoExtractor):
|
|||
'uploader': 'ljfriel2',
|
||||
'title': 'Most unlucky car accident'
|
||||
}
|
||||
},
|
||||
{
|
||||
}, {
|
||||
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
|
||||
'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
|
||||
'info_dict': {
|
||||
|
@ -30,8 +29,7 @@ class LiveLeakIE(InfoExtractor):
|
|||
'uploader': 'ARD_Stinkt',
|
||||
'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
|
||||
}
|
||||
},
|
||||
{
|
||||
}, {
|
||||
'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
|
||||
'md5': '42c6d97d54f1db107958760788c5f48f',
|
||||
'info_dict': {
|
||||
|
|
|
@ -109,7 +109,7 @@ class LyndaIE(SubtitlesInfoExtractor):
|
|||
'password': password,
|
||||
'remember': 'false',
|
||||
'stayPut': 'false'
|
||||
}
|
||||
}
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
||||
login_page = self._download_webpage(request, None, 'Logging in as %s' % username)
|
||||
|
||||
|
@ -117,7 +117,7 @@ class LyndaIE(SubtitlesInfoExtractor):
|
|||
m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page)
|
||||
if m is not None:
|
||||
response = m.group('json')
|
||||
response_json = json.loads(response)
|
||||
response_json = json.loads(response)
|
||||
state = response_json['state']
|
||||
|
||||
if state == 'notlogged':
|
||||
|
@ -187,7 +187,7 @@ class LyndaCourseIE(InfoExtractor):
|
|||
mobj = re.match(self._VALID_URL, url)
|
||||
course_path = mobj.group('coursepath')
|
||||
course_id = mobj.group('courseid')
|
||||
|
||||
|
||||
page = self._download_webpage('http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
|
||||
course_id, 'Downloading course JSON')
|
||||
course_json = json.loads(page)
|
||||
|
@ -221,4 +221,4 @@ class LyndaCourseIE(InfoExtractor):
|
|||
|
||||
course_title = course_json['Title']
|
||||
|
||||
return self.playlist_result(entries, course_id, course_title)
|
||||
return self.playlist_result(entries, course_id, course_title)
|
||||
|
|
|
@ -53,4 +53,4 @@ class M6IE(InfoExtractor):
|
|||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@ from ..utils import (
|
|||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
|
||||
class MalemotionIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)'
|
||||
_TEST = {
|
||||
|
|
|
@ -7,7 +7,7 @@ from .common import InfoExtractor
|
|||
|
||||
class MDRIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?P<domain>https?://(?:www\.)?mdr\.de)/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)(?:_|\.html)'
|
||||
|
||||
|
||||
# No tests, MDR regularily deletes its videos
|
||||
_TEST = {
|
||||
'url': 'http://www.mdr.de/fakt/video189002.html',
|
||||
|
|
|
@ -22,7 +22,7 @@ class MetacafeIE(InfoExtractor):
|
|||
# Youtube video
|
||||
{
|
||||
'add_ie': ['Youtube'],
|
||||
'url': 'http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/',
|
||||
'url': 'http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/',
|
||||
'info_dict': {
|
||||
'id': '_aUehQsCQtM',
|
||||
'ext': 'mp4',
|
||||
|
|
|
@ -55,4 +55,4 @@ class MojvideoIE(InfoExtractor):
|
|||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -54,7 +54,7 @@ class MonikerIE(InfoExtractor):
|
|||
|
||||
title = os.path.splitext(data['fname'])[0]
|
||||
|
||||
#Could be several links with different quality
|
||||
# Could be several links with different quality
|
||||
links = re.findall(r'"file" : "?(.+?)",', webpage)
|
||||
# Assume the links are ordered in quality
|
||||
formats = [{
|
||||
|
|
|
@ -111,4 +111,4 @@ class MooshareIE(InfoExtractor):
|
|||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -72,7 +72,7 @@ class MotherlessIE(InfoExtractor):
|
|||
like_count = str_to_int(self._html_search_regex(
|
||||
r'<strong>Favorited</strong>\s+([^<]+)<',
|
||||
webpage, 'like count', fatal=False))
|
||||
|
||||
|
||||
upload_date = self._html_search_regex(
|
||||
r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload date')
|
||||
if 'Ago' in upload_date:
|
||||
|
|
|
@ -27,7 +27,7 @@ class MoviezineIE(InfoExtractor):
|
|||
webpage = self._download_webpage(url, video_id)
|
||||
jsplayer = self._download_webpage('http://www.moviezine.se/api/player.js?video=%s' % video_id, video_id, 'Downloading js api player')
|
||||
|
||||
formats =[{
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
'url': self._html_search_regex(r'file: "(.+?)",', jsplayer, 'file'),
|
||||
'quality': 0,
|
||||
|
|
|
@ -24,4 +24,4 @@ class MovShareIE(NovaMovIE):
|
|||
'title': 'dissapeared image',
|
||||
'description': 'optical illusion dissapeared image magic illusion',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -44,7 +44,7 @@ class MporaIE(InfoExtractor):
|
|||
r'_([0-9]+)\.[a-zA-Z0-9]+$', src['src'],
|
||||
False, default=None)
|
||||
vcodec = src['type'].partition('/')[2]
|
||||
|
||||
|
||||
formats.append({
|
||||
'format_id': encoding_id + '-' + vcodec,
|
||||
'url': src['src'],
|
||||
|
|
|
@ -60,7 +60,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||
url = response.geturl()
|
||||
# Transform the url to get the best quality:
|
||||
url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, 1)
|
||||
return [{'url': url,'ext': 'mp4'}]
|
||||
return [{'url': url, 'ext': 'mp4'}]
|
||||
|
||||
def _extract_video_formats(self, mdoc, mtvn_id):
|
||||
if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None:
|
||||
|
@ -240,15 +240,15 @@ class MTVIE(MTVServicesInfoExtractor):
|
|||
uri = mobj.groupdict().get('mgid')
|
||||
if uri is None:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
|
||||
# Some videos come from Vevo.com
|
||||
m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";',
|
||||
webpage, re.DOTALL)
|
||||
if m_vevo:
|
||||
vevo_id = m_vevo.group(1);
|
||||
vevo_id = m_vevo.group(1)
|
||||
self.to_screen('Vevo video detected: %s' % vevo_id)
|
||||
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
|
||||
|
||||
|
||||
uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, 'uri')
|
||||
return self._get_videos_info(uri)
|
||||
|
||||
|
|
|
@ -73,4 +73,3 @@ class MuenchenTVIE(InfoExtractor):
|
|||
'is_live': True,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
|
|
@ -72,4 +72,4 @@ class MusicPlayOnIE(InfoExtractor):
|
|||
'duration': int_or_none(duration),
|
||||
'view_count': int_or_none(view_count),
|
||||
'formats': formats,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,7 +37,7 @@ class MuzuTVIE(InfoExtractor):
|
|||
player_info_page = self._download_webpage('http://player.muzu.tv/player/playerInit?ai=%s' % video_id,
|
||||
video_id, u'Downloading player info')
|
||||
video_info = json.loads(player_info_page)['videos'][0]
|
||||
for quality in ['1080' , '720', '480', '360']:
|
||||
for quality in ['1080', '720', '480', '360']:
|
||||
if video_info.get('v%s' % quality):
|
||||
break
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@ class MyVideoIE(InfoExtractor):
|
|||
# Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git
|
||||
# Released into the Public Domain by Tristan Fischer on 2013-05-19
|
||||
# https://github.com/rg3/youtube-dl/pull/842
|
||||
def __rc4crypt(self,data, key):
|
||||
def __rc4crypt(self, data, key):
|
||||
x = 0
|
||||
box = list(range(256))
|
||||
for i in list(range(256)):
|
||||
|
@ -49,10 +49,10 @@ class MyVideoIE(InfoExtractor):
|
|||
out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256])
|
||||
return out
|
||||
|
||||
def __md5(self,s):
|
||||
def __md5(self, s):
|
||||
return hashlib.md5(s).hexdigest().encode()
|
||||
|
||||
def _real_extract(self,url):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
|
@ -173,4 +173,3 @@ class MyVideoIE(InfoExtractor):
|
|||
'play_path': video_playpath,
|
||||
'player_url': video_swfobj,
|
||||
}
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@ class NaverIE(InfoExtractor):
|
|||
raise ExtractorError('couldn\'t extract vid and key')
|
||||
vid = m_id.group(1)
|
||||
key = m_id.group(2)
|
||||
query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,})
|
||||
query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key, })
|
||||
query_urls = compat_urllib_parse.urlencode({
|
||||
'masterVid': vid,
|
||||
'protocol': 'p2p',
|
||||
|
@ -65,7 +65,7 @@ class NaverIE(InfoExtractor):
|
|||
if domain.startswith('rtmp'):
|
||||
f.update({
|
||||
'ext': 'flv',
|
||||
'rtmp_protocol': '1', # rtmpt
|
||||
'rtmp_protocol': '1', # rtmpt
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
|
|
@ -39,7 +39,6 @@ class NBAIE(InfoExtractor):
|
|||
duration = parse_duration(
|
||||
self._html_search_meta('duration', webpage, 'duration', fatal=False))
|
||||
|
||||
|
||||
return {
|
||||
'id': shortened_video_id,
|
||||
'url': video_url,
|
||||
|
|
|
@ -91,4 +91,4 @@ class NDRIE(InfoExtractor):
|
|||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,12 +23,12 @@ class NewgroundsIE(InfoExtractor):
|
|||
mobj = re.match(self._VALID_URL, url)
|
||||
music_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, music_id)
|
||||
|
||||
|
||||
title = self._html_search_regex(
|
||||
r',"name":"([^"]+)",', webpage, 'music title')
|
||||
uploader = self._html_search_regex(
|
||||
r',"artist":"([^"]+)",', webpage, 'music uploader')
|
||||
|
||||
|
||||
music_url_json_string = self._html_search_regex(
|
||||
r'({"url":"[^"]+"),', webpage, 'music url') + '}'
|
||||
music_url_json = json.loads(music_url_json_string)
|
||||
|
|
|
@ -89,4 +89,4 @@ class NewstubeIE(InfoExtractor):
|
|||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -93,4 +93,4 @@ class NFBIE(InfoExtractor):
|
|||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'formats': formats,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,7 +31,7 @@ class NHLBaseInfoExtractor(InfoExtractor):
|
|||
path_url, video_id, 'Downloading final video url')
|
||||
video_url = path_doc.find('path').text
|
||||
else:
|
||||
video_url = initial_video_url
|
||||
video_url = initial_video_url
|
||||
|
||||
join = compat_urlparse.urljoin
|
||||
return {
|
||||
|
|
|
@ -163,4 +163,4 @@ class NocoIE(InfoExtractor):
|
|||
'uploader_id': uploader_id,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -66,4 +66,4 @@ class NovaMovIE(InfoExtractor):
|
|||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,4 +25,4 @@ class NowVideoIE(NovaMovIE):
|
|||
'title': 'youtubedl test video _BaW_jenozKc.mp4',
|
||||
'description': 'Description',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -145,4 +145,4 @@ class NTVIE(InfoExtractor):
|
|||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -71,4 +71,4 @@ class NuvidIE(InfoExtractor):
|
|||
'upload_date': upload_date,
|
||||
'age_limit': 18,
|
||||
'formats': formats,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -74,4 +74,4 @@ class NYTimesIE(InfoExtractor):
|
|||
'duration': duration,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -97,4 +97,3 @@ class OoyalaIE(InfoExtractor):
|
|||
}
|
||||
else:
|
||||
return self._extract_result(videos_info[0], videos_more_info)
|
||||
|
||||
|
|
|
@ -178,4 +178,4 @@ class ORFFM4IE(InfoExtractor):
|
|||
'title': data['title'],
|
||||
'description': data['subtitle'],
|
||||
'entries': entries
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@ import re
|
|||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class PodomaticIE(InfoExtractor):
|
||||
IE_NAME = 'podomatic'
|
||||
_VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
|
||||
|
|
|
@ -56,7 +56,7 @@ class PornHubIE(InfoExtractor):
|
|||
comment_count = self._extract_count(
|
||||
r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment')
|
||||
|
||||
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
|
||||
video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
|
||||
if webpage.find('"encrypted":true') != -1:
|
||||
password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
|
||||
video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
|
||||
|
|
|
@ -38,7 +38,7 @@ class PornotubeIE(InfoExtractor):
|
|||
video_url = self._search_regex(VIDEO_URL_RE, webpage, 'video url')
|
||||
video_url = compat_urllib_parse.unquote(video_url)
|
||||
|
||||
#Get the uploaded date
|
||||
# Get the uploaded date
|
||||
VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
|
||||
upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, 'upload date', fatal=False)
|
||||
if upload_date:
|
||||
|
|
|
@ -280,4 +280,4 @@ class ProSiebenSat1IE(InfoExtractor):
|
|||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -119,4 +119,4 @@ class RaiIE(SubtitlesInfoExtractor):
|
|||
if captions.endswith(STL_EXT):
|
||||
captions = captions[:-len(STL_EXT)] + SRT_EXT
|
||||
subtitles['it'] = 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions)
|
||||
return subtitles
|
||||
return subtitles
|
||||
|
|
|
@ -41,4 +41,3 @@ class RingTVIE(InfoExtractor):
|
|||
'thumbnail': thumbnail_url,
|
||||
'description': description,
|
||||
}
|
||||
|
||||
|
|
|
@ -44,7 +44,7 @@ class RtlXlIE(InfoExtractor):
|
|||
|
||||
formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4')
|
||||
|
||||
video_urlpart = videopath.split('/flash/')[1][:-4]
|
||||
video_urlpart = videopath.split('/flash/')[1][:-5]
|
||||
PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
|
||||
|
||||
formats.extend([
|
||||
|
|
|
@ -122,7 +122,7 @@ class RTLnowIE(InfoExtractor):
|
|||
playerdata = self._download_xml(playerdata_url, video_id, 'Downloading player data XML')
|
||||
|
||||
videoinfo = playerdata.find('./playlist/videoinfo')
|
||||
|
||||
|
||||
formats = []
|
||||
for filename in videoinfo.findall('filename'):
|
||||
mobj = re.search(r'(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>.+)', filename.text)
|
||||
|
@ -153,4 +153,4 @@ class RTLnowIE(InfoExtractor):
|
|||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -54,7 +54,6 @@ def _decrypt_url(png):
|
|||
return url
|
||||
|
||||
|
||||
|
||||
class RTVEALaCartaIE(InfoExtractor):
|
||||
IE_NAME = 'rtve.es:alacarta'
|
||||
IE_DESC = 'RTVE a la carta'
|
||||
|
|
|
@ -191,4 +191,4 @@ class RUTVIE(InfoExtractor):
|
|||
'view_count': view_count,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -53,4 +53,4 @@ class SciVeeIE(InfoExtractor):
|
|||
'description': description,
|
||||
'thumbnail': 'http://www.scivee.tv/assets/videothumb/%s' % video_id,
|
||||
'formats': formats,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -67,5 +67,3 @@ class ServingSysIE(InfoExtractor):
|
|||
'title': title,
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
|
|
@ -54,4 +54,4 @@ class SharedIE(InfoExtractor):
|
|||
'filesize': filesize,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os.path
|
||||
import re
|
||||
import json
|
||||
import hashlib
|
||||
|
@ -12,15 +11,15 @@ from ..utils import (
|
|||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
ExtractorError,
|
||||
url_basename,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class SmotriIE(InfoExtractor):
|
||||
IE_DESC = 'Smotri.com'
|
||||
IE_NAME = 'smotri'
|
||||
_VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
|
||||
_VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
|
||||
_NETRC_MACHINE = 'smotri'
|
||||
|
||||
_TESTS = [
|
||||
|
@ -35,7 +34,6 @@ class SmotriIE(InfoExtractor):
|
|||
'uploader': 'rbc2008',
|
||||
'uploader_id': 'rbc08',
|
||||
'upload_date': '20131118',
|
||||
'description': 'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения',
|
||||
'thumbnail': 'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
|
||||
},
|
||||
},
|
||||
|
@ -50,7 +48,6 @@ class SmotriIE(InfoExtractor):
|
|||
'uploader': 'Support Photofile@photofile',
|
||||
'uploader_id': 'support-photofile',
|
||||
'upload_date': '20070704',
|
||||
'description': 'test, видео test',
|
||||
'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
|
||||
},
|
||||
},
|
||||
|
@ -66,7 +63,6 @@ class SmotriIE(InfoExtractor):
|
|||
'uploader_id': 'timoxa40',
|
||||
'upload_date': '20100404',
|
||||
'thumbnail': 'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
|
||||
'description': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
|
||||
},
|
||||
'params': {
|
||||
'videopassword': 'qwerty',
|
||||
|
@ -85,7 +81,6 @@ class SmotriIE(InfoExtractor):
|
|||
'upload_date': '20101001',
|
||||
'thumbnail': 'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
|
||||
'age_limit': 18,
|
||||
'description': 'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ',
|
||||
},
|
||||
'params': {
|
||||
'videopassword': '333'
|
||||
|
@ -102,17 +97,11 @@ class SmotriIE(InfoExtractor):
|
|||
'uploader': 'HannahL',
|
||||
'uploader_id': 'lisaha95',
|
||||
'upload_date': '20090331',
|
||||
'description': 'Shakira - Don\'t Bother, видео Shakira - Don\'t Bother',
|
||||
'thumbnail': 'http://frame8.loadup.ru/44/0b/918809.7.3.jpg',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
_SUCCESS = 0
|
||||
_PASSWORD_NOT_VERIFIED = 1
|
||||
_PASSWORD_DETECTED = 2
|
||||
_VIDEO_NOT_FOUND = 3
|
||||
|
||||
@classmethod
|
||||
def _extract_url(cls, webpage):
|
||||
mobj = re.search(
|
||||
|
@ -137,44 +126,44 @@ class SmotriIE(InfoExtractor):
|
|||
return self._html_search_meta(name, html, display_name)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
real_video_id = mobj.group('realvideoid')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# Download video JSON data
|
||||
video_json_url = 'http://smotri.com/vt.php?id=%s' % real_video_id
|
||||
video_json_page = self._download_webpage(video_json_url, video_id, 'Downloading video JSON')
|
||||
video_json = json.loads(video_json_page)
|
||||
video_form = {
|
||||
'ticket': video_id,
|
||||
'video_url': '1',
|
||||
'frame_url': '1',
|
||||
'devid': 'LoadupFlashPlayer',
|
||||
'getvideoinfo': '1',
|
||||
}
|
||||
|
||||
status = video_json['status']
|
||||
if status == self._VIDEO_NOT_FOUND:
|
||||
request = compat_urllib_request.Request(
|
||||
'http://smotri.com/video/view/url/bot/', compat_urllib_parse.urlencode(video_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
|
||||
video = self._download_json(request, video_id, 'Downloading video JSON')
|
||||
|
||||
if video.get('_moderate_no') or not video.get('moderated'):
|
||||
raise ExtractorError('Video %s has not been approved by moderator' % video_id, expected=True)
|
||||
|
||||
if video.get('error'):
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
elif status == self._PASSWORD_DETECTED: # The video is protected by a password, retry with
|
||||
# video-password set
|
||||
video_password = self._downloader.params.get('videopassword', None)
|
||||
if not video_password:
|
||||
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
|
||||
video_json_url += '&md5pass=%s' % hashlib.md5(video_password.encode('utf-8')).hexdigest()
|
||||
video_json_page = self._download_webpage(video_json_url, video_id, 'Downloading video JSON (video-password set)')
|
||||
video_json = json.loads(video_json_page)
|
||||
status = video_json['status']
|
||||
if status == self._PASSWORD_NOT_VERIFIED:
|
||||
raise ExtractorError('Video password is invalid', expected=True)
|
||||
|
||||
if status != self._SUCCESS:
|
||||
raise ExtractorError('Unexpected status value %s' % status)
|
||||
|
||||
# Extract the URL of the video
|
||||
video_url = video_json['file_data']
|
||||
video_url = video.get('_vidURL') or video.get('_vidURL_mp4')
|
||||
title = video['title']
|
||||
thumbnail = video['_imgURL']
|
||||
upload_date = unified_strdate(video['added'])
|
||||
uploader = video['userNick']
|
||||
uploader_id = video['userLogin']
|
||||
duration = int_or_none(video['duration'])
|
||||
|
||||
# Video JSON does not provide enough meta data
|
||||
# We will extract some from the video web page instead
|
||||
video_page_url = 'http://smotri.com/video/view/?id=%s' % video_id
|
||||
video_page = self._download_webpage(video_page_url, video_id, 'Downloading video page')
|
||||
webpage_url = 'http://smotri.com/video/view/?id=%s' % video_id
|
||||
webpage = self._download_webpage(webpage_url, video_id, 'Downloading video page')
|
||||
|
||||
# Warning if video is unavailable
|
||||
warning = self._html_search_regex(
|
||||
r'<div class="videoUnModer">(.*?)</div>', video_page,
|
||||
r'<div class="videoUnModer">(.*?)</div>', webpage,
|
||||
'warning message', default=None)
|
||||
if warning is not None:
|
||||
self._downloader.report_warning(
|
||||
|
@ -182,84 +171,32 @@ class SmotriIE(InfoExtractor):
|
|||
(video_id, warning))
|
||||
|
||||
# Adult content
|
||||
if re.search('EroConfirmText">', video_page) is not None:
|
||||
if re.search('EroConfirmText">', webpage) is not None:
|
||||
self.report_age_confirmation()
|
||||
confirm_string = self._html_search_regex(
|
||||
r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id,
|
||||
video_page, 'confirm string')
|
||||
confirm_url = video_page_url + '&confirm=%s' % confirm_string
|
||||
video_page = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)')
|
||||
webpage, 'confirm string')
|
||||
confirm_url = webpage_url + '&confirm=%s' % confirm_string
|
||||
webpage = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)')
|
||||
adult_content = True
|
||||
else:
|
||||
adult_content = False
|
||||
|
||||
# Extract the rest of meta data
|
||||
video_title = self._search_meta('name', video_page, 'title')
|
||||
if not video_title:
|
||||
video_title = os.path.splitext(url_basename(video_url))[0]
|
||||
|
||||
video_description = self._search_meta('description', video_page)
|
||||
END_TEXT = ' на сайте Smotri.com'
|
||||
if video_description and video_description.endswith(END_TEXT):
|
||||
video_description = video_description[:-len(END_TEXT)]
|
||||
START_TEXT = 'Смотреть онлайн ролик '
|
||||
if video_description and video_description.startswith(START_TEXT):
|
||||
video_description = video_description[len(START_TEXT):]
|
||||
video_thumbnail = self._search_meta('thumbnail', video_page)
|
||||
|
||||
upload_date_str = self._search_meta('uploadDate', video_page, 'upload date')
|
||||
if upload_date_str:
|
||||
upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str)
|
||||
video_upload_date = (
|
||||
(
|
||||
upload_date_m.group('year') +
|
||||
upload_date_m.group('month') +
|
||||
upload_date_m.group('day')
|
||||
)
|
||||
if upload_date_m else None
|
||||
)
|
||||
else:
|
||||
video_upload_date = None
|
||||
|
||||
duration_str = self._search_meta('duration', video_page)
|
||||
if duration_str:
|
||||
duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str)
|
||||
video_duration = (
|
||||
(
|
||||
(int(duration_m.group('hours')) * 60 * 60) +
|
||||
(int(duration_m.group('minutes')) * 60) +
|
||||
int(duration_m.group('seconds'))
|
||||
)
|
||||
if duration_m else None
|
||||
)
|
||||
else:
|
||||
video_duration = None
|
||||
|
||||
video_uploader = self._html_search_regex(
|
||||
'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>',
|
||||
video_page, 'uploader', fatal=False, flags=re.MULTILINE|re.DOTALL)
|
||||
|
||||
video_uploader_id = self._html_search_regex(
|
||||
'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">',
|
||||
video_page, 'uploader id', fatal=False, flags=re.MULTILINE|re.DOTALL)
|
||||
|
||||
video_view_count = self._html_search_regex(
|
||||
view_count = self._html_search_regex(
|
||||
'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
|
||||
video_page, 'view count', fatal=False, flags=re.MULTILINE|re.DOTALL)
|
||||
webpage, 'view count', fatal=False, flags=re.MULTILINE | re.DOTALL)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': video_title,
|
||||
'thumbnail': video_thumbnail,
|
||||
'description': video_description,
|
||||
'uploader': video_uploader,
|
||||
'upload_date': video_upload_date,
|
||||
'uploader_id': video_uploader_id,
|
||||
'duration': video_duration,
|
||||
'view_count': int_or_none(video_view_count),
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'uploader_id': uploader_id,
|
||||
'duration': duration,
|
||||
'view_count': int_or_none(view_count),
|
||||
'age_limit': 18 if adult_content else 0,
|
||||
'video_page_url': video_page_url
|
||||
}
|
||||
|
||||
|
||||
|
@ -275,7 +212,7 @@ class SmotriCommunityIE(InfoExtractor):
|
|||
},
|
||||
'playlist_mincount': 4,
|
||||
}
|
||||
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
community_id = mobj.group('communityid')
|
||||
|
|
|
@ -69,7 +69,7 @@ class SohuIE(InfoExtractor):
|
|||
(allot, prot, clipsURL[i], su[i]))
|
||||
part_str = self._download_webpage(
|
||||
part_url, video_id,
|
||||
note=u'Downloading part %d of %d' % (i+1, part_count))
|
||||
note=u'Downloading part %d of %d' % (i + 1, part_count))
|
||||
|
||||
part_info = part_str.split('|')
|
||||
video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
|
||||
|
|
|
@ -224,14 +224,14 @@ class SoundcloudIE(InfoExtractor):
|
|||
# extract uploader (which is in the url)
|
||||
uploader = mobj.group('uploader')
|
||||
# extract simple title (uploader + slug of song title)
|
||||
slug_title = mobj.group('title')
|
||||
slug_title = mobj.group('title')
|
||||
token = mobj.group('token')
|
||||
full_title = resolve_title = '%s/%s' % (uploader, slug_title)
|
||||
if token:
|
||||
resolve_title += '/%s' % token
|
||||
|
||||
|
||||
self.report_resolve(full_title)
|
||||
|
||||
|
||||
url = 'http://soundcloud.com/%s' % resolve_title
|
||||
info_json_url = self._resolv_url(url)
|
||||
info = self._download_json(info_json_url, full_title, 'Downloading info JSON')
|
||||
|
|
|
@ -77,4 +77,4 @@ class SpiegeltvIE(InfoExtractor):
|
|||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnails': thumbnails
|
||||
}
|
||||
}
|
||||
|
|
|
@ -89,4 +89,4 @@ class Sport5IE(InfoExtractor):
|
|||
'duration': duration,
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -93,4 +93,3 @@ class SportDeutschlandIE(InfoExtractor):
|
|||
'rtmp_live': asset.get('live'),
|
||||
'timestamp': parse_iso8601(asset.get('date')),
|
||||
}
|
||||
|
||||
|
|
|
@ -50,7 +50,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
|
|||
|
||||
sub_lang_list = {}
|
||||
for sub_lang in requested_langs:
|
||||
if not sub_lang in available_subs_list:
|
||||
if sub_lang not in available_subs_list:
|
||||
self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
|
||||
continue
|
||||
sub_lang_list[sub_lang] = available_subs_list[sub_lang]
|
||||
|
|
|
@ -80,7 +80,7 @@ class SWRMediathekIE(InfoExtractor):
|
|||
|
||||
if media_type == 'Video':
|
||||
fmt.update({
|
||||
'format_note': ['144p', '288p', '544p', '720p'][quality-1],
|
||||
'format_note': ['144p', '288p', '544p', '720p'][quality - 1],
|
||||
'vcodec': codec,
|
||||
})
|
||||
elif media_type == 'Audio':
|
||||
|
|
|
@ -121,7 +121,7 @@ class TeacherTubeUserIE(InfoExtractor):
|
|||
urls = []
|
||||
webpage = self._download_webpage(url, user_id)
|
||||
urls.extend(re.findall(self._MEDIA_RE, webpage))
|
||||
|
||||
|
||||
pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[:-1]
|
||||
for p in pages:
|
||||
more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p)
|
||||
|
|
|
@ -33,7 +33,7 @@ class TeamcocoIE(InfoExtractor):
|
|||
|
||||
display_id = mobj.group('display_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
|
||||
video_id = mobj.group("video_id")
|
||||
if not video_id:
|
||||
video_id = self._html_search_regex(
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue