Merge remote-tracking branch 'jtwaleson/master'

This commit is contained in:
Philipp Hagemeister 2014-11-23 21:33:31 +01:00
commit 784b6d3a9b
164 changed files with 574 additions and 570 deletions

View file

@ -529,4 +529,4 @@ def gen_extractors():
def get_info_extractor(ie_name):
"""Returns the info extractor class with the given ie_name"""
return globals()[ie_name+'IE']
return globals()[ie_name + 'IE']

View file

@ -5,6 +5,7 @@ import re
from .common import InfoExtractor
class AdultSwimIE(InfoExtractor):
_VALID_URL = r'https?://video\.adultswim\.com/(?P<path>.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$'
_TEST = {

View file

@ -1,4 +1,4 @@
#coding: utf-8
# coding: utf-8
from __future__ import unicode_literals

View file

@ -70,11 +70,13 @@ class AppleTrailersIE(InfoExtractor):
uploader_id = mobj.group('company')
playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
def fix_html(s):
s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s)
# The ' in the onClick attributes are not escaped, it couldn't be parsed
# like: http://trailers.apple.com/trailers/wb/gravity/
def _clean_json(m):
return 'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
s = re.sub(self._JSON_RE, _clean_json, s)

View file

@ -192,4 +192,3 @@ class ARDIE(InfoExtractor):
'upload_date': upload_date,
'thumbnail': thumbnail,
}

View file

@ -13,7 +13,7 @@ from ..utils import (
qualities,
)
# There are different sources of video in arte.tv, the extraction process
# There are different sources of video in arte.tv, the extraction process
# is different for each one. The videos usually expire in 7 days, so we can't
# add tests.

View file

@ -12,17 +12,17 @@ class AudiomackIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?audiomack\.com/song/(?P<id>[\w/-]+)'
IE_NAME = 'audiomack'
_TESTS = [
#hosted on audiomack
# hosted on audiomack
{
'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary',
'info_dict':
{
'id' : 'roosh-williams/extraordinary',
'id': 'roosh-williams/extraordinary',
'ext': 'mp3',
'title': 'Roosh Williams - Extraordinary'
}
},
#hosted on soundcloud via audiomack
# hosted on soundcloud via audiomack
{
'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare',
'file': '172419696.mp3',
@ -49,7 +49,7 @@ class AudiomackIE(InfoExtractor):
raise ExtractorError("Unable to deduce api url of song")
realurl = api_response["url"]
#Audiomack wraps a lot of soundcloud tracks in their branded wrapper
# Audiomack wraps a lot of soundcloud tracks in their branded wrapper
# - if so, pass the work off to the soundcloud extractor
if SoundcloudIE.suitable(realurl):
return {'_type': 'url', 'url': realurl, 'ie_key': 'Soundcloud'}

View file

@ -18,7 +18,7 @@ class BambuserIE(InfoExtractor):
_TEST = {
'url': 'http://bambuser.com/v/4050584',
# MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388
#u'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
# u'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
'info_dict': {
'id': '4050584',
'ext': 'flv',

View file

@ -83,12 +83,12 @@ class BandcampIE(InfoExtractor):
initial_url = mp3_info['url']
re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
m_url = re.match(re_url, initial_url)
#We build the url we will use to get the final track url
# We build the url we will use to get the final track url
# This url is build in Bandcamp in the script download_bunde_*.js
request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))
final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url')
# If we could correctly generate the .rand field the url would be
#in the "download_url" key
# in the "download_url" key
final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
return {

View file

@ -195,7 +195,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
duration = int(item.get('duration'))
media_selection = self._download_xml(
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
programme_id, 'Downloading media selection XML')
for media in self._extract_medias(media_selection):
@ -220,4 +220,4 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
'duration': duration,
'formats': formats,
'subtitles': subtitles,
}
}

View file

@ -40,7 +40,7 @@ class BeegIE(InfoExtractor):
title = self._html_search_regex(
r'<title>([^<]+)\s*-\s*beeg\.?</title>', webpage, 'title')
description = self._html_search_regex(
r'<meta name="description" content="([^"]*)"',
webpage, 'description', fatal=False)

View file

@ -1,4 +1,4 @@
#coding: utf-8
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor

View file

@ -112,4 +112,4 @@ class CanalplusIE(InfoExtractor):
'like_count': int(infos.find('NB_LIKES').text),
'comment_count': int(infos.find('NB_COMMENTS').text),
'formats': formats,
}
}

View file

@ -84,4 +84,4 @@ class CBSNewsIE(InfoExtractor):
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
}
}

View file

@ -92,7 +92,7 @@ class CeskaTelevizeIE(InfoExtractor):
req.add_header('Referer', url)
playlist = self._download_xml(req, video_id)
formats = []
for i in playlist.find('smilRoot/body'):
if 'AD' not in i.attrib['id']:

View file

@ -5,6 +5,7 @@ import re
from .common import InfoExtractor
from ..utils import ExtractorError
class Channel9IE(InfoExtractor):
'''
Common extractor for channel9.msdn.com.
@ -31,7 +32,7 @@ class Channel9IE(InfoExtractor):
'session_code': 'KOS002',
'session_day': 'Day 1',
'session_room': 'Arena 1A',
'session_speakers': [ 'Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', 'Mads Kristensen' ],
'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', 'Mads Kristensen'],
},
},
{
@ -44,7 +45,7 @@ class Channel9IE(InfoExtractor):
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
'duration': 1540,
'thumbnail': 'http://video.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg',
'authors': [ 'Mike Wilmot' ],
'authors': ['Mike Wilmot'],
},
}
]
@ -83,7 +84,7 @@ class Channel9IE(InfoExtractor):
'format_id': x.group('quality'),
'format_note': x.group('note'),
'format': '%s (%s)' % (x.group('quality'), x.group('note')),
'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate
'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate
'preference': self._known_formats.index(x.group('quality')),
'vcodec': 'none' if x.group('note') == 'Audio only' else None,
} for x in list(re.finditer(FORMAT_REGEX, html)) if x.group('quality') in self._known_formats]
@ -202,17 +203,17 @@ class Channel9IE(InfoExtractor):
if slides is not None:
d = common.copy()
d.update({ 'title': title + '-Slides', 'url': slides })
d.update({'title': title + '-Slides', 'url': slides})
result.append(d)
if zip_ is not None:
d = common.copy()
d.update({ 'title': title + '-Zip', 'url': zip_ })
d.update({'title': title + '-Zip', 'url': zip_})
result.append(d)
if len(formats) > 0:
d = common.copy()
d.update({ 'title': title, 'formats': formats })
d.update({'title': title, 'formats': formats})
result.append(d)
return result
@ -270,5 +271,5 @@ class Channel9IE(InfoExtractor):
else:
raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True)
else: # Assuming list
else: # Assuming list
return self._extract_list(content_path)

View file

@ -77,7 +77,7 @@ class CinemassacreIE(InfoExtractor):
if videolist_url:
videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')
formats = []
baseurl = vidurl[:vidurl.rfind('/')+1]
baseurl = vidurl[:vidurl.rfind('/') + 1]
for video in videolist.findall('.//video'):
src = video.get('src')
if not src:

View file

@ -39,6 +39,7 @@ class ClipsyndicateIE(InfoExtractor):
transform_source=fix_xml_ampersands)
track_doc = pdoc.find('trackList/track')
def find_param(name):
node = find_xpath_attr(track_doc, './/param', 'name', name)
if node is not None:

View file

@ -423,17 +423,18 @@ class InfoExtractor(object):
"""Report attempt to log in."""
self.to_screen('Logging in')
#Methods for following #608
# Methods for following #608
@staticmethod
def url_result(url, ie=None, video_id=None):
"""Returns a url that points to a page that should be processed"""
#TODO: ie should be the class used for getting the info
# TODO: ie should be the class used for getting the info
video_info = {'_type': 'url',
'url': url,
'ie_key': ie}
if video_id is not None:
video_info['id'] = video_id
return video_info
@staticmethod
def playlist_result(entries, playlist_id=None, playlist_title=None):
"""Returns a playlist"""
@ -517,7 +518,7 @@ class InfoExtractor(object):
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
except (IOError, netrc.NetrcParseError) as err:
self._downloader.report_warning('parsing .netrc: %s' % compat_str(err))
return (username, password)
def _get_tfa_info(self):

View file

@ -54,7 +54,7 @@ class CrackedIE(InfoExtractor):
return {
'id': video_id,
'url':video_url,
'url': video_url,
'title': title,
'description': description,
'timestamp': timestamp,
@ -62,4 +62,4 @@ class CrackedIE(InfoExtractor):
'comment_count': comment_count,
'height': height,
'width': width,
}
}

View file

@ -69,11 +69,9 @@ class CrunchyrollIE(SubtitlesInfoExtractor):
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
self._download_webpage(login_request, None, False, 'Wrong login info')
def _real_initialize(self):
self._login()
def _decrypt_subtitles(self, data, iv, id):
data = bytes_to_intlist(data)
iv = bytes_to_intlist(iv)
@ -99,8 +97,10 @@ class CrunchyrollIE(SubtitlesInfoExtractor):
return shaHash + [0] * 12
key = obfuscate_key(id)
class Counter:
__value = iv
def next_value(self):
temp = self.__value
self.__value = inc(self.__value)
@ -183,7 +183,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
return output
def _real_extract(self,url):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
@ -226,10 +226,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
formats = []
for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage):
stream_quality, stream_format = self._FORMAT_IDS[fmt]
video_format = fmt+'p'
video_format = fmt + 'p'
streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/')
# urlencode doesn't work!
streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+'&media%5Fid='+stream_id+'&video%5Fformat='+stream_format
streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality=' + stream_quality + '&media%5Fid=' + stream_id + '&video%5Fformat=' + stream_format
streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
streamdata_req.add_header('Content-Length', str(len(streamdata_req.data)))
streamdata = self._download_xml(
@ -248,8 +248,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
subtitles = {}
sub_format = self._downloader.params.get('subtitlesformat', 'srt')
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
sub_page = self._download_webpage('http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\
video_id, note='Downloading subtitles for '+sub_name)
sub_page = self._download_webpage('http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,\
video_id, note='Downloading subtitles for ' + sub_name)
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
@ -274,14 +274,14 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
return
return {
'id': video_id,
'title': video_title,
'id': video_id,
'title': video_title,
'description': video_description,
'thumbnail': video_thumbnail,
'uploader': video_uploader,
'thumbnail': video_thumbnail,
'uploader': video_uploader,
'upload_date': video_upload_date,
'subtitles': subtitles,
'formats': formats,
'subtitles': subtitles,
'formats': formats,
}

View file

@ -1,4 +1,4 @@
#coding: utf-8
# coding: utf-8
from __future__ import unicode_literals
import re
@ -18,6 +18,7 @@ from ..utils import (
unescapeHTML,
)
class DailymotionBaseInfoExtractor(InfoExtractor):
@staticmethod
def _build_request(url):
@ -27,6 +28,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
request.add_header('Cookie', 'ff=off')
return request
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
"""Information Extractor for Dailymotion"""

View file

@ -26,13 +26,13 @@ class DefenseGouvFrIE(InfoExtractor):
video_id = self._search_regex(
r"flashvars.pvg_id=\"(\d+)\";",
webpage, 'ID')
json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
+ video_id)
info = self._download_webpage(json_url, title,
'Downloading JSON config')
video_url = json.loads(info)['renditions'][0]['url']
return {'id': video_id,
'ext': 'mp4',
'url': video_url,

View file

@ -27,7 +27,7 @@ class DotsubIE(InfoExtractor):
video_id = mobj.group('id')
info_url = "https://dotsub.com/api/media/%s/metadata" % video_id
info = self._download_json(info_url, video_id)
date = time.gmtime(info['dateCreated']/1000) # The timestamp is in miliseconds
date = time.gmtime(info['dateCreated'] / 1000) # The timestamp is in miliseconds
return {
'id': video_id,

View file

@ -40,7 +40,7 @@ class FC2IE(InfoExtractor):
info_url = (
"http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&".
format(video_id, mimi, compat_urllib_request.quote(refer, safe='').replace('.','%2E')))
format(video_id, mimi, compat_urllib_request.quote(refer, safe='').replace('.', '%2E')))
info_webpage = self._download_webpage(
info_url, video_id, note='Downloading info page')

View file

@ -57,4 +57,4 @@ class FirstTVIE(InfoExtractor):
'duration': int_or_none(duration),
'like_count': int_or_none(like_count),
'dislike_count': int_or_none(dislike_count),
}
}

View file

@ -17,8 +17,8 @@ class FlickrIE(InfoExtractor):
'info_dict': {
'id': '5645318632',
'ext': 'mp4',
"description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.",
"uploader_id": "forestwander-nature-pictures",
"description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.",
"uploader_id": "forestwander-nature-pictures",
"title": "Dark Hollow Waterfalls"
}
}

View file

@ -92,4 +92,4 @@ class FourTubeIE(InfoExtractor):
'duration': duration,
'age_limit': 18,
'webpage_url': webpage_url,
}
}

View file

@ -733,7 +733,7 @@ class GenericIE(InfoExtractor):
'title': video_title,
'id': video_id,
}
match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
if match:
return {
@ -748,7 +748,7 @@ class GenericIE(InfoExtractor):
# Look for embedded blip.tv player
mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
if mobj:
return self.url_result('http://blip.tv/a/a-'+mobj.group(1), 'BlipTV')
return self.url_result('http://blip.tv/a/a-' + mobj.group(1), 'BlipTV')
mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
if mobj:
return self.url_result(mobj.group(1), 'BlipTV')
@ -1025,4 +1025,3 @@ class GenericIE(InfoExtractor):
'_type': 'playlist',
'entries': entries,
}

View file

@ -397,4 +397,4 @@ class GloboIE(InfoExtractor):
'uploader_id': uploader_id,
'like_count': like_count,
'formats': formats
}
}

View file

@ -69,7 +69,7 @@ class GorillaVidIE(InfoExtractor):
(?:id="[^"]+"\s+)?
value="([^"]*)"
''', webpage))
if fields['op'] == 'download1':
post = compat_urllib_parse.urlencode(fields)

View file

@ -37,7 +37,7 @@ class HornBunnyIE(InfoExtractor):
webpage2 = self._download_webpage(redirect_url, video_id)
video_url = self._html_search_regex(
r'flvMask:(.*?);', webpage2, 'video_url')
duration = parse_duration(self._search_regex(
r'<strong>Runtime:</strong>\s*([0-9:]+)</div>',
webpage, 'duration', fatal=False))

View file

@ -13,7 +13,7 @@ class HowcastIE(InfoExtractor):
'info_dict': {
'id': '390161',
'ext': 'mp4',
'description': 'The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here\'s the proper way to tie a square knot.',
'description': 'The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here\'s the proper way to tie a square knot.',
'title': 'How to Tie a Square Knot Properly',
}
}

View file

@ -71,7 +71,7 @@ class ImdbListIE(InfoExtractor):
},
'playlist_count': 7,
}
def _real_extract(self, url):
list_id = self._match_id(url)
webpage = self._download_webpage(url, list_id)

View file

@ -32,7 +32,7 @@ class InternetVideoArchiveIE(InfoExtractor):
def _clean_query(query):
NEEDED_ARGS = ['publishedid', 'customerid']
query_dic = compat_urlparse.parse_qs(query)
cleaned_dic = dict((k,v[0]) for (k,v) in query_dic.items() if k in NEEDED_ARGS)
cleaned_dic = dict((k, v[0]) for (k, v) in query_dic.items() if k in NEEDED_ARGS)
# Other player ids return m3u8 urls
cleaned_dic['playerid'] = '247'
cleaned_dic['videokbrate'] = '100000'

View file

@ -54,7 +54,7 @@ class IPrimaIE(InfoExtractor):
player_url = (
'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' %
(floor(random()*1073741824), floor(random()*1073741824))
(floor(random() * 1073741824), floor(random() * 1073741824))
)
req = compat_urllib_request.Request(player_url)

View file

@ -102,7 +102,7 @@ class IviIE(InfoExtractor):
compilation = result['compilation']
title = result['title']
title = '%s - %s' % (compilation, title) if compilation is not None else title
title = '%s - %s' % (compilation, title) if compilation is not None else title
previews = result['preview']
previews.sort(key=lambda fmt: self._known_thumbnails.index(fmt['content_format']))
@ -152,17 +152,17 @@ class IviCompilationIE(InfoExtractor):
compilation_id = mobj.group('compilationid')
season_id = mobj.group('seasonid')
if season_id is not None: # Season link
if season_id is not None: # Season link
season_page = self._download_webpage(url, compilation_id, 'Downloading season %s web page' % season_id)
playlist_id = '%s/season%s' % (compilation_id, season_id)
playlist_title = self._html_search_meta('title', season_page, 'title')
entries = self._extract_entries(season_page, compilation_id)
else: # Compilation link
else: # Compilation link
compilation_page = self._download_webpage(url, compilation_id, 'Downloading compilation web page')
playlist_id = compilation_id
playlist_title = self._html_search_meta('title', compilation_page, 'title')
seasons = re.findall(r'<a href="/watch/%s/season(\d+)">[^<]+</a>' % compilation_id, compilation_page)
if len(seasons) == 0: # No seasons in this compilation
if len(seasons) == 0: # No seasons in this compilation
entries = self._extract_entries(compilation_page, compilation_id)
else:
entries = []
@ -172,4 +172,4 @@ class IviCompilationIE(InfoExtractor):
compilation_id, 'Downloading season %s web page' % season_id)
entries.extend(self._extract_entries(season_page, compilation_id))
return self.playlist_result(entries, playlist_id, playlist_title)
return self.playlist_result(entries, playlist_id, playlist_title)

View file

@ -45,4 +45,3 @@ class JadoreCettePubIE(InfoExtractor):
'title': title,
'description': description,
}

View file

@ -29,7 +29,7 @@ class JeuxVideoIE(InfoExtractor):
xml_link = self._html_search_regex(
r'<param name="flashvars" value="config=(.*?)" />',
webpage, 'config URL')
video_id = self._search_regex(
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
xml_link, 'video ID')
@ -38,7 +38,7 @@ class JeuxVideoIE(InfoExtractor):
xml_link, title, 'Downloading XML config')
info_json = config.find('format.json').text
info = json.loads(info_json)['versions'][0]
video_url = 'http://video720.jeuxvideo.com/' + info['file']
return {

View file

@ -10,7 +10,7 @@ _md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
class KankanIE(InfoExtractor):
_VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
_TEST = {
'url': 'http://yinyue.kankan.com/vod/48/48863.shtml',
'file': '48863.flv',

View file

@ -63,4 +63,4 @@ class KontrTubeIE(InfoExtractor):
'duration': duration,
'view_count': int_or_none(view_count),
'comment_count': int_or_none(comment_count),
}
}

View file

@ -30,4 +30,3 @@ class Ku6IE(InfoExtractor):
'title': title,
'url': downloadUrl
}

View file

@ -75,4 +75,3 @@ class Laola1TvIE(InfoExtractor):
'categories': categories,
'ext': 'mp4',
}

View file

@ -52,7 +52,7 @@ class LifeNewsIE(InfoExtractor):
r'<div class=\'comments\'>\s*<span class=\'counter\'>(\d+)</span>', webpage, 'comment count', fatal=False)
upload_date = self._html_search_regex(
r'<time datetime=\'([^\']+)\'>', webpage, 'upload date',fatal=False)
r'<time datetime=\'([^\']+)\'>', webpage, 'upload date', fatal=False)
if upload_date is not None:
upload_date = unified_strdate(upload_date)
@ -71,4 +71,4 @@ class LifeNewsIE(InfoExtractor):
if len(videos) == 1:
return make_entry(video_id, videos[0])
else:
return [make_entry(video_id, media, video_number+1) for video_number, media in enumerate(videos)]
return [make_entry(video_id, media, video_number + 1) for video_number, media in enumerate(videos)]

View file

@ -19,8 +19,7 @@ class LiveLeakIE(InfoExtractor):
'uploader': 'ljfriel2',
'title': 'Most unlucky car accident'
}
},
{
}, {
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
'info_dict': {
@ -30,8 +29,7 @@ class LiveLeakIE(InfoExtractor):
'uploader': 'ARD_Stinkt',
'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
}
},
{
}, {
'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
'md5': '42c6d97d54f1db107958760788c5f48f',
'info_dict': {

View file

@ -109,7 +109,7 @@ class LyndaIE(SubtitlesInfoExtractor):
'password': password,
'remember': 'false',
'stayPut': 'false'
}
}
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
login_page = self._download_webpage(request, None, 'Logging in as %s' % username)
@ -117,7 +117,7 @@ class LyndaIE(SubtitlesInfoExtractor):
m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page)
if m is not None:
response = m.group('json')
response_json = json.loads(response)
response_json = json.loads(response)
state = response_json['state']
if state == 'notlogged':
@ -187,7 +187,7 @@ class LyndaCourseIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
course_path = mobj.group('coursepath')
course_id = mobj.group('courseid')
page = self._download_webpage('http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
course_id, 'Downloading course JSON')
course_json = json.loads(page)
@ -221,4 +221,4 @@ class LyndaCourseIE(InfoExtractor):
course_title = course_json['Title']
return self.playlist_result(entries, course_id, course_title)
return self.playlist_result(entries, course_id, course_title)

View file

@ -53,4 +53,4 @@ class M6IE(InfoExtractor):
'duration': duration,
'view_count': view_count,
'formats': formats,
}
}

View file

@ -7,6 +7,7 @@ from ..utils import (
compat_urllib_parse,
)
class MalemotionIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)'
_TEST = {

View file

@ -7,7 +7,7 @@ from .common import InfoExtractor
class MDRIE(InfoExtractor):
_VALID_URL = r'^(?P<domain>https?://(?:www\.)?mdr\.de)/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)(?:_|\.html)'
# No tests, MDR regularily deletes its videos
_TEST = {
'url': 'http://www.mdr.de/fakt/video189002.html',

View file

@ -22,7 +22,7 @@ class MetacafeIE(InfoExtractor):
# Youtube video
{
'add_ie': ['Youtube'],
'url': 'http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/',
'url': 'http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/',
'info_dict': {
'id': '_aUehQsCQtM',
'ext': 'mp4',

View file

@ -55,4 +55,4 @@ class MojvideoIE(InfoExtractor):
'title': title,
'thumbnail': thumbnail,
'duration': duration,
}
}

View file

@ -54,7 +54,7 @@ class MonikerIE(InfoExtractor):
title = os.path.splitext(data['fname'])[0]
#Could be several links with different quality
# Could be several links with different quality
links = re.findall(r'"file" : "?(.+?)",', webpage)
# Assume the links are ordered in quality
formats = [{

View file

@ -111,4 +111,4 @@ class MooshareIE(InfoExtractor):
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
}
}

View file

@ -72,7 +72,7 @@ class MotherlessIE(InfoExtractor):
like_count = str_to_int(self._html_search_regex(
r'<strong>Favorited</strong>\s+([^<]+)<',
webpage, 'like count', fatal=False))
upload_date = self._html_search_regex(
r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload date')
if 'Ago' in upload_date:

View file

@ -27,7 +27,7 @@ class MoviezineIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
jsplayer = self._download_webpage('http://www.moviezine.se/api/player.js?video=%s' % video_id, video_id, 'Downloading js api player')
formats =[{
formats = [{
'format_id': 'sd',
'url': self._html_search_regex(r'file: "(.+?)",', jsplayer, 'file'),
'quality': 0,

View file

@ -24,4 +24,4 @@ class MovShareIE(NovaMovIE):
'title': 'dissapeared image',
'description': 'optical illusion dissapeared image magic illusion',
}
}
}

View file

@ -44,7 +44,7 @@ class MporaIE(InfoExtractor):
r'_([0-9]+)\.[a-zA-Z0-9]+$', src['src'],
False, default=None)
vcodec = src['type'].partition('/')[2]
formats.append({
'format_id': encoding_id + '-' + vcodec,
'url': src['src'],

View file

@ -60,7 +60,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
url = response.geturl()
# Transform the url to get the best quality:
url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, 1)
return [{'url': url,'ext': 'mp4'}]
return [{'url': url, 'ext': 'mp4'}]
def _extract_video_formats(self, mdoc, mtvn_id):
if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None:
@ -240,15 +240,15 @@ class MTVIE(MTVServicesInfoExtractor):
uri = mobj.groupdict().get('mgid')
if uri is None:
webpage = self._download_webpage(url, video_id)
# Some videos come from Vevo.com
m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";',
webpage, re.DOTALL)
if m_vevo:
vevo_id = m_vevo.group(1);
vevo_id = m_vevo.group(1)
self.to_screen('Vevo video detected: %s' % vevo_id)
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, 'uri')
return self._get_videos_info(uri)

View file

@ -73,4 +73,3 @@ class MuenchenTVIE(InfoExtractor):
'is_live': True,
'thumbnail': thumbnail,
}

View file

@ -72,4 +72,4 @@ class MusicPlayOnIE(InfoExtractor):
'duration': int_or_none(duration),
'view_count': int_or_none(view_count),
'formats': formats,
}
}

View file

@ -37,7 +37,7 @@ class MuzuTVIE(InfoExtractor):
player_info_page = self._download_webpage('http://player.muzu.tv/player/playerInit?ai=%s' % video_id,
video_id, u'Downloading player info')
video_info = json.loads(player_info_page)['videos'][0]
for quality in ['1080' , '720', '480', '360']:
for quality in ['1080', '720', '480', '360']:
if video_info.get('v%s' % quality):
break

View file

@ -33,7 +33,7 @@ class MyVideoIE(InfoExtractor):
# Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git
# Released into the Public Domain by Tristan Fischer on 2013-05-19
# https://github.com/rg3/youtube-dl/pull/842
def __rc4crypt(self,data, key):
def __rc4crypt(self, data, key):
x = 0
box = list(range(256))
for i in list(range(256)):
@ -49,10 +49,10 @@ class MyVideoIE(InfoExtractor):
out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256])
return out
def __md5(self,s):
def __md5(self, s):
return hashlib.md5(s).hexdigest().encode()
def _real_extract(self,url):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
@ -173,4 +173,3 @@ class MyVideoIE(InfoExtractor):
'play_path': video_playpath,
'player_url': video_swfobj,
}

View file

@ -40,7 +40,7 @@ class NaverIE(InfoExtractor):
raise ExtractorError('couldn\'t extract vid and key')
vid = m_id.group(1)
key = m_id.group(2)
query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,})
query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key, })
query_urls = compat_urllib_parse.urlencode({
'masterVid': vid,
'protocol': 'p2p',
@ -65,7 +65,7 @@ class NaverIE(InfoExtractor):
if domain.startswith('rtmp'):
f.update({
'ext': 'flv',
'rtmp_protocol': '1', # rtmpt
'rtmp_protocol': '1', # rtmpt
})
formats.append(f)
self._sort_formats(formats)

View file

@ -39,7 +39,6 @@ class NBAIE(InfoExtractor):
duration = parse_duration(
self._html_search_meta('duration', webpage, 'duration', fatal=False))
return {
'id': shortened_video_id,
'url': video_url,

View file

@ -91,4 +91,4 @@ class NDRIE(InfoExtractor):
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
}
}

View file

@ -23,12 +23,12 @@ class NewgroundsIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
music_id = mobj.group('id')
webpage = self._download_webpage(url, music_id)
title = self._html_search_regex(
r',"name":"([^"]+)",', webpage, 'music title')
uploader = self._html_search_regex(
r',"artist":"([^"]+)",', webpage, 'music uploader')
music_url_json_string = self._html_search_regex(
r'({"url":"[^"]+"),', webpage, 'music url') + '}'
music_url_json = json.loads(music_url_json_string)

View file

@ -89,4 +89,4 @@ class NewstubeIE(InfoExtractor):
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
}
}

View file

@ -93,4 +93,4 @@ class NFBIE(InfoExtractor):
'uploader': uploader,
'uploader_id': uploader_id,
'formats': formats,
}
}

View file

@ -31,7 +31,7 @@ class NHLBaseInfoExtractor(InfoExtractor):
path_url, video_id, 'Downloading final video url')
video_url = path_doc.find('path').text
else:
video_url = initial_video_url
video_url = initial_video_url
join = compat_urlparse.urljoin
return {

View file

@ -163,4 +163,4 @@ class NocoIE(InfoExtractor):
'uploader_id': uploader_id,
'duration': duration,
'formats': formats,
}
}

View file

@ -66,4 +66,4 @@ class NovaMovIE(InfoExtractor):
'url': video_url,
'title': title,
'description': description
}
}

View file

@ -25,4 +25,4 @@ class NowVideoIE(NovaMovIE):
'title': 'youtubedl test video _BaW_jenozKc.mp4',
'description': 'Description',
}
}
}

View file

@ -145,4 +145,4 @@ class NTVIE(InfoExtractor):
'duration': duration,
'view_count': view_count,
'formats': formats,
}
}

View file

@ -71,4 +71,4 @@ class NuvidIE(InfoExtractor):
'upload_date': upload_date,
'age_limit': 18,
'formats': formats,
}
}

View file

@ -74,4 +74,4 @@ class NYTimesIE(InfoExtractor):
'duration': duration,
'formats': formats,
'thumbnails': thumbnails,
}
}

View file

@ -97,4 +97,3 @@ class OoyalaIE(InfoExtractor):
}
else:
return self._extract_result(videos_info[0], videos_more_info)

View file

@ -178,4 +178,4 @@ class ORFFM4IE(InfoExtractor):
'title': data['title'],
'description': data['subtitle'],
'entries': entries
}
}

View file

@ -6,6 +6,7 @@ import re
from .common import InfoExtractor
from ..utils import int_or_none
class PodomaticIE(InfoExtractor):
IE_NAME = 'podomatic'
_VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'

View file

@ -56,7 +56,7 @@ class PornHubIE(InfoExtractor):
comment_count = self._extract_count(
r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment')
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
if webpage.find('"encrypted":true') != -1:
password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))

View file

@ -38,7 +38,7 @@ class PornotubeIE(InfoExtractor):
video_url = self._search_regex(VIDEO_URL_RE, webpage, 'video url')
video_url = compat_urllib_parse.unquote(video_url)
#Get the uploaded date
# Get the uploaded date
VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, 'upload date', fatal=False)
if upload_date:

View file

@ -280,4 +280,4 @@ class ProSiebenSat1IE(InfoExtractor):
'upload_date': upload_date,
'duration': duration,
'formats': formats,
}
}

View file

@ -119,4 +119,4 @@ class RaiIE(SubtitlesInfoExtractor):
if captions.endswith(STL_EXT):
captions = captions[:-len(STL_EXT)] + SRT_EXT
subtitles['it'] = 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions)
return subtitles
return subtitles

View file

@ -41,4 +41,3 @@ class RingTVIE(InfoExtractor):
'thumbnail': thumbnail_url,
'description': description,
}

View file

@ -44,7 +44,7 @@ class RtlXlIE(InfoExtractor):
formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4')
video_urlpart = videopath.split('/flash/')[1][:-4]
video_urlpart = videopath.split('/flash/')[1][:-5]
PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
formats.extend([

View file

@ -122,7 +122,7 @@ class RTLnowIE(InfoExtractor):
playerdata = self._download_xml(playerdata_url, video_id, 'Downloading player data XML')
videoinfo = playerdata.find('./playlist/videoinfo')
formats = []
for filename in videoinfo.findall('filename'):
mobj = re.search(r'(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>.+)', filename.text)
@ -153,4 +153,4 @@ class RTLnowIE(InfoExtractor):
'upload_date': upload_date,
'duration': duration,
'formats': formats,
}
}

View file

@ -54,7 +54,6 @@ def _decrypt_url(png):
return url
class RTVEALaCartaIE(InfoExtractor):
IE_NAME = 'rtve.es:alacarta'
IE_DESC = 'RTVE a la carta'

View file

@ -191,4 +191,4 @@ class RUTVIE(InfoExtractor):
'view_count': view_count,
'duration': duration,
'formats': formats,
}
}

View file

@ -53,4 +53,4 @@ class SciVeeIE(InfoExtractor):
'description': description,
'thumbnail': 'http://www.scivee.tv/assets/videothumb/%s' % video_id,
'formats': formats,
}
}

View file

@ -67,5 +67,3 @@ class ServingSysIE(InfoExtractor):
'title': title,
'entries': entries,
}

View file

@ -54,4 +54,4 @@ class SharedIE(InfoExtractor):
'filesize': filesize,
'title': title,
'thumbnail': thumbnail,
}
}

View file

@ -1,7 +1,6 @@
# encoding: utf-8
from __future__ import unicode_literals
import os.path
import re
import json
import hashlib
@ -12,15 +11,15 @@ from ..utils import (
compat_urllib_parse,
compat_urllib_request,
ExtractorError,
url_basename,
int_or_none,
unified_strdate,
)
class SmotriIE(InfoExtractor):
IE_DESC = 'Smotri.com'
IE_NAME = 'smotri'
_VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
_VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
_NETRC_MACHINE = 'smotri'
_TESTS = [
@ -35,7 +34,6 @@ class SmotriIE(InfoExtractor):
'uploader': 'rbc2008',
'uploader_id': 'rbc08',
'upload_date': '20131118',
'description': 'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения',
'thumbnail': 'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
},
},
@ -50,7 +48,6 @@ class SmotriIE(InfoExtractor):
'uploader': 'Support Photofile@photofile',
'uploader_id': 'support-photofile',
'upload_date': '20070704',
'description': 'test, видео test',
'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
},
},
@ -66,7 +63,6 @@ class SmotriIE(InfoExtractor):
'uploader_id': 'timoxa40',
'upload_date': '20100404',
'thumbnail': 'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
'description': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
},
'params': {
'videopassword': 'qwerty',
@ -85,7 +81,6 @@ class SmotriIE(InfoExtractor):
'upload_date': '20101001',
'thumbnail': 'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
'age_limit': 18,
'description': 'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ',
},
'params': {
'videopassword': '333'
@ -102,17 +97,11 @@ class SmotriIE(InfoExtractor):
'uploader': 'HannahL',
'uploader_id': 'lisaha95',
'upload_date': '20090331',
'description': 'Shakira - Don\'t Bother, видео Shakira - Don\'t Bother',
'thumbnail': 'http://frame8.loadup.ru/44/0b/918809.7.3.jpg',
},
},
]
_SUCCESS = 0
_PASSWORD_NOT_VERIFIED = 1
_PASSWORD_DETECTED = 2
_VIDEO_NOT_FOUND = 3
@classmethod
def _extract_url(cls, webpage):
mobj = re.search(
@ -137,44 +126,44 @@ class SmotriIE(InfoExtractor):
return self._html_search_meta(name, html, display_name)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
real_video_id = mobj.group('realvideoid')
video_id = self._match_id(url)
# Download video JSON data
video_json_url = 'http://smotri.com/vt.php?id=%s' % real_video_id
video_json_page = self._download_webpage(video_json_url, video_id, 'Downloading video JSON')
video_json = json.loads(video_json_page)
video_form = {
'ticket': video_id,
'video_url': '1',
'frame_url': '1',
'devid': 'LoadupFlashPlayer',
'getvideoinfo': '1',
}
status = video_json['status']
if status == self._VIDEO_NOT_FOUND:
request = compat_urllib_request.Request(
'http://smotri.com/video/view/url/bot/', compat_urllib_parse.urlencode(video_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
video = self._download_json(request, video_id, 'Downloading video JSON')
if video.get('_moderate_no') or not video.get('moderated'):
raise ExtractorError('Video %s has not been approved by moderator' % video_id, expected=True)
if video.get('error'):
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
elif status == self._PASSWORD_DETECTED: # The video is protected by a password, retry with
# video-password set
video_password = self._downloader.params.get('videopassword', None)
if not video_password:
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
video_json_url += '&md5pass=%s' % hashlib.md5(video_password.encode('utf-8')).hexdigest()
video_json_page = self._download_webpage(video_json_url, video_id, 'Downloading video JSON (video-password set)')
video_json = json.loads(video_json_page)
status = video_json['status']
if status == self._PASSWORD_NOT_VERIFIED:
raise ExtractorError('Video password is invalid', expected=True)
if status != self._SUCCESS:
raise ExtractorError('Unexpected status value %s' % status)
# Extract the URL of the video
video_url = video_json['file_data']
video_url = video.get('_vidURL') or video.get('_vidURL_mp4')
title = video['title']
thumbnail = video['_imgURL']
upload_date = unified_strdate(video['added'])
uploader = video['userNick']
uploader_id = video['userLogin']
duration = int_or_none(video['duration'])
# Video JSON does not provide enough meta data
# We will extract some from the video web page instead
video_page_url = 'http://smotri.com/video/view/?id=%s' % video_id
video_page = self._download_webpage(video_page_url, video_id, 'Downloading video page')
webpage_url = 'http://smotri.com/video/view/?id=%s' % video_id
webpage = self._download_webpage(webpage_url, video_id, 'Downloading video page')
# Warning if video is unavailable
warning = self._html_search_regex(
r'<div class="videoUnModer">(.*?)</div>', video_page,
r'<div class="videoUnModer">(.*?)</div>', webpage,
'warning message', default=None)
if warning is not None:
self._downloader.report_warning(
@ -182,84 +171,32 @@ class SmotriIE(InfoExtractor):
(video_id, warning))
# Adult content
if re.search('EroConfirmText">', video_page) is not None:
if re.search('EroConfirmText">', webpage) is not None:
self.report_age_confirmation()
confirm_string = self._html_search_regex(
r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id,
video_page, 'confirm string')
confirm_url = video_page_url + '&confirm=%s' % confirm_string
video_page = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)')
webpage, 'confirm string')
confirm_url = webpage_url + '&confirm=%s' % confirm_string
webpage = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)')
adult_content = True
else:
adult_content = False
# Extract the rest of meta data
video_title = self._search_meta('name', video_page, 'title')
if not video_title:
video_title = os.path.splitext(url_basename(video_url))[0]
video_description = self._search_meta('description', video_page)
END_TEXT = ' на сайте Smotri.com'
if video_description and video_description.endswith(END_TEXT):
video_description = video_description[:-len(END_TEXT)]
START_TEXT = 'Смотреть онлайн ролик '
if video_description and video_description.startswith(START_TEXT):
video_description = video_description[len(START_TEXT):]
video_thumbnail = self._search_meta('thumbnail', video_page)
upload_date_str = self._search_meta('uploadDate', video_page, 'upload date')
if upload_date_str:
upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str)
video_upload_date = (
(
upload_date_m.group('year') +
upload_date_m.group('month') +
upload_date_m.group('day')
)
if upload_date_m else None
)
else:
video_upload_date = None
duration_str = self._search_meta('duration', video_page)
if duration_str:
duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str)
video_duration = (
(
(int(duration_m.group('hours')) * 60 * 60) +
(int(duration_m.group('minutes')) * 60) +
int(duration_m.group('seconds'))
)
if duration_m else None
)
else:
video_duration = None
video_uploader = self._html_search_regex(
'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>',
video_page, 'uploader', fatal=False, flags=re.MULTILINE|re.DOTALL)
video_uploader_id = self._html_search_regex(
'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">',
video_page, 'uploader id', fatal=False, flags=re.MULTILINE|re.DOTALL)
video_view_count = self._html_search_regex(
view_count = self._html_search_regex(
'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
video_page, 'view count', fatal=False, flags=re.MULTILINE|re.DOTALL)
webpage, 'view count', fatal=False, flags=re.MULTILINE | re.DOTALL)
return {
'id': video_id,
'url': video_url,
'title': video_title,
'thumbnail': video_thumbnail,
'description': video_description,
'uploader': video_uploader,
'upload_date': video_upload_date,
'uploader_id': video_uploader_id,
'duration': video_duration,
'view_count': int_or_none(video_view_count),
'title': title,
'thumbnail': thumbnail,
'uploader': uploader,
'upload_date': upload_date,
'uploader_id': uploader_id,
'duration': duration,
'view_count': int_or_none(view_count),
'age_limit': 18 if adult_content else 0,
'video_page_url': video_page_url
}
@ -275,7 +212,7 @@ class SmotriCommunityIE(InfoExtractor):
},
'playlist_mincount': 4,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
community_id = mobj.group('communityid')

View file

@ -69,7 +69,7 @@ class SohuIE(InfoExtractor):
(allot, prot, clipsURL[i], su[i]))
part_str = self._download_webpage(
part_url, video_id,
note=u'Downloading part %d of %d' % (i+1, part_count))
note=u'Downloading part %d of %d' % (i + 1, part_count))
part_info = part_str.split('|')
video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])

View file

@ -224,14 +224,14 @@ class SoundcloudIE(InfoExtractor):
# extract uploader (which is in the url)
uploader = mobj.group('uploader')
# extract simple title (uploader + slug of song title)
slug_title = mobj.group('title')
slug_title = mobj.group('title')
token = mobj.group('token')
full_title = resolve_title = '%s/%s' % (uploader, slug_title)
if token:
resolve_title += '/%s' % token
self.report_resolve(full_title)
url = 'http://soundcloud.com/%s' % resolve_title
info_json_url = self._resolv_url(url)
info = self._download_json(info_json_url, full_title, 'Downloading info JSON')

View file

@ -77,4 +77,4 @@ class SpiegeltvIE(InfoExtractor):
'description': description,
'duration': duration,
'thumbnails': thumbnails
}
}

View file

@ -89,4 +89,4 @@ class Sport5IE(InfoExtractor):
'duration': duration,
'categories': categories,
'formats': formats,
}
}

View file

@ -93,4 +93,3 @@ class SportDeutschlandIE(InfoExtractor):
'rtmp_live': asset.get('live'),
'timestamp': parse_iso8601(asset.get('date')),
}

View file

@ -50,7 +50,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
sub_lang_list = {}
for sub_lang in requested_langs:
if not sub_lang in available_subs_list:
if sub_lang not in available_subs_list:
self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
continue
sub_lang_list[sub_lang] = available_subs_list[sub_lang]

View file

@ -80,7 +80,7 @@ class SWRMediathekIE(InfoExtractor):
if media_type == 'Video':
fmt.update({
'format_note': ['144p', '288p', '544p', '720p'][quality-1],
'format_note': ['144p', '288p', '544p', '720p'][quality - 1],
'vcodec': codec,
})
elif media_type == 'Audio':

View file

@ -121,7 +121,7 @@ class TeacherTubeUserIE(InfoExtractor):
urls = []
webpage = self._download_webpage(url, user_id)
urls.extend(re.findall(self._MEDIA_RE, webpage))
pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[:-1]
for p in pages:
more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p)

View file

@ -33,7 +33,7 @@ class TeamcocoIE(InfoExtractor):
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
video_id = mobj.group("video_id")
if not video_id:
video_id = self._html_search_regex(

Some files were not shown because too many files have changed in this diff Show more