Move playlist tests to extractors.

From now on, test_download will run these tests. That means we benefit not only from the networking setup in there, but also from the other tests (for example test_all_urls to find problems with _VALID_URLs).
This commit is contained in:
Philipp Hagemeister 2014-08-28 00:58:24 +02:00
parent 259454525f
commit 22a6f15061
27 changed files with 345 additions and 449 deletions

View file

@ -7,6 +7,15 @@ from .common import InfoExtractor
class AcademicEarthCourseIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
IE_NAME = 'AcademicEarth:Course'
_TEST = {
'url': 'http://academicearth.org/playlists/laws-of-nature/',
'info_dict': {
'id': 'laws-of-nature',
'title': 'Laws of Nature',
'description': 'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.',
},
'playlist_count': 4,
}
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)

View file

@ -21,7 +21,7 @@ class AolIE(InfoExtractor):
(?:$|\?)
'''
_TEST = {
_TESTS = [{
'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
'md5': '18ef68f48740e86ae94b98da815eec42',
'info_dict': {
@ -30,7 +30,14 @@ class AolIE(InfoExtractor):
'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
},
'add_ie': ['FiveMin'],
}
}, {
'url': 'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316',
'info_dict': {
'id': '152147',
'title': 'Brace Yourself - Today\'s Weirdest News',
},
'playlist_mincount': 10,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View file

@ -59,6 +59,13 @@ class BambuserChannelIE(InfoExtractor):
_VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
# The maximum number we can get with each request
_STEP = 50
_TEST = {
'url': 'http://bambuser.com/channel/pixelversity',
'info_dict': {
'title': 'pixelversity',
},
'playlist_mincount': 60,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@ -73,10 +80,10 @@ class BambuserChannelIE(InfoExtractor):
req = compat_urllib_request.Request(req_url)
# Without setting this header, we wouldn't get any result
req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
info_json = self._download_webpage(req, user,
'Downloading page %d' % i)
results = json.loads(info_json)['result']
if len(results) == 0:
data = self._download_json(
req, user, 'Downloading page %d' % i)
results = data['result']
if not results:
break
last_id = results[-1]['vid']
urls.extend(self.url_result(v['page'], 'Bambuser') for v in results)

View file

@ -96,7 +96,7 @@ class BandcampAlbumIE(InfoExtractor):
IE_NAME = 'Bandcamp:album'
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+))'
_TEST = {
_TESTS = [{
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
'playlist': [
{
@ -118,7 +118,13 @@ class BandcampAlbumIE(InfoExtractor):
'playlistend': 2
},
'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
}
}, {
'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
'info_dict': {
'title': 'Hierophany of the Open Grave',
},
'playlist_mincount': 9,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View file

@ -34,6 +34,13 @@ class CSpanIE(InfoExtractor):
'title': 'International Health Care Models',
'description': 'md5:7a985a2d595dba00af3d9c9f0783c967',
}
}, {
'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall',
'info_dict': {
'id': '342759',
'title': 'General Motors Ignition Switch Recall',
},
'playlist_duration_sum': 14855,
}]
def _real_extract(self, url):

View file

@ -1,3 +1,6 @@
#coding: utf-8
from __future__ import unicode_literals
import re
import json
import itertools
@ -99,8 +102,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
webpage)
if m_vevo is not None:
vevo_id = m_vevo.group('id')
self.to_screen(u'Vevo video detected: %s' % vevo_id)
return self.url_result(u'vevo:%s' % vevo_id, ie='Vevo')
self.to_screen('Vevo video detected: %s' % vevo_id)
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
age_limit = self._rta_search(webpage)
@ -111,7 +114,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
embed_page = self._download_webpage(embed_url, video_id,
u'Downloading embed page')
'Downloading embed page')
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
'video info', flags=re.MULTILINE)
info = json.loads(info)
@ -136,7 +139,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
'height': height,
})
if not formats:
raise ExtractorError(u'Unable to extract video URL')
raise ExtractorError('Unable to extract video URL')
# subtitles
video_subtitles = self.extract_subtitles(video_id, webpage)
@ -145,7 +148,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
return
view_count = self._search_regex(
r'video_views_count[^>]+>\s+([\d\.,]+)', webpage, u'view count', fatal=False)
r'video_views_count[^>]+>\s+([\d\.,]+)', webpage, 'view count', fatal=False)
if view_count is not None:
view_count = str_to_int(view_count)
@ -167,28 +170,35 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
video_id, note=False)
except ExtractorError as err:
self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
return {}
info = json.loads(sub_list)
if (info['total'] > 0):
sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
return sub_lang_list
self._downloader.report_warning(u'video doesn\'t have subtitles')
self._downloader.report_warning('video doesn\'t have subtitles')
return {}
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
IE_NAME = u'dailymotion:playlist'
IE_NAME = 'dailymotion:playlist'
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
_TESTS = [{
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
'info_dict': {
'title': 'SPORT',
},
'playlist_mincount': 20,
}]
def _extract_entries(self, id):
video_ids = []
for pagenum in itertools.count(1):
request = self._build_request(self._PAGE_TEMPLATE % (id, pagenum))
webpage = self._download_webpage(request,
id, u'Downloading page %s' % pagenum)
id, 'Downloading page %s' % pagenum)
video_ids.extend(re.findall(r'data-xid="(.+?)"', webpage))
@ -211,9 +221,17 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
class DailymotionUserIE(DailymotionPlaylistIE):
IE_NAME = u'dailymotion:user'
IE_NAME = 'dailymotion:user'
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)'
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
_TESTS = [{
'url': 'https://www.dailymotion.com/user/nqtv',
'info_dict': {
'id': 'nqtv',
'title': 'Rémi Gaillard',
},
'playlist_mincount': 100,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@ -221,7 +239,7 @@ class DailymotionUserIE(DailymotionPlaylistIE):
webpage = self._download_webpage(url, user)
full_user = unescapeHTML(self._html_search_regex(
r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user),
webpage, u'user', flags=re.DOTALL))
webpage, 'user'))
return {
'_type': 'playlist',

View file

@ -12,10 +12,11 @@ from ..utils import (
class EveryonesMixtapeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?everyonesmixtape\.com/#/mix/(?P<id>[0-9a-zA-Z]+)(?:/(?P<songnr>[0-9]))?$'
_TEST = {
_TESTS = [{
'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi/5',
'file': '5bfseWNmlds.mp4',
"info_dict": {
'id': '5bfseWNmlds',
'ext': 'mp4',
"title": "Passion Pit - \"Sleepyhead\" (Official Music Video)",
"uploader": "FKR.TV",
"uploader_id": "frenchkissrecords",
@ -25,7 +26,14 @@ class EveryonesMixtapeIE(InfoExtractor):
'params': {
'skip_download': True, # This is simply YouTube
}
}
}, {
'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi',
'info_dict': {
'id': 'm7m0jJAbMQi',
'title': 'Driving',
},
'playlist_count': 24
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View file

@ -351,6 +351,20 @@ class GenericIE(InfoExtractor):
'description': 're:'
},
'playlist_mincount': 11,
},
# Multiple brightcove videos
# https://github.com/rg3/youtube-dl/issues/2283
{
'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
'info_dict': {
'id': 'always-never',
'title': 'Always / Never - The New Yorker',
},
'playlist_count': 3,
'params': {
'extract_flat': False,
'skip_download': True,
}
}
]

View file

@ -63,6 +63,14 @@ class ImdbListIE(InfoExtractor):
IE_NAME = 'imdb:list'
IE_DESC = 'Internet Movie Database lists'
_VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
_TEST = {
'url': 'http://www.imdb.com/list/JFs9NWw6XI0',
'info_dict': {
'id': 'JFs9NWw6XI0',
'title': 'March 23, 2012 Releases',
},
'playlist_count': 7,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View file

@ -46,6 +46,30 @@ class InstagramUserIE(InfoExtractor):
_VALID_URL = r'http://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
IE_DESC = 'Instagram user profile'
IE_NAME = 'instagram:user'
_TEST = {
'url': 'http://instagram.com/porsche',
'info_dict': {
'id': 'porsche',
'title': 'porsche',
},
'playlist_mincount': 2,
'playlist': [{
'info_dict': {
'id': '614605558512799803_462752227',
'ext': 'mp4',
'title': '#Porsche Intelligent Performance.',
'thumbnail': 're:^https?://.*\.jpg',
'uploader': 'Porsche',
'uploader_id': 'porsche',
'timestamp': 1387486713,
'upload_date': '20131219',
},
}],
'params': {
'extract_flat': True,
'skip_download': True,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View file

@ -127,6 +127,21 @@ class IviCompilationIE(InfoExtractor):
IE_DESC = 'ivi.ru compilations'
IE_NAME = 'ivi:compilation'
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
_TESTS = [{
'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa',
'info_dict': {
'id': 'dvoe_iz_lartsa',
'title': 'Двое из ларца (2006 - 2008)',
},
'playlist_mincount': 24,
}, {
'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/season1',
'info_dict': {
'id': 'dvoe_iz_lartsa/season1',
'title': 'Двое из ларца (2006 - 2008) 1 сезон',
},
'playlist_mincount': 12,
}]
def _extract_entries(self, html, compilation_id):
return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')

View file

@ -12,18 +12,27 @@ class KhanAcademyIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
IE_NAME = 'KhanAcademy'
_TEST = {
_TESTS = [{
'url': 'http://www.khanacademy.org/video/one-time-pad',
'file': 'one-time-pad.mp4',
'md5': '7021db7f2d47d4fff89b13177cb1e8f4',
'info_dict': {
'id': 'one-time-pad',
'ext': 'mp4',
'title': 'The one-time pad',
'description': 'The perfect cipher',
'duration': 176,
'uploader': 'Brit Cruise',
'upload_date': '20120411',
}
}
}, {
'url': 'https://www.khanacademy.org/math/applied-math/cryptography',
'info_dict': {
'id': 'cryptography',
'title': 'Journey into cryptography',
'description': 'How have humans protected their secret messages through history? What has changed today?',
},
'playlist_mincount': 3,
}]
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)

View file

@ -19,7 +19,7 @@ from ..utils import (
class LivestreamIE(InfoExtractor):
IE_NAME = 'livestream'
_VALID_URL = r'http://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
_TEST = {
_TESTS = [{
'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
'md5': '53274c76ba7754fb0e8d072716f2292b',
'info_dict': {
@ -31,7 +31,13 @@ class LivestreamIE(InfoExtractor):
'view_count': int,
'thumbnail': 're:^http://.*\.jpg$'
}
}
}, {
'url': 'http://new.livestream.com/tedx/cityenglish',
'info_dict': {
'title': 'TEDCity2.0 (English)',
},
'playlist_mincount': 4,
}]
def _parse_smil(self, video_id, smil_url):
formats = []
@ -111,34 +117,37 @@ class LivestreamIE(InfoExtractor):
event_name = mobj.group('event_name')
webpage = self._download_webpage(url, video_id or event_name)
og_video = self._og_search_video_url(webpage, 'player url', fatal=False, default=None)
if og_video is None:
config_json = self._search_regex(
r'window.config = ({.*?});', webpage, 'window config')
info = json.loads(config_json)['event']
def is_relevant(vdata, vid):
result = vdata['type'] == 'video'
if video_id is not None:
result = result and compat_str(vdata['data']['id']) == vid
return result
videos = [self._extract_video_info(video_data['data'])
for video_data in info['feed']['data']
if is_relevant(video_data, video_id)]
if video_id is None:
# This is an event page:
return self.playlist_result(videos, info['id'], info['full_name'])
else:
if videos:
return videos[0]
else:
og_video = self._og_search_video_url(
webpage, 'player url', fatal=False, default=None)
if og_video is not None:
query_str = compat_urllib_parse_urlparse(og_video).query
query = compat_urlparse.parse_qs(query_str)
api_url = query['play_url'][0].replace('.smil', '')
info = json.loads(self._download_webpage(
api_url, video_id, 'Downloading video info'))
return self._extract_video_info(info)
if 'play_url' in query:
api_url = query['play_url'][0].replace('.smil', '')
info = json.loads(self._download_webpage(
api_url, video_id, 'Downloading video info'))
return self._extract_video_info(info)
config_json = self._search_regex(
r'window.config = ({.*?});', webpage, 'window config')
info = json.loads(config_json)['event']
def is_relevant(vdata, vid):
result = vdata['type'] == 'video'
if video_id is not None:
result = result and compat_str(vdata['data']['id']) == vid
return result
videos = [self._extract_video_info(video_data['data'])
for video_data in info['feed']['data']
if is_relevant(video_data, video_id)]
if video_id is None:
# This is an event page:
return self.playlist_result(videos, info['id'], info['full_name'])
else:
if not videos:
raise ExtractorError('Cannot find video %s' % video_id)
return videos[0]
# The original version of Livestream uses a different system
@ -148,7 +157,7 @@ class LivestreamOriginalIE(InfoExtractor):
(?P<user>[^/]+)/(?P<type>video|folder)
(?:\?.*?Id=|/)(?P<id>.*?)(&|$)
'''
_TEST = {
_TESTS = [{
'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
'info_dict': {
'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
@ -159,7 +168,13 @@ class LivestreamOriginalIE(InfoExtractor):
# rtmp
'skip_download': True,
},
}
}, {
'url': 'https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3',
'info_dict': {
'id': 'a07bf706-d0e4-4e75-a747-b021d84f2fd3',
},
'playlist_mincount': 4,
}]
def _extract_video(self, user, video_id):
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
@ -182,15 +197,19 @@ class LivestreamOriginalIE(InfoExtractor):
def _extract_folder(self, url, folder_id):
webpage = self._download_webpage(url, folder_id)
urls = orderedSet(re.findall(r'<a href="(https?://livestre\.am/.*?)"', webpage))
paths = orderedSet(re.findall(
r'''(?x)(?:
<li\s+class="folder">\s*<a\s+href="|
<a\s+href="(?=https?://livestre\.am/)
)([^"]+)"''', webpage))
return {
'_type': 'playlist',
'id': folder_id,
'entries': [{
'_type': 'url',
'url': video_url,
} for video_url in urls],
'url': compat_urlparse.urljoin(url, p),
} for p in paths],
}
def _real_extract(self, url):

View file

@ -46,7 +46,7 @@ class NHLBaseInfoExtractor(InfoExtractor):
class NHLIE(NHLBaseInfoExtractor):
IE_NAME = 'nhl.com'
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?<=[?&])id=(?P<id>\d+)'
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?:[?&])id=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
@ -72,7 +72,7 @@ class NHLIE(NHLBaseInfoExtractor):
class NHLVideocenterIE(NHLBaseInfoExtractor):
IE_NAME = 'nhl.com:videocenter'
IE_DESC = 'NHL videocenter category'
_VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[^&]+))?'
_VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
_TEST = {
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=999',
'info_dict': {

View file

@ -74,6 +74,13 @@ class RutubeChannelIE(InfoExtractor):
IE_NAME = 'rutube:channel'
IE_DESC = 'Rutube channels'
_VALID_URL = r'http://rutube\.ru/tags/video/(?P<id>\d+)'
_TESTS = [{
'url': 'http://rutube.ru/tags/video/1800/',
'info_dict': {
'id': '1800',
},
'playlist_mincount': 68,
}]
_PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json'
@ -101,6 +108,7 @@ class RutubeMovieIE(RutubeChannelIE):
IE_NAME = 'rutube:movie'
IE_DESC = 'Rutube movies'
_VALID_URL = r'http://rutube\.ru/metainfo/tv/(?P<id>\d+)'
_TESTS = []
_MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json'
_PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json'
@ -119,5 +127,12 @@ class RutubePersonIE(RutubeChannelIE):
IE_NAME = 'rutube:person'
IE_DESC = 'Rutube person videos'
_VALID_URL = r'http://rutube\.ru/video/person/(?P<id>\d+)'
_TESTS = [{
'url': 'http://rutube.ru/video/person/313878/',
'info_dict': {
'id': '313878',
},
'playlist_mincount': 37,
}]
_PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'

View file

@ -267,6 +267,14 @@ class SmotriCommunityIE(InfoExtractor):
IE_DESC = 'Smotri.com community videos'
IE_NAME = 'smotri:community'
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
_TEST = {
'url': 'http://smotri.com/community/video/kommuna',
'info_dict': {
'id': 'kommuna',
'title': 'КПРФ',
},
'playlist_mincount': 4,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@ -289,6 +297,14 @@ class SmotriUserIE(InfoExtractor):
IE_DESC = 'Smotri.com user videos'
IE_NAME = 'smotri:user'
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'
_TESTS = [{
'url': 'http://smotri.com/user/inspector',
'info_dict': {
'id': 'inspector',
'title': 'Inspector',
},
'playlist_mincount': 9,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View file

@ -28,7 +28,8 @@ class SoundcloudIE(InfoExtractor):
_VALID_URL = r'''(?x)^(?:https?://)?
(?:(?:(?:www\.|m\.)?soundcloud\.com/
(?P<uploader>[\w\d-]+)/
(?!sets/)(?P<title>[\w\d-]+)/?
(?!sets/|likes/?(?:$|[?#]))
(?P<title>[\w\d-]+)/?
(?P<token>[^?]+?)?(?:[?].*)?$)
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
|(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
@ -221,13 +222,16 @@ class SoundcloudIE(InfoExtractor):
class SoundcloudSetIE(SoundcloudIE):
_VALID_URL = r'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
IE_NAME = 'soundcloud:set'
# it's in tests/test_playlists.py
_TESTS = []
_TESTS = [{
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
'info_dict': {
'title': 'The Royal Concept EP',
},
'playlist_mincount': 6,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError('Invalid URL: %s' % url)
# extract uploader (which is in the url)
uploader = mobj.group(1)
@ -246,20 +250,32 @@ class SoundcloudSetIE(SoundcloudIE):
self._downloader.report_error('unable to download video webpage: %s' % compat_str(err['error_message']))
return
self.report_extraction(full_title)
return {'_type': 'playlist',
'entries': [self._extract_info_dict(track) for track in info['tracks']],
'id': info['id'],
'title': info['title'],
}
return {
'_type': 'playlist',
'entries': [self._extract_info_dict(track) for track in info['tracks']],
'id': info['id'],
'title': info['title'],
}
class SoundcloudUserIE(SoundcloudIE):
_VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
IE_NAME = 'soundcloud:user'
# it's in tests/test_playlists.py
_TESTS = []
_TESTS = [{
'url': 'https://soundcloud.com/the-concept-band',
'info_dict': {
'id': '9615865',
'title': 'The Royal Concept',
},
'playlist_mincount': 12
}, {
'url': 'https://soundcloud.com/the-concept-band/likes',
'info_dict': {
'id': '9615865',
'title': 'The Royal Concept',
},
'playlist_mincount': 1,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@ -301,9 +317,18 @@ class SoundcloudUserIE(SoundcloudIE):
class SoundcloudPlaylistIE(SoundcloudIE):
_VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)'
IE_NAME = 'soundcloud:playlist'
_TESTS = [
# it's in tests/test_playlists.py
_TESTS = []
{
'url': 'http://api.soundcloud.com/playlists/4110309',
'info_dict': {
'id': '4110309',
'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
'description': 're:.*?TILT Brass - Bowery Poetry Club',
},
'playlist_count': 6,
}
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View file

@ -106,6 +106,13 @@ class TeacherTubeUserIE(InfoExtractor):
\s*
<a\s+href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)"
'''
_TEST = {
'url': 'http://www.teachertube.com/user/profile/rbhagwati2',
'info_dict': {
'id': 'rbhagwati2'
},
'playlist_mincount': 179,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View file

@ -58,6 +58,13 @@ class TEDIE(SubtitlesInfoExtractor):
'uploader': 'Gabby Giffords and Mark Kelly',
'description': 'md5:5174aed4d0f16021b704120360f72b92',
},
}, {
'url': 'http://www.ted.com/playlists/who_are_the_hackers',
'info_dict': {
'id': '10',
'title': 'Who are the hackers?',
},
'playlist_mincount': 6,
}]
_NATIVE_FORMATS = {

View file

@ -42,6 +42,13 @@ class ToypicsIE(InfoExtractor):
class ToypicsUserIE(InfoExtractor):
IE_DESC = 'Toypics user profile'
_VALID_URL = r'http://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])'
_TEST = {
'url': 'http://videos.toypics.net/Mikey',
'info_dict': {
'id': 'Mikey',
},
'playlist_mincount': 9917,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View file

@ -68,21 +68,36 @@ class UstreamIE(InfoExtractor):
class UstreamChannelIE(InfoExtractor):
_VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
IE_NAME = 'ustream:channel'
_TEST = {
'url': 'http://www.ustream.tv/channel/channeljapan',
'info_dict': {
'id': '10874166',
},
'playlist_mincount': 54,
}
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
slug = m.group('slug')
webpage = self._download_webpage(url, slug)
display_id = m.group('slug')
webpage = self._download_webpage(url, display_id)
channel_id = get_meta_content('ustream:channel_id', webpage)
BASE = 'http://www.ustream.tv'
next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id
video_ids = []
while next_url:
reply = json.loads(self._download_webpage(compat_urlparse.urljoin(BASE, next_url), channel_id))
reply = self._download_json(
compat_urlparse.urljoin(BASE, next_url), display_id,
note='Downloading video information (next: %d)' % (len(video_ids) + 1))
video_ids.extend(re.findall(r'data-content-id="(\d.*)"', reply['data']))
next_url = reply['nextUrl']
urls = ['http://www.ustream.tv/recorded/' + vid for vid in video_ids]
url_entries = [self.url_result(eurl, 'Ustream') for eurl in urls]
return self.playlist_result(url_entries, channel_id)
entries = [
self.url_result('http://www.ustream.tv/recorded/' + vid, 'Ustream')
for vid in video_ids]
return {
'_type': 'playlist',
'id': channel_id,
'display_id': display_id,
'entries': entries,
}

View file

@ -65,6 +65,13 @@ class VineUserIE(InfoExtractor):
IE_NAME = 'vine:user'
_VALID_URL = r'(?:https?://)?vine\.co/(?P<user>[^/]+)/?(\?.*)?$'
_VINE_BASE_URL = "https://vine.co/"
_TEST = {
'url': 'https://vine.co/Visa',
'info_dict': {
'id': 'Visa',
},
'playlist_mincount': 47,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View file

@ -77,9 +77,17 @@ class XTubeIE(InfoExtractor):
'age_limit': 18,
}
class XTubeUserIE(InfoExtractor):
IE_DESC = 'XTube user profile'
_VALID_URL = r'https?://(?:www\.)?xtube\.com/community/profile\.php\?(.*?)user=(?P<username>[^&#]+)(?:$|[&#])'
_TEST = {
'url': 'http://www.xtube.com/community/profile.php?user=greenshowers',
'info_dict': {
'id': 'greenshowers',
},
'playlist_mincount': 155,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)