Move playlist tests to extractors.

From now on, test_download will run these tests. That means we benefit not only from the networking setup in there, but also from the other tests (for example test_all_urls to find problems with _VALID_URLs).
2014-08-28 00:58:24 +02:00 · 2014-08-28 00:58:24 +02:00 · 22a6f15061
commit 22a6f15061
parent 259454525f
27 changed files with 345 additions and 449 deletions
--- a/youtube_dl/extractor/academicearth.py
+++ b/youtube_dl/extractor/academicearth.py
@ -7,6 +7,15 @@ from .common import InfoExtractor
 class AcademicEarthCourseIE(InfoExtractor):
    _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
    IE_NAME = 'AcademicEarth:Course'
+    _TEST = {
+        'url': 'http://academicearth.org/playlists/laws-of-nature/',
+        'info_dict': {
+            'id': 'laws-of-nature',
+            'title': 'Laws of Nature',
+            'description': 'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.',
+        },
+        'playlist_count': 4,
+    }

    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
--- a/youtube_dl/extractor/aol.py
+++ b/youtube_dl/extractor/aol.py
@ -21,7 +21,7 @@ class AolIE(InfoExtractor):
        (?:$|\?)
    '''

-    _TEST = {
+    _TESTS = [{
        'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
        'md5': '18ef68f48740e86ae94b98da815eec42',
        'info_dict': {
@ -30,7 +30,14 @@ class AolIE(InfoExtractor):
            'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
        },
        'add_ie': ['FiveMin'],
-    }
+    }, {
+        'url': 'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316',
+        'info_dict': {
+            'id': '152147',
+            'title': 'Brace Yourself - Today\'s Weirdest News',
+        },
+        'playlist_mincount': 10,
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
--- a/youtube_dl/extractor/bambuser.py
+++ b/youtube_dl/extractor/bambuser.py
@ -59,6 +59,13 @@ class BambuserChannelIE(InfoExtractor):
    _VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
    # The maximum number we can get with each request
    _STEP = 50
+    _TEST = {
+        'url': 'http://bambuser.com/channel/pixelversity',
+        'info_dict': {
+            'title': 'pixelversity',
+        },
+        'playlist_mincount': 60,
+    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@ -73,10 +80,10 @@ class BambuserChannelIE(InfoExtractor):
            req = compat_urllib_request.Request(req_url)
            # Without setting this header, we wouldn't get any result
            req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
-            info_json = self._download_webpage(req, user,
-                'Downloading page %d' % i)
-            results = json.loads(info_json)['result']
-            if len(results) == 0:
+            data = self._download_json(
+                req, user, 'Downloading page %d' % i)
+            results = data['result']
+            if not results:
                break
            last_id = results[-1]['vid']
            urls.extend(self.url_result(v['page'], 'Bambuser') for v in results)
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@ -96,7 +96,7 @@ class BandcampAlbumIE(InfoExtractor):
    IE_NAME = 'Bandcamp:album'
    _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+))'

-    _TEST = {
+    _TESTS = [{
        'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
        'playlist': [
            {
@ -118,7 +118,13 @@ class BandcampAlbumIE(InfoExtractor):
            'playlistend': 2
        },
        'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
-    }
+    }, {
+        'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
+        'info_dict': {
+            'title': 'Hierophany of the Open Grave',
+        },
+        'playlist_mincount': 9,
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
--- a/youtube_dl/extractor/cspan.py
+++ b/youtube_dl/extractor/cspan.py
@ -34,6 +34,13 @@ class CSpanIE(InfoExtractor):
            'title': 'International Health Care Models',
            'description': 'md5:7a985a2d595dba00af3d9c9f0783c967',
        }
+    }, {
+        'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall',
+        'info_dict': {
+            'id': '342759',
+            'title': 'General Motors Ignition Switch Recall',
+        },
+        'playlist_duration_sum': 14855,
    }]

    def _real_extract(self, url):
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@ -1,3 +1,6 @@
+#coding: utf-8
+from __future__ import unicode_literals
+
 import re
 import json
 import itertools
@ -99,8 +102,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
            webpage)
        if m_vevo is not None:
            vevo_id = m_vevo.group('id')
-            self.to_screen(u'Vevo video detected: %s' % vevo_id)
-            return self.url_result(u'vevo:%s' % vevo_id, ie='Vevo')
+            self.to_screen('Vevo video detected: %s' % vevo_id)
+            return self.url_result('vevo:%s' % vevo_id, ie='Vevo')

        age_limit = self._rta_search(webpage)

@ -111,7 +114,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):

        embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
        embed_page = self._download_webpage(embed_url, video_id,
-                                            u'Downloading embed page')
+                                            'Downloading embed page')
        info = self._search_regex(r'var info = ({.*?}),$', embed_page,
            'video info', flags=re.MULTILINE)
        info = json.loads(info)
@ -136,7 +139,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
                    'height': height,
                })
        if not formats:
-            raise ExtractorError(u'Unable to extract video URL')
+            raise ExtractorError('Unable to extract video URL')

        # subtitles
        video_subtitles = self.extract_subtitles(video_id, webpage)
@ -145,7 +148,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
            return

        view_count = self._search_regex(
-            r'video_views_count[^>]+>\s+([\d\.,]+)', webpage, u'view count', fatal=False)
+            r'video_views_count[^>]+>\s+([\d\.,]+)', webpage, 'view count', fatal=False)
        if view_count is not None:
            view_count = str_to_int(view_count)

@ -167,28 +170,35 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
                'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
                video_id, note=False)
        except ExtractorError as err:
-            self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
+            self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
            return {}
        info = json.loads(sub_list)
        if (info['total'] > 0):
            sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
            return sub_lang_list
-        self._downloader.report_warning(u'video doesn\'t have subtitles')
+        self._downloader.report_warning('video doesn\'t have subtitles')
        return {}


 class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
-    IE_NAME = u'dailymotion:playlist'
+    IE_NAME = 'dailymotion:playlist'
    _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
    _MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
    _PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
+    _TESTS = [{
+        'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
+        'info_dict': {
+            'title': 'SPORT',
+        },
+        'playlist_mincount': 20,
+    }]

    def _extract_entries(self, id):
        video_ids = []
        for pagenum in itertools.count(1):
            request = self._build_request(self._PAGE_TEMPLATE % (id, pagenum))
            webpage = self._download_webpage(request,
-                                             id, u'Downloading page %s' % pagenum)
+                                             id, 'Downloading page %s' % pagenum)

            video_ids.extend(re.findall(r'data-xid="(.+?)"', webpage))

@ -211,9 +221,17 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):


 class DailymotionUserIE(DailymotionPlaylistIE):
-    IE_NAME = u'dailymotion:user'
+    IE_NAME = 'dailymotion:user'
    _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)'
    _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
+    _TESTS = [{
+        'url': 'https://www.dailymotion.com/user/nqtv',
+        'info_dict': {
+            'id': 'nqtv',
+            'title': 'Rémi Gaillard',
+        },
+        'playlist_mincount': 100,
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@ -221,7 +239,7 @@ class DailymotionUserIE(DailymotionPlaylistIE):
        webpage = self._download_webpage(url, user)
        full_user = unescapeHTML(self._html_search_regex(
            r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user),
-            webpage, u'user', flags=re.DOTALL))
+            webpage, 'user'))

        return {
            '_type': 'playlist',
--- a/youtube_dl/extractor/everyonesmixtape.py
+++ b/youtube_dl/extractor/everyonesmixtape.py
@ -12,10 +12,11 @@ from ..utils import (
 class EveryonesMixtapeIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?everyonesmixtape\.com/#/mix/(?P<id>[0-9a-zA-Z]+)(?:/(?P<songnr>[0-9]))?$'

-    _TEST = {
+    _TESTS = [{
        'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi/5',
-        'file': '5bfseWNmlds.mp4',
        "info_dict": {
+            'id': '5bfseWNmlds',
+            'ext': 'mp4',
            "title": "Passion Pit - \"Sleepyhead\" (Official Music Video)",
            "uploader": "FKR.TV",
            "uploader_id": "frenchkissrecords",
@ -25,7 +26,14 @@ class EveryonesMixtapeIE(InfoExtractor):
        'params': {
            'skip_download': True,  # This is simply YouTube
        }
-    }
+    }, {
+        'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi',
+        'info_dict': {
+            'id': 'm7m0jJAbMQi',
+            'title': 'Driving',
+        },
+        'playlist_count': 24
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -351,6 +351,20 @@ class GenericIE(InfoExtractor):
                'description': 're:'
            },
            'playlist_mincount': 11,
+        },
+        # Multiple brightcove videos
+        # https://github.com/rg3/youtube-dl/issues/2283
+        {
+            'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
+            'info_dict': {
+                'id': 'always-never',
+                'title': 'Always / Never - The New Yorker',
+            },
+            'playlist_count': 3,
+            'params': {
+                'extract_flat': False,
+                'skip_download': True,
+            }
        }
    ]

--- a/youtube_dl/extractor/imdb.py
+++ b/youtube_dl/extractor/imdb.py
@ -63,6 +63,14 @@ class ImdbListIE(InfoExtractor):
    IE_NAME = 'imdb:list'
    IE_DESC = 'Internet Movie Database lists'
    _VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
+    _TEST = {
+        'url': 'http://www.imdb.com/list/JFs9NWw6XI0',
+        'info_dict': {
+            'id': 'JFs9NWw6XI0',
+            'title': 'March 23, 2012 Releases',
+        },
+        'playlist_count': 7,
+    }
    
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@ -46,6 +46,30 @@ class InstagramUserIE(InfoExtractor):
    _VALID_URL = r'http://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
    IE_DESC = 'Instagram user profile'
    IE_NAME = 'instagram:user'
+    _TEST = {
+        'url': 'http://instagram.com/porsche',
+        'info_dict': {
+            'id': 'porsche',
+            'title': 'porsche',
+        },
+        'playlist_mincount': 2,
+        'playlist': [{
+            'info_dict': {
+                'id': '614605558512799803_462752227',
+                'ext': 'mp4',
+                'title': '#Porsche Intelligent Performance.',
+                'thumbnail': 're:^https?://.*\.jpg',
+                'uploader': 'Porsche',
+                'uploader_id': 'porsche',
+                'timestamp': 1387486713,
+                'upload_date': '20131219',
+            },
+        }],
+        'params': {
+            'extract_flat': True,
+            'skip_download': True,
+        }
+    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
--- a/youtube_dl/extractor/ivi.py
+++ b/youtube_dl/extractor/ivi.py
@ -127,6 +127,21 @@ class IviCompilationIE(InfoExtractor):
    IE_DESC = 'ivi.ru compilations'
    IE_NAME = 'ivi:compilation'
    _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
+    _TESTS = [{
+        'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa',
+        'info_dict': {
+            'id': 'dvoe_iz_lartsa',
+            'title': 'Двое из ларца (2006 - 2008)',
+        },
+        'playlist_mincount': 24,
+    }, {
+        'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/season1',
+        'info_dict': {
+            'id': 'dvoe_iz_lartsa/season1',
+            'title': 'Двое из ларца (2006 - 2008) 1 сезон',
+        },
+        'playlist_mincount': 12,
+    }]

    def _extract_entries(self, html, compilation_id):
        return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
--- a/youtube_dl/extractor/khanacademy.py
+++ b/youtube_dl/extractor/khanacademy.py
@ -12,18 +12,27 @@ class KhanAcademyIE(InfoExtractor):
    _VALID_URL = r'^https?://(?:www\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
    IE_NAME = 'KhanAcademy'

-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.khanacademy.org/video/one-time-pad',
-        'file': 'one-time-pad.mp4',
        'md5': '7021db7f2d47d4fff89b13177cb1e8f4',
        'info_dict': {
+            'id': 'one-time-pad',
+            'ext': 'mp4',
            'title': 'The one-time pad',
            'description': 'The perfect cipher',
            'duration': 176,
            'uploader': 'Brit Cruise',
            'upload_date': '20120411',
        }
-    }
+    }, {
+        'url': 'https://www.khanacademy.org/math/applied-math/cryptography',
+        'info_dict': {
+            'id': 'cryptography',
+            'title': 'Journey into cryptography',
+            'description': 'How have humans protected their secret messages through history? What has changed today?',
+        },
+        'playlist_mincount': 3,
+    }]

    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@ -19,7 +19,7 @@ from ..utils import (
 class LivestreamIE(InfoExtractor):
    IE_NAME = 'livestream'
    _VALID_URL = r'http://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
        'md5': '53274c76ba7754fb0e8d072716f2292b',
        'info_dict': {
@ -31,7 +31,13 @@ class LivestreamIE(InfoExtractor):
            'view_count': int,
            'thumbnail': 're:^http://.*\.jpg$'
        }
-    }
+    }, {
+        'url': 'http://new.livestream.com/tedx/cityenglish',
+        'info_dict': {
+            'title': 'TEDCity2.0 (English)',
+        },
+        'playlist_mincount': 4,
+    }]

    def _parse_smil(self, video_id, smil_url):
        formats = []
@ -111,34 +117,37 @@ class LivestreamIE(InfoExtractor):
        event_name = mobj.group('event_name')
        webpage = self._download_webpage(url, video_id or event_name)

-        og_video = self._og_search_video_url(webpage, 'player url', fatal=False, default=None)
-        if og_video is None:
-            config_json = self._search_regex(
-                r'window.config = ({.*?});', webpage, 'window config')
-            info = json.loads(config_json)['event']
-
-            def is_relevant(vdata, vid):
-                result = vdata['type'] == 'video'
-                if video_id is not None:
-                    result = result and compat_str(vdata['data']['id']) == vid
-                return result
-
-            videos = [self._extract_video_info(video_data['data'])
-                      for video_data in info['feed']['data']
-                      if is_relevant(video_data, video_id)]
-            if video_id is None:
-                # This is an event page:
-                return self.playlist_result(videos, info['id'], info['full_name'])
-            else:
-                if videos:
-                    return videos[0]
-        else:
+        og_video = self._og_search_video_url(
+            webpage, 'player url', fatal=False, default=None)
+        if og_video is not None:
            query_str = compat_urllib_parse_urlparse(og_video).query
            query = compat_urlparse.parse_qs(query_str)
-            api_url = query['play_url'][0].replace('.smil', '')
-            info = json.loads(self._download_webpage(
-                api_url, video_id, 'Downloading video info'))
-            return self._extract_video_info(info)
+            if 'play_url' in query:
+                api_url = query['play_url'][0].replace('.smil', '')
+                info = json.loads(self._download_webpage(
+                    api_url, video_id, 'Downloading video info'))
+                return self._extract_video_info(info)
+
+        config_json = self._search_regex(
+            r'window.config = ({.*?});', webpage, 'window config')
+        info = json.loads(config_json)['event']
+
+        def is_relevant(vdata, vid):
+            result = vdata['type'] == 'video'
+            if video_id is not None:
+                result = result and compat_str(vdata['data']['id']) == vid
+            return result
+
+        videos = [self._extract_video_info(video_data['data'])
+                  for video_data in info['feed']['data']
+                  if is_relevant(video_data, video_id)]
+        if video_id is None:
+            # This is an event page:
+            return self.playlist_result(videos, info['id'], info['full_name'])
+        else:
+            if not videos:
+                raise ExtractorError('Cannot find video %s' % video_id)
+            return videos[0]


 # The original version of Livestream uses a different system
@ -148,7 +157,7 @@ class LivestreamOriginalIE(InfoExtractor):
        (?P<user>[^/]+)/(?P<type>video|folder)
        (?:\?.*?Id=|/)(?P<id>.*?)(&|$)
        '''
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
        'info_dict': {
            'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
@ -159,7 +168,13 @@ class LivestreamOriginalIE(InfoExtractor):
            # rtmp
            'skip_download': True,
        },
-    }
+    }, {
+        'url': 'https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3',
+        'info_dict': {
+            'id': 'a07bf706-d0e4-4e75-a747-b021d84f2fd3',
+        },
+        'playlist_mincount': 4,
+    }]

    def _extract_video(self, user, video_id):
        api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
@ -182,15 +197,19 @@ class LivestreamOriginalIE(InfoExtractor):

    def _extract_folder(self, url, folder_id):
        webpage = self._download_webpage(url, folder_id)
-        urls = orderedSet(re.findall(r'<a href="(https?://livestre\.am/.*?)"', webpage))
+        paths = orderedSet(re.findall(
+            r'''(?x)(?:
+                <li\s+class="folder">\s*<a\s+href="|
+                <a\s+href="(?=https?://livestre\.am/)
+            )([^"]+)"''', webpage))

        return {
            '_type': 'playlist',
            'id': folder_id,
            'entries': [{
                '_type': 'url',
-                'url': video_url,
-            } for video_url in urls],
+                'url': compat_urlparse.urljoin(url, p),
+            } for p in paths],
        }

    def _real_extract(self, url):
--- a/youtube_dl/extractor/nhl.py
+++ b/youtube_dl/extractor/nhl.py
@ -46,7 +46,7 @@ class NHLBaseInfoExtractor(InfoExtractor):

 class NHLIE(NHLBaseInfoExtractor):
    IE_NAME = 'nhl.com'
-    _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?<=[?&])id=(?P<id>\d+)'
+    _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?:[?&])id=(?P<id>[0-9]+)'

    _TEST = {
        'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
@ -72,7 +72,7 @@ class NHLIE(NHLBaseInfoExtractor):
 class NHLVideocenterIE(NHLBaseInfoExtractor):
    IE_NAME = 'nhl.com:videocenter'
    IE_DESC = 'NHL videocenter category'
-    _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[^&]+))?'
+    _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
    _TEST = {
        'url': 'http://video.canucks.nhl.com/videocenter/console?catid=999',
        'info_dict': {
--- a/youtube_dl/extractor/rutube.py
+++ b/youtube_dl/extractor/rutube.py
@ -74,6 +74,13 @@ class RutubeChannelIE(InfoExtractor):
    IE_NAME = 'rutube:channel'
    IE_DESC = 'Rutube channels'
    _VALID_URL = r'http://rutube\.ru/tags/video/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://rutube.ru/tags/video/1800/',
+        'info_dict': {
+            'id': '1800',
+        },
+        'playlist_mincount': 68,
+    }]

    _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json'

@ -101,6 +108,7 @@ class RutubeMovieIE(RutubeChannelIE):
    IE_NAME = 'rutube:movie'
    IE_DESC = 'Rutube movies'
    _VALID_URL = r'http://rutube\.ru/metainfo/tv/(?P<id>\d+)'
+    _TESTS = []

    _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json'
    _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json'
@ -119,5 +127,12 @@ class RutubePersonIE(RutubeChannelIE):
    IE_NAME = 'rutube:person'
    IE_DESC = 'Rutube person videos'
    _VALID_URL = r'http://rutube\.ru/video/person/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://rutube.ru/video/person/313878/',
+        'info_dict': {
+            'id': '313878',
+        },
+        'playlist_mincount': 37,
+    }]

    _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
--- a/youtube_dl/extractor/smotri.py
+++ b/youtube_dl/extractor/smotri.py
@ -267,6 +267,14 @@ class SmotriCommunityIE(InfoExtractor):
    IE_DESC = 'Smotri.com community videos'
    IE_NAME = 'smotri:community'
    _VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
+    _TEST = {
+        'url': 'http://smotri.com/community/video/kommuna',
+        'info_dict': {
+            'id': 'kommuna',
+            'title': 'КПРФ',
+        },
+        'playlist_mincount': 4,
+    }
    
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@ -289,6 +297,14 @@ class SmotriUserIE(InfoExtractor):
    IE_DESC = 'Smotri.com user videos'
    IE_NAME = 'smotri:user'
    _VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'
+    _TESTS = [{
+        'url': 'http://smotri.com/user/inspector',
+        'info_dict': {
+            'id': 'inspector',
+            'title': 'Inspector',
+        },
+        'playlist_mincount': 9,
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@ -28,7 +28,8 @@ class SoundcloudIE(InfoExtractor):
    _VALID_URL = r'''(?x)^(?:https?://)?
                    (?:(?:(?:www\.|m\.)?soundcloud\.com/
                            (?P<uploader>[\w\d-]+)/
-                            (?!sets/)(?P<title>[\w\d-]+)/?
+                            (?!sets/|likes/?(?:$|[?#]))
+                            (?P<title>[\w\d-]+)/?
                            (?P<token>[^?]+?)?(?:[?].*)?$)
                       |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
                       |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
@ -221,13 +222,16 @@ class SoundcloudIE(InfoExtractor):
 class SoundcloudSetIE(SoundcloudIE):
    _VALID_URL = r'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
    IE_NAME = 'soundcloud:set'
-    # it's in tests/test_playlists.py
-    _TESTS = []
+    _TESTS = [{
+        'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
+        'info_dict': {
+            'title': 'The Royal Concept EP',
+        },
+        'playlist_mincount': 6,
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError('Invalid URL: %s' % url)

        # extract uploader (which is in the url)
        uploader = mobj.group(1)
@ -246,20 +250,32 @@ class SoundcloudSetIE(SoundcloudIE):
                self._downloader.report_error('unable to download video webpage: %s' % compat_str(err['error_message']))
            return

-        self.report_extraction(full_title)
-        return {'_type': 'playlist',
-                'entries': [self._extract_info_dict(track) for track in info['tracks']],
-                'id': info['id'],
-                'title': info['title'],
-                }
+        return {
+            '_type': 'playlist',
+            'entries': [self._extract_info_dict(track) for track in info['tracks']],
+            'id': info['id'],
+            'title': info['title'],
+        }


 class SoundcloudUserIE(SoundcloudIE):
    _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
    IE_NAME = 'soundcloud:user'
-
-    # it's in tests/test_playlists.py
-    _TESTS = []
+    _TESTS = [{
+        'url': 'https://soundcloud.com/the-concept-band',
+        'info_dict': {
+            'id': '9615865',
+            'title': 'The Royal Concept',
+        },
+        'playlist_mincount': 12
+    }, {
+        'url': 'https://soundcloud.com/the-concept-band/likes',
+        'info_dict': {
+            'id': '9615865',
+            'title': 'The Royal Concept',
+        },
+        'playlist_mincount': 1,
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@ -301,9 +317,18 @@ class SoundcloudUserIE(SoundcloudIE):
 class SoundcloudPlaylistIE(SoundcloudIE):
    _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)'
    IE_NAME = 'soundcloud:playlist'
+    _TESTS = [

-     # it's in tests/test_playlists.py
-    _TESTS = []
+        {
+            'url': 'http://api.soundcloud.com/playlists/4110309',
+            'info_dict': {
+                'id': '4110309',
+                'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
+                'description': 're:.*?TILT Brass - Bowery Poetry Club',
+            },
+            'playlist_count': 6,
+        }
+    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
--- a/youtube_dl/extractor/teachertube.py
+++ b/youtube_dl/extractor/teachertube.py
@ -106,6 +106,13 @@ class TeacherTubeUserIE(InfoExtractor):
        \s*
        <a\s+href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)"
    '''
+    _TEST = {
+        'url': 'http://www.teachertube.com/user/profile/rbhagwati2',
+        'info_dict': {
+            'id': 'rbhagwati2'
+        },
+        'playlist_mincount': 179,
+    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@ -58,6 +58,13 @@ class TEDIE(SubtitlesInfoExtractor):
            'uploader': 'Gabby Giffords and Mark Kelly',
            'description': 'md5:5174aed4d0f16021b704120360f72b92',
        },
+    }, {
+        'url': 'http://www.ted.com/playlists/who_are_the_hackers',
+        'info_dict': {
+            'id': '10',
+            'title': 'Who are the hackers?',
+        },
+        'playlist_mincount': 6,
    }]

    _NATIVE_FORMATS = {
--- a/youtube_dl/extractor/toypics.py
+++ b/youtube_dl/extractor/toypics.py
@ -42,6 +42,13 @@ class ToypicsIE(InfoExtractor):
 class ToypicsUserIE(InfoExtractor):
    IE_DESC = 'Toypics user profile'
    _VALID_URL = r'http://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])'
+    _TEST = {
+        'url': 'http://videos.toypics.net/Mikey',
+        'info_dict': {
+            'id': 'Mikey',
+        },
+        'playlist_mincount': 9917,
+    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
--- a/youtube_dl/extractor/ustream.py
+++ b/youtube_dl/extractor/ustream.py
@ -68,21 +68,36 @@ class UstreamIE(InfoExtractor):
 class UstreamChannelIE(InfoExtractor):
    _VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
    IE_NAME = 'ustream:channel'
+    _TEST = {
+        'url': 'http://www.ustream.tv/channel/channeljapan',
+        'info_dict': {
+            'id': '10874166',
+        },
+        'playlist_mincount': 54,
+    }

    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
-        slug = m.group('slug')
-        webpage = self._download_webpage(url, slug)
+        display_id = m.group('slug')
+        webpage = self._download_webpage(url, display_id)
        channel_id = get_meta_content('ustream:channel_id', webpage)

        BASE = 'http://www.ustream.tv'
        next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id
        video_ids = []
        while next_url:
-            reply = json.loads(self._download_webpage(compat_urlparse.urljoin(BASE, next_url), channel_id))
+            reply = self._download_json(
+                compat_urlparse.urljoin(BASE, next_url), display_id,
+                note='Downloading video information (next: %d)' % (len(video_ids) + 1))
            video_ids.extend(re.findall(r'data-content-id="(\d.*)"', reply['data']))
            next_url = reply['nextUrl']

-        urls = ['http://www.ustream.tv/recorded/' + vid for vid in video_ids]
-        url_entries = [self.url_result(eurl, 'Ustream') for eurl in urls]
-        return self.playlist_result(url_entries, channel_id)
+        entries = [
+            self.url_result('http://www.ustream.tv/recorded/' + vid, 'Ustream')
+            for vid in video_ids]
+        return {
+            '_type': 'playlist',
+            'id': channel_id,
+            'display_id': display_id,
+            'entries': entries,
+        }
--- a/youtube_dl/extractor/vine.py
+++ b/youtube_dl/extractor/vine.py
@ -65,6 +65,13 @@ class VineUserIE(InfoExtractor):
    IE_NAME = 'vine:user'
    _VALID_URL = r'(?:https?://)?vine\.co/(?P<user>[^/]+)/?(\?.*)?$'
    _VINE_BASE_URL = "https://vine.co/"
+    _TEST = {
+        'url': 'https://vine.co/Visa',
+        'info_dict': {
+            'id': 'Visa',
+        },
+        'playlist_mincount': 47,
+    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
--- a/youtube_dl/extractor/xtube.py
+++ b/youtube_dl/extractor/xtube.py
@ -77,9 +77,17 @@ class XTubeIE(InfoExtractor):
            'age_limit': 18,
        }

+
 class XTubeUserIE(InfoExtractor):
    IE_DESC = 'XTube user profile'
    _VALID_URL = r'https?://(?:www\.)?xtube\.com/community/profile\.php\?(.*?)user=(?P<username>[^&#]+)(?:$|[&#])'
+    _TEST = {
+        'url': 'http://www.xtube.com/community/profile.php?user=greenshowers',
+        'info_dict': {
+            'id': 'greenshowers',
+        },
+        'playlist_mincount': 155,
+    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)