From d8d24a922adb0646f9f78b2fc4a287a72fa7ff73 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sat, 27 Jun 2015 00:36:23 +0600
Subject: [PATCH 1/6] [youtube] Extract formats from multiple DASH manifests
 (Closes #6093)

DASH manifest pointed by dashmpd from the video webpage and one pointed by get_video_info may
be different (namely different itag set) - some itags are missing from DASH manifest pointed by
webpage's dashmpd, some - from DASH manifest pointed by get_video_info's dashmpd).
The general idea is to take a union of itags of both DASH manifests (for example video with such
'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093).
---
 youtube_dl/extractor/youtube.py | 77 ++++++++++++++++++++-------------
 1 file changed, 46 insertions(+), 31 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index a3da56c14..fa1a2e544 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -853,6 +853,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         else:
             player_url = None
 
+        dash_mpds = []
+
+        def add_dash_mpd(video_info):
+            dash_mpd = video_info.get('dashmpd')
+            if dash_mpd and dash_mpd[0] not in dash_mpds:
+                dash_mpds.append(dash_mpd[0])
+
         # Get video info
         embed_webpage = None
         if re.search(r'player-age-gate-content">', video_webpage) is not None:
@@ -873,34 +880,40 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 note='Refetching age-gated info webpage',
                 errnote='unable to download video info webpage')
             video_info = compat_parse_qs(video_info_webpage)
+            add_dash_mpd(video_info)
         else:
             age_gate = False
-            try:
-                # Try looking directly into the video webpage
-                mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
-                if not mobj:
-                    raise ValueError('Could not find ytplayer.config')  # caught below
+            # Try looking directly into the video webpage
+            mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
+            if mobj:
                 json_code = uppercase_escape(mobj.group(1))
                 ytplayer_config = json.loads(json_code)
                 args = ytplayer_config['args']
-                # Convert to the same format returned by compat_parse_qs
-                video_info = dict((k, [v]) for k, v in args.items())
-                if not args.get('url_encoded_fmt_stream_map'):
-                    raise ValueError('No stream_map present')  # caught below
-            except ValueError:
-                # We fallback to the get_video_info pages (used by the embed page)
-                self.report_video_info_webpage_download(video_id)
-                for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
-                    video_info_url = (
-                        '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
-                        % (proto, video_id, el_type))
-                    video_info_webpage = self._download_webpage(
-                        video_info_url,
-                        video_id, note=False,
-                        errnote='unable to download video info webpage')
-                    video_info = compat_parse_qs(video_info_webpage)
-                    if 'token' in video_info:
-                        break
+                if args.get('url_encoded_fmt_stream_map'):
+                    # Convert to the same format returned by compat_parse_qs
+                    video_info = dict((k, [v]) for k, v in args.items())
+                    add_dash_mpd(video_info)
+            # We also try looking in get_video_info since it may contain different dashmpd
+            # URL that points to a DASH manifest with possibly different itag set (some itags
+            # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
+            # manifest pointed by get_video_info's dashmpd).
+            # The general idea is to take a union of itags of both DASH manifests (for example
+            # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
+            self.report_video_info_webpage_download(video_id)
+            for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
+                video_info_url = (
+                    '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
+                    % (proto, video_id, el_type))
+                video_info_webpage = self._download_webpage(
+                    video_info_url,
+                    video_id, note=False,
+                    errnote='unable to download video info webpage')
+                get_video_info = compat_parse_qs(video_info_webpage)
+                add_dash_mpd(get_video_info)
+                if not video_info:
+                    video_info = get_video_info
+                if 'token' in get_video_info:
+                    break
         if 'token' not in video_info:
             if 'reason' in video_info:
                 raise ExtractorError(
@@ -1118,24 +1131,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
         # Look for the DASH manifest
         if self._downloader.params.get('youtube_include_dash_manifest', True):
-            dash_mpd = video_info.get('dashmpd')
-            if dash_mpd:
-                dash_manifest_url = dash_mpd[0]
+            for dash_manifest_url in dash_mpds:
+                dash_formats = {}
                 try:
-                    dash_formats = self._parse_dash_manifest(
-                        video_id, dash_manifest_url, player_url, age_gate)
+                    for df in self._parse_dash_manifest(
+                            video_id, dash_manifest_url, player_url, age_gate):
+                        # Do not overwrite DASH format found in some previous DASH manifest
+                        if df['format_id'] not in dash_formats:
+                            dash_formats[df['format_id']] = df
                 except (ExtractorError, KeyError) as e:
                     self.report_warning(
                         'Skipping DASH manifest: %r' % e, video_id)
-                else:
+                if dash_formats:
                     # Remove the formats we found through non-DASH, they
                     # contain less info and it can be wrong, because we use
                     # fixed values (for example the resolution). See
                     # https://github.com/rg3/youtube-dl/issues/5774 for an
                     # example.
-                    dash_keys = set(df['format_id'] for df in dash_formats)
+                    dash_keys = set(df['format_id'] for df in dash_formats.values())
                     formats = [f for f in formats if f['format_id'] not in dash_keys]
-                    formats.extend(dash_formats)
+                    formats.extend(dash_formats.values())
 
         # Check for malformed aspect ratio
         stretched_m = re.search(

From d80265ccd6ff08ef273d83aff847a457f051071f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sat, 27 Jun 2015 02:48:50 +0600
Subject: [PATCH 2/6] [youtube] Simplify non-DASH formats exclusion

---
 youtube_dl/extractor/youtube.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index fa1a2e544..46841617a 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1148,8 +1148,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     # fixed values (for example the resolution). See
                     # https://github.com/rg3/youtube-dl/issues/5774 for an
                     # example.
-                    dash_keys = set(df['format_id'] for df in dash_formats.values())
-                    formats = [f for f in formats if f['format_id'] not in dash_keys]
+                    formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
                     formats.extend(dash_formats.values())
 
         # Check for malformed aspect ratio

From bc93bdb5bbf48b121977a5088ab9607f0fbeb83e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sat, 27 Jun 2015 13:19:46 +0600
Subject: [PATCH 3/6] [youtube] Fix reference before assignment for video_info

---
 youtube_dl/extractor/youtube.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 46841617a..45e5cb80e 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -883,6 +883,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             add_dash_mpd(video_info)
         else:
             age_gate = False
+            video_info = None
             # Try looking directly into the video webpage
             mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
             if mobj:

From 0a3cf9ad3dc1a038018ff347473c0e2d8fc10a69 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sat, 27 Jun 2015 14:31:18 +0600
Subject: [PATCH 4/6] [youtube] Skip get_video_info requests when
 --youtube-skip-dash-manifest is specified

---
 youtube_dl/extractor/youtube.py | 43 +++++++++++++++++----------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 45e5cb80e..2c9ad5e92 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -894,27 +894,28 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     # Convert to the same format returned by compat_parse_qs
                     video_info = dict((k, [v]) for k, v in args.items())
                     add_dash_mpd(video_info)
-            # We also try looking in get_video_info since it may contain different dashmpd
-            # URL that points to a DASH manifest with possibly different itag set (some itags
-            # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
-            # manifest pointed by get_video_info's dashmpd).
-            # The general idea is to take a union of itags of both DASH manifests (for example
-            # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
-            self.report_video_info_webpage_download(video_id)
-            for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
-                video_info_url = (
-                    '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
-                    % (proto, video_id, el_type))
-                video_info_webpage = self._download_webpage(
-                    video_info_url,
-                    video_id, note=False,
-                    errnote='unable to download video info webpage')
-                get_video_info = compat_parse_qs(video_info_webpage)
-                add_dash_mpd(get_video_info)
-                if not video_info:
-                    video_info = get_video_info
-                if 'token' in get_video_info:
-                    break
+            if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
+                # We also try looking in get_video_info since it may contain different dashmpd
+                # URL that points to a DASH manifest with possibly different itag set (some itags
+                # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
+                # manifest pointed by get_video_info's dashmpd).
+                # The general idea is to take a union of itags of both DASH manifests (for example
+                # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
+                self.report_video_info_webpage_download(video_id)
+                for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
+                    video_info_url = (
+                        '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
+                        % (proto, video_id, el_type))
+                    video_info_webpage = self._download_webpage(
+                        video_info_url,
+                        video_id, note=False,
+                        errnote='unable to download video info webpage')
+                    get_video_info = compat_parse_qs(video_info_webpage)
+                    add_dash_mpd(get_video_info)
+                    if not video_info:
+                        video_info = get_video_info
+                    if 'token' in get_video_info:
+                        break
         if 'token' not in video_info:
             if 'reason' in video_info:
                 raise ExtractorError(

From b2575b38e76e14b7552a43cd707a03fe78748f40 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sat, 27 Jun 2015 14:38:41 +0600
Subject: [PATCH 5/6] [options] Clarify --youtube-skip-dash-manifest

---
 youtube_dl/options.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index 6aeca61ee..e3dfb7af9 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -346,7 +346,7 @@ def parseOpts(overrideArguments=None):
     video_format.add_option(
         '--youtube-skip-dash-manifest',
         action='store_false', dest='youtube_include_dash_manifest',
-        help='Do not download the DASH manifest on YouTube videos')
+        help='Do not download the DASH manifests and related data on YouTube videos')
     video_format.add_option(
         '--merge-output-format',
         action='store', dest='merge_output_format', metavar='FORMAT', default=None,

From da77d856a1310b52975abbad82121a1b4c6597a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sat, 27 Jun 2015 14:55:46 +0600
Subject: [PATCH 6/6] [youtube] Add test for #6093

---
 youtube_dl/extractor/youtube.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 2c9ad5e92..20e1781f8 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -518,6 +518,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'skip_download': 'requires avconv',
             }
         },
+        # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
+        {
+            'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
+            'info_dict': {
+                'id': 'FIl7x6_3R5Y',
+                'ext': 'mp4',
+                'title': 'md5:7b81415841e02ecd4313668cde88737a',
+                'description': 'md5:116377fd2963b81ec4ce64b542173306',
+                'upload_date': '20150625',
+                'uploader_id': 'dorappi2000',
+                'uploader': 'dorappi2000',
+                'formats': 'mincount:33',
+            },
+        }
     ]
 
     def __init__(self, *args, **kwargs):