Use a wrapper around xml.etree.ElementTree.fromstring in python 2.x (#7178)

Attributes aren't unicode objects, so they couldn't be directly used in info_dict fields (for example '--write-description' doesn't work with bytes).
2015-10-25 20:04:55 +01:00 · 2015-10-25 20:04:55 +01:00 · 36e6f62cd0
commit 36e6f62cd0
parent 755ff8d22c
11 changed files with 61 additions and 21 deletions
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@ -2,7 +2,6 @@
 from __future__ import unicode_literals

 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@ -14,7 +13,10 @@ from ..utils import (
    remove_end,
    unescapeHTML,
 )
-from ..compat import compat_HTTPError
+from ..compat import (
+    compat_etree_fromstring,
+    compat_HTTPError,
+)


 class BBCCoUkIE(InfoExtractor):
@ -344,7 +346,7 @@ class BBCCoUkIE(InfoExtractor):
                url, programme_id, 'Downloading media selection XML')
        except ExtractorError as ee:
            if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
-                media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().decode('utf-8'))
+                media_selection = compat_etree_fromstring(ee.cause.read().decode('utf-8'))
            else:
                raise
        return self._process_media_selector(media_selection, programme_id)
--- a/youtube_dl/extractor/bilibili.py
+++ b/youtube_dl/extractor/bilibili.py
@ -4,9 +4,11 @@ from __future__ import unicode_literals
 import re
 import itertools
 import json
-import xml.etree.ElementTree as ET

 from .common import InfoExtractor
+from ..compat import (
+    compat_etree_fromstring,
+)
 from ..utils import (
    int_or_none,
    unified_strdate,
@ -88,7 +90,7 @@ class BiliBiliIE(InfoExtractor):
        except ValueError:
            pass

-        lq_doc = ET.fromstring(lq_page)
+        lq_doc = compat_etree_fromstring(lq_page)
        lq_durls = lq_doc.findall('./durl')

        hq_doc = self._download_xml(
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@ -3,10 +3,10 @@ from __future__ import unicode_literals

 import re
 import json
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..compat import (
+    compat_etree_fromstring,
    compat_parse_qs,
    compat_str,
    compat_urllib_parse,
@ -119,7 +119,7 @@ class BrightcoveIE(InfoExtractor):
        object_str = fix_xml_ampersands(object_str)

        try:
-            object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
+            object_doc = compat_etree_fromstring(object_str.encode('utf-8'))
        except compat_xml_parse_error:
            return

--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -10,7 +10,6 @@ import re
 import socket
 import sys
 import time
-import xml.etree.ElementTree

 from ..compat import (
    compat_cookiejar,
@ -23,6 +22,7 @@ from ..compat import (
    compat_urllib_request,
    compat_urlparse,
    compat_str,
+    compat_etree_fromstring,
 )
 from ..utils import (
    NO_DEFAULT,
@ -461,7 +461,7 @@ class InfoExtractor(object):
            return xml_string
        if transform_source:
            xml_string = transform_source(xml_string)
-        return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
+        return compat_etree_fromstring(xml_string.encode('utf-8'))

    def _download_json(self, url_or_request, video_id,
                       note='Downloading JSON metadata',
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@ -5,12 +5,12 @@ import re
 import json
 import base64
 import zlib
-import xml.etree.ElementTree

 from hashlib import sha1
 from math import pow, sqrt, floor
 from .common import InfoExtractor
 from ..compat import (
+    compat_etree_fromstring,
    compat_urllib_parse,
    compat_urllib_parse_unquote,
    compat_urllib_request,
@ -234,7 +234,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
        return output

    def _extract_subtitles(self, subtitle):
-        sub_root = xml.etree.ElementTree.fromstring(subtitle)
+        sub_root = compat_etree_fromstring(subtitle)
        return [{
            'ext': 'srt',
            'data': self._convert_subtitles_to_srt(sub_root),
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@ -1,10 +1,10 @@
 from __future__ import unicode_literals

 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..compat import (
+    compat_etree_fromstring,
    compat_urllib_request,
 )
 from ..utils import (
@ -97,7 +97,7 @@ class VevoIE(InfoExtractor):
        if last_version['version'] == -1:
            raise ExtractorError('Unable to extract last version of the video')

-        renditions = xml.etree.ElementTree.fromstring(last_version['data'])
+        renditions = compat_etree_fromstring(last_version['data'])
        formats = []
        # Already sorted from worst to best quality
        for rend in renditions.findall('rendition'):
@ -114,7 +114,7 @@ class VevoIE(InfoExtractor):

    def _formats_from_smil(self, smil_xml):
        formats = []
-        smil_doc = xml.etree.ElementTree.fromstring(smil_xml.encode('utf-8'))
+        smil_doc = compat_etree_fromstring(smil_xml.encode('utf-8'))
        els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')
        for el in els:
            src = el.attrib['src']