Merge pull request #7296 from jaimeMF/xml_attrib_unicode
Use a wrapper around xml.etree.ElementTree.fromstring in python 2.x (…
This commit is contained in:
commit
30eecc6a04
13 changed files with 89 additions and 48 deletions
|
@ -14,8 +14,8 @@ from ..utils import (
|
|||
parse_duration,
|
||||
unified_strdate,
|
||||
xpath_text,
|
||||
parse_xml,
|
||||
)
|
||||
from ..compat import compat_etree_fromstring
|
||||
|
||||
|
||||
class ARDMediathekIE(InfoExtractor):
|
||||
|
@ -161,7 +161,7 @@ class ARDMediathekIE(InfoExtractor):
|
|||
raise ExtractorError('This program is only suitable for those aged 12 and older. Video %s is therefore only available between 20 pm and 6 am.' % video_id, expected=True)
|
||||
|
||||
if re.search(r'[\?&]rss($|[=&])', url):
|
||||
doc = parse_xml(webpage)
|
||||
doc = compat_etree_fromstring(webpage.encode('utf-8'))
|
||||
if doc.tag == 'rss':
|
||||
return GenericIE()._extract_rss(url, video_id, doc)
|
||||
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
|
@ -14,7 +13,10 @@ from ..utils import (
|
|||
remove_end,
|
||||
unescapeHTML,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_HTTPError,
|
||||
)
|
||||
|
||||
|
||||
class BBCCoUkIE(InfoExtractor):
|
||||
|
@ -344,7 +346,7 @@ class BBCCoUkIE(InfoExtractor):
|
|||
url, programme_id, 'Downloading media selection XML')
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
||||
media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().decode('utf-8'))
|
||||
media_selection = compat_etree_fromstring(ee.cause.read().decode('utf-8'))
|
||||
else:
|
||||
raise
|
||||
return self._process_media_selector(media_selection, programme_id)
|
||||
|
|
|
@ -4,9 +4,11 @@ from __future__ import unicode_literals
|
|||
import re
|
||||
import itertools
|
||||
import json
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
|
@ -88,7 +90,7 @@ class BiliBiliIE(InfoExtractor):
|
|||
except ValueError:
|
||||
pass
|
||||
|
||||
lq_doc = ET.fromstring(lq_page)
|
||||
lq_doc = compat_etree_fromstring(lq_page)
|
||||
lq_durls = lq_doc.findall('./durl')
|
||||
|
||||
hq_doc = self._download_xml(
|
||||
|
|
|
@ -3,10 +3,10 @@ from __future__ import unicode_literals
|
|||
|
||||
import re
|
||||
import json
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
|
@ -119,7 +119,7 @@ class BrightcoveIE(InfoExtractor):
|
|||
object_str = fix_xml_ampersands(object_str)
|
||||
|
||||
try:
|
||||
object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
|
||||
object_doc = compat_etree_fromstring(object_str.encode('utf-8'))
|
||||
except compat_xml_parse_error:
|
||||
return
|
||||
|
||||
|
|
|
@ -10,7 +10,6 @@ import re
|
|||
import socket
|
||||
import sys
|
||||
import time
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from ..compat import (
|
||||
compat_cookiejar,
|
||||
|
@ -23,6 +22,7 @@ from ..compat import (
|
|||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
compat_str,
|
||||
compat_etree_fromstring,
|
||||
)
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
|
@ -461,7 +461,7 @@ class InfoExtractor(object):
|
|||
return xml_string
|
||||
if transform_source:
|
||||
xml_string = transform_source(xml_string)
|
||||
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
|
||||
return compat_etree_fromstring(xml_string.encode('utf-8'))
|
||||
|
||||
def _download_json(self, url_or_request, video_id,
|
||||
note='Downloading JSON metadata',
|
||||
|
|
|
@ -5,12 +5,12 @@ import re
|
|||
import json
|
||||
import base64
|
||||
import zlib
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from hashlib import sha1
|
||||
from math import pow, sqrt, floor
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_request,
|
||||
|
@ -234,7 +234,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||
return output
|
||||
|
||||
def _extract_subtitles(self, subtitle):
|
||||
sub_root = xml.etree.ElementTree.fromstring(subtitle)
|
||||
sub_root = compat_etree_fromstring(subtitle)
|
||||
return [{
|
||||
'ext': 'srt',
|
||||
'data': self._convert_subtitles_to_srt(sub_root),
|
||||
|
|
|
@ -9,6 +9,7 @@ import sys
|
|||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
|
@ -21,7 +22,6 @@ from ..utils import (
|
|||
HEADRequest,
|
||||
is_html,
|
||||
orderedSet,
|
||||
parse_xml,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
|
@ -1238,7 +1238,7 @@ class GenericIE(InfoExtractor):
|
|||
|
||||
# Is it an RSS feed, a SMIL file or a XSPF playlist?
|
||||
try:
|
||||
doc = parse_xml(webpage)
|
||||
doc = compat_etree_fromstring(webpage.encode('utf-8'))
|
||||
if doc.tag == 'rss':
|
||||
return self._extract_rss(url, video_id, doc)
|
||||
elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
|
@ -97,7 +97,7 @@ class VevoIE(InfoExtractor):
|
|||
if last_version['version'] == -1:
|
||||
raise ExtractorError('Unable to extract last version of the video')
|
||||
|
||||
renditions = xml.etree.ElementTree.fromstring(last_version['data'])
|
||||
renditions = compat_etree_fromstring(last_version['data'])
|
||||
formats = []
|
||||
# Already sorted from worst to best quality
|
||||
for rend in renditions.findall('rendition'):
|
||||
|
@ -114,7 +114,7 @@ class VevoIE(InfoExtractor):
|
|||
|
||||
def _formats_from_smil(self, smil_xml):
|
||||
formats = []
|
||||
smil_doc = xml.etree.ElementTree.fromstring(smil_xml.encode('utf-8'))
|
||||
smil_doc = compat_etree_fromstring(smil_xml.encode('utf-8'))
|
||||
els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')
|
||||
for el in els:
|
||||
src = el.attrib['src']
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue