From dfe5fa49aed02cf36ba9f743b11b0903554b5e56 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 7 Jul 2016 17:37:29 +0800 Subject: [PATCH] [compat] Fix compat_shlex_split for non-ASCII input Closes #9871 --- test/test_compat.py | 1 + youtube_dl/compat.py | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/test/test_compat.py b/test/test_compat.py index 1d7ac9f16..b57424948 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -88,6 +88,7 @@ class TestCompat(unittest.TestCase): def test_compat_shlex_split(self): self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two']) self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag']) + self.assertEqual(compat_shlex_split('-val 中文'), ['-val', '中文']) def test_compat_etree_fromstring(self): xml = ''' diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 67db1c7c6..83b96d38f 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals import binascii @@ -2594,15 +2595,16 @@ except ImportError: # Python < 3.3 return "'" + s.replace("'", "'\"'\"'") + "'" -if sys.version_info >= (2, 7, 3): +try: + assert shlex.split('中文') == ['中文'] compat_shlex_split = shlex.split -else: +except (AssertionError, UnicodeWarning, UnicodeEncodeError): # Working around shlex issue with unicode strings on some python 2 # versions (see http://bugs.python.org/issue1548891) def compat_shlex_split(s, comments=False, posix=True): if isinstance(s, compat_str): s = s.encode('utf-8') - return shlex.split(s, comments, posix) + return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix))) def compat_ord(c):