[commonmistakes] Detect BOMs at the beginning of URLs

Reported at https://bugzilla.redhat.com/show_bug.cgi?id=1093517 .
This commit is contained in:
Philipp Hagemeister 2015-02-10 01:39:43 +01:00
parent 834bf069d2
commit c73fae1e2e
2 changed files with 18 additions and 1 deletions

View file

@ -27,3 +27,20 @@ class CommonMistakesIE(InfoExtractor):
if not self._downloader.params.get('verbose'):
msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.'
raise ExtractorError(msg, expected=True)
class UnicodeBOMIE(InfoExtractor):
IE_DESC = False
_VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
_TESTS = [{
'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
'only_matching': True,
}]
def _real_extract(self, url):
real_url = self._match_id(url)
self.report_warning(
'Your URL starts with a Byte Order Mark (BOM). '
'Removing the BOM and looking for "%s" ...' % real_url)
return self.url_result(real_url)