[extractor/common] remove duplicated formats and subtiles in smil manifests
This commit is contained in:
parent
1bedf4de06
commit
d413095f7e
1 changed files with 7 additions and 1 deletions
|
@ -1186,6 +1186,7 @@ class InfoExtractor(object):
|
||||||
http_count = 0
|
http_count = 0
|
||||||
m3u8_count = 0
|
m3u8_count = 0
|
||||||
|
|
||||||
|
src_urls = []
|
||||||
videos = smil.findall(self._xpath_ns('.//video', namespace))
|
videos = smil.findall(self._xpath_ns('.//video', namespace))
|
||||||
for video in videos:
|
for video in videos:
|
||||||
src = video.get('src')
|
src = video.get('src')
|
||||||
|
@ -1222,6 +1223,9 @@ class InfoExtractor(object):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
|
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
|
||||||
|
if src_url in src_urls:
|
||||||
|
continue
|
||||||
|
src_urls.append(src_url)
|
||||||
|
|
||||||
if proto == 'm3u8' or src_ext == 'm3u8':
|
if proto == 'm3u8' or src_ext == 'm3u8':
|
||||||
m3u8_formats = self._extract_m3u8_formats(
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
|
@ -1267,11 +1271,13 @@ class InfoExtractor(object):
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
||||||
|
urls = []
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
|
for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
|
||||||
src = textstream.get('src')
|
src = textstream.get('src')
|
||||||
if not src:
|
if not src or src in urls:
|
||||||
continue
|
continue
|
||||||
|
urls.append(src)
|
||||||
ext = textstream.get('ext') or determine_ext(src)
|
ext = textstream.get('ext') or determine_ext(src)
|
||||||
if not ext:
|
if not ext:
|
||||||
type_ = textstream.get('type')
|
type_ = textstream.get('type')
|
||||||
|
|
Loading…
Reference in a new issue