[udn] Add new extractor
This commit is contained in:
parent
de5c545648
commit
418c5cc3fc
5 changed files with 103 additions and 0 deletions
|
@ -557,6 +557,7 @@ from .udemy import (
|
|||
UdemyIE,
|
||||
UdemyCourseIE
|
||||
)
|
||||
from .udn import UDNEmbedIE
|
||||
from .ultimedia import UltimediaIE
|
||||
from .unistra import UnistraIE
|
||||
from .urort import UrortIE
|
||||
|
|
|
@ -26,6 +26,7 @@ from ..utils import (
|
|||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
url_basename,
|
||||
url_infer_protocol,
|
||||
xpath_text,
|
||||
)
|
||||
from .brightcove import BrightcoveIE
|
||||
|
@ -34,6 +35,7 @@ from .ooyala import OoyalaIE
|
|||
from .rutv import RUTVIE
|
||||
from .smotri import SmotriIE
|
||||
from .condenast import CondeNastIE
|
||||
from .udn import UDNEmbedIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
|
@ -650,6 +652,17 @@ class GenericIE(InfoExtractor):
|
|||
'title': "PFT Live: New leader in the 'new-look' defense",
|
||||
'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
|
||||
},
|
||||
},
|
||||
# UDN embed
|
||||
{
|
||||
'url': 'http://www.udn.com/news/story/7314/822787',
|
||||
'md5': 'de06b4c90b042c128395a88f0384817e',
|
||||
'info_dict': {
|
||||
'id': '300040',
|
||||
'ext': 'mp4',
|
||||
'title': '生物老師男變女 全校挺"做自己"',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
|
@ -1268,6 +1281,13 @@ class GenericIE(InfoExtractor):
|
|||
if nbc_sports_url:
|
||||
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
|
||||
|
||||
# Look for UDN embeds
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(
|
||||
url_infer_protocol(url, mobj.group('url')), 'UDNEmbed')
|
||||
|
||||
def check_video(vurl):
|
||||
if YoutubeIE.suitable(vurl):
|
||||
return True
|
||||
|
|
66
youtube_dl/extractor/udn.py
Normal file
66
youtube_dl/extractor/udn.py
Normal file
|
@ -0,0 +1,66 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
url_infer_protocol,
|
||||
js_to_json
|
||||
)
|
||||
|
||||
|
||||
class UDNEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?:)?//video\.udn\.com/embed/news/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://video.udn.com/embed/news/300040',
|
||||
'md5': 'de06b4c90b042c128395a88f0384817e',
|
||||
'info_dict': {
|
||||
'id': '300040',
|
||||
'ext': 'mp4',
|
||||
'title': '生物老師男變女 全校挺"做自己"',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'url': '//video.udn.com/embed/news/300040',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
page = self._download_webpage(url, video_id)
|
||||
|
||||
options = json.loads(js_to_json(self._html_search_regex(
|
||||
r'var options\s*=\s*([^;]+);', page, 'video urls dictionary')))
|
||||
|
||||
video_urls = options['video']
|
||||
|
||||
if video_urls.get('youtube'):
|
||||
return self.url_result(video_urls.get('youtube'), 'Youtube')
|
||||
|
||||
try:
|
||||
del video_urls['youtube']
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
formats = [{
|
||||
'url': self._download_webpage(
|
||||
url_infer_protocol(url, api_url), video_id,
|
||||
'retrieve url for %s video' % video_type),
|
||||
'format_id': video_type,
|
||||
'preference': 0 if video_type == 'mp4' else -1,
|
||||
} for video_type, api_url in video_urls.items()]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = None
|
||||
|
||||
if options.get('gallery') and len(options['gallery']):
|
||||
thumbnail = options['gallery'][0].get('original')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': options['title'],
|
||||
'thumbnail': thumbnail
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue