Add infrastructure for paged lists
This commit allows to download pages in playlists as needed instead of all at once. Before this commit, youtube-dl http://www.youtube.com/user/ANNnewsCH/videos --playlist-end 2 --skip-download took quite some time - now it's almost instantaneous. As an example, the youtube:user extractor has been converted. Fixes #2175
This commit is contained in:
parent
c91778f8c0
commit
b7ab059084
4 changed files with 92 additions and 25 deletions
|
@ -6,6 +6,7 @@ import datetime
|
|||
import email.utils
|
||||
import errno
|
||||
import gzip
|
||||
import itertools
|
||||
import io
|
||||
import json
|
||||
import locale
|
||||
|
@ -1161,3 +1162,46 @@ def check_executable(exe, args=[]):
|
|||
except OSError:
|
||||
return False
|
||||
return exe
|
||||
|
||||
|
||||
class PagedList(object):
|
||||
def __init__(self, pagefunc, pagesize):
|
||||
self._pagefunc = pagefunc
|
||||
self._pagesize = pagesize
|
||||
|
||||
def getslice(self, start=0, end=None):
|
||||
res = []
|
||||
for pagenum in itertools.count(start // self._pagesize):
|
||||
firstid = pagenum * self._pagesize
|
||||
nextfirstid = pagenum * self._pagesize + self._pagesize
|
||||
if start >= nextfirstid:
|
||||
continue
|
||||
|
||||
page_results = list(self._pagefunc(pagenum))
|
||||
|
||||
startv = (
|
||||
start % self._pagesize
|
||||
if firstid <= start < nextfirstid
|
||||
else 0)
|
||||
|
||||
endv = (
|
||||
((end - 1) % self._pagesize) + 1
|
||||
if (end is not None and firstid <= end <= nextfirstid)
|
||||
else None)
|
||||
|
||||
if startv != 0 or endv is not None:
|
||||
page_results = page_results[startv:endv]
|
||||
res.extend(page_results)
|
||||
|
||||
# A little optimization - if current page is not "full", ie. does
|
||||
# not contain page_size videos then we can assume that this page
|
||||
# is the last one - there are no more ids on further pages -
|
||||
# i.e. no need to query again.
|
||||
if len(page_results) + startv < self._pagesize:
|
||||
break
|
||||
|
||||
# If we got the whole page, but the next page is not interesting,
|
||||
# break out early as well
|
||||
if end == nextfirstid:
|
||||
break
|
||||
return res
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue