[netzkino] Add new extractor (Fixes #4669)

This commit is contained in:
Philipp Hagemeister 2015-01-09 23:59:18 +01:00
parent b8da6b9fc6
commit dd622d7c4e
5 changed files with 116 additions and 0 deletions

View file

@ -205,6 +205,10 @@ def get_element_by_attribute(attribute, value, html):
def clean_html(html):
"""Clean an HTML snippet into a readable string"""
if html is None: # Convenience for sanitizing descriptions etc.
return html
# Newline vs <br />
html = html.replace('\n', ' ')
html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)