Mercurial > libervia-web
changeset 363:187126b63170
tools: remove unused method that was copy/pasted from nltk module
author | souliane <souliane@mailoo.org> |
---|---|
date | Wed, 19 Feb 2014 16:42:37 +0100 |
parents | 019e1e706e74 |
children | 4cf735b40304 |
files | browser_side/tools.py |
diffstat | 1 files changed, 0 insertions(+), 24 deletions(-) [+] |
line wrap: on
line diff
--- a/browser_side/tools.py Wed Feb 19 16:38:13 2014 +0100 +++ b/browser_side/tools.py Wed Feb 19 16:42:37 2014 +0100 @@ -32,30 +32,6 @@ return html.replace('<', '<').replace('>', '>') -def html_clean(html): - """ - Remove HTML markup from the given string. - Copied from nltk.clean_html (http://www.nltk.org/) - - @param html: the HTML string to be cleaned - @type html: C{string} - @rtype: C{string} - """ - - # First we remove inline JavaScript/CSS: - cleaned = re.sub(r"(?is)<(script|style).*?>.*?(</\1>)", "", html.strip()) - # Then we remove html comments. This has to be done before removing regular - # tags since comments can contain '>' characters. - cleaned = re.sub(r"(?s)<!--(.*?)-->[\n]?", "", cleaned) - # Next we can remove the remaining tags: - cleaned = re.sub(r"(?s)<.*?>", " ", cleaned) - # Finally, we deal with whitespace - cleaned = re.sub(r" ", " ", cleaned) - cleaned = re.sub(r" ", " ", cleaned) - cleaned = re.sub(r" ", " ", cleaned) - return cleaned.strip() - - def html_strip(html): """Strip leading/trailing white spaces, HTML line breaks and sequences.""" cleaned = re.sub(r"^(<br/?>| |\s)+", "", html)