# HG changeset patch # User souliane # Date 1392824557 -3600 # Node ID 187126b63170829eea601cea11f76be14251e7b4 # Parent 019e1e706e74e1d6e6aa9f80a59a42714ba45e0e tools: remove unused method that was copy/pasted from nltk module diff -r 019e1e706e74 -r 187126b63170 browser_side/tools.py --- a/browser_side/tools.py Wed Feb 19 16:38:13 2014 +0100 +++ b/browser_side/tools.py Wed Feb 19 16:42:37 2014 +0100 @@ -32,30 +32,6 @@ return html.replace('<', '<').replace('>', '>') -def html_clean(html): - """ - Remove HTML markup from the given string. - Copied from nltk.clean_html (http://www.nltk.org/) - - @param html: the HTML string to be cleaned - @type html: C{string} - @rtype: C{string} - """ - - # First we remove inline JavaScript/CSS: - cleaned = re.sub(r"(?is)<(script|style).*?>.*?()", "", html.strip()) - # Then we remove html comments. This has to be done before removing regular - # tags since comments can contain '>' characters. - cleaned = re.sub(r"(?s)[\n]?", "", cleaned) - # Next we can remove the remaining tags: - cleaned = re.sub(r"(?s)<.*?>", " ", cleaned) - # Finally, we deal with whitespace - cleaned = re.sub(r" ", " ", cleaned) - cleaned = re.sub(r" ", " ", cleaned) - cleaned = re.sub(r" ", " ", cleaned) - return cleaned.strip() - - def html_strip(html): """Strip leading/trailing white spaces, HTML line breaks and   sequences.""" cleaned = re.sub(r"^(
| |\s)+", "", html)