changeset 363:187126b63170

tools: remove unused method that was copy/pasted from nltk module
author souliane <souliane@mailoo.org>
date Wed, 19 Feb 2014 16:42:37 +0100
parents 019e1e706e74
children 4cf735b40304
files browser_side/tools.py
diffstat 1 files changed, 0 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/browser_side/tools.py	Wed Feb 19 16:38:13 2014 +0100
+++ b/browser_side/tools.py	Wed Feb 19 16:42:37 2014 +0100
@@ -32,30 +32,6 @@
     return html.replace('<', '&lt;').replace('>', '&gt;')
 
 
-def html_clean(html):
-    """
-    Remove HTML markup from the given string.
-    Copied from nltk.clean_html (http://www.nltk.org/)
-
-    @param html: the HTML string to be cleaned
-    @type html: C{string}
-    @rtype: C{string}
-    """
-
-    # First we remove inline JavaScript/CSS:
-    cleaned = re.sub(r"(?is)<(script|style).*?>.*?(</\1>)", "", html.strip())
-    # Then we remove html comments. This has to be done before removing regular
-    # tags since comments can contain '>' characters.
-    cleaned = re.sub(r"(?s)<!--(.*?)-->[\n]?", "", cleaned)
-    # Next we can remove the remaining tags:
-    cleaned = re.sub(r"(?s)<.*?>", " ", cleaned)
-    # Finally, we deal with whitespace
-    cleaned = re.sub(r"&nbsp;", " ", cleaned)
-    cleaned = re.sub(r"  ", " ", cleaned)
-    cleaned = re.sub(r"  ", " ", cleaned)
-    return cleaned.strip()
-
-
 def html_strip(html):
     """Strip leading/trailing white spaces, HTML line breaks and &nbsp; sequences."""
     cleaned = re.sub(r"^(<br/?>|&nbsp;|\s)+", "", html)