libervia-web: browser_side/tools.py comparison

browser_side: status panel is based on a new class LightTextEditor which uses HTML5 "editablecontent" property

author	souliane <souliane@mailoo.org>
date	Fri, 07 Feb 2014 20:14:11 +0100
parents	ce5b33f499c5
children	f488692c4903

comparison

equal deleted inserted replaced

-:82f9e92379b0
+:f1ba38043d78
 from pyjamas.ui.DragWidget import DragWidget
 from pyjamas.ui.FileUpload import FileUpload
 from pyjamas import Window
 from nativedom import NativeDOM
 from sat_frontends.tools import xmltools
+import re
 dom = NativeDOM()
 def html_sanitize(html):
 """Naive sanitization of HTML"""
 return html.replace('<', '&lt;').replace('>', '&gt;')
+def html_clean(html):
+"""
+Remove HTML markup from the given string.
+Copied from nltk.clean_html (http://www.nltk.org/)
+@param html: the HTML string to be cleaned
+@type html: C{string}
+@rtype: C{string}
+"""
+# First we remove inline JavaScript/CSS:
+cleaned = re.sub(r"(?is)<(script|style).*?>.*?(</\1>)", "", html.strip())
+# Then we remove html comments. This has to be done before removing regular
+# tags since comments can contain '>' characters.
+cleaned = re.sub(r"(?s)<!--(.*?)-->[\n]?", "", cleaned)
+# Next we can remove the remaining tags:
+cleaned = re.sub(r"(?s)<.*?>", " ", cleaned)
+# Finally, we deal with whitespace
+cleaned = re.sub(r"&nbsp;", " ", cleaned)
+cleaned = re.sub(r"  ", " ", cleaned)
+cleaned = re.sub(r"  ", " ", cleaned)
+return cleaned.strip()
 def inlineRoot(xhtml):
 """ make root element inline """
 doc = dom.parseString(xhtml)

Mercurial > libervia-web