Mercurial > libervia-web
diff browser_side/tools.py @ 347:f1ba38043d78
browser_side: status panel is based on a new class LightTextEditor which uses HTML5 "editablecontent" property
author | souliane <souliane@mailoo.org> |
---|---|
date | Fri, 07 Feb 2014 20:14:11 +0100 |
parents | ce5b33f499c5 |
children | f488692c4903 |
line wrap: on
line diff
--- a/browser_side/tools.py Fri Feb 07 20:08:28 2014 +0100 +++ b/browser_side/tools.py Fri Feb 07 20:14:11 2014 +0100 @@ -22,6 +22,7 @@ from pyjamas import Window from nativedom import NativeDOM from sat_frontends.tools import xmltools +import re dom = NativeDOM() @@ -31,6 +32,30 @@ return html.replace('<', '<').replace('>', '>') +def html_clean(html): + """ + Remove HTML markup from the given string. + Copied from nltk.clean_html (http://www.nltk.org/) + + @param html: the HTML string to be cleaned + @type html: C{string} + @rtype: C{string} + """ + + # First we remove inline JavaScript/CSS: + cleaned = re.sub(r"(?is)<(script|style).*?>.*?(</\1>)", "", html.strip()) + # Then we remove html comments. This has to be done before removing regular + # tags since comments can contain '>' characters. + cleaned = re.sub(r"(?s)<!--(.*?)-->[\n]?", "", cleaned) + # Next we can remove the remaining tags: + cleaned = re.sub(r"(?s)<.*?>", " ", cleaned) + # Finally, we deal with whitespace + cleaned = re.sub(r" ", " ", cleaned) + cleaned = re.sub(r" ", " ", cleaned) + cleaned = re.sub(r" ", " ", cleaned) + return cleaned.strip() + + def inlineRoot(xhtml): """ make root element inline """ doc = dom.parseString(xhtml)