comparison browser_side/tools.py @ 347:f1ba38043d78

browser_side: status panel is based on a new class LightTextEditor which uses HTML5 "editablecontent" property
author souliane <souliane@mailoo.org>
date Fri, 07 Feb 2014 20:14:11 +0100
parents ce5b33f499c5
children f488692c4903
comparison
equal deleted inserted replaced
346:82f9e92379b0 347:f1ba38043d78
20 from pyjamas.ui.DragWidget import DragWidget 20 from pyjamas.ui.DragWidget import DragWidget
21 from pyjamas.ui.FileUpload import FileUpload 21 from pyjamas.ui.FileUpload import FileUpload
22 from pyjamas import Window 22 from pyjamas import Window
23 from nativedom import NativeDOM 23 from nativedom import NativeDOM
24 from sat_frontends.tools import xmltools 24 from sat_frontends.tools import xmltools
25 import re
25 26
26 dom = NativeDOM() 27 dom = NativeDOM()
27 28
28 29
29 def html_sanitize(html): 30 def html_sanitize(html):
30 """Naive sanitization of HTML""" 31 """Naive sanitization of HTML"""
31 return html.replace('<', '&lt;').replace('>', '&gt;') 32 return html.replace('<', '&lt;').replace('>', '&gt;')
33
34
35 def html_clean(html):
36 """
37 Remove HTML markup from the given string.
38 Copied from nltk.clean_html (http://www.nltk.org/)
39
40 @param html: the HTML string to be cleaned
41 @type html: C{string}
42 @rtype: C{string}
43 """
44
45 # First we remove inline JavaScript/CSS:
46 cleaned = re.sub(r"(?is)<(script|style).*?>.*?(</\1>)", "", html.strip())
47 # Then we remove html comments. This has to be done before removing regular
48 # tags since comments can contain '>' characters.
49 cleaned = re.sub(r"(?s)<!--(.*?)-->[\n]?", "", cleaned)
50 # Next we can remove the remaining tags:
51 cleaned = re.sub(r"(?s)<.*?>", " ", cleaned)
52 # Finally, we deal with whitespace
53 cleaned = re.sub(r"&nbsp;", " ", cleaned)
54 cleaned = re.sub(r" ", " ", cleaned)
55 cleaned = re.sub(r" ", " ", cleaned)
56 return cleaned.strip()
32 57
33 58
34 def inlineRoot(xhtml): 59 def inlineRoot(xhtml):
35 """ make root element inline """ 60 """ make root element inline """
36 doc = dom.parseString(xhtml) 61 doc = dom.parseString(xhtml)