Mercurial > libervia-backend
comparison src/plugins/plugin_misc_text_syntaxes.py @ 705:6c8a119dcc94
plugin text syntaxes: clean_xhtml now accept lxml's HtmlElement to avoid parsing two times the same xml
author | Goffi <goffi@goffi.org> |
---|---|
date | Thu, 14 Nov 2013 17:53:47 +0100 |
parents | a25db3fe3959 |
children | 312a2842b2b8 |
comparison
equal
deleted
inserted
replaced
704:3c304929af74 | 705:6c8a119dcc94 |
---|---|
20 from logging import debug, info, error, warning | 20 from logging import debug, info, error, warning |
21 | 21 |
22 from wokkel import disco, pubsub | 22 from wokkel import disco, pubsub |
23 from twisted.internet import defer | 23 from twisted.internet import defer |
24 from twisted.internet.threads import deferToThread | 24 from twisted.internet.threads import deferToThread |
25 from sat.core import exceptions | |
25 from lxml import html | 26 from lxml import html |
26 from lxml.html import clean | 27 from lxml.html import clean |
27 import re | 28 import re |
28 | 29 |
29 | 30 |
129 """ | 130 """ |
130 return self.host.memory.getParamA(NAME, CATEGORY , profile_key=profile) | 131 return self.host.memory.getParamA(NAME, CATEGORY , profile_key=profile) |
131 | 132 |
132 def clean_xhtml(self, xhtml): | 133 def clean_xhtml(self, xhtml): |
133 """ Clean XHTML text by removing potentially dangerous/malicious parts | 134 """ Clean XHTML text by removing potentially dangerous/malicious parts |
134 @param xhtml: raw xhtml text to clean | 135 @param xhtml: raw xhtml text to clean (or lxml's HtmlElement) |
135 """ | 136 """ |
136 def blocking_cleaning(xhtml): | 137 def blocking_cleaning(xhtml): |
137 """ Clean XHTML and style attributes """ | 138 """ Clean XHTML and style attributes """ |
138 | 139 |
139 def clean_style(styles_raw): | 140 def clean_style(styles_raw): |
155 if value == "none": | 156 if value == "none": |
156 continue | 157 continue |
157 cleaned_styles.append((key, value)) | 158 cleaned_styles.append((key, value)) |
158 return "; ".join(["%s: %s" % (key, value) for key, value in cleaned_styles]) | 159 return "; ".join(["%s: %s" % (key, value) for key, value in cleaned_styles]) |
159 | 160 |
160 xhtml_elt = html.fromstring(xhtml) | 161 if isinstance(xhtml, basestring): |
162 xhtml_elt = html.fromstring(xhtml) | |
163 elif isinstance(xhtml, html.HtmlElement): | |
164 xhtml_elt = xhtml | |
165 else: | |
166 error("Only strings and HtmlElements can be cleaned") | |
167 raise exceptions.DataError | |
161 cleaner = clean.Cleaner(style=False, | 168 cleaner = clean.Cleaner(style=False, |
162 add_nofollow=False, | 169 add_nofollow=False, |
163 safe_attrs=SAFE_ATTRS) | 170 safe_attrs=SAFE_ATTRS) |
164 xhtml_elt = cleaner.clean_html(xhtml_elt) | 171 xhtml_elt = cleaner.clean_html(xhtml_elt) |
165 for elt in xhtml_elt.xpath("//*[@style]"): | 172 for elt in xhtml_elt.xpath("//*[@style]"): |
166 elt.set("style", clean_style(elt.get('style'))) | 173 elt.set("style", clean_style(elt.get('style'))) |
167 return html.tostring(xhtml_elt, method='xml') | 174 return html.tostring(xhtml_elt, method='xml') |
168 | 175 |
169 d = deferToThread(blocking_cleaning, xhtml) | 176 return deferToThread(blocking_cleaning, xhtml) |
170 return d | |
171 | 177 |
172 def convert(self, text, syntax_from, syntax_to=_SYNTAX_XHTML, safe=True): | 178 def convert(self, text, syntax_from, syntax_to=_SYNTAX_XHTML, safe=True): |
173 """ Convert a text between two syntaxes | 179 """ Convert a text between two syntaxes |
174 @param text: text to convert | 180 @param text: text to convert |
175 @param syntax_from: source syntax (e.g. "markdown") | 181 @param syntax_from: source syntax (e.g. "markdown") |