Mercurial > libervia-backend
comparison sat/plugins/plugin_misc_text_syntaxes.py @ 2781:816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
author | Goffi <goffi@goffi.org> |
---|---|
date | Sat, 19 Jan 2019 11:39:02 +0100 |
parents | 85d3240a400f |
children | b17e6fa1e607 |
comparison
equal
deleted
inserted
replaced
2780:85d3240a400f | 2781:816be0a23877 |
---|---|
238 log.error( | 238 log.error( |
239 u"Error while {action}: {failure}".format(action=action, failure=failure) | 239 u"Error while {action}: {failure}".format(action=action, failure=failure) |
240 ) | 240 ) |
241 return failure | 241 return failure |
242 | 242 |
243 def cleanStyle(self, styles): | |
244 """"Clean unsafe CSS styles | |
245 | |
246 Remove styles not in the whitelist, or where the value doesn't match the regex | |
247 @param styles_raw(unicode): CSS styles | |
248 @return (unicode): cleaned styles | |
249 """ | |
250 styles = styles.split(";") | |
251 cleaned_styles = [] | |
252 for style in styles: | |
253 try: | |
254 key, value = style.split(":") | |
255 except ValueError: | |
256 continue | |
257 key = key.lower().strip() | |
258 if key not in STYLES_WHITELIST: | |
259 continue | |
260 value = value.lower().strip() | |
261 if not STYLES_ACCEPTED_VALUE.match(value): | |
262 continue | |
263 if value == "none": | |
264 continue | |
265 cleaned_styles.append((key, value)) | |
266 return "; ".join( | |
267 ["%s: %s" % (key_, value_) for key_, value_ in cleaned_styles] | |
268 ) | |
269 | |
243 def cleanXHTML(self, xhtml): | 270 def cleanXHTML(self, xhtml): |
244 """ Clean XHTML text by removing potentially dangerous/malicious parts | 271 """Clean XHTML text by removing potentially dangerous/malicious parts |
245 @param xhtml: raw xhtml text to clean (or lxml's HtmlElement) | 272 |
246 """ | 273 @param xhtml(unicode, lxml.etree._Element): raw HTML/XHTML text to clean |
247 | 274 @return (unicode): cleaned XHTML |
248 def blocking_cleaning(xhtml): | 275 """ |
249 """ Clean XHTML and style attributes """ | 276 |
250 | 277 if isinstance(xhtml, basestring): |
251 def clean_style(styles_raw): | 278 xhtml_elt = html.fromstring(xhtml) |
252 """" Remove styles not in the whitelist, | 279 elif isinstance(xhtml, html.HtmlElement): |
253 or where the value doesn't match the regex """ | 280 xhtml_elt = xhtml |
254 styles = styles_raw.split(";") | 281 else: |
255 cleaned_styles = [] | 282 log.error("Only strings and HtmlElements can be cleaned") |
256 for style in styles: | 283 raise exceptions.DataError |
257 try: | 284 cleaner = clean.Cleaner( |
258 key, value = style.split(":") | 285 style=False, add_nofollow=False, safe_attrs=SAFE_ATTRS |
259 except ValueError: | 286 ) |
260 continue | 287 xhtml_elt = cleaner.clean_html(xhtml_elt) |
261 key = key.lower().strip() | 288 for elt in xhtml_elt.xpath("//*[@style]"): |
262 if key not in STYLES_WHITELIST: | 289 elt.set("style", self.cleanStyle(elt.get("style"))) |
263 continue | 290 return html.tostring(xhtml_elt, encoding=unicode, method="xml") |
264 value = value.lower().strip() | |
265 if not STYLES_ACCEPTED_VALUE.match(value): | |
266 continue | |
267 if value == "none": | |
268 continue | |
269 cleaned_styles.append((key, value)) | |
270 return "; ".join( | |
271 ["%s: %s" % (key_, value_) for key_, value_ in cleaned_styles] | |
272 ) | |
273 | |
274 if isinstance(xhtml, basestring): | |
275 xhtml_elt = html.fromstring(xhtml) | |
276 elif isinstance(xhtml, html.HtmlElement): | |
277 xhtml_elt = xhtml | |
278 else: | |
279 log.error("Only strings and HtmlElements can be cleaned") | |
280 raise exceptions.DataError | |
281 cleaner = clean.Cleaner( | |
282 style=False, add_nofollow=False, safe_attrs=SAFE_ATTRS | |
283 ) | |
284 xhtml_elt = cleaner.clean_html(xhtml_elt) | |
285 for elt in xhtml_elt.xpath("//*[@style]"): | |
286 elt.set("style", clean_style(elt.get("style"))) | |
287 return html.tostring(xhtml_elt, encoding=unicode, method="xml") | |
288 | |
289 d = deferToThread(blocking_cleaning, xhtml) | |
290 d.addErrback(self._logError, action=u"cleaning syntax") | |
291 return d | |
292 | 291 |
293 def convert( | 292 def convert( |
294 self, text, syntax_from, syntax_to=_SYNTAX_XHTML, safe=True, profile=None | 293 self, text, syntax_from, syntax_to=_SYNTAX_XHTML, safe=True, profile=None |
295 ): | 294 ): |
296 """Convert a text between two syntaxes | 295 """Convert a text between two syntaxes |
297 | 296 |
298 @param text: text to convert | 297 @param text: text to convert |
299 @param syntax_from: source syntax (e.g. "markdown") | 298 @param syntax_from: source syntax (e.g. "markdown") |
300 @param syntax_to: dest syntax (e.g.: "XHTML") | 299 @param syntax_to: dest syntax (e.g.: "XHTML") |
301 @param safe: clean resulting XHTML to avoid malicious code if True | 300 @param safe: clean resulting XHTML to avoid malicious code if True |
302 @param profile: needed only when syntax_from or syntax_to is set to _SYNTAX_CURRENT | 301 @param profile: needed only when syntax_from or syntax_to is set to |
302 _SYNTAX_CURRENT | |
303 @return(unicode): converted text | 303 @return(unicode): converted text |
304 """ | 304 """ |
305 # FIXME: convert should be abled to handle domish.Element directly | 305 # FIXME: convert should be abled to handle domish.Element directly |
306 # when dealing with XHTML | 306 # when dealing with XHTML |
307 # TODO: a way for parser to return parsing errors/warnings | 307 # TODO: a way for parser to return parsing errors/warnings |