libervia-backend: sat/plugins/plugin_misc_text

comparison sat/plugins/plugin_misc_text_syntaxes.py @ 2781:816be0a23877

plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)

author	Goffi <goffi@goffi.org>
date	Sat, 19 Jan 2019 11:39:02 +0100
parents	85d3240a400f
children	b17e6fa1e607

comparison

equal deleted inserted replaced

-:85d3240a400f
+:816be0a23877
 log.error(
 u"Error while {action}: {failure}".format(action=action, failure=failure)
 )
 return failure
+def cleanStyle(self, styles):
+""""Clean unsafe CSS styles
+Remove styles not in the whitelist, or where the value doesn't match the regex
+@param styles_raw(unicode): CSS styles
+@return (unicode): cleaned styles
+"""
+styles = styles.split(";")
+cleaned_styles = []
+for style in styles:
+try:
+key, value = style.split(":")
+except ValueError:
+continue
+key = key.lower().strip()
+if key not in STYLES_WHITELIST:
+continue
+value = value.lower().strip()
+if not STYLES_ACCEPTED_VALUE.match(value):
+continue
+if value == "none":
+continue
+cleaned_styles.append((key, value))
+return "; ".join(
+["%s: %s" % (key_, value_) for key_, value_ in cleaned_styles]
+)
 def cleanXHTML(self, xhtml):
-""" Clean XHTML text by removing potentially dangerous/malicious parts
+"""Clean XHTML text by removing potentially dangerous/malicious parts
-@param xhtml: raw xhtml text to clean (or lxml's HtmlElement)
-"""
+@param xhtml(unicode, lxml.etree._Element): raw HTML/XHTML text to clean
+@return (unicode): cleaned XHTML
-def blocking_cleaning(xhtml):
+"""
-""" Clean XHTML and style attributes """
+if isinstance(xhtml, basestring):
-def clean_style(styles_raw):
+xhtml_elt = html.fromstring(xhtml)
-"""" Remove styles not in the whitelist,
+elif isinstance(xhtml, html.HtmlElement):
-or where the value doesn't match the regex """
+xhtml_elt = xhtml
-styles = styles_raw.split(";")
+else:
-cleaned_styles = []
+log.error("Only strings and HtmlElements can be cleaned")
-for style in styles:
+raise exceptions.DataError
-try:
+cleaner = clean.Cleaner(
-key, value = style.split(":")
+style=False, add_nofollow=False, safe_attrs=SAFE_ATTRS
-except ValueError:
+)
-continue
+xhtml_elt = cleaner.clean_html(xhtml_elt)
-key = key.lower().strip()
+for elt in xhtml_elt.xpath("//*[@style]"):
-if key not in STYLES_WHITELIST:
+elt.set("style", self.cleanStyle(elt.get("style")))
-continue
+return html.tostring(xhtml_elt, encoding=unicode, method="xml")
-value = value.lower().strip()
-if not STYLES_ACCEPTED_VALUE.match(value):
-continue
-if value == "none":
-continue
-cleaned_styles.append((key, value))
-return "; ".join(
-["%s: %s" % (key_, value_) for key_, value_ in cleaned_styles]
-)
-if isinstance(xhtml, basestring):
-xhtml_elt = html.fromstring(xhtml)
-elif isinstance(xhtml, html.HtmlElement):
-xhtml_elt = xhtml
-else:
-log.error("Only strings and HtmlElements can be cleaned")
-raise exceptions.DataError
-cleaner = clean.Cleaner(
-style=False, add_nofollow=False, safe_attrs=SAFE_ATTRS
-)
-xhtml_elt = cleaner.clean_html(xhtml_elt)
-for elt in xhtml_elt.xpath("//*[@style]"):
-elt.set("style", clean_style(elt.get("style")))
-return html.tostring(xhtml_elt, encoding=unicode, method="xml")
-d = deferToThread(blocking_cleaning, xhtml)
-d.addErrback(self._logError, action=u"cleaning syntax")
-return d
 def convert(
 self, text, syntax_from, syntax_to=_SYNTAX_XHTML, safe=True, profile=None
 ):
 """Convert a text between two syntaxes
 @param text: text to convert
 @param syntax_from: source syntax (e.g. "markdown")
 @param syntax_to: dest syntax (e.g.: "XHTML")
 @param safe: clean resulting XHTML to avoid malicious code if True
-@param profile: needed only when syntax_from or syntax_to is set to _SYNTAX_CURRENT
+@param profile: needed only when syntax_from or syntax_to is set to
+_SYNTAX_CURRENT
 @return(unicode): converted text
 """
 # FIXME: convert should be abled to handle domish.Element directly
 #        when dealing with XHTML
 # TODO: a way for parser to return parsing errors/warnings

Mercurial > libervia-backend

comparison sat/plugins/plugin_misc_text_syntaxes.py @ 2781:816be0a23877