changeset 2781:816be0a23877

plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
author Goffi <goffi@goffi.org>
date Sat, 19 Jan 2019 11:39:02 +0100
parents 85d3240a400f
children b17e6fa1e607
files sat/plugins/plugin_misc_text_syntaxes.py
diffstat 1 files changed, 47 insertions(+), 47 deletions(-) [+]
line wrap: on
line diff
--- a/sat/plugins/plugin_misc_text_syntaxes.py	Sat Jan 19 11:39:02 2019 +0100
+++ b/sat/plugins/plugin_misc_text_syntaxes.py	Sat Jan 19 11:39:02 2019 +0100
@@ -240,55 +240,54 @@
         )
         return failure
 
+    def cleanStyle(self, styles):
+        """"Clean unsafe CSS styles
+
+        Remove styles not in the whitelist, or where the value doesn't match the regex
+        @param styles_raw(unicode): CSS styles
+        @return (unicode): cleaned styles
+        """
+        styles = styles.split(";")
+        cleaned_styles = []
+        for style in styles:
+            try:
+                key, value = style.split(":")
+            except ValueError:
+                continue
+            key = key.lower().strip()
+            if key not in STYLES_WHITELIST:
+                continue
+            value = value.lower().strip()
+            if not STYLES_ACCEPTED_VALUE.match(value):
+                continue
+            if value == "none":
+                continue
+            cleaned_styles.append((key, value))
+        return "; ".join(
+            ["%s: %s" % (key_, value_) for key_, value_ in cleaned_styles]
+        )
+
     def cleanXHTML(self, xhtml):
-        """ Clean XHTML text by removing potentially dangerous/malicious parts
-        @param xhtml: raw xhtml text to clean (or lxml's HtmlElement)
+        """Clean XHTML text by removing potentially dangerous/malicious parts
+
+        @param xhtml(unicode, lxml.etree._Element): raw HTML/XHTML text to clean
+        @return (unicode): cleaned XHTML
         """
 
-        def blocking_cleaning(xhtml):
-            """ Clean XHTML and style attributes """
-
-            def clean_style(styles_raw):
-                """" Remove styles not in the whitelist,
-                or where the value doesn't match the regex """
-                styles = styles_raw.split(";")
-                cleaned_styles = []
-                for style in styles:
-                    try:
-                        key, value = style.split(":")
-                    except ValueError:
-                        continue
-                    key = key.lower().strip()
-                    if key not in STYLES_WHITELIST:
-                        continue
-                    value = value.lower().strip()
-                    if not STYLES_ACCEPTED_VALUE.match(value):
-                        continue
-                    if value == "none":
-                        continue
-                    cleaned_styles.append((key, value))
-                return "; ".join(
-                    ["%s: %s" % (key_, value_) for key_, value_ in cleaned_styles]
-                )
-
-            if isinstance(xhtml, basestring):
-                xhtml_elt = html.fromstring(xhtml)
-            elif isinstance(xhtml, html.HtmlElement):
-                xhtml_elt = xhtml
-            else:
-                log.error("Only strings and HtmlElements can be cleaned")
-                raise exceptions.DataError
-            cleaner = clean.Cleaner(
-                style=False, add_nofollow=False, safe_attrs=SAFE_ATTRS
-            )
-            xhtml_elt = cleaner.clean_html(xhtml_elt)
-            for elt in xhtml_elt.xpath("//*[@style]"):
-                elt.set("style", clean_style(elt.get("style")))
-            return html.tostring(xhtml_elt, encoding=unicode, method="xml")
-
-        d = deferToThread(blocking_cleaning, xhtml)
-        d.addErrback(self._logError, action=u"cleaning syntax")
-        return d
+        if isinstance(xhtml, basestring):
+            xhtml_elt = html.fromstring(xhtml)
+        elif isinstance(xhtml, html.HtmlElement):
+            xhtml_elt = xhtml
+        else:
+            log.error("Only strings and HtmlElements can be cleaned")
+            raise exceptions.DataError
+        cleaner = clean.Cleaner(
+            style=False, add_nofollow=False, safe_attrs=SAFE_ATTRS
+        )
+        xhtml_elt = cleaner.clean_html(xhtml_elt)
+        for elt in xhtml_elt.xpath("//*[@style]"):
+            elt.set("style", self.cleanStyle(elt.get("style")))
+        return html.tostring(xhtml_elt, encoding=unicode, method="xml")
 
     def convert(
         self, text, syntax_from, syntax_to=_SYNTAX_XHTML, safe=True, profile=None
@@ -299,7 +298,8 @@
         @param syntax_from: source syntax (e.g. "markdown")
         @param syntax_to: dest syntax (e.g.: "XHTML")
         @param safe: clean resulting XHTML to avoid malicious code if True
-        @param profile: needed only when syntax_from or syntax_to is set to _SYNTAX_CURRENT
+        @param profile: needed only when syntax_from or syntax_to is set to
+            _SYNTAX_CURRENT
         @return(unicode): converted text
         """
         # FIXME: convert should be abled to handle domish.Element directly