# HG changeset patch # User Goffi # Date 1547894342 -3600 # Node ID 816be0a23877779dc055956811922dfc81fd0a0e # Parent 85d3240a400f534093d6a5b683c9ae61ba9e881d plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that) diff -r 85d3240a400f -r 816be0a23877 sat/plugins/plugin_misc_text_syntaxes.py --- a/sat/plugins/plugin_misc_text_syntaxes.py Sat Jan 19 11:39:02 2019 +0100 +++ b/sat/plugins/plugin_misc_text_syntaxes.py Sat Jan 19 11:39:02 2019 +0100 @@ -240,55 +240,54 @@ ) return failure + def cleanStyle(self, styles): + """"Clean unsafe CSS styles + + Remove styles not in the whitelist, or where the value doesn't match the regex + @param styles_raw(unicode): CSS styles + @return (unicode): cleaned styles + """ + styles = styles.split(";") + cleaned_styles = [] + for style in styles: + try: + key, value = style.split(":") + except ValueError: + continue + key = key.lower().strip() + if key not in STYLES_WHITELIST: + continue + value = value.lower().strip() + if not STYLES_ACCEPTED_VALUE.match(value): + continue + if value == "none": + continue + cleaned_styles.append((key, value)) + return "; ".join( + ["%s: %s" % (key_, value_) for key_, value_ in cleaned_styles] + ) + def cleanXHTML(self, xhtml): - """ Clean XHTML text by removing potentially dangerous/malicious parts - @param xhtml: raw xhtml text to clean (or lxml's HtmlElement) + """Clean XHTML text by removing potentially dangerous/malicious parts + + @param xhtml(unicode, lxml.etree._Element): raw HTML/XHTML text to clean + @return (unicode): cleaned XHTML """ - def blocking_cleaning(xhtml): - """ Clean XHTML and style attributes """ - - def clean_style(styles_raw): - """" Remove styles not in the whitelist, - or where the value doesn't match the regex """ - styles = styles_raw.split(";") - cleaned_styles = [] - for style in styles: - try: - key, value = style.split(":") - except ValueError: - continue - key = key.lower().strip() - if key not in STYLES_WHITELIST: - continue - value = value.lower().strip() - if not STYLES_ACCEPTED_VALUE.match(value): - continue - if value == "none": - continue - cleaned_styles.append((key, value)) - return "; ".join( - ["%s: %s" % (key_, value_) for key_, value_ in cleaned_styles] - ) - - if isinstance(xhtml, basestring): - xhtml_elt = html.fromstring(xhtml) - elif isinstance(xhtml, html.HtmlElement): - xhtml_elt = xhtml - else: - log.error("Only strings and HtmlElements can be cleaned") - raise exceptions.DataError - cleaner = clean.Cleaner( - style=False, add_nofollow=False, safe_attrs=SAFE_ATTRS - ) - xhtml_elt = cleaner.clean_html(xhtml_elt) - for elt in xhtml_elt.xpath("//*[@style]"): - elt.set("style", clean_style(elt.get("style"))) - return html.tostring(xhtml_elt, encoding=unicode, method="xml") - - d = deferToThread(blocking_cleaning, xhtml) - d.addErrback(self._logError, action=u"cleaning syntax") - return d + if isinstance(xhtml, basestring): + xhtml_elt = html.fromstring(xhtml) + elif isinstance(xhtml, html.HtmlElement): + xhtml_elt = xhtml + else: + log.error("Only strings and HtmlElements can be cleaned") + raise exceptions.DataError + cleaner = clean.Cleaner( + style=False, add_nofollow=False, safe_attrs=SAFE_ATTRS + ) + xhtml_elt = cleaner.clean_html(xhtml_elt) + for elt in xhtml_elt.xpath("//*[@style]"): + elt.set("style", self.cleanStyle(elt.get("style"))) + return html.tostring(xhtml_elt, encoding=unicode, method="xml") def convert( self, text, syntax_from, syntax_to=_SYNTAX_XHTML, safe=True, profile=None @@ -299,7 +298,8 @@ @param syntax_from: source syntax (e.g. "markdown") @param syntax_to: dest syntax (e.g.: "XHTML") @param safe: clean resulting XHTML to avoid malicious code if True - @param profile: needed only when syntax_from or syntax_to is set to _SYNTAX_CURRENT + @param profile: needed only when syntax_from or syntax_to is set to + _SYNTAX_CURRENT @return(unicode): converted text """ # FIXME: convert should be abled to handle domish.Element directly