Mercurial > libervia-backend
changeset 705:6c8a119dcc94
plugin text syntaxes: clean_xhtml now accept lxml's HtmlElement to avoid parsing two times the same xml
author | Goffi <goffi@goffi.org> |
---|---|
date | Thu, 14 Nov 2013 17:53:47 +0100 |
parents | 3c304929af74 |
children | 80e9d3ecb272 |
files | src/plugins/plugin_misc_text_syntaxes.py |
diffstat | 1 files changed, 10 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/src/plugins/plugin_misc_text_syntaxes.py Thu Nov 14 17:51:35 2013 +0100 +++ b/src/plugins/plugin_misc_text_syntaxes.py Thu Nov 14 17:53:47 2013 +0100 @@ -22,6 +22,7 @@ from wokkel import disco, pubsub from twisted.internet import defer from twisted.internet.threads import deferToThread +from sat.core import exceptions from lxml import html from lxml.html import clean import re @@ -131,7 +132,7 @@ def clean_xhtml(self, xhtml): """ Clean XHTML text by removing potentially dangerous/malicious parts - @param xhtml: raw xhtml text to clean + @param xhtml: raw xhtml text to clean (or lxml's HtmlElement) """ def blocking_cleaning(xhtml): """ Clean XHTML and style attributes """ @@ -157,7 +158,13 @@ cleaned_styles.append((key, value)) return "; ".join(["%s: %s" % (key, value) for key, value in cleaned_styles]) - xhtml_elt = html.fromstring(xhtml) + if isinstance(xhtml, basestring): + xhtml_elt = html.fromstring(xhtml) + elif isinstance(xhtml, html.HtmlElement): + xhtml_elt = xhtml + else: + error("Only strings and HtmlElements can be cleaned") + raise exceptions.DataError cleaner = clean.Cleaner(style=False, add_nofollow=False, safe_attrs=SAFE_ATTRS) @@ -166,8 +173,7 @@ elt.set("style", clean_style(elt.get('style'))) return html.tostring(xhtml_elt, method='xml') - d = deferToThread(blocking_cleaning, xhtml) - return d + return deferToThread(blocking_cleaning, xhtml) def convert(self, text, syntax_from, syntax_to=_SYNTAX_XHTML, safe=True): """ Convert a text between two syntaxes