changeset 674:fb0b1100c908

plugin text_syntaxes: fixed clean_xhml (it now return XHTML instead of HTML)
author Goffi <goffi@goffi.org>
date Sat, 09 Nov 2013 10:16:35 +0100
parents 903c4749de15
children abb9a5104de8
files src/plugins/plugin_misc_text_syntaxes.py
diffstat 1 files changed, 12 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/src/plugins/plugin_misc_text_syntaxes.py	Fri Nov 08 16:35:51 2013 +0100
+++ b/src/plugins/plugin_misc_text_syntaxes.py	Sat Nov 09 10:16:35 2013 +0100
@@ -22,7 +22,9 @@
 from wokkel import disco, pubsub
 from twisted.internet import defer
 from twisted.internet.threads import deferToThread
-from lxml.html import defs, clean
+from lxml import html
+from lxml.html import clean
+
 
 CATEGORY = "Composition"
 NAME = "Syntax"
@@ -123,11 +125,15 @@
         @param xhtml: raw xhtml text to clean
         """
         # FIXME: styles are allowed but not cleaned, they have to be cleaned (whitelist ? cssutils ?) !
-        safe_attrs = defs.safe_attrs.union(('style',))
-        cleaner = clean.Cleaner(style=False,
-                                add_nofollow=False,
-                                safe_attrs=safe_attrs)
-        d = deferToThread(cleaner.clean_html, xhtml)
+        def blocking_cleaning(xhtml):
+            safe_attrs = html.defs.safe_attrs.union(('style',))
+            xhtml_elt = html.fromstring(xhtml)
+            cleaner = clean.Cleaner(style=False,
+                                    add_nofollow=False,
+                                    safe_attrs=safe_attrs)
+            return html.tostring(cleaner.clean_html(xhtml_elt), method='xml')
+
+        d = deferToThread(blocking_cleaning, xhtml)
         return d
 
     def convert(self, text, syntax_from, syntax_to=_SYNTAX_XHTML, safe=True):