changeset 665:6a64e0a759e6

plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
author Goffi <goffi@goffi.org>
date Tue, 05 Nov 2013 22:40:46 +0100 (2013-11-05)
parents cac98ca76479
children 2a7185b8452c
files src/plugins/plugin_misc_text_syntaxes.py
diffstat 1 files changed, 187 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugins/plugin_misc_text_syntaxes.py	Tue Nov 05 22:40:46 2013 +0100
@@ -0,0 +1,187 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+# SAT plugin for managing various text syntaxes
+# Copyright (C) 2009, 2010, 2011, 2012, 2013 Jérôme Poisson (goffi@goffi.org)
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+from logging import debug, info, error
+
+from wokkel import disco, pubsub
+from twisted.internet import defer
+from twisted.internet.threads import deferToThread
+from lxml.html import defs, clean
+
+CATEGORY = "Composition"
+NAME = "Syntax"
+_SYNTAX_XHTML = "XHTML"
+
+
+PLUGIN_INFO = {
+    "name": "Text syntaxes",
+    "import_name": "TEXT-SYNTAXES",
+    "type": "MISC",
+    "protocols": [],
+    "dependencies": [],
+    "main": "TextSyntaxes",
+    "handler": "no",
+    "description": _("""Management of various text syntaxes (XHTML-IM, Markdown, etc)""")
+}
+
+class UnknownSyntax(Exception):
+    pass
+
+class TextSyntaxes(object):
+    """ Text conversion class
+    XHTML utf-8 is used as intermediate language for conversions
+    """
+
+    OPT_DEFAULT = "DEFAULT"
+    OPT_HIDDEN = "HIDDEN"
+    OPT_NO_THREAD = "NO_THREAD"
+    SYNTAX_XHTML = _SYNTAX_XHTML
+    SYNTAX_MARKDOWN = "markdown"
+
+    params = """
+    <params>
+    <individual>
+    <category name="%(category_name)s" label="%(category_label)s">
+        <param name="%(name)s" label="%(label)s"
+           value="%(default)s" type="list" security="0">
+            %(options)s
+        </param>
+    </category>
+    </individual>
+    </params>
+    """
+
+    params_data = {
+        'category_name': CATEGORY,
+        'category_label': _(CATEGORY), #FIXME: gof:  vérifier que gettext gère ça
+        'name': NAME,
+        'label': _(NAME),
+        'default': _SYNTAX_XHTML,
+        'syntaxes': {},
+        }
+
+    def __init__(self, host):
+        info(_("Text syntaxes plugin initialization"))
+        self.host = host
+        self.syntaxes = {}
+        self.addSyntax(self.SYNTAX_XHTML, lambda xhtml: defer.succeed(xhtml), lambda xhtml: defer.succeed(xhtml),
+                       TextSyntaxes.OPT_NO_THREAD)
+        try:
+            import markdown, html2text
+            self.addSyntax(self.SYNTAX_MARKDOWN, markdown.markdown, html2text.html2text, [TextSyntaxes.OPT_DEFAULT])
+        except ImportError:
+            warning("markdown or html2text not found, can't use Markdown syntax")
+        host.bridge.addMethod("syntaxConvert", ".plugin", in_sign='sssb', out_sign='s',
+                              async=True, method=self.convert)
+
+        # TODO: gof: nettoyage XHTML avec lxml
+
+    def _updateParamOptions(self):
+        data_synt = TextSyntaxes.params_data['syntaxes']
+        syntaxes = []
+
+        for syntax in data_synt.keys():
+            flags = data_synt[syntax]["flags"]
+            if TextSyntaxes.OPT_HIDDEN not in flags:
+                syntaxes.append(syntax)
+
+        syntaxes.sort(key=unicode.lower)
+        options = []
+
+        for syntax in syntaxes:
+            options.append(u'<option value="%s" />' % syntax)
+
+        TextSyntaxes.params_data["options"] = u'\n'.join(options)
+        self.host.memory.updateParams(TextSyntaxes.params % TextSyntaxes.params_data)
+
+    def getFavoriteSyntax(self, profile):
+        """ Return the selected syntax for the given profile
+
+        @param profile: %(doc_profile)s
+        @return: profile selected syntax
+        """
+        return self.host.memory.getParamA(CATEGORY, NAME , profile_key=profile)
+
+    def clean_xhtml(self, xhtml):
+        """ Clean XHTML text by removing potentially dangerous/malicious parts
+        @param xhtml: raw xhtml text to clean
+        """
+        # FIXME: styles are allowed but not cleaned, they have to be cleaned (whitelist ? cssutils ?) !
+        safe_attrs = defs.safe_attrs.union(('style',))
+        cleaner = clean.Cleaner(style=False,
+                                add_nofollow=False,
+                                safe_attrs=safe_attrs)
+        d = deferToThread(cleaner.clean_html, xhtml)
+        return d
+
+    def convert(self, text, syntax_from, syntax_to=_SYNTAX_XHTML, safe=True):
+        """ Convert a text between two syntaxes
+        @param text: text to convert
+        @param syntax_from: source syntax (e.g. "markdown")
+        @param syntax_to: dest syntax (e.g.: "XHTML")
+        @param safe: clean resulting XHTML to avoid malicious code if True
+        @return: converted text """
+
+        syntaxes = TextSyntaxes.params_data['syntaxes']
+        if syntax_from not in syntaxes:
+            raise UnknownSyntax(syntax_from)
+        if syntax_to not in syntaxes:
+            raise UnknownSyntax(syntax_to)
+        d = None
+
+        if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_from]["flags"]:
+            d = syntaxes[syntax_from]["to"](text)
+        else:
+            d = deferToThread(syntaxes[syntax_from]["to"], text)
+
+        #TODO: keep only body element and change it to a div here ?
+
+        if safe:
+            d.addCallback(self.clean_xhtml)
+
+        if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_to]["flags"]:
+            d.addCallback(syntaxes[syntax_to]["from"])
+        else:
+            d.addCallback(lambda xhtml: deferToThread(syntaxes[syntax_to]["from"], xhtml))
+
+        return d
+
+    def addSyntax(self, name, to_xhtml_cb, from_xhtml_cb, flags = None):
+        """ Add a new syntax to the manager
+        @param name: unique name of the syntax
+        @param to_xhtml_cb: callback to convert from syntax to XHTML
+        @param from_xhtml_cb: callback to convert from XHTML to syntax
+        @param flags: set of optional flags, can be:
+            TextSyntaxes.OPT_DEFAULT: use as the default syntax (replace former one)
+            TextSyntaxes.OPT_HIDDEN: do not show in parameters
+            TextSyntaxes.OPT_NO_THREAD: do not defer to thread when converting (the callback must then return a deferred)
+
+        """
+        name = unicode(name)
+        flags = flags or []
+        if TextSyntaxes.OPT_HIDDEN in flags and TextSyntaxes.OPT_DEFAULT in flags:
+            raise ValueError("%s and %s are mutually exclusive" % (TextSyntaxes.OPT_HIDDEN, TextSyntaxes.OPT_DEFAULT))
+
+        syntaxes = TextSyntaxes.params_data['syntaxes']
+        syntaxes[name] = {"to": to_xhtml_cb, "from": from_xhtml_cb, "flags": flags}
+        if TextSyntaxes.OPT_DEFAULT in flags:
+            syntaxes = TextSyntaxes.params_data['default'] = name
+
+        self._updateParamOptions()
+