Mercurial > libervia-backend
changeset 665:6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
author | Goffi <goffi@goffi.org> |
---|---|
date | Tue, 05 Nov 2013 22:40:46 +0100 |
parents | cac98ca76479 |
children | 2a7185b8452c |
files | src/plugins/plugin_misc_text_syntaxes.py |
diffstat | 1 files changed, 187 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/plugins/plugin_misc_text_syntaxes.py Tue Nov 05 22:40:46 2013 +0100 @@ -0,0 +1,187 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# SAT plugin for managing various text syntaxes +# Copyright (C) 2009, 2010, 2011, 2012, 2013 Jérôme Poisson (goffi@goffi.org) + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +from logging import debug, info, error + +from wokkel import disco, pubsub +from twisted.internet import defer +from twisted.internet.threads import deferToThread +from lxml.html import defs, clean + +CATEGORY = "Composition" +NAME = "Syntax" +_SYNTAX_XHTML = "XHTML" + + +PLUGIN_INFO = { + "name": "Text syntaxes", + "import_name": "TEXT-SYNTAXES", + "type": "MISC", + "protocols": [], + "dependencies": [], + "main": "TextSyntaxes", + "handler": "no", + "description": _("""Management of various text syntaxes (XHTML-IM, Markdown, etc)""") +} + +class UnknownSyntax(Exception): + pass + +class TextSyntaxes(object): + """ Text conversion class + XHTML utf-8 is used as intermediate language for conversions + """ + + OPT_DEFAULT = "DEFAULT" + OPT_HIDDEN = "HIDDEN" + OPT_NO_THREAD = "NO_THREAD" + SYNTAX_XHTML = _SYNTAX_XHTML + SYNTAX_MARKDOWN = "markdown" + + params = """ + <params> + <individual> + <category name="%(category_name)s" label="%(category_label)s"> + <param name="%(name)s" label="%(label)s" + value="%(default)s" type="list" security="0"> + %(options)s + </param> + </category> + </individual> + </params> + """ + + params_data = { + 'category_name': CATEGORY, + 'category_label': _(CATEGORY), #FIXME: gof: vérifier que gettext gère ça + 'name': NAME, + 'label': _(NAME), + 'default': _SYNTAX_XHTML, + 'syntaxes': {}, + } + + def __init__(self, host): + info(_("Text syntaxes plugin initialization")) + self.host = host + self.syntaxes = {} + self.addSyntax(self.SYNTAX_XHTML, lambda xhtml: defer.succeed(xhtml), lambda xhtml: defer.succeed(xhtml), + TextSyntaxes.OPT_NO_THREAD) + try: + import markdown, html2text + self.addSyntax(self.SYNTAX_MARKDOWN, markdown.markdown, html2text.html2text, [TextSyntaxes.OPT_DEFAULT]) + except ImportError: + warning("markdown or html2text not found, can't use Markdown syntax") + host.bridge.addMethod("syntaxConvert", ".plugin", in_sign='sssb', out_sign='s', + async=True, method=self.convert) + + # TODO: gof: nettoyage XHTML avec lxml + + def _updateParamOptions(self): + data_synt = TextSyntaxes.params_data['syntaxes'] + syntaxes = [] + + for syntax in data_synt.keys(): + flags = data_synt[syntax]["flags"] + if TextSyntaxes.OPT_HIDDEN not in flags: + syntaxes.append(syntax) + + syntaxes.sort(key=unicode.lower) + options = [] + + for syntax in syntaxes: + options.append(u'<option value="%s" />' % syntax) + + TextSyntaxes.params_data["options"] = u'\n'.join(options) + self.host.memory.updateParams(TextSyntaxes.params % TextSyntaxes.params_data) + + def getFavoriteSyntax(self, profile): + """ Return the selected syntax for the given profile + + @param profile: %(doc_profile)s + @return: profile selected syntax + """ + return self.host.memory.getParamA(CATEGORY, NAME , profile_key=profile) + + def clean_xhtml(self, xhtml): + """ Clean XHTML text by removing potentially dangerous/malicious parts + @param xhtml: raw xhtml text to clean + """ + # FIXME: styles are allowed but not cleaned, they have to be cleaned (whitelist ? cssutils ?) ! + safe_attrs = defs.safe_attrs.union(('style',)) + cleaner = clean.Cleaner(style=False, + add_nofollow=False, + safe_attrs=safe_attrs) + d = deferToThread(cleaner.clean_html, xhtml) + return d + + def convert(self, text, syntax_from, syntax_to=_SYNTAX_XHTML, safe=True): + """ Convert a text between two syntaxes + @param text: text to convert + @param syntax_from: source syntax (e.g. "markdown") + @param syntax_to: dest syntax (e.g.: "XHTML") + @param safe: clean resulting XHTML to avoid malicious code if True + @return: converted text """ + + syntaxes = TextSyntaxes.params_data['syntaxes'] + if syntax_from not in syntaxes: + raise UnknownSyntax(syntax_from) + if syntax_to not in syntaxes: + raise UnknownSyntax(syntax_to) + d = None + + if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_from]["flags"]: + d = syntaxes[syntax_from]["to"](text) + else: + d = deferToThread(syntaxes[syntax_from]["to"], text) + + #TODO: keep only body element and change it to a div here ? + + if safe: + d.addCallback(self.clean_xhtml) + + if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_to]["flags"]: + d.addCallback(syntaxes[syntax_to]["from"]) + else: + d.addCallback(lambda xhtml: deferToThread(syntaxes[syntax_to]["from"], xhtml)) + + return d + + def addSyntax(self, name, to_xhtml_cb, from_xhtml_cb, flags = None): + """ Add a new syntax to the manager + @param name: unique name of the syntax + @param to_xhtml_cb: callback to convert from syntax to XHTML + @param from_xhtml_cb: callback to convert from XHTML to syntax + @param flags: set of optional flags, can be: + TextSyntaxes.OPT_DEFAULT: use as the default syntax (replace former one) + TextSyntaxes.OPT_HIDDEN: do not show in parameters + TextSyntaxes.OPT_NO_THREAD: do not defer to thread when converting (the callback must then return a deferred) + + """ + name = unicode(name) + flags = flags or [] + if TextSyntaxes.OPT_HIDDEN in flags and TextSyntaxes.OPT_DEFAULT in flags: + raise ValueError("%s and %s are mutually exclusive" % (TextSyntaxes.OPT_HIDDEN, TextSyntaxes.OPT_DEFAULT)) + + syntaxes = TextSyntaxes.params_data['syntaxes'] + syntaxes[name] = {"to": to_xhtml_cb, "from": from_xhtml_cb, "flags": flags} + if TextSyntaxes.OPT_DEFAULT in flags: + syntaxes = TextSyntaxes.params_data['default'] = name + + self._updateParamOptions() +