view src/plugins/plugin_misc_text_syntaxes.py @ 675:abb9a5104de8

new xml tools module (sat.tools.frontends.xml): - inlineRoot display the first XHTML tag as inline.
author Goffi <goffi@goffi.org>
date Sat, 09 Nov 2013 10:17:53 +0100
parents fb0b1100c908
children e98db42cd78c
line wrap: on
line source

#!/usr/bin/python
# -*- coding: utf-8 -*-

# SAT plugin for managing various text syntaxes
# Copyright (C) 2009, 2010, 2011, 2012, 2013 Jérôme Poisson (goffi@goffi.org)

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.

# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from logging import debug, info, error

from wokkel import disco, pubsub
from twisted.internet import defer
from twisted.internet.threads import deferToThread
from lxml import html
from lxml.html import clean


CATEGORY = "Composition"
NAME = "Syntax"
_SYNTAX_XHTML = "XHTML"


PLUGIN_INFO = {
    "name": "Text syntaxes",
    "import_name": "TEXT-SYNTAXES",
    "type": "MISC",
    "protocols": [],
    "dependencies": [],
    "main": "TextSyntaxes",
    "handler": "no",
    "description": _("""Management of various text syntaxes (XHTML-IM, Markdown, etc)""")
}

class UnknownSyntax(Exception):
    pass

class TextSyntaxes(object):
    """ Text conversion class
    XHTML utf-8 is used as intermediate language for conversions
    """

    OPT_DEFAULT = "DEFAULT"
    OPT_HIDDEN = "HIDDEN"
    OPT_NO_THREAD = "NO_THREAD"
    SYNTAX_XHTML = _SYNTAX_XHTML
    SYNTAX_MARKDOWN = "markdown"

    params = """
    <params>
    <individual>
    <category name="%(category_name)s" label="%(category_label)s">
        <param name="%(name)s" label="%(label)s"
           value="%(default)s" type="list" security="0">
            %(options)s
        </param>
    </category>
    </individual>
    </params>
    """

    params_data = {
        'category_name': CATEGORY,
        'category_label': _(CATEGORY), #FIXME: gof:  vérifier que gettext gère ça
        'name': NAME,
        'label': _(NAME),
        'default': _SYNTAX_XHTML,
        'syntaxes': {},
        }

    def __init__(self, host):
        info(_("Text syntaxes plugin initialization"))
        self.host = host
        self.syntaxes = {}
        self.addSyntax(self.SYNTAX_XHTML, lambda xhtml: defer.succeed(xhtml), lambda xhtml: defer.succeed(xhtml),
                       TextSyntaxes.OPT_NO_THREAD)
        try:
            import markdown, html2text
            self.addSyntax(self.SYNTAX_MARKDOWN, markdown.markdown, html2text.html2text, [TextSyntaxes.OPT_DEFAULT])
        except ImportError:
            warning("markdown or html2text not found, can't use Markdown syntax")
        host.bridge.addMethod("syntaxConvert", ".plugin", in_sign='sssb', out_sign='s',
                              async=True, method=self.convert)

        # TODO: gof: nettoyage XHTML avec lxml

    def _updateParamOptions(self):
        data_synt = TextSyntaxes.params_data['syntaxes']
        syntaxes = []

        for syntax in data_synt.keys():
            flags = data_synt[syntax]["flags"]
            if TextSyntaxes.OPT_HIDDEN not in flags:
                syntaxes.append(syntax)

        syntaxes.sort(key=unicode.lower)
        options = []

        for syntax in syntaxes:
            options.append(u'<option value="%s" />' % syntax)

        TextSyntaxes.params_data["options"] = u'\n'.join(options)
        self.host.memory.updateParams(TextSyntaxes.params % TextSyntaxes.params_data)

    def getFavoriteSyntax(self, profile):
        """ Return the selected syntax for the given profile

        @param profile: %(doc_profile)s
        @return: profile selected syntax
        """
        return self.host.memory.getParamA(CATEGORY, NAME , profile_key=profile)

    def clean_xhtml(self, xhtml):
        """ Clean XHTML text by removing potentially dangerous/malicious parts
        @param xhtml: raw xhtml text to clean
        """
        # FIXME: styles are allowed but not cleaned, they have to be cleaned (whitelist ? cssutils ?) !
        def blocking_cleaning(xhtml):
            safe_attrs = html.defs.safe_attrs.union(('style',))
            xhtml_elt = html.fromstring(xhtml)
            cleaner = clean.Cleaner(style=False,
                                    add_nofollow=False,
                                    safe_attrs=safe_attrs)
            return html.tostring(cleaner.clean_html(xhtml_elt), method='xml')

        d = deferToThread(blocking_cleaning, xhtml)
        return d

    def convert(self, text, syntax_from, syntax_to=_SYNTAX_XHTML, safe=True):
        """ Convert a text between two syntaxes
        @param text: text to convert
        @param syntax_from: source syntax (e.g. "markdown")
        @param syntax_to: dest syntax (e.g.: "XHTML")
        @param safe: clean resulting XHTML to avoid malicious code if True
        @return: converted text """

        syntaxes = TextSyntaxes.params_data['syntaxes']
        if syntax_from not in syntaxes:
            raise UnknownSyntax(syntax_from)
        if syntax_to not in syntaxes:
            raise UnknownSyntax(syntax_to)
        d = None

        if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_from]["flags"]:
            d = syntaxes[syntax_from]["to"](text)
        else:
            d = deferToThread(syntaxes[syntax_from]["to"], text)

        #TODO: keep only body element and change it to a div here ?

        if safe:
            d.addCallback(self.clean_xhtml)

        if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_to]["flags"]:
            d.addCallback(syntaxes[syntax_to]["from"])
        else:
            d.addCallback(lambda xhtml: deferToThread(syntaxes[syntax_to]["from"], xhtml))

        return d

    def addSyntax(self, name, to_xhtml_cb, from_xhtml_cb, flags = None):
        """ Add a new syntax to the manager
        @param name: unique name of the syntax
        @param to_xhtml_cb: callback to convert from syntax to XHTML
        @param from_xhtml_cb: callback to convert from XHTML to syntax
        @param flags: set of optional flags, can be:
            TextSyntaxes.OPT_DEFAULT: use as the default syntax (replace former one)
            TextSyntaxes.OPT_HIDDEN: do not show in parameters
            TextSyntaxes.OPT_NO_THREAD: do not defer to thread when converting (the callback must then return a deferred)

        """
        name = unicode(name)
        flags = flags or []
        if TextSyntaxes.OPT_HIDDEN in flags and TextSyntaxes.OPT_DEFAULT in flags:
            raise ValueError("%s and %s are mutually exclusive" % (TextSyntaxes.OPT_HIDDEN, TextSyntaxes.OPT_DEFAULT))

        syntaxes = TextSyntaxes.params_data['syntaxes']
        syntaxes[name] = {"to": to_xhtml_cb, "from": from_xhtml_cb, "flags": flags}
        if TextSyntaxes.OPT_DEFAULT in flags:
            syntaxes = TextSyntaxes.params_data['default'] = name

        self._updateParamOptions()