Mercurial > libervia-backend
view src/plugins/plugin_misc_text_syntaxes.py @ 675:abb9a5104de8
new xml tools module (sat.tools.frontends.xml):
- inlineRoot display the first XHTML tag as inline.
author | Goffi <goffi@goffi.org> |
---|---|
date | Sat, 09 Nov 2013 10:17:53 +0100 |
parents | fb0b1100c908 |
children | e98db42cd78c |
line wrap: on
line source
#!/usr/bin/python # -*- coding: utf-8 -*- # SAT plugin for managing various text syntaxes # Copyright (C) 2009, 2010, 2011, 2012, 2013 Jérôme Poisson (goffi@goffi.org) # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. from logging import debug, info, error from wokkel import disco, pubsub from twisted.internet import defer from twisted.internet.threads import deferToThread from lxml import html from lxml.html import clean CATEGORY = "Composition" NAME = "Syntax" _SYNTAX_XHTML = "XHTML" PLUGIN_INFO = { "name": "Text syntaxes", "import_name": "TEXT-SYNTAXES", "type": "MISC", "protocols": [], "dependencies": [], "main": "TextSyntaxes", "handler": "no", "description": _("""Management of various text syntaxes (XHTML-IM, Markdown, etc)""") } class UnknownSyntax(Exception): pass class TextSyntaxes(object): """ Text conversion class XHTML utf-8 is used as intermediate language for conversions """ OPT_DEFAULT = "DEFAULT" OPT_HIDDEN = "HIDDEN" OPT_NO_THREAD = "NO_THREAD" SYNTAX_XHTML = _SYNTAX_XHTML SYNTAX_MARKDOWN = "markdown" params = """ <params> <individual> <category name="%(category_name)s" label="%(category_label)s"> <param name="%(name)s" label="%(label)s" value="%(default)s" type="list" security="0"> %(options)s </param> </category> </individual> </params> """ params_data = { 'category_name': CATEGORY, 'category_label': _(CATEGORY), #FIXME: gof: vérifier que gettext gère ça 'name': NAME, 'label': _(NAME), 'default': _SYNTAX_XHTML, 'syntaxes': {}, } def __init__(self, host): info(_("Text syntaxes plugin initialization")) self.host = host self.syntaxes = {} self.addSyntax(self.SYNTAX_XHTML, lambda xhtml: defer.succeed(xhtml), lambda xhtml: defer.succeed(xhtml), TextSyntaxes.OPT_NO_THREAD) try: import markdown, html2text self.addSyntax(self.SYNTAX_MARKDOWN, markdown.markdown, html2text.html2text, [TextSyntaxes.OPT_DEFAULT]) except ImportError: warning("markdown or html2text not found, can't use Markdown syntax") host.bridge.addMethod("syntaxConvert", ".plugin", in_sign='sssb', out_sign='s', async=True, method=self.convert) # TODO: gof: nettoyage XHTML avec lxml def _updateParamOptions(self): data_synt = TextSyntaxes.params_data['syntaxes'] syntaxes = [] for syntax in data_synt.keys(): flags = data_synt[syntax]["flags"] if TextSyntaxes.OPT_HIDDEN not in flags: syntaxes.append(syntax) syntaxes.sort(key=unicode.lower) options = [] for syntax in syntaxes: options.append(u'<option value="%s" />' % syntax) TextSyntaxes.params_data["options"] = u'\n'.join(options) self.host.memory.updateParams(TextSyntaxes.params % TextSyntaxes.params_data) def getFavoriteSyntax(self, profile): """ Return the selected syntax for the given profile @param profile: %(doc_profile)s @return: profile selected syntax """ return self.host.memory.getParamA(CATEGORY, NAME , profile_key=profile) def clean_xhtml(self, xhtml): """ Clean XHTML text by removing potentially dangerous/malicious parts @param xhtml: raw xhtml text to clean """ # FIXME: styles are allowed but not cleaned, they have to be cleaned (whitelist ? cssutils ?) ! def blocking_cleaning(xhtml): safe_attrs = html.defs.safe_attrs.union(('style',)) xhtml_elt = html.fromstring(xhtml) cleaner = clean.Cleaner(style=False, add_nofollow=False, safe_attrs=safe_attrs) return html.tostring(cleaner.clean_html(xhtml_elt), method='xml') d = deferToThread(blocking_cleaning, xhtml) return d def convert(self, text, syntax_from, syntax_to=_SYNTAX_XHTML, safe=True): """ Convert a text between two syntaxes @param text: text to convert @param syntax_from: source syntax (e.g. "markdown") @param syntax_to: dest syntax (e.g.: "XHTML") @param safe: clean resulting XHTML to avoid malicious code if True @return: converted text """ syntaxes = TextSyntaxes.params_data['syntaxes'] if syntax_from not in syntaxes: raise UnknownSyntax(syntax_from) if syntax_to not in syntaxes: raise UnknownSyntax(syntax_to) d = None if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_from]["flags"]: d = syntaxes[syntax_from]["to"](text) else: d = deferToThread(syntaxes[syntax_from]["to"], text) #TODO: keep only body element and change it to a div here ? if safe: d.addCallback(self.clean_xhtml) if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_to]["flags"]: d.addCallback(syntaxes[syntax_to]["from"]) else: d.addCallback(lambda xhtml: deferToThread(syntaxes[syntax_to]["from"], xhtml)) return d def addSyntax(self, name, to_xhtml_cb, from_xhtml_cb, flags = None): """ Add a new syntax to the manager @param name: unique name of the syntax @param to_xhtml_cb: callback to convert from syntax to XHTML @param from_xhtml_cb: callback to convert from XHTML to syntax @param flags: set of optional flags, can be: TextSyntaxes.OPT_DEFAULT: use as the default syntax (replace former one) TextSyntaxes.OPT_HIDDEN: do not show in parameters TextSyntaxes.OPT_NO_THREAD: do not defer to thread when converting (the callback must then return a deferred) """ name = unicode(name) flags = flags or [] if TextSyntaxes.OPT_HIDDEN in flags and TextSyntaxes.OPT_DEFAULT in flags: raise ValueError("%s and %s are mutually exclusive" % (TextSyntaxes.OPT_HIDDEN, TextSyntaxes.OPT_DEFAULT)) syntaxes = TextSyntaxes.params_data['syntaxes'] syntaxes[name] = {"to": to_xhtml_cb, "from": from_xhtml_cb, "flags": flags} if TextSyntaxes.OPT_DEFAULT in flags: syntaxes = TextSyntaxes.params_data['default'] = name self._updateParamOptions()