comparison src/plugins/plugin_misc_text_syntaxes.py @ 665:6a64e0a759e6

plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
author Goffi <goffi@goffi.org>
date Tue, 05 Nov 2013 22:40:46 +0100
parents
children fb0b1100c908
comparison
equal deleted inserted replaced
664:cac98ca76479 665:6a64e0a759e6
1 #!/usr/bin/python
2 # -*- coding: utf-8 -*-
3
4 # SAT plugin for managing various text syntaxes
5 # Copyright (C) 2009, 2010, 2011, 2012, 2013 Jérôme Poisson (goffi@goffi.org)
6
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU Affero General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Affero General Public License for more details.
16
17 # You should have received a copy of the GNU Affero General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
20 from logging import debug, info, error
21
22 from wokkel import disco, pubsub
23 from twisted.internet import defer
24 from twisted.internet.threads import deferToThread
25 from lxml.html import defs, clean
26
27 CATEGORY = "Composition"
28 NAME = "Syntax"
29 _SYNTAX_XHTML = "XHTML"
30
31
32 PLUGIN_INFO = {
33 "name": "Text syntaxes",
34 "import_name": "TEXT-SYNTAXES",
35 "type": "MISC",
36 "protocols": [],
37 "dependencies": [],
38 "main": "TextSyntaxes",
39 "handler": "no",
40 "description": _("""Management of various text syntaxes (XHTML-IM, Markdown, etc)""")
41 }
42
43 class UnknownSyntax(Exception):
44 pass
45
46 class TextSyntaxes(object):
47 """ Text conversion class
48 XHTML utf-8 is used as intermediate language for conversions
49 """
50
51 OPT_DEFAULT = "DEFAULT"
52 OPT_HIDDEN = "HIDDEN"
53 OPT_NO_THREAD = "NO_THREAD"
54 SYNTAX_XHTML = _SYNTAX_XHTML
55 SYNTAX_MARKDOWN = "markdown"
56
57 params = """
58 <params>
59 <individual>
60 <category name="%(category_name)s" label="%(category_label)s">
61 <param name="%(name)s" label="%(label)s"
62 value="%(default)s" type="list" security="0">
63 %(options)s
64 </param>
65 </category>
66 </individual>
67 </params>
68 """
69
70 params_data = {
71 'category_name': CATEGORY,
72 'category_label': _(CATEGORY), #FIXME: gof: vérifier que gettext gère ça
73 'name': NAME,
74 'label': _(NAME),
75 'default': _SYNTAX_XHTML,
76 'syntaxes': {},
77 }
78
79 def __init__(self, host):
80 info(_("Text syntaxes plugin initialization"))
81 self.host = host
82 self.syntaxes = {}
83 self.addSyntax(self.SYNTAX_XHTML, lambda xhtml: defer.succeed(xhtml), lambda xhtml: defer.succeed(xhtml),
84 TextSyntaxes.OPT_NO_THREAD)
85 try:
86 import markdown, html2text
87 self.addSyntax(self.SYNTAX_MARKDOWN, markdown.markdown, html2text.html2text, [TextSyntaxes.OPT_DEFAULT])
88 except ImportError:
89 warning("markdown or html2text not found, can't use Markdown syntax")
90 host.bridge.addMethod("syntaxConvert", ".plugin", in_sign='sssb', out_sign='s',
91 async=True, method=self.convert)
92
93 # TODO: gof: nettoyage XHTML avec lxml
94
95 def _updateParamOptions(self):
96 data_synt = TextSyntaxes.params_data['syntaxes']
97 syntaxes = []
98
99 for syntax in data_synt.keys():
100 flags = data_synt[syntax]["flags"]
101 if TextSyntaxes.OPT_HIDDEN not in flags:
102 syntaxes.append(syntax)
103
104 syntaxes.sort(key=unicode.lower)
105 options = []
106
107 for syntax in syntaxes:
108 options.append(u'<option value="%s" />' % syntax)
109
110 TextSyntaxes.params_data["options"] = u'\n'.join(options)
111 self.host.memory.updateParams(TextSyntaxes.params % TextSyntaxes.params_data)
112
113 def getFavoriteSyntax(self, profile):
114 """ Return the selected syntax for the given profile
115
116 @param profile: %(doc_profile)s
117 @return: profile selected syntax
118 """
119 return self.host.memory.getParamA(CATEGORY, NAME , profile_key=profile)
120
121 def clean_xhtml(self, xhtml):
122 """ Clean XHTML text by removing potentially dangerous/malicious parts
123 @param xhtml: raw xhtml text to clean
124 """
125 # FIXME: styles are allowed but not cleaned, they have to be cleaned (whitelist ? cssutils ?) !
126 safe_attrs = defs.safe_attrs.union(('style',))
127 cleaner = clean.Cleaner(style=False,
128 add_nofollow=False,
129 safe_attrs=safe_attrs)
130 d = deferToThread(cleaner.clean_html, xhtml)
131 return d
132
133 def convert(self, text, syntax_from, syntax_to=_SYNTAX_XHTML, safe=True):
134 """ Convert a text between two syntaxes
135 @param text: text to convert
136 @param syntax_from: source syntax (e.g. "markdown")
137 @param syntax_to: dest syntax (e.g.: "XHTML")
138 @param safe: clean resulting XHTML to avoid malicious code if True
139 @return: converted text """
140
141 syntaxes = TextSyntaxes.params_data['syntaxes']
142 if syntax_from not in syntaxes:
143 raise UnknownSyntax(syntax_from)
144 if syntax_to not in syntaxes:
145 raise UnknownSyntax(syntax_to)
146 d = None
147
148 if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_from]["flags"]:
149 d = syntaxes[syntax_from]["to"](text)
150 else:
151 d = deferToThread(syntaxes[syntax_from]["to"], text)
152
153 #TODO: keep only body element and change it to a div here ?
154
155 if safe:
156 d.addCallback(self.clean_xhtml)
157
158 if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_to]["flags"]:
159 d.addCallback(syntaxes[syntax_to]["from"])
160 else:
161 d.addCallback(lambda xhtml: deferToThread(syntaxes[syntax_to]["from"], xhtml))
162
163 return d
164
165 def addSyntax(self, name, to_xhtml_cb, from_xhtml_cb, flags = None):
166 """ Add a new syntax to the manager
167 @param name: unique name of the syntax
168 @param to_xhtml_cb: callback to convert from syntax to XHTML
169 @param from_xhtml_cb: callback to convert from XHTML to syntax
170 @param flags: set of optional flags, can be:
171 TextSyntaxes.OPT_DEFAULT: use as the default syntax (replace former one)
172 TextSyntaxes.OPT_HIDDEN: do not show in parameters
173 TextSyntaxes.OPT_NO_THREAD: do not defer to thread when converting (the callback must then return a deferred)
174
175 """
176 name = unicode(name)
177 flags = flags or []
178 if TextSyntaxes.OPT_HIDDEN in flags and TextSyntaxes.OPT_DEFAULT in flags:
179 raise ValueError("%s and %s are mutually exclusive" % (TextSyntaxes.OPT_HIDDEN, TextSyntaxes.OPT_DEFAULT))
180
181 syntaxes = TextSyntaxes.params_data['syntaxes']
182 syntaxes[name] = {"to": to_xhtml_cb, "from": from_xhtml_cb, "flags": flags}
183 if TextSyntaxes.OPT_DEFAULT in flags:
184 syntaxes = TextSyntaxes.params_data['default'] = name
185
186 self._updateParamOptions()
187