Mercurial > libervia-backend
comparison sat/plugins/plugin_xep_0071.py @ 2562:26edcf3a30eb
core, setup: huge cleaning:
- moved directories from src and frontends/src to sat and sat_frontends, which is the recommanded naming convention
- move twisted directory to root
- removed all hacks from setup.py, and added missing dependencies, it is now clean
- use https URL for website in setup.py
- removed "Environment :: X11 Applications :: GTK", as wix is deprecated and removed
- renamed sat.sh to sat and fixed its installation
- added python_requires to specify Python version needed
- replaced glib2reactor which use deprecated code by gtk3reactor
sat can now be installed directly from virtualenv without using --system-site-packages anymore \o/
author | Goffi <goffi@goffi.org> |
---|---|
date | Mon, 02 Apr 2018 19:44:50 +0200 |
parents | src/plugins/plugin_xep_0071.py@0046283a285d |
children | 56f94936df1e |
comparison
equal
deleted
inserted
replaced
2561:bd30dc3ffe5a | 2562:26edcf3a30eb |
---|---|
1 #!/usr/bin/env python2 | |
2 # -*- coding: utf-8 -*- | |
3 | |
4 # SAT plugin for Publish-Subscribe (xep-0071) | |
5 # Copyright (C) 2009-2018 Jérôme Poisson (goffi@goffi.org) | |
6 | |
7 # This program is free software: you can redistribute it and/or modify | |
8 # it under the terms of the GNU Affero General Public License as published by | |
9 # the Free Software Foundation, either version 3 of the License, or | |
10 # (at your option) any later version. | |
11 | |
12 # This program is distributed in the hope that it will be useful, | |
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 # GNU Affero General Public License for more details. | |
16 | |
17 # You should have received a copy of the GNU Affero General Public License | |
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
19 | |
20 from sat.core.i18n import _ | |
21 from sat.core.constants import Const as C | |
22 from sat.core import exceptions | |
23 from sat.core.log import getLogger | |
24 log = getLogger(__name__) | |
25 from sat.tools.common import data_format | |
26 | |
27 from twisted.internet import defer | |
28 from wokkel import disco, iwokkel | |
29 from zope.interface import implements | |
30 # from lxml import etree | |
31 try: | |
32 from lxml import html | |
33 except ImportError: | |
34 raise exceptions.MissingModule(u"Missing module lxml, please download/install it from http://lxml.de/") | |
35 try: | |
36 from twisted.words.protocols.xmlstream import XMPPHandler | |
37 except ImportError: | |
38 from wokkel.subprotocols import XMPPHandler | |
39 | |
40 NS_XHTML_IM = 'http://jabber.org/protocol/xhtml-im' | |
41 NS_XHTML = 'http://www.w3.org/1999/xhtml' | |
42 | |
43 PLUGIN_INFO = { | |
44 C.PI_NAME: "XHTML-IM Plugin", | |
45 C.PI_IMPORT_NAME: "XEP-0071", | |
46 C.PI_TYPE: "XEP", | |
47 C.PI_PROTOCOLS: ["XEP-0071"], | |
48 C.PI_DEPENDENCIES: ["TEXT-SYNTAXES"], | |
49 C.PI_MAIN: "XEP_0071", | |
50 C.PI_HANDLER: "yes", | |
51 C.PI_DESCRIPTION: _("""Implementation of XHTML-IM""") | |
52 } | |
53 | |
54 allowed = { | |
55 "a": set(["href", "style", "type"]), | |
56 "blockquote": set(["style"]), | |
57 "body": set(["style"]), | |
58 "br": set([]), | |
59 "cite": set(["style"]), | |
60 "em": set([]), | |
61 "img": set(["alt", "height", "src", "style", "width"]), | |
62 "li": set(["style"]), | |
63 "ol": set(["style"]), | |
64 "p": set(["style"]), | |
65 "span": set(["style"]), | |
66 "strong": set([]), | |
67 "ul": set(["style"]), | |
68 } | |
69 | |
70 styles_allowed = ["background-color", "color", "font-family", "font-size", "font-style", "font-weight", "margin-left", "margin-right", "text-align", "text-decoration"] | |
71 | |
72 blacklist = ['script'] # tag that we have to kill (we don't keep content) | |
73 | |
74 | |
75 class XEP_0071(object): | |
76 SYNTAX_XHTML_IM = "XHTML-IM" | |
77 | |
78 def __init__(self, host): | |
79 log.info(_("XHTML-IM plugin initialization")) | |
80 self.host = host | |
81 self._s = self.host.plugins["TEXT-SYNTAXES"] | |
82 self._s.addSyntax(self.SYNTAX_XHTML_IM, lambda xhtml: xhtml, self.XHTML2XHTML_IM, [self._s.OPT_HIDDEN]) | |
83 host.trigger.add("MessageReceived", self.messageReceivedTrigger) | |
84 host.trigger.add("sendMessage", self.sendMessageTrigger) | |
85 | |
86 def getHandler(self, client): | |
87 return XEP_0071_handler(self) | |
88 | |
89 def _messagePostTreat(self, data, message_elt, body_elts, client): | |
90 """Callback which manage the post treatment of the message in case of XHTML-IM found | |
91 | |
92 @param data: data send by MessageReceived trigger through post_treat deferred | |
93 @param message_elt: whole <message> stanza | |
94 @param body_elts: XHTML-IM body elements found | |
95 @return: the data with the extra parameter updated | |
96 """ | |
97 # TODO: check if text only body is empty, then try to convert XHTML-IM to pure text and show a warning message | |
98 def converted(xhtml, lang): | |
99 if lang: | |
100 data['extra']['xhtml_{}'.format(lang)] = xhtml | |
101 else: | |
102 data['extra']['xhtml'] = xhtml | |
103 | |
104 defers = [] | |
105 for body_elt in body_elts: | |
106 lang = body_elt.getAttribute((C.NS_XML, 'lang'), '') | |
107 treat_d = defer.succeed(None) # deferred used for treatments | |
108 if self.host.trigger.point("xhtml_post_treat", client, message_elt, body_elt, lang, treat_d): | |
109 continue | |
110 treat_d.addCallback(lambda dummy: self._s.convert(body_elt.toXml(), self.SYNTAX_XHTML_IM, safe=True)) | |
111 treat_d.addCallback(converted, lang) | |
112 defers.append(treat_d) | |
113 | |
114 d_list = defer.DeferredList(defers) | |
115 d_list.addCallback(lambda dummy: data) | |
116 return d_list | |
117 | |
118 def _fill_body_text(self, text, data, lang): | |
119 data['message'][lang or ''] = text | |
120 message_elt = data['xml'] | |
121 body_elt = message_elt.addElement("body", content=text) | |
122 if lang: | |
123 body_elt[(C.NS_XML, 'lang')] = lang | |
124 | |
125 def _check_body_text(self, data, lang, markup, syntax, defers): | |
126 """check if simple text message exists, and fill if needed""" | |
127 if not (lang or '') in data['message']: | |
128 d = self._s.convert(markup, syntax, self._s.SYNTAX_TEXT) | |
129 d.addCallback(self._fill_body_text, data, lang) | |
130 defers.append(d) | |
131 | |
132 def _sendMessageAddRich(self, data, client): | |
133 """ Construct XHTML-IM node and add it XML element | |
134 | |
135 @param data: message data as sended by sendMessage callback | |
136 """ | |
137 # at this point, either ['extra']['rich'] or ['extra']['xhtml'] exists | |
138 # but both can't exist at the same time | |
139 message_elt = data['xml'] | |
140 html_elt = message_elt.addElement((NS_XHTML_IM, 'html')) | |
141 | |
142 def syntax_converted(xhtml_im, lang): | |
143 body_elt = html_elt.addElement((NS_XHTML, 'body')) | |
144 if lang: | |
145 body_elt[(C.NS_XML, 'lang')] = lang | |
146 data['extra']['xhtml_{}'.format(lang)] = xhtml_im | |
147 else: | |
148 data['extra']['xhtml'] = xhtml_im | |
149 body_elt.addRawXml(xhtml_im) | |
150 | |
151 syntax = self._s.getCurrentSyntax(client.profile) | |
152 defers = [] | |
153 if u'xhtml' in data['extra']: | |
154 # we have directly XHTML | |
155 for lang, xhtml in data_format.getSubDict('xhtml', data['extra']): | |
156 self._check_body_text(data, lang, xhtml, self._s.SYNTAX_XHTML, defers) | |
157 d = self._s.convert(xhtml, self._s.SYNTAX_XHTML, self.SYNTAX_XHTML_IM) | |
158 d.addCallback(syntax_converted, lang) | |
159 defers.append(d) | |
160 elif u'rich' in data['extra']: | |
161 # we have rich syntax to convert | |
162 for lang, rich_data in data_format.getSubDict('rich', data['extra']): | |
163 self._check_body_text(data, lang, rich_data, syntax, defers) | |
164 d = self._s.convert(rich_data, syntax, self.SYNTAX_XHTML_IM) | |
165 d.addCallback(syntax_converted, lang) | |
166 defers.append(d) | |
167 else: | |
168 exceptions.InternalError(u"xhtml or rich should be present at this point") | |
169 d_list = defer.DeferredList(defers) | |
170 d_list.addCallback(lambda dummy: data) | |
171 return d_list | |
172 | |
173 def messageReceivedTrigger(self, client, message, post_treat): | |
174 """ Check presence of XHTML-IM in message | |
175 """ | |
176 try: | |
177 html_elt = message.elements(NS_XHTML_IM, 'html').next() | |
178 except StopIteration: | |
179 # No XHTML-IM | |
180 pass | |
181 else: | |
182 body_elts = html_elt.elements(NS_XHTML, 'body') | |
183 post_treat.addCallback(self._messagePostTreat, message, body_elts, client) | |
184 return True | |
185 | |
186 def sendMessageTrigger(self, client, data, pre_xml_treatments, post_xml_treatments): | |
187 """ Check presence of rich text in extra """ | |
188 rich = {} | |
189 xhtml = {} | |
190 for key, value in data['extra'].iteritems(): | |
191 if key.startswith('rich'): | |
192 rich[key[5:]] = value | |
193 elif key.startswith('xhtml'): | |
194 xhtml[key[6:]] = value | |
195 if rich and xhtml: | |
196 raise exceptions.DataError(_(u"Can't have XHTML and rich content at the same time")) | |
197 if rich or xhtml: | |
198 if rich: | |
199 data['rich'] = rich | |
200 else: | |
201 data['xhtml'] = xhtml | |
202 post_xml_treatments.addCallback(self._sendMessageAddRich, client) | |
203 return True | |
204 | |
205 def _purgeStyle(self, styles_raw): | |
206 """ Remove unauthorised styles according to the XEP-0071 | |
207 @param styles_raw: raw styles (value of the style attribute) | |
208 """ | |
209 purged = [] | |
210 | |
211 styles = [style.strip().split(':') for style in styles_raw.split(';')] | |
212 | |
213 for style_tuple in styles: | |
214 if len(style_tuple) != 2: | |
215 continue | |
216 name, value = style_tuple | |
217 name = name.strip() | |
218 if name not in styles_allowed: | |
219 continue | |
220 purged.append((name, value.strip())) | |
221 | |
222 return u'; '.join([u"%s: %s" % data for data in purged]) | |
223 | |
224 def XHTML2XHTML_IM(self, xhtml): | |
225 """ Convert XHTML document to XHTML_IM subset | |
226 @param xhtml: raw xhtml to convert | |
227 """ | |
228 # TODO: more clever tag replacement (replace forbidden tags with equivalents when possible) | |
229 | |
230 parser = html.HTMLParser(remove_comments=True, encoding='utf-8') | |
231 root = html.fromstring(xhtml, parser=parser) | |
232 body_elt = root.find('body') | |
233 if body_elt is None: | |
234 # we use the whole XML as body if no body element is found | |
235 body_elt = html.Element('body') | |
236 body_elt.append(root) | |
237 else: | |
238 body_elt.attrib.clear() | |
239 | |
240 allowed_tags = allowed.keys() | |
241 to_strip = [] | |
242 for elem in body_elt.iter(): | |
243 if elem.tag not in allowed_tags: | |
244 to_strip.append(elem) | |
245 else: | |
246 # we remove unallowed attributes | |
247 attrib = elem.attrib | |
248 att_to_remove = set(attrib).difference(allowed[elem.tag]) | |
249 for att in att_to_remove: | |
250 del(attrib[att]) | |
251 if "style" in attrib: | |
252 attrib["style"] = self._purgeStyle(attrib["style"]) | |
253 | |
254 for elem in to_strip: | |
255 if elem.tag in blacklist: | |
256 #we need to remove the element and all descendants | |
257 log.debug(u"removing black listed tag: %s" % (elem.tag)) | |
258 elem.drop_tree() | |
259 else: | |
260 elem.drop_tag() | |
261 if len(body_elt) !=1: | |
262 root_elt = body_elt | |
263 body_elt.tag = "p" | |
264 else: | |
265 root_elt = body_elt[0] | |
266 | |
267 return html.tostring(root_elt, encoding='unicode', method='xml') | |
268 | |
269 class XEP_0071_handler(XMPPHandler): | |
270 implements(iwokkel.IDisco) | |
271 | |
272 def __init__(self, plugin_parent): | |
273 self.plugin_parent = plugin_parent | |
274 self.host = plugin_parent.host | |
275 | |
276 def getDiscoInfo(self, requestor, target, nodeIdentifier=''): | |
277 return [disco.DiscoFeature(NS_XHTML_IM)] | |
278 | |
279 def getDiscoItems(self, requestor, target, nodeIdentifier=''): | |
280 return [] |