comparison sat/plugins/plugin_xep_0071.py @ 2562:26edcf3a30eb

core, setup: huge cleaning: - moved directories from src and frontends/src to sat and sat_frontends, which is the recommanded naming convention - move twisted directory to root - removed all hacks from setup.py, and added missing dependencies, it is now clean - use https URL for website in setup.py - removed "Environment :: X11 Applications :: GTK", as wix is deprecated and removed - renamed sat.sh to sat and fixed its installation - added python_requires to specify Python version needed - replaced glib2reactor which use deprecated code by gtk3reactor sat can now be installed directly from virtualenv without using --system-site-packages anymore \o/
author Goffi <goffi@goffi.org>
date Mon, 02 Apr 2018 19:44:50 +0200
parents src/plugins/plugin_xep_0071.py@0046283a285d
children 56f94936df1e
comparison
equal deleted inserted replaced
2561:bd30dc3ffe5a 2562:26edcf3a30eb
1 #!/usr/bin/env python2
2 # -*- coding: utf-8 -*-
3
4 # SAT plugin for Publish-Subscribe (xep-0071)
5 # Copyright (C) 2009-2018 Jérôme Poisson (goffi@goffi.org)
6
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU Affero General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Affero General Public License for more details.
16
17 # You should have received a copy of the GNU Affero General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
20 from sat.core.i18n import _
21 from sat.core.constants import Const as C
22 from sat.core import exceptions
23 from sat.core.log import getLogger
24 log = getLogger(__name__)
25 from sat.tools.common import data_format
26
27 from twisted.internet import defer
28 from wokkel import disco, iwokkel
29 from zope.interface import implements
30 # from lxml import etree
31 try:
32 from lxml import html
33 except ImportError:
34 raise exceptions.MissingModule(u"Missing module lxml, please download/install it from http://lxml.de/")
35 try:
36 from twisted.words.protocols.xmlstream import XMPPHandler
37 except ImportError:
38 from wokkel.subprotocols import XMPPHandler
39
40 NS_XHTML_IM = 'http://jabber.org/protocol/xhtml-im'
41 NS_XHTML = 'http://www.w3.org/1999/xhtml'
42
43 PLUGIN_INFO = {
44 C.PI_NAME: "XHTML-IM Plugin",
45 C.PI_IMPORT_NAME: "XEP-0071",
46 C.PI_TYPE: "XEP",
47 C.PI_PROTOCOLS: ["XEP-0071"],
48 C.PI_DEPENDENCIES: ["TEXT-SYNTAXES"],
49 C.PI_MAIN: "XEP_0071",
50 C.PI_HANDLER: "yes",
51 C.PI_DESCRIPTION: _("""Implementation of XHTML-IM""")
52 }
53
54 allowed = {
55 "a": set(["href", "style", "type"]),
56 "blockquote": set(["style"]),
57 "body": set(["style"]),
58 "br": set([]),
59 "cite": set(["style"]),
60 "em": set([]),
61 "img": set(["alt", "height", "src", "style", "width"]),
62 "li": set(["style"]),
63 "ol": set(["style"]),
64 "p": set(["style"]),
65 "span": set(["style"]),
66 "strong": set([]),
67 "ul": set(["style"]),
68 }
69
70 styles_allowed = ["background-color", "color", "font-family", "font-size", "font-style", "font-weight", "margin-left", "margin-right", "text-align", "text-decoration"]
71
72 blacklist = ['script'] # tag that we have to kill (we don't keep content)
73
74
75 class XEP_0071(object):
76 SYNTAX_XHTML_IM = "XHTML-IM"
77
78 def __init__(self, host):
79 log.info(_("XHTML-IM plugin initialization"))
80 self.host = host
81 self._s = self.host.plugins["TEXT-SYNTAXES"]
82 self._s.addSyntax(self.SYNTAX_XHTML_IM, lambda xhtml: xhtml, self.XHTML2XHTML_IM, [self._s.OPT_HIDDEN])
83 host.trigger.add("MessageReceived", self.messageReceivedTrigger)
84 host.trigger.add("sendMessage", self.sendMessageTrigger)
85
86 def getHandler(self, client):
87 return XEP_0071_handler(self)
88
89 def _messagePostTreat(self, data, message_elt, body_elts, client):
90 """Callback which manage the post treatment of the message in case of XHTML-IM found
91
92 @param data: data send by MessageReceived trigger through post_treat deferred
93 @param message_elt: whole <message> stanza
94 @param body_elts: XHTML-IM body elements found
95 @return: the data with the extra parameter updated
96 """
97 # TODO: check if text only body is empty, then try to convert XHTML-IM to pure text and show a warning message
98 def converted(xhtml, lang):
99 if lang:
100 data['extra']['xhtml_{}'.format(lang)] = xhtml
101 else:
102 data['extra']['xhtml'] = xhtml
103
104 defers = []
105 for body_elt in body_elts:
106 lang = body_elt.getAttribute((C.NS_XML, 'lang'), '')
107 treat_d = defer.succeed(None) # deferred used for treatments
108 if self.host.trigger.point("xhtml_post_treat", client, message_elt, body_elt, lang, treat_d):
109 continue
110 treat_d.addCallback(lambda dummy: self._s.convert(body_elt.toXml(), self.SYNTAX_XHTML_IM, safe=True))
111 treat_d.addCallback(converted, lang)
112 defers.append(treat_d)
113
114 d_list = defer.DeferredList(defers)
115 d_list.addCallback(lambda dummy: data)
116 return d_list
117
118 def _fill_body_text(self, text, data, lang):
119 data['message'][lang or ''] = text
120 message_elt = data['xml']
121 body_elt = message_elt.addElement("body", content=text)
122 if lang:
123 body_elt[(C.NS_XML, 'lang')] = lang
124
125 def _check_body_text(self, data, lang, markup, syntax, defers):
126 """check if simple text message exists, and fill if needed"""
127 if not (lang or '') in data['message']:
128 d = self._s.convert(markup, syntax, self._s.SYNTAX_TEXT)
129 d.addCallback(self._fill_body_text, data, lang)
130 defers.append(d)
131
132 def _sendMessageAddRich(self, data, client):
133 """ Construct XHTML-IM node and add it XML element
134
135 @param data: message data as sended by sendMessage callback
136 """
137 # at this point, either ['extra']['rich'] or ['extra']['xhtml'] exists
138 # but both can't exist at the same time
139 message_elt = data['xml']
140 html_elt = message_elt.addElement((NS_XHTML_IM, 'html'))
141
142 def syntax_converted(xhtml_im, lang):
143 body_elt = html_elt.addElement((NS_XHTML, 'body'))
144 if lang:
145 body_elt[(C.NS_XML, 'lang')] = lang
146 data['extra']['xhtml_{}'.format(lang)] = xhtml_im
147 else:
148 data['extra']['xhtml'] = xhtml_im
149 body_elt.addRawXml(xhtml_im)
150
151 syntax = self._s.getCurrentSyntax(client.profile)
152 defers = []
153 if u'xhtml' in data['extra']:
154 # we have directly XHTML
155 for lang, xhtml in data_format.getSubDict('xhtml', data['extra']):
156 self._check_body_text(data, lang, xhtml, self._s.SYNTAX_XHTML, defers)
157 d = self._s.convert(xhtml, self._s.SYNTAX_XHTML, self.SYNTAX_XHTML_IM)
158 d.addCallback(syntax_converted, lang)
159 defers.append(d)
160 elif u'rich' in data['extra']:
161 # we have rich syntax to convert
162 for lang, rich_data in data_format.getSubDict('rich', data['extra']):
163 self._check_body_text(data, lang, rich_data, syntax, defers)
164 d = self._s.convert(rich_data, syntax, self.SYNTAX_XHTML_IM)
165 d.addCallback(syntax_converted, lang)
166 defers.append(d)
167 else:
168 exceptions.InternalError(u"xhtml or rich should be present at this point")
169 d_list = defer.DeferredList(defers)
170 d_list.addCallback(lambda dummy: data)
171 return d_list
172
173 def messageReceivedTrigger(self, client, message, post_treat):
174 """ Check presence of XHTML-IM in message
175 """
176 try:
177 html_elt = message.elements(NS_XHTML_IM, 'html').next()
178 except StopIteration:
179 # No XHTML-IM
180 pass
181 else:
182 body_elts = html_elt.elements(NS_XHTML, 'body')
183 post_treat.addCallback(self._messagePostTreat, message, body_elts, client)
184 return True
185
186 def sendMessageTrigger(self, client, data, pre_xml_treatments, post_xml_treatments):
187 """ Check presence of rich text in extra """
188 rich = {}
189 xhtml = {}
190 for key, value in data['extra'].iteritems():
191 if key.startswith('rich'):
192 rich[key[5:]] = value
193 elif key.startswith('xhtml'):
194 xhtml[key[6:]] = value
195 if rich and xhtml:
196 raise exceptions.DataError(_(u"Can't have XHTML and rich content at the same time"))
197 if rich or xhtml:
198 if rich:
199 data['rich'] = rich
200 else:
201 data['xhtml'] = xhtml
202 post_xml_treatments.addCallback(self._sendMessageAddRich, client)
203 return True
204
205 def _purgeStyle(self, styles_raw):
206 """ Remove unauthorised styles according to the XEP-0071
207 @param styles_raw: raw styles (value of the style attribute)
208 """
209 purged = []
210
211 styles = [style.strip().split(':') for style in styles_raw.split(';')]
212
213 for style_tuple in styles:
214 if len(style_tuple) != 2:
215 continue
216 name, value = style_tuple
217 name = name.strip()
218 if name not in styles_allowed:
219 continue
220 purged.append((name, value.strip()))
221
222 return u'; '.join([u"%s: %s" % data for data in purged])
223
224 def XHTML2XHTML_IM(self, xhtml):
225 """ Convert XHTML document to XHTML_IM subset
226 @param xhtml: raw xhtml to convert
227 """
228 # TODO: more clever tag replacement (replace forbidden tags with equivalents when possible)
229
230 parser = html.HTMLParser(remove_comments=True, encoding='utf-8')
231 root = html.fromstring(xhtml, parser=parser)
232 body_elt = root.find('body')
233 if body_elt is None:
234 # we use the whole XML as body if no body element is found
235 body_elt = html.Element('body')
236 body_elt.append(root)
237 else:
238 body_elt.attrib.clear()
239
240 allowed_tags = allowed.keys()
241 to_strip = []
242 for elem in body_elt.iter():
243 if elem.tag not in allowed_tags:
244 to_strip.append(elem)
245 else:
246 # we remove unallowed attributes
247 attrib = elem.attrib
248 att_to_remove = set(attrib).difference(allowed[elem.tag])
249 for att in att_to_remove:
250 del(attrib[att])
251 if "style" in attrib:
252 attrib["style"] = self._purgeStyle(attrib["style"])
253
254 for elem in to_strip:
255 if elem.tag in blacklist:
256 #we need to remove the element and all descendants
257 log.debug(u"removing black listed tag: %s" % (elem.tag))
258 elem.drop_tree()
259 else:
260 elem.drop_tag()
261 if len(body_elt) !=1:
262 root_elt = body_elt
263 body_elt.tag = "p"
264 else:
265 root_elt = body_elt[0]
266
267 return html.tostring(root_elt, encoding='unicode', method='xml')
268
269 class XEP_0071_handler(XMPPHandler):
270 implements(iwokkel.IDisco)
271
272 def __init__(self, plugin_parent):
273 self.plugin_parent = plugin_parent
274 self.host = plugin_parent.host
275
276 def getDiscoInfo(self, requestor, target, nodeIdentifier=''):
277 return [disco.DiscoFeature(NS_XHTML_IM)]
278
279 def getDiscoItems(self, requestor, target, nodeIdentifier=''):
280 return []