Mercurial > libervia-backend
comparison libervia/backend/plugins/plugin_xep_0071.py @ 4071:4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
author | Goffi <goffi@goffi.org> |
---|---|
date | Fri, 02 Jun 2023 11:49:51 +0200 |
parents | sat/plugins/plugin_xep_0071.py@c23cad65ae99 |
children | 0d7bb4df2343 |
comparison
equal
deleted
inserted
replaced
4070:d10748475025 | 4071:4b842c1fb686 |
---|---|
1 #!/usr/bin/env python3 | |
2 | |
3 | |
4 # SAT plugin for Publish-Subscribe (xep-0071) | |
5 # Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org) | |
6 | |
7 # This program is free software: you can redistribute it and/or modify | |
8 # it under the terms of the GNU Affero General Public License as published by | |
9 # the Free Software Foundation, either version 3 of the License, or | |
10 # (at your option) any later version. | |
11 | |
12 # This program is distributed in the hope that it will be useful, | |
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 # GNU Affero General Public License for more details. | |
16 | |
17 # You should have received a copy of the GNU Affero General Public License | |
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
19 | |
20 from libervia.backend.core.i18n import _ | |
21 from libervia.backend.core.constants import Const as C | |
22 from libervia.backend.core import exceptions | |
23 from libervia.backend.core.log import getLogger | |
24 | |
25 log = getLogger(__name__) | |
26 from libervia.backend.tools.common import data_format | |
27 | |
28 from twisted.internet import defer | |
29 from wokkel import disco, iwokkel | |
30 from zope.interface import implementer | |
31 | |
32 # from lxml import etree | |
33 try: | |
34 from lxml import html | |
35 except ImportError: | |
36 raise exceptions.MissingModule( | |
37 "Missing module lxml, please download/install it from http://lxml.de/" | |
38 ) | |
39 try: | |
40 from twisted.words.protocols.xmlstream import XMPPHandler | |
41 except ImportError: | |
42 from wokkel.subprotocols import XMPPHandler | |
43 | |
44 NS_XHTML_IM = "http://jabber.org/protocol/xhtml-im" | |
45 NS_XHTML = "http://www.w3.org/1999/xhtml" | |
46 | |
47 PLUGIN_INFO = { | |
48 C.PI_NAME: "XHTML-IM Plugin", | |
49 C.PI_IMPORT_NAME: "XEP-0071", | |
50 C.PI_TYPE: "XEP", | |
51 C.PI_PROTOCOLS: ["XEP-0071"], | |
52 C.PI_DEPENDENCIES: ["TEXT_SYNTAXES"], | |
53 C.PI_MAIN: "XEP_0071", | |
54 C.PI_HANDLER: "yes", | |
55 C.PI_DESCRIPTION: _("""Implementation of XHTML-IM"""), | |
56 } | |
57 | |
58 allowed = { | |
59 "a": set(["href", "style", "type"]), | |
60 "blockquote": set(["style"]), | |
61 "body": set(["style"]), | |
62 "br": set([]), | |
63 "cite": set(["style"]), | |
64 "em": set([]), | |
65 "img": set(["alt", "height", "src", "style", "width"]), | |
66 "li": set(["style"]), | |
67 "ol": set(["style"]), | |
68 "p": set(["style"]), | |
69 "span": set(["style"]), | |
70 "strong": set([]), | |
71 "ul": set(["style"]), | |
72 } | |
73 | |
74 styles_allowed = [ | |
75 "background-color", | |
76 "color", | |
77 "font-family", | |
78 "font-size", | |
79 "font-style", | |
80 "font-weight", | |
81 "margin-left", | |
82 "margin-right", | |
83 "text-align", | |
84 "text-decoration", | |
85 ] | |
86 | |
87 blacklist = ["script"] # tag that we have to kill (we don't keep content) | |
88 | |
89 | |
90 class XEP_0071(object): | |
91 SYNTAX_XHTML_IM = "XHTML-IM" | |
92 | |
93 def __init__(self, host): | |
94 log.info(_("XHTML-IM plugin initialization")) | |
95 self.host = host | |
96 self._s = self.host.plugins["TEXT_SYNTAXES"] | |
97 self._s.add_syntax( | |
98 self.SYNTAX_XHTML_IM, | |
99 lambda xhtml: xhtml, | |
100 self.XHTML2XHTML_IM, | |
101 [self._s.OPT_HIDDEN], | |
102 ) | |
103 host.trigger.add("message_received", self.message_received_trigger) | |
104 host.trigger.add("sendMessage", self.send_message_trigger) | |
105 | |
106 def get_handler(self, client): | |
107 return XEP_0071_handler(self) | |
108 | |
109 def _message_post_treat(self, data, message_elt, body_elts, client): | |
110 """Callback which manage the post treatment of the message in case of XHTML-IM found | |
111 | |
112 @param data: data send by message_received trigger through post_treat deferred | |
113 @param message_elt: whole <message> stanza | |
114 @param body_elts: XHTML-IM body elements found | |
115 @return: the data with the extra parameter updated | |
116 """ | |
117 # TODO: check if text only body is empty, then try to convert XHTML-IM to pure text and show a warning message | |
118 def converted(xhtml, lang): | |
119 if lang: | |
120 data["extra"]["xhtml_{}".format(lang)] = xhtml | |
121 else: | |
122 data["extra"]["xhtml"] = xhtml | |
123 | |
124 defers = [] | |
125 for body_elt in body_elts: | |
126 lang = body_elt.getAttribute((C.NS_XML, "lang"), "") | |
127 treat_d = defer.succeed(None) # deferred used for treatments | |
128 if self.host.trigger.point( | |
129 "xhtml_post_treat", client, message_elt, body_elt, lang, treat_d | |
130 ): | |
131 continue | |
132 treat_d.addCallback( | |
133 lambda __: self._s.convert( | |
134 body_elt.toXml(), self.SYNTAX_XHTML_IM, safe=True | |
135 ) | |
136 ) | |
137 treat_d.addCallback(converted, lang) | |
138 defers.append(treat_d) | |
139 | |
140 d_list = defer.DeferredList(defers) | |
141 d_list.addCallback(lambda __: data) | |
142 return d_list | |
143 | |
144 def _fill_body_text(self, text, data, lang): | |
145 data["message"][lang or ""] = text | |
146 message_elt = data["xml"] | |
147 body_elt = message_elt.addElement("body", content=text) | |
148 if lang: | |
149 body_elt[(C.NS_XML, "lang")] = lang | |
150 | |
151 def _check_body_text(self, data, lang, markup, syntax, defers): | |
152 """check if simple text message exists, and fill if needed""" | |
153 if not (lang or "") in data["message"]: | |
154 d = self._s.convert(markup, syntax, self._s.SYNTAX_TEXT) | |
155 d.addCallback(self._fill_body_text, data, lang) | |
156 defers.append(d) | |
157 | |
158 def _send_message_add_rich(self, data, client): | |
159 """ Construct XHTML-IM node and add it XML element | |
160 | |
161 @param data: message data as sended by sendMessage callback | |
162 """ | |
163 # at this point, either ['extra']['rich'] or ['extra']['xhtml'] exists | |
164 # but both can't exist at the same time | |
165 message_elt = data["xml"] | |
166 html_elt = message_elt.addElement((NS_XHTML_IM, "html")) | |
167 | |
168 def syntax_converted(xhtml_im, lang): | |
169 body_elt = html_elt.addElement((NS_XHTML, "body")) | |
170 if lang: | |
171 body_elt[(C.NS_XML, "lang")] = lang | |
172 data["extra"]["xhtml_{}".format(lang)] = xhtml_im | |
173 else: | |
174 data["extra"]["xhtml"] = xhtml_im | |
175 body_elt.addRawXml(xhtml_im) | |
176 | |
177 syntax = self._s.get_current_syntax(client.profile) | |
178 defers = [] | |
179 if "xhtml" in data["extra"]: | |
180 # we have directly XHTML | |
181 for lang, xhtml in data_format.get_sub_dict("xhtml", data["extra"]): | |
182 self._check_body_text(data, lang, xhtml, self._s.SYNTAX_XHTML, defers) | |
183 d = self._s.convert(xhtml, self._s.SYNTAX_XHTML, self.SYNTAX_XHTML_IM) | |
184 d.addCallback(syntax_converted, lang) | |
185 defers.append(d) | |
186 elif "rich" in data["extra"]: | |
187 # we have rich syntax to convert | |
188 for lang, rich_data in data_format.get_sub_dict("rich", data["extra"]): | |
189 self._check_body_text(data, lang, rich_data, syntax, defers) | |
190 d = self._s.convert(rich_data, syntax, self.SYNTAX_XHTML_IM) | |
191 d.addCallback(syntax_converted, lang) | |
192 defers.append(d) | |
193 else: | |
194 exceptions.InternalError("xhtml or rich should be present at this point") | |
195 d_list = defer.DeferredList(defers) | |
196 d_list.addCallback(lambda __: data) | |
197 return d_list | |
198 | |
199 def message_received_trigger(self, client, message, post_treat): | |
200 """ Check presence of XHTML-IM in message | |
201 """ | |
202 try: | |
203 html_elt = next(message.elements(NS_XHTML_IM, "html")) | |
204 except StopIteration: | |
205 # No XHTML-IM | |
206 pass | |
207 else: | |
208 body_elts = html_elt.elements(NS_XHTML, "body") | |
209 post_treat.addCallback(self._message_post_treat, message, body_elts, client) | |
210 return True | |
211 | |
212 def send_message_trigger(self, client, data, pre_xml_treatments, post_xml_treatments): | |
213 """ Check presence of rich text in extra """ | |
214 rich = {} | |
215 xhtml = {} | |
216 for key, value in data["extra"].items(): | |
217 if key.startswith("rich"): | |
218 rich[key[5:]] = value | |
219 elif key.startswith("xhtml"): | |
220 xhtml[key[6:]] = value | |
221 if rich and xhtml: | |
222 raise exceptions.DataError( | |
223 _("Can't have XHTML and rich content at the same time") | |
224 ) | |
225 if rich or xhtml: | |
226 if rich: | |
227 data["rich"] = rich | |
228 else: | |
229 data["xhtml"] = xhtml | |
230 post_xml_treatments.addCallback(self._send_message_add_rich, client) | |
231 return True | |
232 | |
233 def _purge_style(self, styles_raw): | |
234 """ Remove unauthorised styles according to the XEP-0071 | |
235 @param styles_raw: raw styles (value of the style attribute) | |
236 """ | |
237 purged = [] | |
238 | |
239 styles = [style.strip().split(":") for style in styles_raw.split(";")] | |
240 | |
241 for style_tuple in styles: | |
242 if len(style_tuple) != 2: | |
243 continue | |
244 name, value = style_tuple | |
245 name = name.strip() | |
246 if name not in styles_allowed: | |
247 continue | |
248 purged.append((name, value.strip())) | |
249 | |
250 return "; ".join(["%s: %s" % data for data in purged]) | |
251 | |
252 def XHTML2XHTML_IM(self, xhtml): | |
253 """ Convert XHTML document to XHTML_IM subset | |
254 @param xhtml: raw xhtml to convert | |
255 """ | |
256 # TODO: more clever tag replacement (replace forbidden tags with equivalents when possible) | |
257 | |
258 parser = html.HTMLParser(remove_comments=True, encoding="utf-8") | |
259 root = html.fromstring(xhtml, parser=parser) | |
260 body_elt = root.find("body") | |
261 if body_elt is None: | |
262 # we use the whole XML as body if no body element is found | |
263 body_elt = html.Element("body") | |
264 body_elt.append(root) | |
265 else: | |
266 body_elt.attrib.clear() | |
267 | |
268 allowed_tags = list(allowed.keys()) | |
269 to_strip = [] | |
270 for elem in body_elt.iter(): | |
271 if elem.tag not in allowed_tags: | |
272 to_strip.append(elem) | |
273 else: | |
274 # we remove unallowed attributes | |
275 attrib = elem.attrib | |
276 att_to_remove = set(attrib).difference(allowed[elem.tag]) | |
277 for att in att_to_remove: | |
278 del (attrib[att]) | |
279 if "style" in attrib: | |
280 attrib["style"] = self._purge_style(attrib["style"]) | |
281 | |
282 for elem in to_strip: | |
283 if elem.tag in blacklist: | |
284 # we need to remove the element and all descendants | |
285 log.debug("removing black listed tag: %s" % (elem.tag)) | |
286 elem.drop_tree() | |
287 else: | |
288 elem.drop_tag() | |
289 if len(body_elt) != 1: | |
290 root_elt = body_elt | |
291 body_elt.tag = "p" | |
292 else: | |
293 root_elt = body_elt[0] | |
294 | |
295 return html.tostring(root_elt, encoding="unicode", method="xml") | |
296 | |
297 | |
298 @implementer(iwokkel.IDisco) | |
299 class XEP_0071_handler(XMPPHandler): | |
300 | |
301 def __init__(self, plugin_parent): | |
302 self.plugin_parent = plugin_parent | |
303 self.host = plugin_parent.host | |
304 | |
305 def getDiscoInfo(self, requestor, target, nodeIdentifier=""): | |
306 return [disco.DiscoFeature(NS_XHTML_IM)] | |
307 | |
308 def getDiscoItems(self, requestor, target, nodeIdentifier=""): | |
309 return [] |