comparison libervia/backend/plugins/plugin_xep_0071.py @ 4071:4b842c1fb686

refactoring: renamed `sat` package to `libervia.backend`
author Goffi <goffi@goffi.org>
date Fri, 02 Jun 2023 11:49:51 +0200
parents sat/plugins/plugin_xep_0071.py@c23cad65ae99
children 0d7bb4df2343
comparison
equal deleted inserted replaced
4070:d10748475025 4071:4b842c1fb686
1 #!/usr/bin/env python3
2
3
4 # SAT plugin for Publish-Subscribe (xep-0071)
5 # Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org)
6
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU Affero General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Affero General Public License for more details.
16
17 # You should have received a copy of the GNU Affero General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
20 from libervia.backend.core.i18n import _
21 from libervia.backend.core.constants import Const as C
22 from libervia.backend.core import exceptions
23 from libervia.backend.core.log import getLogger
24
25 log = getLogger(__name__)
26 from libervia.backend.tools.common import data_format
27
28 from twisted.internet import defer
29 from wokkel import disco, iwokkel
30 from zope.interface import implementer
31
32 # from lxml import etree
33 try:
34 from lxml import html
35 except ImportError:
36 raise exceptions.MissingModule(
37 "Missing module lxml, please download/install it from http://lxml.de/"
38 )
39 try:
40 from twisted.words.protocols.xmlstream import XMPPHandler
41 except ImportError:
42 from wokkel.subprotocols import XMPPHandler
43
44 NS_XHTML_IM = "http://jabber.org/protocol/xhtml-im"
45 NS_XHTML = "http://www.w3.org/1999/xhtml"
46
47 PLUGIN_INFO = {
48 C.PI_NAME: "XHTML-IM Plugin",
49 C.PI_IMPORT_NAME: "XEP-0071",
50 C.PI_TYPE: "XEP",
51 C.PI_PROTOCOLS: ["XEP-0071"],
52 C.PI_DEPENDENCIES: ["TEXT_SYNTAXES"],
53 C.PI_MAIN: "XEP_0071",
54 C.PI_HANDLER: "yes",
55 C.PI_DESCRIPTION: _("""Implementation of XHTML-IM"""),
56 }
57
58 allowed = {
59 "a": set(["href", "style", "type"]),
60 "blockquote": set(["style"]),
61 "body": set(["style"]),
62 "br": set([]),
63 "cite": set(["style"]),
64 "em": set([]),
65 "img": set(["alt", "height", "src", "style", "width"]),
66 "li": set(["style"]),
67 "ol": set(["style"]),
68 "p": set(["style"]),
69 "span": set(["style"]),
70 "strong": set([]),
71 "ul": set(["style"]),
72 }
73
74 styles_allowed = [
75 "background-color",
76 "color",
77 "font-family",
78 "font-size",
79 "font-style",
80 "font-weight",
81 "margin-left",
82 "margin-right",
83 "text-align",
84 "text-decoration",
85 ]
86
87 blacklist = ["script"] # tag that we have to kill (we don't keep content)
88
89
90 class XEP_0071(object):
91 SYNTAX_XHTML_IM = "XHTML-IM"
92
93 def __init__(self, host):
94 log.info(_("XHTML-IM plugin initialization"))
95 self.host = host
96 self._s = self.host.plugins["TEXT_SYNTAXES"]
97 self._s.add_syntax(
98 self.SYNTAX_XHTML_IM,
99 lambda xhtml: xhtml,
100 self.XHTML2XHTML_IM,
101 [self._s.OPT_HIDDEN],
102 )
103 host.trigger.add("message_received", self.message_received_trigger)
104 host.trigger.add("sendMessage", self.send_message_trigger)
105
106 def get_handler(self, client):
107 return XEP_0071_handler(self)
108
109 def _message_post_treat(self, data, message_elt, body_elts, client):
110 """Callback which manage the post treatment of the message in case of XHTML-IM found
111
112 @param data: data send by message_received trigger through post_treat deferred
113 @param message_elt: whole <message> stanza
114 @param body_elts: XHTML-IM body elements found
115 @return: the data with the extra parameter updated
116 """
117 # TODO: check if text only body is empty, then try to convert XHTML-IM to pure text and show a warning message
118 def converted(xhtml, lang):
119 if lang:
120 data["extra"]["xhtml_{}".format(lang)] = xhtml
121 else:
122 data["extra"]["xhtml"] = xhtml
123
124 defers = []
125 for body_elt in body_elts:
126 lang = body_elt.getAttribute((C.NS_XML, "lang"), "")
127 treat_d = defer.succeed(None) #  deferred used for treatments
128 if self.host.trigger.point(
129 "xhtml_post_treat", client, message_elt, body_elt, lang, treat_d
130 ):
131 continue
132 treat_d.addCallback(
133 lambda __: self._s.convert(
134 body_elt.toXml(), self.SYNTAX_XHTML_IM, safe=True
135 )
136 )
137 treat_d.addCallback(converted, lang)
138 defers.append(treat_d)
139
140 d_list = defer.DeferredList(defers)
141 d_list.addCallback(lambda __: data)
142 return d_list
143
144 def _fill_body_text(self, text, data, lang):
145 data["message"][lang or ""] = text
146 message_elt = data["xml"]
147 body_elt = message_elt.addElement("body", content=text)
148 if lang:
149 body_elt[(C.NS_XML, "lang")] = lang
150
151 def _check_body_text(self, data, lang, markup, syntax, defers):
152 """check if simple text message exists, and fill if needed"""
153 if not (lang or "") in data["message"]:
154 d = self._s.convert(markup, syntax, self._s.SYNTAX_TEXT)
155 d.addCallback(self._fill_body_text, data, lang)
156 defers.append(d)
157
158 def _send_message_add_rich(self, data, client):
159 """ Construct XHTML-IM node and add it XML element
160
161 @param data: message data as sended by sendMessage callback
162 """
163 # at this point, either ['extra']['rich'] or ['extra']['xhtml'] exists
164 # but both can't exist at the same time
165 message_elt = data["xml"]
166 html_elt = message_elt.addElement((NS_XHTML_IM, "html"))
167
168 def syntax_converted(xhtml_im, lang):
169 body_elt = html_elt.addElement((NS_XHTML, "body"))
170 if lang:
171 body_elt[(C.NS_XML, "lang")] = lang
172 data["extra"]["xhtml_{}".format(lang)] = xhtml_im
173 else:
174 data["extra"]["xhtml"] = xhtml_im
175 body_elt.addRawXml(xhtml_im)
176
177 syntax = self._s.get_current_syntax(client.profile)
178 defers = []
179 if "xhtml" in data["extra"]:
180 # we have directly XHTML
181 for lang, xhtml in data_format.get_sub_dict("xhtml", data["extra"]):
182 self._check_body_text(data, lang, xhtml, self._s.SYNTAX_XHTML, defers)
183 d = self._s.convert(xhtml, self._s.SYNTAX_XHTML, self.SYNTAX_XHTML_IM)
184 d.addCallback(syntax_converted, lang)
185 defers.append(d)
186 elif "rich" in data["extra"]:
187 # we have rich syntax to convert
188 for lang, rich_data in data_format.get_sub_dict("rich", data["extra"]):
189 self._check_body_text(data, lang, rich_data, syntax, defers)
190 d = self._s.convert(rich_data, syntax, self.SYNTAX_XHTML_IM)
191 d.addCallback(syntax_converted, lang)
192 defers.append(d)
193 else:
194 exceptions.InternalError("xhtml or rich should be present at this point")
195 d_list = defer.DeferredList(defers)
196 d_list.addCallback(lambda __: data)
197 return d_list
198
199 def message_received_trigger(self, client, message, post_treat):
200 """ Check presence of XHTML-IM in message
201 """
202 try:
203 html_elt = next(message.elements(NS_XHTML_IM, "html"))
204 except StopIteration:
205 # No XHTML-IM
206 pass
207 else:
208 body_elts = html_elt.elements(NS_XHTML, "body")
209 post_treat.addCallback(self._message_post_treat, message, body_elts, client)
210 return True
211
212 def send_message_trigger(self, client, data, pre_xml_treatments, post_xml_treatments):
213 """ Check presence of rich text in extra """
214 rich = {}
215 xhtml = {}
216 for key, value in data["extra"].items():
217 if key.startswith("rich"):
218 rich[key[5:]] = value
219 elif key.startswith("xhtml"):
220 xhtml[key[6:]] = value
221 if rich and xhtml:
222 raise exceptions.DataError(
223 _("Can't have XHTML and rich content at the same time")
224 )
225 if rich or xhtml:
226 if rich:
227 data["rich"] = rich
228 else:
229 data["xhtml"] = xhtml
230 post_xml_treatments.addCallback(self._send_message_add_rich, client)
231 return True
232
233 def _purge_style(self, styles_raw):
234 """ Remove unauthorised styles according to the XEP-0071
235 @param styles_raw: raw styles (value of the style attribute)
236 """
237 purged = []
238
239 styles = [style.strip().split(":") for style in styles_raw.split(";")]
240
241 for style_tuple in styles:
242 if len(style_tuple) != 2:
243 continue
244 name, value = style_tuple
245 name = name.strip()
246 if name not in styles_allowed:
247 continue
248 purged.append((name, value.strip()))
249
250 return "; ".join(["%s: %s" % data for data in purged])
251
252 def XHTML2XHTML_IM(self, xhtml):
253 """ Convert XHTML document to XHTML_IM subset
254 @param xhtml: raw xhtml to convert
255 """
256 # TODO: more clever tag replacement (replace forbidden tags with equivalents when possible)
257
258 parser = html.HTMLParser(remove_comments=True, encoding="utf-8")
259 root = html.fromstring(xhtml, parser=parser)
260 body_elt = root.find("body")
261 if body_elt is None:
262 # we use the whole XML as body if no body element is found
263 body_elt = html.Element("body")
264 body_elt.append(root)
265 else:
266 body_elt.attrib.clear()
267
268 allowed_tags = list(allowed.keys())
269 to_strip = []
270 for elem in body_elt.iter():
271 if elem.tag not in allowed_tags:
272 to_strip.append(elem)
273 else:
274 # we remove unallowed attributes
275 attrib = elem.attrib
276 att_to_remove = set(attrib).difference(allowed[elem.tag])
277 for att in att_to_remove:
278 del (attrib[att])
279 if "style" in attrib:
280 attrib["style"] = self._purge_style(attrib["style"])
281
282 for elem in to_strip:
283 if elem.tag in blacklist:
284 # we need to remove the element and all descendants
285 log.debug("removing black listed tag: %s" % (elem.tag))
286 elem.drop_tree()
287 else:
288 elem.drop_tag()
289 if len(body_elt) != 1:
290 root_elt = body_elt
291 body_elt.tag = "p"
292 else:
293 root_elt = body_elt[0]
294
295 return html.tostring(root_elt, encoding="unicode", method="xml")
296
297
298 @implementer(iwokkel.IDisco)
299 class XEP_0071_handler(XMPPHandler):
300
301 def __init__(self, plugin_parent):
302 self.plugin_parent = plugin_parent
303 self.host = plugin_parent.host
304
305 def getDiscoInfo(self, requestor, target, nodeIdentifier=""):
306 return [disco.DiscoFeature(NS_XHTML_IM)]
307
308 def getDiscoItems(self, requestor, target, nodeIdentifier=""):
309 return []