comparison sat/plugins/plugin_xep_0071.py @ 2624:56f94936df1e

code style reformatting using black
author Goffi <goffi@goffi.org>
date Wed, 27 Jun 2018 20:14:46 +0200
parents 26edcf3a30eb
children 378188abe941
comparison
equal deleted inserted replaced
2623:49533de4540b 2624:56f94936df1e
19 19
20 from sat.core.i18n import _ 20 from sat.core.i18n import _
21 from sat.core.constants import Const as C 21 from sat.core.constants import Const as C
22 from sat.core import exceptions 22 from sat.core import exceptions
23 from sat.core.log import getLogger 23 from sat.core.log import getLogger
24
24 log = getLogger(__name__) 25 log = getLogger(__name__)
25 from sat.tools.common import data_format 26 from sat.tools.common import data_format
26 27
27 from twisted.internet import defer 28 from twisted.internet import defer
28 from wokkel import disco, iwokkel 29 from wokkel import disco, iwokkel
29 from zope.interface import implements 30 from zope.interface import implements
31
30 # from lxml import etree 32 # from lxml import etree
31 try: 33 try:
32 from lxml import html 34 from lxml import html
33 except ImportError: 35 except ImportError:
34 raise exceptions.MissingModule(u"Missing module lxml, please download/install it from http://lxml.de/") 36 raise exceptions.MissingModule(
37 u"Missing module lxml, please download/install it from http://lxml.de/"
38 )
35 try: 39 try:
36 from twisted.words.protocols.xmlstream import XMPPHandler 40 from twisted.words.protocols.xmlstream import XMPPHandler
37 except ImportError: 41 except ImportError:
38 from wokkel.subprotocols import XMPPHandler 42 from wokkel.subprotocols import XMPPHandler
39 43
40 NS_XHTML_IM = 'http://jabber.org/protocol/xhtml-im' 44 NS_XHTML_IM = "http://jabber.org/protocol/xhtml-im"
41 NS_XHTML = 'http://www.w3.org/1999/xhtml' 45 NS_XHTML = "http://www.w3.org/1999/xhtml"
42 46
43 PLUGIN_INFO = { 47 PLUGIN_INFO = {
44 C.PI_NAME: "XHTML-IM Plugin", 48 C.PI_NAME: "XHTML-IM Plugin",
45 C.PI_IMPORT_NAME: "XEP-0071", 49 C.PI_IMPORT_NAME: "XEP-0071",
46 C.PI_TYPE: "XEP", 50 C.PI_TYPE: "XEP",
47 C.PI_PROTOCOLS: ["XEP-0071"], 51 C.PI_PROTOCOLS: ["XEP-0071"],
48 C.PI_DEPENDENCIES: ["TEXT-SYNTAXES"], 52 C.PI_DEPENDENCIES: ["TEXT-SYNTAXES"],
49 C.PI_MAIN: "XEP_0071", 53 C.PI_MAIN: "XEP_0071",
50 C.PI_HANDLER: "yes", 54 C.PI_HANDLER: "yes",
51 C.PI_DESCRIPTION: _("""Implementation of XHTML-IM""") 55 C.PI_DESCRIPTION: _("""Implementation of XHTML-IM"""),
52 } 56 }
53 57
54 allowed = { 58 allowed = {
55 "a": set(["href", "style", "type"]), 59 "a": set(["href", "style", "type"]),
56 "blockquote": set(["style"]), 60 "blockquote": set(["style"]),
57 "body": set(["style"]), 61 "body": set(["style"]),
58 "br": set([]), 62 "br": set([]),
59 "cite": set(["style"]), 63 "cite": set(["style"]),
60 "em": set([]), 64 "em": set([]),
61 "img": set(["alt", "height", "src", "style", "width"]), 65 "img": set(["alt", "height", "src", "style", "width"]),
62 "li": set(["style"]), 66 "li": set(["style"]),
63 "ol": set(["style"]), 67 "ol": set(["style"]),
64 "p": set(["style"]), 68 "p": set(["style"]),
65 "span": set(["style"]), 69 "span": set(["style"]),
66 "strong": set([]), 70 "strong": set([]),
67 "ul": set(["style"]), 71 "ul": set(["style"]),
68 } 72 }
69 73
70 styles_allowed = ["background-color", "color", "font-family", "font-size", "font-style", "font-weight", "margin-left", "margin-right", "text-align", "text-decoration"] 74 styles_allowed = [
71 75 "background-color",
72 blacklist = ['script'] # tag that we have to kill (we don't keep content) 76 "color",
77 "font-family",
78 "font-size",
79 "font-style",
80 "font-weight",
81 "margin-left",
82 "margin-right",
83 "text-align",
84 "text-decoration",
85 ]
86
87 blacklist = ["script"] # tag that we have to kill (we don't keep content)
73 88
74 89
75 class XEP_0071(object): 90 class XEP_0071(object):
76 SYNTAX_XHTML_IM = "XHTML-IM" 91 SYNTAX_XHTML_IM = "XHTML-IM"
77 92
78 def __init__(self, host): 93 def __init__(self, host):
79 log.info(_("XHTML-IM plugin initialization")) 94 log.info(_("XHTML-IM plugin initialization"))
80 self.host = host 95 self.host = host
81 self._s = self.host.plugins["TEXT-SYNTAXES"] 96 self._s = self.host.plugins["TEXT-SYNTAXES"]
82 self._s.addSyntax(self.SYNTAX_XHTML_IM, lambda xhtml: xhtml, self.XHTML2XHTML_IM, [self._s.OPT_HIDDEN]) 97 self._s.addSyntax(
98 self.SYNTAX_XHTML_IM,
99 lambda xhtml: xhtml,
100 self.XHTML2XHTML_IM,
101 [self._s.OPT_HIDDEN],
102 )
83 host.trigger.add("MessageReceived", self.messageReceivedTrigger) 103 host.trigger.add("MessageReceived", self.messageReceivedTrigger)
84 host.trigger.add("sendMessage", self.sendMessageTrigger) 104 host.trigger.add("sendMessage", self.sendMessageTrigger)
85 105
86 def getHandler(self, client): 106 def getHandler(self, client):
87 return XEP_0071_handler(self) 107 return XEP_0071_handler(self)
95 @return: the data with the extra parameter updated 115 @return: the data with the extra parameter updated
96 """ 116 """
97 # TODO: check if text only body is empty, then try to convert XHTML-IM to pure text and show a warning message 117 # TODO: check if text only body is empty, then try to convert XHTML-IM to pure text and show a warning message
98 def converted(xhtml, lang): 118 def converted(xhtml, lang):
99 if lang: 119 if lang:
100 data['extra']['xhtml_{}'.format(lang)] = xhtml 120 data["extra"]["xhtml_{}".format(lang)] = xhtml
101 else: 121 else:
102 data['extra']['xhtml'] = xhtml 122 data["extra"]["xhtml"] = xhtml
103 123
104 defers = [] 124 defers = []
105 for body_elt in body_elts: 125 for body_elt in body_elts:
106 lang = body_elt.getAttribute((C.NS_XML, 'lang'), '') 126 lang = body_elt.getAttribute((C.NS_XML, "lang"), "")
107 treat_d = defer.succeed(None) # deferred used for treatments 127 treat_d = defer.succeed(None) #  deferred used for treatments
108 if self.host.trigger.point("xhtml_post_treat", client, message_elt, body_elt, lang, treat_d): 128 if self.host.trigger.point(
129 "xhtml_post_treat", client, message_elt, body_elt, lang, treat_d
130 ):
109 continue 131 continue
110 treat_d.addCallback(lambda dummy: self._s.convert(body_elt.toXml(), self.SYNTAX_XHTML_IM, safe=True)) 132 treat_d.addCallback(
133 lambda dummy: self._s.convert(
134 body_elt.toXml(), self.SYNTAX_XHTML_IM, safe=True
135 )
136 )
111 treat_d.addCallback(converted, lang) 137 treat_d.addCallback(converted, lang)
112 defers.append(treat_d) 138 defers.append(treat_d)
113 139
114 d_list = defer.DeferredList(defers) 140 d_list = defer.DeferredList(defers)
115 d_list.addCallback(lambda dummy: data) 141 d_list.addCallback(lambda dummy: data)
116 return d_list 142 return d_list
117 143
118 def _fill_body_text(self, text, data, lang): 144 def _fill_body_text(self, text, data, lang):
119 data['message'][lang or ''] = text 145 data["message"][lang or ""] = text
120 message_elt = data['xml'] 146 message_elt = data["xml"]
121 body_elt = message_elt.addElement("body", content=text) 147 body_elt = message_elt.addElement("body", content=text)
122 if lang: 148 if lang:
123 body_elt[(C.NS_XML, 'lang')] = lang 149 body_elt[(C.NS_XML, "lang")] = lang
124 150
125 def _check_body_text(self, data, lang, markup, syntax, defers): 151 def _check_body_text(self, data, lang, markup, syntax, defers):
126 """check if simple text message exists, and fill if needed""" 152 """check if simple text message exists, and fill if needed"""
127 if not (lang or '') in data['message']: 153 if not (lang or "") in data["message"]:
128 d = self._s.convert(markup, syntax, self._s.SYNTAX_TEXT) 154 d = self._s.convert(markup, syntax, self._s.SYNTAX_TEXT)
129 d.addCallback(self._fill_body_text, data, lang) 155 d.addCallback(self._fill_body_text, data, lang)
130 defers.append(d) 156 defers.append(d)
131 157
132 def _sendMessageAddRich(self, data, client): 158 def _sendMessageAddRich(self, data, client):
134 160
135 @param data: message data as sended by sendMessage callback 161 @param data: message data as sended by sendMessage callback
136 """ 162 """
137 # at this point, either ['extra']['rich'] or ['extra']['xhtml'] exists 163 # at this point, either ['extra']['rich'] or ['extra']['xhtml'] exists
138 # but both can't exist at the same time 164 # but both can't exist at the same time
139 message_elt = data['xml'] 165 message_elt = data["xml"]
140 html_elt = message_elt.addElement((NS_XHTML_IM, 'html')) 166 html_elt = message_elt.addElement((NS_XHTML_IM, "html"))
141 167
142 def syntax_converted(xhtml_im, lang): 168 def syntax_converted(xhtml_im, lang):
143 body_elt = html_elt.addElement((NS_XHTML, 'body')) 169 body_elt = html_elt.addElement((NS_XHTML, "body"))
144 if lang: 170 if lang:
145 body_elt[(C.NS_XML, 'lang')] = lang 171 body_elt[(C.NS_XML, "lang")] = lang
146 data['extra']['xhtml_{}'.format(lang)] = xhtml_im 172 data["extra"]["xhtml_{}".format(lang)] = xhtml_im
147 else: 173 else:
148 data['extra']['xhtml'] = xhtml_im 174 data["extra"]["xhtml"] = xhtml_im
149 body_elt.addRawXml(xhtml_im) 175 body_elt.addRawXml(xhtml_im)
150 176
151 syntax = self._s.getCurrentSyntax(client.profile) 177 syntax = self._s.getCurrentSyntax(client.profile)
152 defers = [] 178 defers = []
153 if u'xhtml' in data['extra']: 179 if u"xhtml" in data["extra"]:
154 # we have directly XHTML 180 # we have directly XHTML
155 for lang, xhtml in data_format.getSubDict('xhtml', data['extra']): 181 for lang, xhtml in data_format.getSubDict("xhtml", data["extra"]):
156 self._check_body_text(data, lang, xhtml, self._s.SYNTAX_XHTML, defers) 182 self._check_body_text(data, lang, xhtml, self._s.SYNTAX_XHTML, defers)
157 d = self._s.convert(xhtml, self._s.SYNTAX_XHTML, self.SYNTAX_XHTML_IM) 183 d = self._s.convert(xhtml, self._s.SYNTAX_XHTML, self.SYNTAX_XHTML_IM)
158 d.addCallback(syntax_converted, lang) 184 d.addCallback(syntax_converted, lang)
159 defers.append(d) 185 defers.append(d)
160 elif u'rich' in data['extra']: 186 elif u"rich" in data["extra"]:
161 # we have rich syntax to convert 187 # we have rich syntax to convert
162 for lang, rich_data in data_format.getSubDict('rich', data['extra']): 188 for lang, rich_data in data_format.getSubDict("rich", data["extra"]):
163 self._check_body_text(data, lang, rich_data, syntax, defers) 189 self._check_body_text(data, lang, rich_data, syntax, defers)
164 d = self._s.convert(rich_data, syntax, self.SYNTAX_XHTML_IM) 190 d = self._s.convert(rich_data, syntax, self.SYNTAX_XHTML_IM)
165 d.addCallback(syntax_converted, lang) 191 d.addCallback(syntax_converted, lang)
166 defers.append(d) 192 defers.append(d)
167 else: 193 else:
172 198
173 def messageReceivedTrigger(self, client, message, post_treat): 199 def messageReceivedTrigger(self, client, message, post_treat):
174 """ Check presence of XHTML-IM in message 200 """ Check presence of XHTML-IM in message
175 """ 201 """
176 try: 202 try:
177 html_elt = message.elements(NS_XHTML_IM, 'html').next() 203 html_elt = message.elements(NS_XHTML_IM, "html").next()
178 except StopIteration: 204 except StopIteration:
179 # No XHTML-IM 205 # No XHTML-IM
180 pass 206 pass
181 else: 207 else:
182 body_elts = html_elt.elements(NS_XHTML, 'body') 208 body_elts = html_elt.elements(NS_XHTML, "body")
183 post_treat.addCallback(self._messagePostTreat, message, body_elts, client) 209 post_treat.addCallback(self._messagePostTreat, message, body_elts, client)
184 return True 210 return True
185 211
186 def sendMessageTrigger(self, client, data, pre_xml_treatments, post_xml_treatments): 212 def sendMessageTrigger(self, client, data, pre_xml_treatments, post_xml_treatments):
187 """ Check presence of rich text in extra """ 213 """ Check presence of rich text in extra """
188 rich = {} 214 rich = {}
189 xhtml = {} 215 xhtml = {}
190 for key, value in data['extra'].iteritems(): 216 for key, value in data["extra"].iteritems():
191 if key.startswith('rich'): 217 if key.startswith("rich"):
192 rich[key[5:]] = value 218 rich[key[5:]] = value
193 elif key.startswith('xhtml'): 219 elif key.startswith("xhtml"):
194 xhtml[key[6:]] = value 220 xhtml[key[6:]] = value
195 if rich and xhtml: 221 if rich and xhtml:
196 raise exceptions.DataError(_(u"Can't have XHTML and rich content at the same time")) 222 raise exceptions.DataError(
223 _(u"Can't have XHTML and rich content at the same time")
224 )
197 if rich or xhtml: 225 if rich or xhtml:
198 if rich: 226 if rich:
199 data['rich'] = rich 227 data["rich"] = rich
200 else: 228 else:
201 data['xhtml'] = xhtml 229 data["xhtml"] = xhtml
202 post_xml_treatments.addCallback(self._sendMessageAddRich, client) 230 post_xml_treatments.addCallback(self._sendMessageAddRich, client)
203 return True 231 return True
204 232
205 def _purgeStyle(self, styles_raw): 233 def _purgeStyle(self, styles_raw):
206 """ Remove unauthorised styles according to the XEP-0071 234 """ Remove unauthorised styles according to the XEP-0071
207 @param styles_raw: raw styles (value of the style attribute) 235 @param styles_raw: raw styles (value of the style attribute)
208 """ 236 """
209 purged = [] 237 purged = []
210 238
211 styles = [style.strip().split(':') for style in styles_raw.split(';')] 239 styles = [style.strip().split(":") for style in styles_raw.split(";")]
212 240
213 for style_tuple in styles: 241 for style_tuple in styles:
214 if len(style_tuple) != 2: 242 if len(style_tuple) != 2:
215 continue 243 continue
216 name, value = style_tuple 244 name, value = style_tuple
217 name = name.strip() 245 name = name.strip()
218 if name not in styles_allowed: 246 if name not in styles_allowed:
219 continue 247 continue
220 purged.append((name, value.strip())) 248 purged.append((name, value.strip()))
221 249
222 return u'; '.join([u"%s: %s" % data for data in purged]) 250 return u"; ".join([u"%s: %s" % data for data in purged])
223 251
224 def XHTML2XHTML_IM(self, xhtml): 252 def XHTML2XHTML_IM(self, xhtml):
225 """ Convert XHTML document to XHTML_IM subset 253 """ Convert XHTML document to XHTML_IM subset
226 @param xhtml: raw xhtml to convert 254 @param xhtml: raw xhtml to convert
227 """ 255 """
228 # TODO: more clever tag replacement (replace forbidden tags with equivalents when possible) 256 # TODO: more clever tag replacement (replace forbidden tags with equivalents when possible)
229 257
230 parser = html.HTMLParser(remove_comments=True, encoding='utf-8') 258 parser = html.HTMLParser(remove_comments=True, encoding="utf-8")
231 root = html.fromstring(xhtml, parser=parser) 259 root = html.fromstring(xhtml, parser=parser)
232 body_elt = root.find('body') 260 body_elt = root.find("body")
233 if body_elt is None: 261 if body_elt is None:
234 # we use the whole XML as body if no body element is found 262 # we use the whole XML as body if no body element is found
235 body_elt = html.Element('body') 263 body_elt = html.Element("body")
236 body_elt.append(root) 264 body_elt.append(root)
237 else: 265 else:
238 body_elt.attrib.clear() 266 body_elt.attrib.clear()
239 267
240 allowed_tags = allowed.keys() 268 allowed_tags = allowed.keys()
245 else: 273 else:
246 # we remove unallowed attributes 274 # we remove unallowed attributes
247 attrib = elem.attrib 275 attrib = elem.attrib
248 att_to_remove = set(attrib).difference(allowed[elem.tag]) 276 att_to_remove = set(attrib).difference(allowed[elem.tag])
249 for att in att_to_remove: 277 for att in att_to_remove:
250 del(attrib[att]) 278 del (attrib[att])
251 if "style" in attrib: 279 if "style" in attrib:
252 attrib["style"] = self._purgeStyle(attrib["style"]) 280 attrib["style"] = self._purgeStyle(attrib["style"])
253 281
254 for elem in to_strip: 282 for elem in to_strip:
255 if elem.tag in blacklist: 283 if elem.tag in blacklist:
256 #we need to remove the element and all descendants 284 # we need to remove the element and all descendants
257 log.debug(u"removing black listed tag: %s" % (elem.tag)) 285 log.debug(u"removing black listed tag: %s" % (elem.tag))
258 elem.drop_tree() 286 elem.drop_tree()
259 else: 287 else:
260 elem.drop_tag() 288 elem.drop_tag()
261 if len(body_elt) !=1: 289 if len(body_elt) != 1:
262 root_elt = body_elt 290 root_elt = body_elt
263 body_elt.tag = "p" 291 body_elt.tag = "p"
264 else: 292 else:
265 root_elt = body_elt[0] 293 root_elt = body_elt[0]
266 294
267 return html.tostring(root_elt, encoding='unicode', method='xml') 295 return html.tostring(root_elt, encoding="unicode", method="xml")
296
268 297
269 class XEP_0071_handler(XMPPHandler): 298 class XEP_0071_handler(XMPPHandler):
270 implements(iwokkel.IDisco) 299 implements(iwokkel.IDisco)
271 300
272 def __init__(self, plugin_parent): 301 def __init__(self, plugin_parent):
273 self.plugin_parent = plugin_parent 302 self.plugin_parent = plugin_parent
274 self.host = plugin_parent.host 303 self.host = plugin_parent.host
275 304
276 def getDiscoInfo(self, requestor, target, nodeIdentifier=''): 305 def getDiscoInfo(self, requestor, target, nodeIdentifier=""):
277 return [disco.DiscoFeature(NS_XHTML_IM)] 306 return [disco.DiscoFeature(NS_XHTML_IM)]
278 307
279 def getDiscoItems(self, requestor, target, nodeIdentifier=''): 308 def getDiscoItems(self, requestor, target, nodeIdentifier=""):
280 return [] 309 return []