Mercurial > libervia-backend
annotate src/plugins/plugin_xep_0071.py @ 1422:be1fccf4854d
tmp (wokkel): licenses fixes:
the licenses headers were wrong, it was fixed: original work from Adrien Cossa is directly under AGPL v3 (with his agreement), work derivated from Wokkel is sublicensed to AGPL v3 as allowed by the original license, to stay consistent with the rest of the code base.
Theses files (and only these ones) can be relicensed again to fill Wokkel license if Ralph plan to merge them upstream...
author | Goffi <goffi@goffi.org> |
---|---|
date | Thu, 23 Apr 2015 10:57:40 +0200 (2015-04-23) |
parents | 069ad98b360d |
children | 94901070478e |
rev | line source |
---|---|
668 | 1 #!/usr/bin/python |
2 # -*- coding: utf-8 -*- | |
3 | |
4 # SAT plugin for Publish-Subscribe (xep-0071) | |
1396 | 5 # Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Jérôme Poisson (goffi@goffi.org) |
668 | 6 |
7 # This program is free software: you can redistribute it and/or modify | |
8 # it under the terms of the GNU Affero General Public License as published by | |
9 # the Free Software Foundation, either version 3 of the License, or | |
10 # (at your option) any later version. | |
11 | |
12 # This program is distributed in the hope that it will be useful, | |
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 # GNU Affero General Public License for more details. | |
16 | |
17 # You should have received a copy of the GNU Affero General Public License | |
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
19 | |
771 | 20 from sat.core.i18n import _ |
832
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
21 from sat.core import exceptions |
993
301b342c697a
core: use of the new core.log module:
Goffi <goffi@goffi.org>
parents:
922
diff
changeset
|
22 from sat.core.log import getLogger |
301b342c697a
core: use of the new core.log module:
Goffi <goffi@goffi.org>
parents:
922
diff
changeset
|
23 log = getLogger(__name__) |
668 | 24 |
993
301b342c697a
core: use of the new core.log module:
Goffi <goffi@goffi.org>
parents:
922
diff
changeset
|
25 from wokkel import disco, iwokkel |
668 | 26 from zope.interface import implements |
27 # from lxml import etree | |
28 from lxml import html | |
29 try: | |
30 from twisted.words.protocols.xmlstream import XMPPHandler | |
31 except ImportError: | |
32 from wokkel.subprotocols import XMPPHandler | |
33 | |
34 NS_XHTML_IM = 'http://jabber.org/protocol/xhtml-im' | |
35 NS_XHTML = 'http://www.w3.org/1999/xhtml' | |
36 | |
37 PLUGIN_INFO = { | |
38 "name": "XHTML-IM Plugin", | |
39 "import_name": "XEP-0071", | |
40 "type": "XEP", | |
41 "protocols": ["XEP-0071"], | |
42 "dependencies": ["TEXT-SYNTAXES"], | |
43 "main": "XEP_0071", | |
44 "handler": "yes", | |
45 "description": _("""Implementation of XHTML-IM""") | |
46 } | |
47 | |
48 allowed = { | |
49 "a": set(["href", "style", "type"]), | |
50 "blockquote": set(["style"]), | |
51 "body": set(["style"]), | |
52 "br": set([]), | |
53 "cite": set(["style"]), | |
54 "em": set([]), | |
55 "img": set(["alt", "height", "src", "style", "width"]), | |
56 "li": set(["style"]), | |
57 "ol": set(["style"]), | |
58 "p": set(["style"]), | |
59 "span": set(["style"]), | |
60 "strong": set([]), | |
61 "ul": set(["style"]), | |
62 } | |
63 | |
64 styles_allowed = ["background-color", "color", "font-family", "font-size", "font-style", "font-weight", "margin-left", "margin-right", "text-align", "text-decoration"] | |
65 | |
66 blacklist = ['script'] # tag that we have to kill (we don't keep content) | |
67 | |
68 | |
69 class XEP_0071(object): | |
70 SYNTAX_XHTML_IM = "XHTML-IM" | |
71 | |
72 def __init__(self, host): | |
993
301b342c697a
core: use of the new core.log module:
Goffi <goffi@goffi.org>
parents:
922
diff
changeset
|
73 log.info(_("XHTML-IM plugin initialization")) |
668 | 74 self.host = host |
702
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
75 self.synt_plg = self.host.plugins["TEXT-SYNTAXES"] |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
76 self.synt_plg.addSyntax(self.SYNTAX_XHTML_IM, lambda xhtml: xhtml, self.XHTML2XHTML_IM, [self.synt_plg.OPT_HIDDEN]) |
668 | 77 host.trigger.add("MessageReceived", self.messageReceivedTrigger) |
702
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
78 host.trigger.add("sendMessage", self.sendMessageTrigger) |
668 | 79 |
80 def getHandler(self, profile): | |
81 return XEP_0071_handler(self) | |
82 | |
83 def _messagePostTreat(self, data, body_elt): | |
84 """ Callback which manage the post treatment of the message in case of XHTML-IM found | |
85 @param data: data send by MessageReceived trigger through post_treat deferred | |
86 @param xhtml_im: XHTML-IM body element found | |
87 @return: the data with the extra parameter updated | |
88 """ | |
89 #TODO: check if text only body is empty, then try to convert XHTML-IM to pure text and show a warning message | |
90 def converted(xhtml): | |
91 data['extra']['xhtml'] = xhtml | |
92 return data | |
702
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
93 d = self.synt_plg.convert(body_elt.toXml(), self.SYNTAX_XHTML_IM, safe=True) |
668 | 94 d.addCallback(converted) |
95 return d | |
96 | |
702
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
97 def _sendMessageAddRich(self, mess_data, profile): |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
98 """ Construct XHTML-IM node and add it XML element |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
99 @param mess_data: message data as sended by sendMessage callback |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
100 """ |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
101 def syntax_converted(xhtml_im): |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
102 message_elt = mess_data['xml'] |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
103 html_elt = message_elt.addElement('html', NS_XHTML_IM) |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
104 body_elt = html_elt.addElement('body', NS_XHTML) |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
105 body_elt.addRawXml(xhtml_im) |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
106 mess_data['extra']['xhtml'] = xhtml_im |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
107 return mess_data |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
108 |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
109 syntax = self.synt_plg.getCurrentSyntax(profile) |
832
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
110 rich = mess_data['extra'].get('rich', '') |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
111 xhtml = mess_data['extra'].get('xhtml', '') |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
112 if rich: |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
113 d = self.synt_plg.convert(rich, syntax, self.SYNTAX_XHTML_IM) |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
114 if xhtml: |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
115 raise exceptions.DataError(_("Can't have xhtml and rich content at the same time")) |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
116 if xhtml: |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
117 d = self.synt_plg.clean_xhtml(xhtml) |
702
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
118 d.addCallback(syntax_converted) |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
119 return d |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
120 |
668 | 121 def messageReceivedTrigger(self, message, post_treat, profile): |
122 """ Check presence of XHTML-IM in message | |
123 """ | |
124 try: | |
125 html_elt = message.elements(NS_XHTML_IM, 'html').next() | |
126 body_elt = html_elt.elements(NS_XHTML, 'body').next() | |
127 # OK, we have found rich text | |
128 post_treat.addCallback(self._messagePostTreat, body_elt) | |
129 except StopIteration: | |
130 # No XHTML-IM | |
131 pass | |
132 return True | |
133 | |
922
c897c8d321b3
core: sendMessageTrigger now manage pre and post treatments, which happen before or after XML generation
Goffi <goffi@goffi.org>
parents:
832
diff
changeset
|
134 def sendMessageTrigger(self, mess_data, pre_xml_treatments, post_xml_treatments, profile): |
702
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
135 """ Check presence of rich text in extra |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
136 """ |
832
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
137 if 'rich' in mess_data['extra'] or 'xhtml' in mess_data['extra']: |
922
c897c8d321b3
core: sendMessageTrigger now manage pre and post treatments, which happen before or after XML generation
Goffi <goffi@goffi.org>
parents:
832
diff
changeset
|
138 post_xml_treatments.addCallback(self._sendMessageAddRich, profile) |
702
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
139 return True |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
140 |
668 | 141 def _purgeStyle(self, styles_raw): |
142 """ Remove unauthorised styles according to the XEP-0071 | |
143 @param styles_raw: raw styles (value of the style attribute) | |
144 """ | |
145 purged = [] | |
146 | |
147 styles = [style.strip().split(':') for style in styles_raw.split(';')] | |
148 | |
149 for style_tuple in styles: | |
150 if len(style_tuple) != 2: | |
151 continue | |
152 name, value = style_tuple | |
153 name = name.strip() | |
154 if name not in styles_allowed: | |
155 continue | |
156 purged.append((name, value.strip())) | |
157 | |
158 return u'; '.join([u"%s: %s" % data for data in purged]) | |
159 | |
160 def XHTML2XHTML_IM(self, xhtml): | |
161 """ Convert XHTML document to XHTML_IM subset | |
162 @param xhtml: raw xhtml to convert | |
163 """ | |
164 # TODO: more clever tag replacement (replace forbidden tags with equivalents when possible) | |
165 | |
166 parser = html.HTMLParser(remove_comments=True, encoding='utf-8') | |
167 root = html.fromstring(xhtml, parser=parser) | |
168 body_elt = root.find('body') | |
169 if body_elt is None: | |
170 # we use the whole XML as body if no body element is found | |
171 body_elt = html.Element('body') | |
172 body_elt.append(root) | |
173 else: | |
174 body_elt.attrib.clear() | |
175 | |
176 allowed_tags = allowed.keys() | |
177 to_strip = [] | |
178 for elem in body_elt.iter(): | |
179 if elem.tag not in allowed_tags: | |
180 to_strip.append(elem) | |
181 else: | |
182 # we remove unallowed attributes | |
183 attrib = elem.attrib | |
184 att_to_remove = set(attrib).difference(allowed[elem.tag]) | |
185 for att in att_to_remove: | |
186 del(attrib[att]) | |
187 if "style" in attrib: | |
188 attrib["style"] = self._purgeStyle(attrib["style"]) | |
189 | |
190 for elem in to_strip: | |
191 if elem.tag in blacklist: | |
192 #we need to remove the element and all descendants | |
993
301b342c697a
core: use of the new core.log module:
Goffi <goffi@goffi.org>
parents:
922
diff
changeset
|
193 log.debug(u"removing black listed tag: %s" % (elem.tag)) |
668 | 194 elem.drop_tree() |
195 else: | |
196 elem.drop_tag() | |
701
98b2400e17d6
plugin XEP-0071: XHTML2XHTML_IM don't return the <body> root tag anymore.
Goffi <goffi@goffi.org>
parents:
668
diff
changeset
|
197 if len(body_elt) !=1: |
98b2400e17d6
plugin XEP-0071: XHTML2XHTML_IM don't return the <body> root tag anymore.
Goffi <goffi@goffi.org>
parents:
668
diff
changeset
|
198 root_elt = body_elt |
98b2400e17d6
plugin XEP-0071: XHTML2XHTML_IM don't return the <body> root tag anymore.
Goffi <goffi@goffi.org>
parents:
668
diff
changeset
|
199 body_elt.tag = "p" |
98b2400e17d6
plugin XEP-0071: XHTML2XHTML_IM don't return the <body> root tag anymore.
Goffi <goffi@goffi.org>
parents:
668
diff
changeset
|
200 else: |
98b2400e17d6
plugin XEP-0071: XHTML2XHTML_IM don't return the <body> root tag anymore.
Goffi <goffi@goffi.org>
parents:
668
diff
changeset
|
201 root_elt = body_elt[0] |
668 | 202 |
701
98b2400e17d6
plugin XEP-0071: XHTML2XHTML_IM don't return the <body> root tag anymore.
Goffi <goffi@goffi.org>
parents:
668
diff
changeset
|
203 return html.tostring(root_elt, encoding='unicode', method='xml') |
668 | 204 |
205 class XEP_0071_handler(XMPPHandler): | |
206 implements(iwokkel.IDisco) | |
207 | |
208 def __init__(self, plugin_parent): | |
209 self.plugin_parent = plugin_parent | |
210 self.host = plugin_parent.host | |
211 | |
212 def getDiscoInfo(self, requestor, target, nodeIdentifier=''): | |
213 return [disco.DiscoFeature(NS_XHTML_IM)] | |
214 | |
215 def getDiscoItems(self, requestor, target, nodeIdentifier=''): | |
216 return [] |