Mercurial > libervia-backend
annotate src/plugins/plugin_xep_0071.py @ 1812:160b0d4c6520
plugin XEP-0071, XEP-0277: method clean_xhtml has been renamed to cleanXHTML
author | souliane <souliane@mailoo.org> |
---|---|
date | Tue, 19 Jan 2016 11:51:07 +0100 |
parents | d17772b0fe22 |
children | cf53e3cc702a |
rev | line source |
---|---|
668 | 1 #!/usr/bin/python |
2 # -*- coding: utf-8 -*- | |
3 | |
4 # SAT plugin for Publish-Subscribe (xep-0071) | |
1766 | 5 # Copyright (C) 2009-2016 Jérôme Poisson (goffi@goffi.org) |
668 | 6 |
7 # This program is free software: you can redistribute it and/or modify | |
8 # it under the terms of the GNU Affero General Public License as published by | |
9 # the Free Software Foundation, either version 3 of the License, or | |
10 # (at your option) any later version. | |
11 | |
12 # This program is distributed in the hope that it will be useful, | |
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 # GNU Affero General Public License for more details. | |
16 | |
17 # You should have received a copy of the GNU Affero General Public License | |
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
19 | |
771 | 20 from sat.core.i18n import _ |
832
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
21 from sat.core import exceptions |
993
301b342c697a
core: use of the new core.log module:
Goffi <goffi@goffi.org>
parents:
922
diff
changeset
|
22 from sat.core.log import getLogger |
301b342c697a
core: use of the new core.log module:
Goffi <goffi@goffi.org>
parents:
922
diff
changeset
|
23 log = getLogger(__name__) |
668 | 24 |
1732
cf11cfc87ef9
xml_tools, plugin XEP-0071, XEP-0277: add method expandNewLinesToXHTML:
souliane <souliane@mailoo.org>
parents:
1542
diff
changeset
|
25 from sat.tools import xml_tools |
993
301b342c697a
core: use of the new core.log module:
Goffi <goffi@goffi.org>
parents:
922
diff
changeset
|
26 from wokkel import disco, iwokkel |
668 | 27 from zope.interface import implements |
28 # from lxml import etree | |
1542
94901070478e
plugins: added new MissingModule exceptions to plugins using third party modules
Goffi <goffi@goffi.org>
parents:
1396
diff
changeset
|
29 try: |
94901070478e
plugins: added new MissingModule exceptions to plugins using third party modules
Goffi <goffi@goffi.org>
parents:
1396
diff
changeset
|
30 from lxml import html |
94901070478e
plugins: added new MissingModule exceptions to plugins using third party modules
Goffi <goffi@goffi.org>
parents:
1396
diff
changeset
|
31 except ImportError: |
94901070478e
plugins: added new MissingModule exceptions to plugins using third party modules
Goffi <goffi@goffi.org>
parents:
1396
diff
changeset
|
32 raise exceptions.MissingModule(u"Missing module lxml, please download/install it from http://lxml.de/") |
668 | 33 try: |
34 from twisted.words.protocols.xmlstream import XMPPHandler | |
35 except ImportError: | |
36 from wokkel.subprotocols import XMPPHandler | |
37 | |
38 NS_XHTML_IM = 'http://jabber.org/protocol/xhtml-im' | |
39 NS_XHTML = 'http://www.w3.org/1999/xhtml' | |
40 | |
41 PLUGIN_INFO = { | |
42 "name": "XHTML-IM Plugin", | |
43 "import_name": "XEP-0071", | |
44 "type": "XEP", | |
45 "protocols": ["XEP-0071"], | |
46 "dependencies": ["TEXT-SYNTAXES"], | |
47 "main": "XEP_0071", | |
48 "handler": "yes", | |
49 "description": _("""Implementation of XHTML-IM""") | |
50 } | |
51 | |
52 allowed = { | |
53 "a": set(["href", "style", "type"]), | |
54 "blockquote": set(["style"]), | |
55 "body": set(["style"]), | |
56 "br": set([]), | |
57 "cite": set(["style"]), | |
58 "em": set([]), | |
59 "img": set(["alt", "height", "src", "style", "width"]), | |
60 "li": set(["style"]), | |
61 "ol": set(["style"]), | |
62 "p": set(["style"]), | |
63 "span": set(["style"]), | |
64 "strong": set([]), | |
65 "ul": set(["style"]), | |
66 } | |
67 | |
68 styles_allowed = ["background-color", "color", "font-family", "font-size", "font-style", "font-weight", "margin-left", "margin-right", "text-align", "text-decoration"] | |
69 | |
70 blacklist = ['script'] # tag that we have to kill (we don't keep content) | |
71 | |
72 | |
73 class XEP_0071(object): | |
74 SYNTAX_XHTML_IM = "XHTML-IM" | |
75 | |
76 def __init__(self, host): | |
993
301b342c697a
core: use of the new core.log module:
Goffi <goffi@goffi.org>
parents:
922
diff
changeset
|
77 log.info(_("XHTML-IM plugin initialization")) |
668 | 78 self.host = host |
702
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
79 self.synt_plg = self.host.plugins["TEXT-SYNTAXES"] |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
80 self.synt_plg.addSyntax(self.SYNTAX_XHTML_IM, lambda xhtml: xhtml, self.XHTML2XHTML_IM, [self.synt_plg.OPT_HIDDEN]) |
668 | 81 host.trigger.add("MessageReceived", self.messageReceivedTrigger) |
702
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
82 host.trigger.add("sendMessage", self.sendMessageTrigger) |
668 | 83 |
84 def getHandler(self, profile): | |
85 return XEP_0071_handler(self) | |
86 | |
87 def _messagePostTreat(self, data, body_elt): | |
88 """ Callback which manage the post treatment of the message in case of XHTML-IM found | |
89 @param data: data send by MessageReceived trigger through post_treat deferred | |
90 @param xhtml_im: XHTML-IM body element found | |
91 @return: the data with the extra parameter updated | |
92 """ | |
93 #TODO: check if text only body is empty, then try to convert XHTML-IM to pure text and show a warning message | |
94 def converted(xhtml): | |
95 data['extra']['xhtml'] = xhtml | |
96 return data | |
702
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
97 d = self.synt_plg.convert(body_elt.toXml(), self.SYNTAX_XHTML_IM, safe=True) |
668 | 98 d.addCallback(converted) |
99 return d | |
100 | |
702
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
101 def _sendMessageAddRich(self, mess_data, profile): |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
102 """ Construct XHTML-IM node and add it XML element |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
103 @param mess_data: message data as sended by sendMessage callback |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
104 """ |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
105 def syntax_converted(xhtml_im): |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
106 message_elt = mess_data['xml'] |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
107 html_elt = message_elt.addElement('html', NS_XHTML_IM) |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
108 body_elt = html_elt.addElement('body', NS_XHTML) |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
109 body_elt.addRawXml(xhtml_im) |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
110 mess_data['extra']['xhtml'] = xhtml_im |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
111 return mess_data |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
112 |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
113 syntax = self.synt_plg.getCurrentSyntax(profile) |
832
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
114 rich = mess_data['extra'].get('rich', '') |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
115 xhtml = mess_data['extra'].get('xhtml', '') |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
116 if rich: |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
117 d = self.synt_plg.convert(rich, syntax, self.SYNTAX_XHTML_IM) |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
118 if xhtml: |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
119 raise exceptions.DataError(_("Can't have xhtml and rich content at the same time")) |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
120 if xhtml: |
1812
160b0d4c6520
plugin XEP-0071, XEP-0277: method clean_xhtml has been renamed to cleanXHTML
souliane <souliane@mailoo.org>
parents:
1766
diff
changeset
|
121 d = self.synt_plg.cleanXHTML(xhtml) |
702
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
122 d.addCallback(syntax_converted) |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
123 return d |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
124 |
668 | 125 def messageReceivedTrigger(self, message, post_treat, profile): |
126 """ Check presence of XHTML-IM in message | |
127 """ | |
128 try: | |
129 html_elt = message.elements(NS_XHTML_IM, 'html').next() | |
130 body_elt = html_elt.elements(NS_XHTML, 'body').next() | |
131 # OK, we have found rich text | |
132 post_treat.addCallback(self._messagePostTreat, body_elt) | |
133 except StopIteration: | |
134 # No XHTML-IM | |
135 pass | |
136 return True | |
137 | |
922
c897c8d321b3
core: sendMessageTrigger now manage pre and post treatments, which happen before or after XML generation
Goffi <goffi@goffi.org>
parents:
832
diff
changeset
|
138 def sendMessageTrigger(self, mess_data, pre_xml_treatments, post_xml_treatments, profile): |
702
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
139 """ Check presence of rich text in extra |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
140 """ |
832
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
141 if 'rich' in mess_data['extra'] or 'xhtml' in mess_data['extra']: |
922
c897c8d321b3
core: sendMessageTrigger now manage pre and post treatments, which happen before or after XML generation
Goffi <goffi@goffi.org>
parents:
832
diff
changeset
|
142 post_xml_treatments.addCallback(self._sendMessageAddRich, profile) |
702
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
143 return True |
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
701
diff
changeset
|
144 |
668 | 145 def _purgeStyle(self, styles_raw): |
146 """ Remove unauthorised styles according to the XEP-0071 | |
147 @param styles_raw: raw styles (value of the style attribute) | |
148 """ | |
149 purged = [] | |
150 | |
151 styles = [style.strip().split(':') for style in styles_raw.split(';')] | |
152 | |
153 for style_tuple in styles: | |
154 if len(style_tuple) != 2: | |
155 continue | |
156 name, value = style_tuple | |
157 name = name.strip() | |
158 if name not in styles_allowed: | |
159 continue | |
160 purged.append((name, value.strip())) | |
161 | |
162 return u'; '.join([u"%s: %s" % data for data in purged]) | |
163 | |
164 def XHTML2XHTML_IM(self, xhtml): | |
165 """ Convert XHTML document to XHTML_IM subset | |
166 @param xhtml: raw xhtml to convert | |
167 """ | |
168 # TODO: more clever tag replacement (replace forbidden tags with equivalents when possible) | |
169 | |
170 parser = html.HTMLParser(remove_comments=True, encoding='utf-8') | |
171 root = html.fromstring(xhtml, parser=parser) | |
172 body_elt = root.find('body') | |
173 if body_elt is None: | |
174 # we use the whole XML as body if no body element is found | |
175 body_elt = html.Element('body') | |
176 body_elt.append(root) | |
177 else: | |
178 body_elt.attrib.clear() | |
179 | |
180 allowed_tags = allowed.keys() | |
181 to_strip = [] | |
182 for elem in body_elt.iter(): | |
183 if elem.tag not in allowed_tags: | |
184 to_strip.append(elem) | |
185 else: | |
186 # we remove unallowed attributes | |
187 attrib = elem.attrib | |
188 att_to_remove = set(attrib).difference(allowed[elem.tag]) | |
189 for att in att_to_remove: | |
190 del(attrib[att]) | |
191 if "style" in attrib: | |
192 attrib["style"] = self._purgeStyle(attrib["style"]) | |
193 | |
194 for elem in to_strip: | |
195 if elem.tag in blacklist: | |
196 #we need to remove the element and all descendants | |
993
301b342c697a
core: use of the new core.log module:
Goffi <goffi@goffi.org>
parents:
922
diff
changeset
|
197 log.debug(u"removing black listed tag: %s" % (elem.tag)) |
668 | 198 elem.drop_tree() |
199 else: | |
200 elem.drop_tag() | |
701
98b2400e17d6
plugin XEP-0071: XHTML2XHTML_IM don't return the <body> root tag anymore.
Goffi <goffi@goffi.org>
parents:
668
diff
changeset
|
201 if len(body_elt) !=1: |
98b2400e17d6
plugin XEP-0071: XHTML2XHTML_IM don't return the <body> root tag anymore.
Goffi <goffi@goffi.org>
parents:
668
diff
changeset
|
202 root_elt = body_elt |
98b2400e17d6
plugin XEP-0071: XHTML2XHTML_IM don't return the <body> root tag anymore.
Goffi <goffi@goffi.org>
parents:
668
diff
changeset
|
203 body_elt.tag = "p" |
98b2400e17d6
plugin XEP-0071: XHTML2XHTML_IM don't return the <body> root tag anymore.
Goffi <goffi@goffi.org>
parents:
668
diff
changeset
|
204 else: |
98b2400e17d6
plugin XEP-0071: XHTML2XHTML_IM don't return the <body> root tag anymore.
Goffi <goffi@goffi.org>
parents:
668
diff
changeset
|
205 root_elt = body_elt[0] |
668 | 206 |
701
98b2400e17d6
plugin XEP-0071: XHTML2XHTML_IM don't return the <body> root tag anymore.
Goffi <goffi@goffi.org>
parents:
668
diff
changeset
|
207 return html.tostring(root_elt, encoding='unicode', method='xml') |
668 | 208 |
209 class XEP_0071_handler(XMPPHandler): | |
210 implements(iwokkel.IDisco) | |
211 | |
212 def __init__(self, plugin_parent): | |
213 self.plugin_parent = plugin_parent | |
214 self.host = plugin_parent.host | |
215 | |
216 def getDiscoInfo(self, requestor, target, nodeIdentifier=''): | |
217 return [disco.DiscoFeature(NS_XHTML_IM)] | |
218 | |
219 def getDiscoItems(self, requestor, target, nodeIdentifier=''): | |
220 return [] |