668
|
1 #!/usr/bin/python |
|
2 # -*- coding: utf-8 -*- |
|
3 |
|
4 # SAT plugin for Publish-Subscribe (xep-0071) |
|
5 # Copyright (C) 2009, 2010, 2011, 2012, 2013 Jérôme Poisson (goffi@goffi.org) |
|
6 |
|
7 # This program is free software: you can redistribute it and/or modify |
|
8 # it under the terms of the GNU Affero General Public License as published by |
|
9 # the Free Software Foundation, either version 3 of the License, or |
|
10 # (at your option) any later version. |
|
11 |
|
12 # This program is distributed in the hope that it will be useful, |
|
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
15 # GNU Affero General Public License for more details. |
|
16 |
|
17 # You should have received a copy of the GNU Affero General Public License |
|
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
19 |
|
20 from logging import debug, info, error |
|
21 |
|
22 from wokkel import disco, pubsub, iwokkel |
|
23 from zope.interface import implements |
|
24 # from lxml import etree |
|
25 from lxml import html |
|
26 try: |
|
27 from twisted.words.protocols.xmlstream import XMPPHandler |
|
28 except ImportError: |
|
29 from wokkel.subprotocols import XMPPHandler |
|
30 |
|
31 NS_XHTML_IM = 'http://jabber.org/protocol/xhtml-im' |
|
32 NS_XHTML = 'http://www.w3.org/1999/xhtml' |
|
33 |
|
34 PLUGIN_INFO = { |
|
35 "name": "XHTML-IM Plugin", |
|
36 "import_name": "XEP-0071", |
|
37 "type": "XEP", |
|
38 "protocols": ["XEP-0071"], |
|
39 "dependencies": ["TEXT-SYNTAXES"], |
|
40 "main": "XEP_0071", |
|
41 "handler": "yes", |
|
42 "description": _("""Implementation of XHTML-IM""") |
|
43 } |
|
44 |
|
45 allowed = { |
|
46 "a": set(["href", "style", "type"]), |
|
47 "blockquote": set(["style"]), |
|
48 "body": set(["style"]), |
|
49 "br": set([]), |
|
50 "cite": set(["style"]), |
|
51 "em": set([]), |
|
52 "img": set(["alt", "height", "src", "style", "width"]), |
|
53 "li": set(["style"]), |
|
54 "ol": set(["style"]), |
|
55 "p": set(["style"]), |
|
56 "span": set(["style"]), |
|
57 "strong": set([]), |
|
58 "ul": set(["style"]), |
|
59 } |
|
60 |
|
61 styles_allowed = ["background-color", "color", "font-family", "font-size", "font-style", "font-weight", "margin-left", "margin-right", "text-align", "text-decoration"] |
|
62 |
|
63 blacklist = ['script'] # tag that we have to kill (we don't keep content) |
|
64 |
|
65 |
|
66 class XEP_0071(object): |
|
67 SYNTAX_XHTML_IM = "XHTML-IM" |
|
68 |
|
69 def __init__(self, host): |
|
70 info(_("XHTML-IM plugin initialization")) |
|
71 self.host = host |
|
72 txt_synt_plg = self.host.plugins["TEXT-SYNTAXES"] |
|
73 txt_synt_plg.addSyntax(self.SYNTAX_XHTML_IM, lambda xhtml: xhtml, self.XHTML2XHTML_IM, [txt_synt_plg.OPT_HIDDEN]) |
|
74 host.trigger.add("MessageReceived", self.messageReceivedTrigger) |
|
75 |
|
76 def getHandler(self, profile): |
|
77 return XEP_0071_handler(self) |
|
78 |
|
79 def _messagePostTreat(self, data, body_elt): |
|
80 """ Callback which manage the post treatment of the message in case of XHTML-IM found |
|
81 @param data: data send by MessageReceived trigger through post_treat deferred |
|
82 @param xhtml_im: XHTML-IM body element found |
|
83 @return: the data with the extra parameter updated |
|
84 """ |
|
85 #TODO: check if text only body is empty, then try to convert XHTML-IM to pure text and show a warning message |
|
86 def converted(xhtml): |
|
87 data['extra']['xhtml'] = xhtml |
|
88 return data |
|
89 txt_synt_plg = self.host.plugins["TEXT-SYNTAXES"] |
|
90 d = txt_synt_plg.convert(body_elt.toXml(), self.SYNTAX_XHTML_IM, safe=True) |
|
91 d.addCallback(converted) |
|
92 return d |
|
93 |
|
94 def messageReceivedTrigger(self, message, post_treat, profile): |
|
95 """ Check presence of XHTML-IM in message |
|
96 """ |
|
97 try: |
|
98 html_elt = message.elements(NS_XHTML_IM, 'html').next() |
|
99 body_elt = html_elt.elements(NS_XHTML, 'body').next() |
|
100 # OK, we have found rich text |
|
101 post_treat.addCallback(self._messagePostTreat, body_elt) |
|
102 except StopIteration: |
|
103 # No XHTML-IM |
|
104 pass |
|
105 return True |
|
106 |
|
107 def _purgeStyle(self, styles_raw): |
|
108 """ Remove unauthorised styles according to the XEP-0071 |
|
109 @param styles_raw: raw styles (value of the style attribute) |
|
110 """ |
|
111 purged = [] |
|
112 |
|
113 styles = [style.strip().split(':') for style in styles_raw.split(';')] |
|
114 |
|
115 for style_tuple in styles: |
|
116 if len(style_tuple) != 2: |
|
117 continue |
|
118 name, value = style_tuple |
|
119 name = name.strip() |
|
120 if name not in styles_allowed: |
|
121 continue |
|
122 purged.append((name, value.strip())) |
|
123 |
|
124 return u'; '.join([u"%s: %s" % data for data in purged]) |
|
125 |
|
126 def XHTML2XHTML_IM(self, xhtml): |
|
127 """ Convert XHTML document to XHTML_IM subset |
|
128 @param xhtml: raw xhtml to convert |
|
129 """ |
|
130 # TODO: more clever tag replacement (replace forbidden tags with equivalents when possible) |
|
131 |
|
132 parser = html.HTMLParser(remove_comments=True, encoding='utf-8') |
|
133 root = html.fromstring(xhtml, parser=parser) |
|
134 body_elt = root.find('body') |
|
135 if body_elt is None: |
|
136 # we use the whole XML as body if no body element is found |
|
137 body_elt = html.Element('body') |
|
138 body_elt.append(root) |
|
139 else: |
|
140 body_elt.attrib.clear() |
|
141 |
|
142 allowed_tags = allowed.keys() |
|
143 to_strip = [] |
|
144 for elem in body_elt.iter(): |
|
145 if elem.tag not in allowed_tags: |
|
146 to_strip.append(elem) |
|
147 else: |
|
148 # we remove unallowed attributes |
|
149 attrib = elem.attrib |
|
150 att_to_remove = set(attrib).difference(allowed[elem.tag]) |
|
151 for att in att_to_remove: |
|
152 del(attrib[att]) |
|
153 if "style" in attrib: |
|
154 attrib["style"] = self._purgeStyle(attrib["style"]) |
|
155 |
|
156 for elem in to_strip: |
|
157 if elem.tag in blacklist: |
|
158 #we need to remove the element and all descendants |
|
159 debug(u"removing black listed tag: %s" % (elem.tag)) |
|
160 elem.drop_tree() |
|
161 else: |
|
162 elem.drop_tag() |
|
163 |
|
164 return html.tostring(body_elt, encoding='unicode', method='xml') |
|
165 |
|
166 class XEP_0071_handler(XMPPHandler): |
|
167 implements(iwokkel.IDisco) |
|
168 |
|
169 def __init__(self, plugin_parent): |
|
170 self.plugin_parent = plugin_parent |
|
171 self.host = plugin_parent.host |
|
172 |
|
173 def getDiscoInfo(self, requestor, target, nodeIdentifier=''): |
|
174 return [disco.DiscoFeature(NS_XHTML_IM)] |
|
175 |
|
176 def getDiscoItems(self, requestor, target, nodeIdentifier=''): |
|
177 return [] |