Mercurial > libervia-backend
comparison src/plugins/plugin_xep_0071.py @ 668:7bb50096d225
plugin XEP_0071: first draft
a new "xhtml" key is added in bridge newMessage's extra data when rich text is found.
author | Goffi <goffi@goffi.org> |
---|---|
date | Tue, 05 Nov 2013 22:41:45 +0100 |
parents | |
children | 98b2400e17d6 |
comparison
equal
deleted
inserted
replaced
667:a79a6843928c | 668:7bb50096d225 |
---|---|
1 #!/usr/bin/python | |
2 # -*- coding: utf-8 -*- | |
3 | |
4 # SAT plugin for Publish-Subscribe (xep-0071) | |
5 # Copyright (C) 2009, 2010, 2011, 2012, 2013 Jérôme Poisson (goffi@goffi.org) | |
6 | |
7 # This program is free software: you can redistribute it and/or modify | |
8 # it under the terms of the GNU Affero General Public License as published by | |
9 # the Free Software Foundation, either version 3 of the License, or | |
10 # (at your option) any later version. | |
11 | |
12 # This program is distributed in the hope that it will be useful, | |
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 # GNU Affero General Public License for more details. | |
16 | |
17 # You should have received a copy of the GNU Affero General Public License | |
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
19 | |
20 from logging import debug, info, error | |
21 | |
22 from wokkel import disco, pubsub, iwokkel | |
23 from zope.interface import implements | |
24 # from lxml import etree | |
25 from lxml import html | |
26 try: | |
27 from twisted.words.protocols.xmlstream import XMPPHandler | |
28 except ImportError: | |
29 from wokkel.subprotocols import XMPPHandler | |
30 | |
31 NS_XHTML_IM = 'http://jabber.org/protocol/xhtml-im' | |
32 NS_XHTML = 'http://www.w3.org/1999/xhtml' | |
33 | |
34 PLUGIN_INFO = { | |
35 "name": "XHTML-IM Plugin", | |
36 "import_name": "XEP-0071", | |
37 "type": "XEP", | |
38 "protocols": ["XEP-0071"], | |
39 "dependencies": ["TEXT-SYNTAXES"], | |
40 "main": "XEP_0071", | |
41 "handler": "yes", | |
42 "description": _("""Implementation of XHTML-IM""") | |
43 } | |
44 | |
45 allowed = { | |
46 "a": set(["href", "style", "type"]), | |
47 "blockquote": set(["style"]), | |
48 "body": set(["style"]), | |
49 "br": set([]), | |
50 "cite": set(["style"]), | |
51 "em": set([]), | |
52 "img": set(["alt", "height", "src", "style", "width"]), | |
53 "li": set(["style"]), | |
54 "ol": set(["style"]), | |
55 "p": set(["style"]), | |
56 "span": set(["style"]), | |
57 "strong": set([]), | |
58 "ul": set(["style"]), | |
59 } | |
60 | |
61 styles_allowed = ["background-color", "color", "font-family", "font-size", "font-style", "font-weight", "margin-left", "margin-right", "text-align", "text-decoration"] | |
62 | |
63 blacklist = ['script'] # tag that we have to kill (we don't keep content) | |
64 | |
65 | |
66 class XEP_0071(object): | |
67 SYNTAX_XHTML_IM = "XHTML-IM" | |
68 | |
69 def __init__(self, host): | |
70 info(_("XHTML-IM plugin initialization")) | |
71 self.host = host | |
72 txt_synt_plg = self.host.plugins["TEXT-SYNTAXES"] | |
73 txt_synt_plg.addSyntax(self.SYNTAX_XHTML_IM, lambda xhtml: xhtml, self.XHTML2XHTML_IM, [txt_synt_plg.OPT_HIDDEN]) | |
74 host.trigger.add("MessageReceived", self.messageReceivedTrigger) | |
75 | |
76 def getHandler(self, profile): | |
77 return XEP_0071_handler(self) | |
78 | |
79 def _messagePostTreat(self, data, body_elt): | |
80 """ Callback which manage the post treatment of the message in case of XHTML-IM found | |
81 @param data: data send by MessageReceived trigger through post_treat deferred | |
82 @param xhtml_im: XHTML-IM body element found | |
83 @return: the data with the extra parameter updated | |
84 """ | |
85 #TODO: check if text only body is empty, then try to convert XHTML-IM to pure text and show a warning message | |
86 def converted(xhtml): | |
87 data['extra']['xhtml'] = xhtml | |
88 return data | |
89 txt_synt_plg = self.host.plugins["TEXT-SYNTAXES"] | |
90 d = txt_synt_plg.convert(body_elt.toXml(), self.SYNTAX_XHTML_IM, safe=True) | |
91 d.addCallback(converted) | |
92 return d | |
93 | |
94 def messageReceivedTrigger(self, message, post_treat, profile): | |
95 """ Check presence of XHTML-IM in message | |
96 """ | |
97 try: | |
98 html_elt = message.elements(NS_XHTML_IM, 'html').next() | |
99 body_elt = html_elt.elements(NS_XHTML, 'body').next() | |
100 # OK, we have found rich text | |
101 post_treat.addCallback(self._messagePostTreat, body_elt) | |
102 except StopIteration: | |
103 # No XHTML-IM | |
104 pass | |
105 return True | |
106 | |
107 def _purgeStyle(self, styles_raw): | |
108 """ Remove unauthorised styles according to the XEP-0071 | |
109 @param styles_raw: raw styles (value of the style attribute) | |
110 """ | |
111 purged = [] | |
112 | |
113 styles = [style.strip().split(':') for style in styles_raw.split(';')] | |
114 | |
115 for style_tuple in styles: | |
116 if len(style_tuple) != 2: | |
117 continue | |
118 name, value = style_tuple | |
119 name = name.strip() | |
120 if name not in styles_allowed: | |
121 continue | |
122 purged.append((name, value.strip())) | |
123 | |
124 return u'; '.join([u"%s: %s" % data for data in purged]) | |
125 | |
126 def XHTML2XHTML_IM(self, xhtml): | |
127 """ Convert XHTML document to XHTML_IM subset | |
128 @param xhtml: raw xhtml to convert | |
129 """ | |
130 # TODO: more clever tag replacement (replace forbidden tags with equivalents when possible) | |
131 | |
132 parser = html.HTMLParser(remove_comments=True, encoding='utf-8') | |
133 root = html.fromstring(xhtml, parser=parser) | |
134 body_elt = root.find('body') | |
135 if body_elt is None: | |
136 # we use the whole XML as body if no body element is found | |
137 body_elt = html.Element('body') | |
138 body_elt.append(root) | |
139 else: | |
140 body_elt.attrib.clear() | |
141 | |
142 allowed_tags = allowed.keys() | |
143 to_strip = [] | |
144 for elem in body_elt.iter(): | |
145 if elem.tag not in allowed_tags: | |
146 to_strip.append(elem) | |
147 else: | |
148 # we remove unallowed attributes | |
149 attrib = elem.attrib | |
150 att_to_remove = set(attrib).difference(allowed[elem.tag]) | |
151 for att in att_to_remove: | |
152 del(attrib[att]) | |
153 if "style" in attrib: | |
154 attrib["style"] = self._purgeStyle(attrib["style"]) | |
155 | |
156 for elem in to_strip: | |
157 if elem.tag in blacklist: | |
158 #we need to remove the element and all descendants | |
159 debug(u"removing black listed tag: %s" % (elem.tag)) | |
160 elem.drop_tree() | |
161 else: | |
162 elem.drop_tag() | |
163 | |
164 return html.tostring(body_elt, encoding='unicode', method='xml') | |
165 | |
166 class XEP_0071_handler(XMPPHandler): | |
167 implements(iwokkel.IDisco) | |
168 | |
169 def __init__(self, plugin_parent): | |
170 self.plugin_parent = plugin_parent | |
171 self.host = plugin_parent.host | |
172 | |
173 def getDiscoInfo(self, requestor, target, nodeIdentifier=''): | |
174 return [disco.DiscoFeature(NS_XHTML_IM)] | |
175 | |
176 def getDiscoItems(self, requestor, target, nodeIdentifier=''): | |
177 return [] |