comparison src/plugins/plugin_xep_0277.py @ 1446:e8c8e467964b

plugins xep-0060, xep-0277: code simplification/cleaning/fix: - plugin xep-0060: moved rsm data to a more general metadata dict, which will contain all data relative to the node/items set. RSM metadata are prefixed with "rsm_" - plugin xep-0060: minor docstring fixes - plugin xep-0060: removed cache to simplify code base - fixed broken getLastMicroblogs - added _getLastMicroblogs as wrapper to getLastMicroblogs, for bridge - removed lxml dependecy for this plugin, use native twisted instead - several improvments/fixes in item2mbdata
author Goffi <goffi@goffi.org>
date Sat, 15 Aug 2015 22:13:27 +0200
parents 16b1ba7ccaaa
children 7797dda847ae
comparison
equal deleted inserted replaced
1445:ddc7a39ff9d1 1446:e8c8e467964b
21 from sat.core.constants import Const as C 21 from sat.core.constants import Const as C
22 from sat.core.log import getLogger 22 from sat.core.log import getLogger
23 log = getLogger(__name__) 23 log = getLogger(__name__)
24 from twisted.words.protocols.jabber import jid 24 from twisted.words.protocols.jabber import jid
25 from twisted.internet import defer 25 from twisted.internet import defer
26 from twisted.python import failure
26 from sat.core import exceptions 27 from sat.core import exceptions
27 from sat.tools.xml_tools import ElementParser 28 from sat.tools.xml_tools import ElementParser
28 29
29 from wokkel import pubsub 30 from wokkel import pubsub
30 from feed import atom, date 31 from feed import atom, date
31 from lxml import etree
32 import uuid 32 import uuid
33 from time import time 33 from time import time
34 import urlparse 34 import urlparse
35 from cgi import escape 35 from cgi import escape
36 36
37 NS_MICROBLOG = 'urn:xmpp:microblog:0' 37 NS_MICROBLOG = 'urn:xmpp:microblog:0'
38 NS_ATOM = 'http://www.w3.org/2005/Atom'
38 NS_XHTML = 'http://www.w3.org/1999/xhtml' 39 NS_XHTML = 'http://www.w3.org/1999/xhtml'
39 NS_PUBSUB = 'http://jabber.org/protocol/pubsub' 40 NS_PUBSUB_EVENT = "{}{}".format(pubsub.NS_PUBSUB, "#event")
40 41
41 PLUGIN_INFO = { 42 PLUGIN_INFO = {
42 "name": "Microblogging over XMPP Plugin", 43 "name": "Microblogging over XMPP Plugin",
43 "import_name": "XEP-0277", 44 "import_name": "XEP-0277",
44 "type": "XEP", 45 "type": "XEP",
58 class XEP_0277(object): 59 class XEP_0277(object):
59 60
60 def __init__(self, host): 61 def __init__(self, host):
61 log.info(_("Microblogging plugin initialization")) 62 log.info(_("Microblogging plugin initialization"))
62 self.host = host 63 self.host = host
63 self.host.plugins["XEP-0163"].addPEPEvent("MICROBLOG", NS_MICROBLOG, self.microblogCB, self.sendMicroblog) 64 self.host.plugins["XEP-0163"].addPEPEvent("MICROBLOG", NS_MICROBLOG, self.microblogCB, self.sendMicroblog, notify=False)
64 host.bridge.addMethod("getLastMicroblogs", ".plugin", 65 host.bridge.addMethod("getLastMicroblogs", ".plugin",
65 in_sign='sis', out_sign='aa{ss}', 66 in_sign='sis', out_sign='(aa{ss}a{ss})',
66 method=self.getLastMicroblogs, 67 method=self._getLastMicroblogs,
67 async=True, 68 async=True,
68 doc={'summary': 'retrieve items', 69 doc={'summary': 'retrieve items',
69 'param_0': 'jid: publisher of wanted microblog', 70 'param_0': 'jid: publisher of wanted microblog',
70 'param_1': 'max_items: see XEP-0060 #6.5.7', 71 'param_1': 'max_items: see XEP-0060 #6.5.7',
71 'param_2': '%(doc_profile)s', 72 'param_2': '%(doc_profile)s',
85 for item in itemsEvent.items: 86 for item in itemsEvent.items:
86 self.item2mbdata(item).addCallbacks(manageItem, lambda failure: None) 87 self.item2mbdata(item).addCallbacks(manageItem, lambda failure: None)
87 88
88 ## data/item transformation ## 89 ## data/item transformation ##
89 90
90 def _getDomishInnerContent(self, elt):
91 """Return the inner content of a domish.Element."""
92 result = ''
93 for child in elt.children:
94 try:
95 result += child.toXml() # child id a domish.Element
96 except AttributeError:
97 result += child # child is unicode
98 return result
99
100 def _removeXHTMLMarkups(self, xhtml): 91 def _removeXHTMLMarkups(self, xhtml):
101 """Remove XHTML markups from the given string. 92 """Remove XHTML markups from the given string.
102 93
103 @param xhtml: the XHTML string to be cleaned 94 @param xhtml: the XHTML string to be cleaned
104 @return: a Deferred instance for the cleaned string 95 @return: a Deferred instance for the cleaned string
107 self.host.plugins["TEXT-SYNTAXES"].SYNTAX_XHTML, 98 self.host.plugins["TEXT-SYNTAXES"].SYNTAX_XHTML,
108 self.host.plugins["TEXT-SYNTAXES"].SYNTAX_TEXT, 99 self.host.plugins["TEXT-SYNTAXES"].SYNTAX_TEXT,
109 False) 100 False)
110 101
111 @defer.inlineCallbacks 102 @defer.inlineCallbacks
112 def item2mbdata(self, item): 103 def item2mbdata(self, item_elt):
113 """Convert an XML Item to microblog data used in bridge API 104 """Convert an XML Item to microblog data used in bridge API
114 105
115 @param item: domish.Element of microblog item 106 @param item_elt: domish.Element of microblog item
116 @return: microblog data (dictionary) 107 @return: microblog data (dictionary)
117 """ 108 """
118
119 def xpath(elt, path):
120 """Return the XPATH result of an entry element or its descendance."""
121 # XXX: use a wildcard to work with all and even undefined namespaces
122 return elt.xpath('/'.join(["*[local-name() = '%s']" % tag for tag in path.split('/')]))
123
124 def date2float(elt, path):
125 """Convert a date string to float without dealing with the date format."""
126 return unicode(date.rfc3339.tf_from_timestamp(xpath(elt, path)[0].text))
127
128 item_elt = etree.fromstring(item.toXml().encode('utf-8'))
129 item_id = item_elt.get('id', '')
130
131 # XXX: when you raise an exception from inline callbacks, do defer.returnValue(Exception())
132 # to make it catchable by an eventual errback. If you do raise Exception, raise Exception()
133 # or defer.returnValue(Exception), it will explode and then the normal callback is ran.
134
135 if item.uri not in (NS_PUBSUB, NS_PUBSUB + "#event"):
136 log.error(_(u"Unsupported namespace {ns} in pubsub item {id}").format(ns=item.uri, id=item_id))
137 defer.returnValue(exceptions.DataError())
138
139 try:
140 entry_elt = xpath(item_elt, 'entry')[0]
141 except IndexError:
142 log.error(_(u'No atom entry found in the pubsub item %s') % item_id)
143 defer.returnValue(exceptions.DataError())
144
145 microblog_data = {} 109 microblog_data = {}
146 110
147 for key in ['title', 'content']: # process the textual elements 111 def check_conflict(key):
148 for attr_elt in xpath(entry_elt, key): 112 if key in microblog_data:
149 # Return the inner content of a lxml.etree.Element. It is not 113 raise failure.Failure(exceptions.DataError("key {} is already present for item {}").format(key, item_elt['id']))
150 # trivial because the lxml tostring method would return the full 114 return key
151 # content including elt's tag and attributes, and elt.getchildren() 115
152 # would skip a text value which is not within an element... 116 @defer.inlineCallbacks
153 attr_content = self._getDomishInnerContent(ElementParser()(etree.tostring(attr_elt))) 117 def parseElement(elem):
154 if not attr_content.strip(): 118 """Parse title/content elements and fill microblog_data accordingly"""
155 continue # element with empty value 119 type_ = elem.getAttribute('type')
156 content_type = attr_elt.get('type', 'text').lower() 120 if type_ == 'xhtml':
157 if content_type == 'xhtml': 121 data_elt = elem.firstChildElement()
158 # Check for XHTML namespace and decapsulate the content so the user 122 if data_elt.uri != NS_XHTML:
159 # who wants to modify an entry will see the text that he entered. Also 123 raise failure.Failure(exceptions.DataError(_('Content of type XHTML must declare its namespace!')))
160 # this avoids successive encapsulation with a new <div>...</div> at 124 key = check_conflict(u'{}_xhtml'.format(elem.name))
161 # each modification (encapsulation is done in self.data2entry) 125 data = unicode(data_elt)
162 elt = ElementParser()(attr_content) 126 microblog_data[key] = yield self.host.plugins["TEXT-SYNTAXES"].clean_xhtml(data)
163 if elt.uri != NS_XHTML: 127 else:
164 raise exceptions.DataError(_('Content of type XHTML must declare its namespace!')) 128 key = check_conflict(elem.name)
165 text = self._getDomishInnerContent(elt) 129 microblog_data[key] = unicode(elem)
166 microblog_data['%s_xhtml' % key] = yield self.host.plugins["TEXT-SYNTAXES"].clean_xhtml(text) 130
167 else: 131
168 microblog_data[key] = attr_content 132 id_ = item_elt.getAttribute('id', '') # there can be no id for transient nodes
169 if key not in microblog_data and ('%s_xhtml' % key) in microblog_data: 133 microblog_data['id'] = id_
170 microblog_data[key] = yield self._removeXHTMLMarkups(microblog_data['%s_xhtml' % key]) 134 if item_elt.uri not in (pubsub.NS_PUBSUB, NS_PUBSUB_EVENT):
171 135 msg = u"Unsupported namespace {ns} in pubsub item {id_}".format(ns=item_elt.uri, id_=id_)
172 try: # check for mandatory elements 136 log.warning(msg)
173 microblog_data['id'] = xpath(entry_elt, 'id')[0].text 137 raise failure.Failure(exceptions.DataError(msg))
174 microblog_data['updated'] = date2float(entry_elt, 'updated') 138
175 assert('title' in microblog_data) # has been processed already 139 try:
176 except IndexError: 140 entry_elt = item_elt.elements(NS_ATOM, 'entry').next()
177 log.error(_(u"Atom entry of pubsub item %s misses a required element") % item_id) 141 except StopIteration:
178 defer.returnValue(exceptions.DataError()) 142 msg = u'No atom entry found in the pubsub item {}'.format(id_)
179 143 raise failure.Failure(exceptions.DataError(msg))
180 if 'content' not in microblog_data: # use the atom title data as the microblog body content 144
145 try:
146 title_elt = entry_elt.elements(NS_ATOM, 'title').next()
147 except StopIteration:
148 msg = u'No atom title found in the pubsub item {}'.format(id_)
149 raise failure.Failure(exceptions.DataError(msg))
150
151 yield parseElement(title_elt)
152
153 for content_elt in entry_elt.elements(NS_ATOM, 'content'):
154 yield parseElement(content_elt)
155
156 # we check that text content is present
157 for key in ('title', 'content'):
158 if key not in microblog_data and ('{}_xhtml'.format(key)) in microblog_data:
159 log.warning(u"item {id_} provide a {key}_xhtml data but not a text one".format(id_, key))
160 # ... and do the conversion if it's not
161 microblog_data[key] = yield self.host.plugins["TEXT-SYNTAXES"].\
162 convert(microblog_data['{}_xhtml'.format(key)],
163 self.host.plugins["TEXT-SYNTAXES"].SYNTAX_XHTML,
164 self.host.plugins["TEXT-SYNTAXES"].SYNTAX_TEXT,
165 False)
166
167 try:
168 id_elt = entry_elt.elements(NS_ATOM, 'id').next()
169 except StopIteration:
170 msg = u'No atom id found in the pubsub item {}, this is not standard !'.format(id_)
171 log.warning(msg)
172 microblog_data['atom_id'] = ""
173 else:
174 microblog_data['atom_id'] = unicode(id_elt)
175
176 try:
177 updated_elt = entry_elt.elements(NS_ATOM, 'updated').next()
178 except StopIteration:
179 msg = u'No atom updated element found in the pubsub item {}'.format(id_)
180 raise failure.Failure(exceptions.DataError(msg))
181 microblog_data['updated'] = unicode(date.rfc3339.tf_from_timestamp(unicode(updated_elt)))
182
183 if 'content' not in microblog_data:
184 # use the atom title data as the microblog body content
181 microblog_data['content'] = microblog_data['title'] 185 microblog_data['content'] = microblog_data['title']
182 del microblog_data['title'] 186 del microblog_data['title']
183 if 'title_xhtml' in microblog_data: 187 if 'title_xhtml' in microblog_data:
184 microblog_data['content_xhtml'] = microblog_data['title_xhtml'] 188 microblog_data['content_xhtml'] = microblog_data['title_xhtml']
185 del microblog_data['title_xhtml'] 189 del microblog_data['title_xhtml']
186 190
187 # recommended and optional elements with a fallback value 191 try:
188 try: 192 published_elt = entry_elt.elements(NS_ATOM, 'published').next()
189 microblog_data['published'] = date2float(entry_elt, 'published') 193 except StopIteration:
190 except IndexError:
191 microblog_data['published'] = microblog_data['updated'] 194 microblog_data['published'] = microblog_data['updated']
192 195 else:
193 # other recommended and optional elements 196 microblog_data['published'] = unicode(date.rfc3339.tf_from_timestamp(unicode(published_elt)))
194 try: 197
195 link_elt = xpath(entry_elt, "link")[0] 198
199 for link_elt in entry_elt.elements(NS_ATOM, 'link'):
200 if link_elt.getAttribute('rel') == 'replies' and link_elt.getAttribute('title') == 'comments':
201 key = check_conflict('comments')
202 microblog_data[key] = link_elt['href']
203 try:
204 service, node = self.parseCommentUrl(microblog_data[key])
205 except:
206 log.warning(u"Can't parse url {}".format(microblog_data[key]))
207 del microblog_data[key]
208 else:
209 microblog_data['comments_service'] = service.full()
210 microblog_data['comments_node'] = node
211 else:
212 rel = link_elt.getAttribute('rel','')
213 title = link_elt.getAttribute('title','')
214 href = link_elt.getAttribute('href','')
215 log.warning(u"Unmanaged link element: rel={rel} title={title} href={href}".format(rel=rel, title=title, href=href))
216
217 try:
218 author_elt = entry_elt.elements(NS_ATOM, 'author').next()
219 except StopIteration:
220 log.warning("Can't find author element in item {}".format(id_))
221 else:
196 try: 222 try:
197 assert(link_elt.attrib['title'] == "comments") 223 name_elt = author_elt.elements(NS_ATOM, 'name').next()
198 microblog_data['comments'] = link_elt.attrib['href'] 224 except StopIteration:
199 service, node = self.parseCommentUrl(microblog_data["comments"]) 225 log.warning("No name element found in author element of item {}".format(id_))
200 microblog_data['comments_service'] = service.full() 226 else:
201 microblog_data['comments_node'] = node 227 microblog_data['author'] = unicode(name_elt)
202 except (exceptions.DataError, RuntimeError, KeyError):
203 log.warning(_(u"Can't parse the link element of atom entry %s") % microblog_data['id'])
204 except:
205 pass
206 try:
207 microblog_data['author'] = xpath(entry_elt, 'author/name')[0].text
208 except IndexError:
209 try: # XXX: workaround for Jappix behaviour
210 microblog_data['author'] = xpath(entry_elt, 'author/nick')[0].text
211 except IndexError:
212 log.warning(_(u"Can't find author element in atom entry %s") % microblog_data['id'])
213 228
214 defer.returnValue(microblog_data) 229 defer.returnValue(microblog_data)
215 230
216 @defer.inlineCallbacks 231 @defer.inlineCallbacks
217 def data2entry(self, data, profile): 232 def data2entry(self, data, profile):
218 """Convert a data dict to en entry usable to create an item 233 """Convert a data dict to en entry usable to create an item
219 234
220 @param data: data dict as given by bridge method. 235 @param data: data dict as given by bridge method.
221 @return: deferred which fire domish.Element 236 @return: deferred which fire domish.Element
222 """ 237 """
238 #TODO: rewrite this directly with twisted (i.e. without atom / reparsing)
223 _uuid = unicode(uuid.uuid1()) 239 _uuid = unicode(uuid.uuid1())
224 _entry = atom.Entry() 240 _entry = atom.Entry()
225 _entry.title = '' # reset the default value which is not empty 241 _entry.title = '' # reset the default value which is not empty
226 242
227 elems = {'title': atom.Title, 'content': atom.Content} 243 elems = {'title': atom.Title, 'content': atom.Content}
238 else: # clean the XHTML input 254 else: # clean the XHTML input
239 converted = yield synt.clean_xhtml(data[attr]) 255 converted = yield synt.clean_xhtml(data[attr])
240 elem = elems[key]((u'<div xmlns="%s">%s</div>' % (NS_XHTML, converted)).encode('utf-8')) 256 elem = elems[key]((u'<div xmlns="%s">%s</div>' % (NS_XHTML, converted)).encode('utf-8'))
241 elem.attrs['type'] = 'xhtml' 257 elem.attrs['type'] = 'xhtml'
242 if hasattr(_entry, '%s_xhtml' % key): 258 if hasattr(_entry, '%s_xhtml' % key):
243 raise exceptions.DataError(_("Can't have xhtml and rich content at the same time")) 259 raise failure.Failure(exceptions.DataError(_("Can't have xhtml and rich content at the same time")))
244 setattr(_entry, '%s_xhtml' % key, elem) 260 setattr(_entry, '%s_xhtml' % key, elem)
245 else: # raw text only needs to be escaped to get HTML-safe sequence 261 else: # raw text only needs to be escaped to get HTML-safe sequence
246 elem = elems[key](escape(data[attr]).encode('utf-8')) 262 elem = elems[key](escape(data[attr]).encode('utf-8'))
247 elem.attrs['type'] = 'text' 263 elem.attrs['type'] = 'text'
248 setattr(_entry, key, elem) 264 setattr(_entry, key, elem)
284 """Send XEP-0277's microblog data 300 """Send XEP-0277's microblog data
285 301
286 @param data: must include content 302 @param data: must include content
287 @param profile: profile which send the mood""" 303 @param profile: profile which send the mood"""
288 if 'content' not in data: 304 if 'content' not in data:
289 log.error(_("Microblog data must contain at least 'content' key")) 305 log.error("Microblog data must contain at least 'content' key")
290 raise exceptions.DataError('no "content" key found') 306 raise failure.Failure(exceptions.DataError('no "content" key found'))
291 content = data['content'] 307 content = data['content']
292 if not content: 308 if not content:
293 log.error(_("Microblog data's content value must not be empty")) 309 log.error("Microblog data's content value must not be empty")
294 raise exceptions.DataError('empty content') 310 raise failure.Failure(exceptions.DataError('empty content'))
295 item = yield self.data2entry(data, profile) 311 item = yield self.data2entry(data, profile)
296 ret = yield self.host.plugins["XEP-0060"].publish(None, NS_MICROBLOG, [item], profile_key=profile) 312 ret = yield self.host.plugins["XEP-0060"].publish(None, NS_MICROBLOG, [item], profile_key=profile)
297 defer.returnValue(ret) 313 defer.returnValue(ret)
298 314
299 ## get ## 315 ## get ##
300 316
317 def _getLastMicroblogs(self, pub_jid_s, max_items=10, profile_key=C.PROF_KEY_NONE):
318 return self.getLastMicroblogs(jid.JID(pub_jid_s), max_items, profile_key)
319
320 @defer.inlineCallbacks
301 def getLastMicroblogs(self, pub_jid, max_items=10, profile_key=C.PROF_KEY_NONE): 321 def getLastMicroblogs(self, pub_jid, max_items=10, profile_key=C.PROF_KEY_NONE):
302 """Get the last published microblogs 322 """Get the last published microblogs
303 323
304 @param pub_jid: jid of the publisher 324 @param pub_jid(jid.JID): jid of the publisher
305 @param max_items: how many microblogs we want to get 325 @param max_items: how many microblogs we want to get
306 @param profile_key: profile key 326 @param profile_key: profile key
307 327
308 @return: a deferred couple with the list of items and RSM information. 328 @return: a deferred couple with the list of items and metadatas.
309 """ 329 """
310 d = self.host.plugins["XEP-0060"].getItems(jid.JID(pub_jid), NS_MICROBLOG, max_items=max_items, profile_key=profile_key) 330 items, metadata = yield self.host.plugins["XEP-0060"].getItems(pub_jid, NS_MICROBLOG, max_items=max_items, profile_key=profile_key)
311 d.addCallback(lambda res: (defer.DeferredList(map(self.item2mbdata, res[0]), consumeErrors=True), res[1])) 331 dlist_result = yield defer.DeferredList(map(self.item2mbdata, items), consumeErrors=True)
312 d.addCallback(lambda res: ([value for (success, value) in res[0] if success], res[1])) 332 items_data = []
313 return d 333 for success, value in dlist_result:
334 if success:
335 items_data.append(value)
336 else:
337 log.warning(u"Error while parsing microblog data: {}".format(value.value))
338 defer.returnValue((items_data, metadata))
314 339
315 def parseCommentUrl(self, node_url): 340 def parseCommentUrl(self, node_url):
316 """Determine the fields comments_service and comments_node of a microblog data 341 """Parse a XMPP URI
342
343 Determine the fields comments_service and comments_node of a microblog data
317 from the href attribute of an entry's link element. For example this input: 344 from the href attribute of an entry's link element. For example this input:
318 xmpp:sat-pubsub.libervia.org?node=urn%3Axmpp%3Acomments%3A_c5c4a142-2279-4b2a-ba4c-1bc33aa87634__urn%3Axmpp%3Agroupblog%3Asouliane%40libervia.org 345 xmpp:sat-pubsub.libervia.org?node=urn%3Axmpp%3Acomments%3A_c5c4a142-2279-4b2a-ba4c-1bc33aa87634__urn%3Axmpp%3Agroupblog%3Asouliane%40libervia.org
319 will return (JID(u'sat-pubsub.libervia.org'), 'urn:xmpp:comments:_c5c4a142-2279-4b2a-ba4c-1bc33aa87634__urn:xmpp:groupblog:souliane@libervia.org') 346 will return (JID(u'sat-pubsub.libervia.org'), 'urn:xmpp:comments:_c5c4a142-2279-4b2a-ba4c-1bc33aa87634__urn:xmpp:groupblog:souliane@libervia.org')
320 @return: a tuple (JID, str) 347 @return: a tuple (JID, str)
321 """ 348 """
326 for query in queries: 353 for query in queries:
327 parsed_queries.update(urlparse.parse_qs(query)) 354 parsed_queries.update(urlparse.parse_qs(query))
328 node = parsed_queries.get('node', [''])[0] 355 node = parsed_queries.get('node', [''])[0]
329 356
330 if not node: 357 if not node:
331 raise exceptions.DataError('Invalid comments link') 358 raise failure.Failure(exceptions.DataError('Invalid comments link'))
332 359
333 return (service, node) 360 return (service, node)
334 361
335 ## configure ## 362 ## configure ##
336 363