libervia-backend: src/plugins/plugin_xep

comparison src/plugins/plugin_xep_0277.py @ 1446:e8c8e467964b

plugins xep-0060, xep-0277: code simplification/cleaning/fix: - plugin xep-0060: moved rsm data to a more general metadata dict, which will contain all data relative to the node/items set. RSM metadata are prefixed with "rsm_" - plugin xep-0060: minor docstring fixes - plugin xep-0060: removed cache to simplify code base - fixed broken getLastMicroblogs - added _getLastMicroblogs as wrapper to getLastMicroblogs, for bridge - removed lxml dependecy for this plugin, use native twisted instead - several improvments/fixes in item2mbdata

author	Goffi <goffi@goffi.org>
date	Sat, 15 Aug 2015 22:13:27 +0200
parents	16b1ba7ccaaa
children	7797dda847ae

comparison

equal deleted inserted replaced

-:ddc7a39ff9d1
+:e8c8e467964b
 from sat.core.constants import Const as C
 from sat.core.log import getLogger
 log = getLogger(__name__)
 from twisted.words.protocols.jabber import jid
 from twisted.internet import defer
+from twisted.python import failure
 from sat.core import exceptions
 from sat.tools.xml_tools import ElementParser
 from wokkel import pubsub
 from feed import atom, date
-from lxml import etree
 import uuid
 from time import time
 import urlparse
 from cgi import escape
 NS_MICROBLOG = 'urn:xmpp:microblog:0'
+NS_ATOM = 'http://www.w3.org/2005/Atom'
 NS_XHTML = 'http://www.w3.org/1999/xhtml'
-NS_PUBSUB = 'http://jabber.org/protocol/pubsub'
+NS_PUBSUB_EVENT = "{}{}".format(pubsub.NS_PUBSUB, "#event")
 PLUGIN_INFO = {
 "name": "Microblogging over XMPP Plugin",
 "import_name": "XEP-0277",
 "type": "XEP",
 class XEP_0277(object):
 def __init__(self, host):
 log.info(_("Microblogging plugin initialization"))
 self.host = host
-self.host.plugins["XEP-0163"].addPEPEvent("MICROBLOG", NS_MICROBLOG, self.microblogCB, self.sendMicroblog)
+self.host.plugins["XEP-0163"].addPEPEvent("MICROBLOG", NS_MICROBLOG, self.microblogCB, self.sendMicroblog, notify=False)
 host.bridge.addMethod("getLastMicroblogs", ".plugin",
-in_sign='sis', out_sign='aa{ss}',
+in_sign='sis', out_sign='(aa{ss}a{ss})',
-method=self.getLastMicroblogs,
+method=self._getLastMicroblogs,
 async=True,
 doc={'summary': 'retrieve items',
 'param_0': 'jid: publisher of wanted microblog',
 'param_1': 'max_items: see XEP-0060 #6.5.7',
 'param_2': '%(doc_profile)s',
 for item in itemsEvent.items:
 self.item2mbdata(item).addCallbacks(manageItem, lambda failure: None)
 ## data/item transformation ##
-def _getDomishInnerContent(self, elt):
-"""Return the inner content of a domish.Element."""
-result = ''
-for child in elt.children:
-try:
-result += child.toXml()  # child id a domish.Element
-except AttributeError:
-result += child  # child is unicode
-return result
 def _removeXHTMLMarkups(self, xhtml):
 """Remove XHTML markups from the given string.
 @param xhtml: the XHTML string to be cleaned
 @return: a Deferred instance for the cleaned string
 self.host.plugins["TEXT-SYNTAXES"].SYNTAX_XHTML,
 self.host.plugins["TEXT-SYNTAXES"].SYNTAX_TEXT,
 False)
 @defer.inlineCallbacks
-def item2mbdata(self, item):
+def item2mbdata(self, item_elt):
 """Convert an XML Item to microblog data used in bridge API
-@param item: domish.Element of microblog item
+@param item_elt: domish.Element of microblog item
 @return: microblog data (dictionary)
 """
-def xpath(elt, path):
-"""Return the XPATH result of an entry element or its descendance."""
-# XXX: use a wildcard to work with all and even undefined namespaces
-return elt.xpath('/'.join(["*[local-name() = '%s']" % tag for tag in path.split('/')]))
-def date2float(elt, path):
-"""Convert a date string to float without dealing with the date format."""
-return unicode(date.rfc3339.tf_from_timestamp(xpath(elt, path)[0].text))
-item_elt = etree.fromstring(item.toXml().encode('utf-8'))
-item_id = item_elt.get('id', '')
-# XXX: when you raise an exception from inline callbacks, do defer.returnValue(Exception())
-# to make it catchable by an eventual errback. If you do raise Exception, raise Exception()
-# or defer.returnValue(Exception), it will explode and then the normal callback is ran.
-if item.uri not in (NS_PUBSUB, NS_PUBSUB + "#event"):
-log.error(_(u"Unsupported namespace {ns} in pubsub item {id}").format(ns=item.uri, id=item_id))
-defer.returnValue(exceptions.DataError())
-try:
-entry_elt = xpath(item_elt, 'entry')[0]
-except IndexError:
-log.error(_(u'No atom entry found in the pubsub item %s') % item_id)
-defer.returnValue(exceptions.DataError())
 microblog_data = {}
-for key in ['title', 'content']:  # process the textual elements
+def check_conflict(key):
-for attr_elt in xpath(entry_elt, key):
+if key in microblog_data:
-# Return the inner content of a lxml.etree.Element. It is not
+raise failure.Failure(exceptions.DataError("key {} is already present for item {}").format(key, item_elt['id']))
-# trivial because the lxml tostring method would return the full
+return key
-# content including elt's tag and attributes, and elt.getchildren()
-# would skip a text value which is not within an element...
+@defer.inlineCallbacks
-attr_content = self._getDomishInnerContent(ElementParser()(etree.tostring(attr_elt)))
+def parseElement(elem):
-if not attr_content.strip():
+"""Parse title/content elements and fill microblog_data accordingly"""
-continue  # element with empty value
+type_ = elem.getAttribute('type')
-content_type = attr_elt.get('type', 'text').lower()
+if type_ == 'xhtml':
-if content_type == 'xhtml':
+data_elt = elem.firstChildElement()
-# Check for XHTML namespace and decapsulate the content so the user
+if data_elt.uri != NS_XHTML:
-# who wants to modify an entry will see the text that he entered. Also
+raise failure.Failure(exceptions.DataError(_('Content of type XHTML must declare its namespace!')))
-# this avoids successive encapsulation with a new <div>...</div> at
+key = check_conflict(u'{}_xhtml'.format(elem.name))
-# each modification (encapsulation is done in self.data2entry)
+data = unicode(data_elt)
-elt = ElementParser()(attr_content)
+microblog_data[key] = yield self.host.plugins["TEXT-SYNTAXES"].clean_xhtml(data)
-if elt.uri != NS_XHTML:
+else:
-raise exceptions.DataError(_('Content of type XHTML must declare its namespace!'))
+key = check_conflict(elem.name)
-text = self._getDomishInnerContent(elt)
+microblog_data[key] = unicode(elem)
-microblog_data['%s_xhtml' % key] = yield self.host.plugins["TEXT-SYNTAXES"].clean_xhtml(text)
-else:
-microblog_data[key] = attr_content
+id_ = item_elt.getAttribute('id', '') # there can be no id for transient nodes
-if key not in microblog_data and ('%s_xhtml' % key) in microblog_data:
+microblog_data['id'] = id_
-microblog_data[key] = yield self._removeXHTMLMarkups(microblog_data['%s_xhtml' % key])
+if item_elt.uri not in (pubsub.NS_PUBSUB, NS_PUBSUB_EVENT):
+msg = u"Unsupported namespace {ns} in pubsub item {id_}".format(ns=item_elt.uri, id_=id_)
-try:  # check for mandatory elements
+log.warning(msg)
-microblog_data['id'] = xpath(entry_elt, 'id')[0].text
+raise failure.Failure(exceptions.DataError(msg))
-microblog_data['updated'] = date2float(entry_elt, 'updated')
-assert('title' in microblog_data)  # has been processed already
+try:
-except IndexError:
+entry_elt = item_elt.elements(NS_ATOM, 'entry').next()
-log.error(_(u"Atom entry of pubsub item %s misses a required element") % item_id)
+except StopIteration:
-defer.returnValue(exceptions.DataError())
+msg = u'No atom entry found in the pubsub item {}'.format(id_)
+raise failure.Failure(exceptions.DataError(msg))
-if 'content' not in microblog_data:  # use the atom title data as the microblog body content
+try:
+title_elt = entry_elt.elements(NS_ATOM, 'title').next()
+except StopIteration:
+msg = u'No atom title found in the pubsub item {}'.format(id_)
+raise failure.Failure(exceptions.DataError(msg))
+yield parseElement(title_elt)
+for content_elt in entry_elt.elements(NS_ATOM, 'content'):
+yield parseElement(content_elt)
+# we check that text content is present
+for key in ('title', 'content'):
+if key not in microblog_data and ('{}_xhtml'.format(key)) in microblog_data:
+log.warning(u"item {id_} provide a {key}_xhtml data but not a text one".format(id_, key))
+# ... and do the conversion if it's not
+microblog_data[key] = yield self.host.plugins["TEXT-SYNTAXES"].\
+convert(microblog_data['{}_xhtml'.format(key)],
+self.host.plugins["TEXT-SYNTAXES"].SYNTAX_XHTML,
+self.host.plugins["TEXT-SYNTAXES"].SYNTAX_TEXT,
+False)
+try:
+id_elt = entry_elt.elements(NS_ATOM, 'id').next()
+except StopIteration:
+msg = u'No atom id found in the pubsub item {}, this is not standard !'.format(id_)
+log.warning(msg)
+microblog_data['atom_id'] = ""
+else:
+microblog_data['atom_id'] = unicode(id_elt)
+try:
+updated_elt = entry_elt.elements(NS_ATOM, 'updated').next()
+except StopIteration:
+msg = u'No atom updated element found in the pubsub item {}'.format(id_)
+raise failure.Failure(exceptions.DataError(msg))
+microblog_data['updated'] = unicode(date.rfc3339.tf_from_timestamp(unicode(updated_elt)))
+if 'content' not in microblog_data:
+# use the atom title data as the microblog body content
 microblog_data['content'] = microblog_data['title']
 del microblog_data['title']
 if 'title_xhtml' in microblog_data:
 microblog_data['content_xhtml'] = microblog_data['title_xhtml']
 del microblog_data['title_xhtml']
-# recommended and optional elements with a fallback value
+try:
-try:
+published_elt = entry_elt.elements(NS_ATOM, 'published').next()
-microblog_data['published'] = date2float(entry_elt, 'published')
+except StopIteration:
-except IndexError:
 microblog_data['published'] = microblog_data['updated']
+else:
-# other recommended and optional elements
+microblog_data['published'] = unicode(date.rfc3339.tf_from_timestamp(unicode(published_elt)))
-try:
-link_elt = xpath(entry_elt, "link")[0]
+for link_elt in entry_elt.elements(NS_ATOM, 'link'):
+if link_elt.getAttribute('rel') == 'replies' and link_elt.getAttribute('title') == 'comments':
+key = check_conflict('comments')
+microblog_data[key] = link_elt['href']
+try:
+service, node = self.parseCommentUrl(microblog_data[key])
+except:
+log.warning(u"Can't parse url {}".format(microblog_data[key]))
+del microblog_data[key]
+else:
+microblog_data['comments_service'] = service.full()
+microblog_data['comments_node'] = node
+else:
+rel = link_elt.getAttribute('rel','')
+title = link_elt.getAttribute('title','')
+href = link_elt.getAttribute('href','')
+log.warning(u"Unmanaged link element: rel={rel} title={title} href={href}".format(rel=rel, title=title, href=href))
+try:
+author_elt = entry_elt.elements(NS_ATOM, 'author').next()
+except StopIteration:
+log.warning("Can't find author element in item {}".format(id_))
+else:
 try:
-assert(link_elt.attrib['title'] == "comments")
+name_elt = author_elt.elements(NS_ATOM, 'name').next()
-microblog_data['comments'] = link_elt.attrib['href']
+except StopIteration:
-service, node = self.parseCommentUrl(microblog_data["comments"])
+log.warning("No name element found in author element of item {}".format(id_))
-microblog_data['comments_service'] = service.full()
+else:
-microblog_data['comments_node'] = node
+microblog_data['author'] = unicode(name_elt)
-except (exceptions.DataError, RuntimeError, KeyError):
-log.warning(_(u"Can't parse the link element of atom entry %s") % microblog_data['id'])
-except:
-pass
-try:
-microblog_data['author'] = xpath(entry_elt, 'author/name')[0].text
-except IndexError:
-try:  # XXX: workaround for Jappix behaviour
-microblog_data['author'] = xpath(entry_elt, 'author/nick')[0].text
-except IndexError:
-log.warning(_(u"Can't find author element in atom entry %s") % microblog_data['id'])
 defer.returnValue(microblog_data)
 @defer.inlineCallbacks
 def data2entry(self, data, profile):
 """Convert a data dict to en entry usable to create an item
 @param data: data dict as given by bridge method.
 @return: deferred which fire domish.Element
 """
+#TODO: rewrite this directly with twisted (i.e. without atom / reparsing)
 _uuid = unicode(uuid.uuid1())
 _entry = atom.Entry()
 _entry.title = ''  # reset the default value which is not empty
 elems = {'title': atom.Title, 'content': atom.Content}
 else:  # clean the XHTML input
 converted = yield synt.clean_xhtml(data[attr])
 elem = elems[key]((u'<div xmlns="%s">%s</div>' % (NS_XHTML, converted)).encode('utf-8'))
 elem.attrs['type'] = 'xhtml'
 if hasattr(_entry, '%s_xhtml' % key):
-raise exceptions.DataError(_("Can't have xhtml and rich content at the same time"))
+raise failure.Failure(exceptions.DataError(_("Can't have xhtml and rich content at the same time")))
 setattr(_entry, '%s_xhtml' % key, elem)
 else:  # raw text only needs to be escaped to get HTML-safe sequence
 elem = elems[key](escape(data[attr]).encode('utf-8'))
 elem.attrs['type'] = 'text'
 setattr(_entry, key, elem)
 """Send XEP-0277's microblog data
 @param data: must include content
 @param profile: profile which send the mood"""
 if 'content' not in data:
-log.error(_("Microblog data must contain at least 'content' key"))
+log.error("Microblog data must contain at least 'content' key")
-raise exceptions.DataError('no "content" key found')
+raise failure.Failure(exceptions.DataError('no "content" key found'))
 content = data['content']
 if not content:
-log.error(_("Microblog data's content value must not be empty"))
+log.error("Microblog data's content value must not be empty")
-raise exceptions.DataError('empty content')
+raise failure.Failure(exceptions.DataError('empty content'))
 item = yield self.data2entry(data, profile)
 ret = yield self.host.plugins["XEP-0060"].publish(None, NS_MICROBLOG, [item], profile_key=profile)
 defer.returnValue(ret)
 ## get ##
+def _getLastMicroblogs(self, pub_jid_s, max_items=10, profile_key=C.PROF_KEY_NONE):
+return self.getLastMicroblogs(jid.JID(pub_jid_s), max_items, profile_key)
+@defer.inlineCallbacks
 def getLastMicroblogs(self, pub_jid, max_items=10, profile_key=C.PROF_KEY_NONE):
 """Get the last published microblogs
-@param pub_jid: jid of the publisher
+@param pub_jid(jid.JID): jid of the publisher
 @param max_items: how many microblogs we want to get
 @param profile_key: profile key
-@return: a deferred couple with the list of items and RSM information.
+@return: a deferred couple with the list of items and metadatas.
 """
-d = self.host.plugins["XEP-0060"].getItems(jid.JID(pub_jid), NS_MICROBLOG, max_items=max_items, profile_key=profile_key)
+items, metadata = yield self.host.plugins["XEP-0060"].getItems(pub_jid, NS_MICROBLOG, max_items=max_items, profile_key=profile_key)
-d.addCallback(lambda res: (defer.DeferredList(map(self.item2mbdata, res[0]), consumeErrors=True), res[1]))
+dlist_result = yield defer.DeferredList(map(self.item2mbdata, items), consumeErrors=True)
-d.addCallback(lambda res: ([value for (success, value) in res[0] if success], res[1]))
+items_data = []
-return d
+for success, value in dlist_result:
+if success:
+items_data.append(value)
+else:
+log.warning(u"Error while parsing microblog data: {}".format(value.value))
+defer.returnValue((items_data, metadata))
 def parseCommentUrl(self, node_url):
-"""Determine the fields comments_service and comments_node of a microblog data
+"""Parse a XMPP URI
+Determine the fields comments_service and comments_node of a microblog data
 from the href attribute of an entry's link element. For example this input:
 xmpp:sat-pubsub.libervia.org?node=urn%3Axmpp%3Acomments%3A_c5c4a142-2279-4b2a-ba4c-1bc33aa87634__urn%3Axmpp%3Agroupblog%3Asouliane%40libervia.org
 will return (JID(u'sat-pubsub.libervia.org'), 'urn:xmpp:comments:_c5c4a142-2279-4b2a-ba4c-1bc33aa87634__urn:xmpp:groupblog:souliane@libervia.org')
 @return: a tuple (JID, str)
 """
 for query in queries:
 parsed_queries.update(urlparse.parse_qs(query))
 node = parsed_queries.get('node', [''])[0]
 if not node:
-raise exceptions.DataError('Invalid comments link')
+raise failure.Failure(exceptions.DataError('Invalid comments link'))
 return (service, node)
 ## configure ##

Mercurial > libervia-backend

comparison src/plugins/plugin_xep_0277.py @ 1446:e8c8e467964b