Mercurial > libervia-backend

diff src/plugins/plugin_xep_0277.py @ 1446:e8c8e467964b
plugins xep-0060, xep-0277: code simplification/cleaning/fix: - plugin xep-0060: moved rsm data to a more general metadata dict, which will contain all data relative to the node/items set. RSM metadata are prefixed with "rsm_" - plugin xep-0060: minor docstring fixes - plugin xep-0060: removed cache to simplify code base - fixed broken getLastMicroblogs - added _getLastMicroblogs as wrapper to getLastMicroblogs, for bridge - removed lxml dependecy for this plugin, use native twisted instead - several improvments/fixes in item2mbdata
author: Goffi <goffi@goffi.org>
date: Sat, 15 Aug 2015 22:13:27 +0200
parents: 16b1ba7ccaaa
children: 7797dda847ae
--- a/src/plugins/plugin_xep_0277.py	Wed Jul 22 11:42:37 2015 +0200
+++ b/src/plugins/plugin_xep_0277.py	Sat Aug 15 22:13:27 2015 +0200
@@ -23,20 +23,21 @@
 log = getLogger(__name__)
 from twisted.words.protocols.jabber import jid
 from twisted.internet import defer
+from twisted.python import failure
 from sat.core import exceptions
 from sat.tools.xml_tools import ElementParser
 
 from wokkel import pubsub
 from feed import atom, date
-from lxml import etree
 import uuid
 from time import time
 import urlparse
 from cgi import escape
 
 NS_MICROBLOG = 'urn:xmpp:microblog:0'
+NS_ATOM = 'http://www.w3.org/2005/Atom'
 NS_XHTML = 'http://www.w3.org/1999/xhtml'
-NS_PUBSUB = 'http://jabber.org/protocol/pubsub'
+NS_PUBSUB_EVENT = "{}{}".format(pubsub.NS_PUBSUB, "#event")
 
 PLUGIN_INFO = {
     "name": "Microblogging over XMPP Plugin",
@@ -60,10 +61,10 @@
     def __init__(self, host):
         log.info(_("Microblogging plugin initialization"))
         self.host = host
-        self.host.plugins["XEP-0163"].addPEPEvent("MICROBLOG", NS_MICROBLOG, self.microblogCB, self.sendMicroblog)
+        self.host.plugins["XEP-0163"].addPEPEvent("MICROBLOG", NS_MICROBLOG, self.microblogCB, self.sendMicroblog, notify=False)
         host.bridge.addMethod("getLastMicroblogs", ".plugin",
-                              in_sign='sis', out_sign='aa{ss}',
-                              method=self.getLastMicroblogs,
+                              in_sign='sis', out_sign='(aa{ss}a{ss})',
+                              method=self._getLastMicroblogs,
                               async=True,
                               doc={'summary': 'retrieve items',
                                    'param_0': 'jid: publisher of wanted microblog',
@@ -87,16 +88,6 @@
 
     ## data/item transformation ##
 
-    def _getDomishInnerContent(self, elt):
-        """Return the inner content of a domish.Element."""
-        result = ''
-        for child in elt.children:
-            try:
-                result += child.toXml()  # child id a domish.Element
-            except AttributeError:
-                result += child  # child is unicode
-        return result
-
     def _removeXHTMLMarkups(self, xhtml):
         """Remove XHTML markups from the given string.
 
@@ -109,107 +100,131 @@
                                                           False)
 
     @defer.inlineCallbacks
-    def item2mbdata(self, item):
+    def item2mbdata(self, item_elt):
         """Convert an XML Item to microblog data used in bridge API
 
-        @param item: domish.Element of microblog item
+        @param item_elt: domish.Element of microblog item
         @return: microblog data (dictionary)
         """
-
-        def xpath(elt, path):
-            """Return the XPATH result of an entry element or its descendance."""
-            # XXX: use a wildcard to work with all and even undefined namespaces
-            return elt.xpath('/'.join(["*[local-name() = '%s']" % tag for tag in path.split('/')]))
-
-        def date2float(elt, path):
-            """Convert a date string to float without dealing with the date format."""
-            return unicode(date.rfc3339.tf_from_timestamp(xpath(elt, path)[0].text))
-
-        item_elt = etree.fromstring(item.toXml().encode('utf-8'))
-        item_id = item_elt.get('id', '')
-
-        # XXX: when you raise an exception from inline callbacks, do defer.returnValue(Exception())
-        # to make it catchable by an eventual errback. If you do raise Exception, raise Exception()
-        # or defer.returnValue(Exception), it will explode and then the normal callback is ran.
-
-        if item.uri not in (NS_PUBSUB, NS_PUBSUB + "#event"):
-            log.error(_(u"Unsupported namespace {ns} in pubsub item {id}").format(ns=item.uri, id=item_id))
-            defer.returnValue(exceptions.DataError())
-
-        try:
-            entry_elt = xpath(item_elt, 'entry')[0]
-        except IndexError:
-            log.error(_(u'No atom entry found in the pubsub item %s') % item_id)
-            defer.returnValue(exceptions.DataError())
-
         microblog_data = {}
 
-        for key in ['title', 'content']:  # process the textual elements
-            for attr_elt in xpath(entry_elt, key):
-                # Return the inner content of a lxml.etree.Element. It is not
-                # trivial because the lxml tostring method would return the full
-                # content including elt's tag and attributes, and elt.getchildren()
-                # would skip a text value which is not within an element...
-                attr_content = self._getDomishInnerContent(ElementParser()(etree.tostring(attr_elt)))
-                if not attr_content.strip():
-                    continue  # element with empty value
-                content_type = attr_elt.get('type', 'text').lower()
-                if content_type == 'xhtml':
-                    # Check for XHTML namespace and decapsulate the content so the user
-                    # who wants to modify an entry will see the text that he entered. Also
-                    # this avoids successive encapsulation with a new <div>...</div> at
-                    # each modification (encapsulation is done in self.data2entry)
-                    elt = ElementParser()(attr_content)
-                    if elt.uri != NS_XHTML:
-                        raise exceptions.DataError(_('Content of type XHTML must declare its namespace!'))
-                    text = self._getDomishInnerContent(elt)
-                    microblog_data['%s_xhtml' % key] = yield self.host.plugins["TEXT-SYNTAXES"].clean_xhtml(text)
-                else:
-                    microblog_data[key] = attr_content
-            if key not in microblog_data and ('%s_xhtml' % key) in microblog_data:
-                microblog_data[key] = yield self._removeXHTMLMarkups(microblog_data['%s_xhtml' % key])
+        def check_conflict(key):
+            if key in microblog_data:
+                raise failure.Failure(exceptions.DataError("key {} is already present for item {}").format(key, item_elt['id']))
+            return key
+
+        @defer.inlineCallbacks
+        def parseElement(elem):
+            """Parse title/content elements and fill microblog_data accordingly"""
+            type_ = elem.getAttribute('type')
+            if type_ == 'xhtml':
+                data_elt = elem.firstChildElement()
+                if data_elt.uri != NS_XHTML:
+                    raise failure.Failure(exceptions.DataError(_('Content of type XHTML must declare its namespace!')))
+                key = check_conflict(u'{}_xhtml'.format(elem.name))
+                data = unicode(data_elt)
+                microblog_data[key] = yield self.host.plugins["TEXT-SYNTAXES"].clean_xhtml(data)
+            else:
+                key = check_conflict(elem.name)
+                microblog_data[key] = unicode(elem)
+
+
+        id_ = item_elt.getAttribute('id', '') # there can be no id for transient nodes
+        microblog_data['id'] = id_
+        if item_elt.uri not in (pubsub.NS_PUBSUB, NS_PUBSUB_EVENT):
+            msg = u"Unsupported namespace {ns} in pubsub item {id_}".format(ns=item_elt.uri, id_=id_)
+            log.warning(msg)
+            raise failure.Failure(exceptions.DataError(msg))
+
+        try:
+            entry_elt = item_elt.elements(NS_ATOM, 'entry').next()
+        except StopIteration:
+            msg = u'No atom entry found in the pubsub item {}'.format(id_)
+            raise failure.Failure(exceptions.DataError(msg))
 
-        try:  # check for mandatory elements
-            microblog_data['id'] = xpath(entry_elt, 'id')[0].text
-            microblog_data['updated'] = date2float(entry_elt, 'updated')
-            assert('title' in microblog_data)  # has been processed already
-        except IndexError:
-            log.error(_(u"Atom entry of pubsub item %s misses a required element") % item_id)
-            defer.returnValue(exceptions.DataError())
+        try:
+            title_elt = entry_elt.elements(NS_ATOM, 'title').next()
+        except StopIteration:
+            msg = u'No atom title found in the pubsub item {}'.format(id_)
+            raise failure.Failure(exceptions.DataError(msg))
+
+        yield parseElement(title_elt)
+
+        for content_elt in entry_elt.elements(NS_ATOM, 'content'):
+            yield parseElement(content_elt)
 
-        if 'content' not in microblog_data:  # use the atom title data as the microblog body content
+        # we check that text content is present
+        for key in ('title', 'content'):
+            if key not in microblog_data and ('{}_xhtml'.format(key)) in microblog_data:
+                log.warning(u"item {id_} provide a {key}_xhtml data but not a text one".format(id_, key))
+                # ... and do the conversion if it's not
+                microblog_data[key] = yield self.host.plugins["TEXT-SYNTAXES"].\
+                                            convert(microblog_data['{}_xhtml'.format(key)],
+                                            self.host.plugins["TEXT-SYNTAXES"].SYNTAX_XHTML,
+                                            self.host.plugins["TEXT-SYNTAXES"].SYNTAX_TEXT,
+                                            False)
+
+        try:
+            id_elt = entry_elt.elements(NS_ATOM, 'id').next()
+        except StopIteration:
+            msg = u'No atom id found in the pubsub item {}, this is not standard !'.format(id_)
+            log.warning(msg)
+            microblog_data['atom_id'] = ""
+        else:
+            microblog_data['atom_id'] = unicode(id_elt)
+
+        try:
+            updated_elt = entry_elt.elements(NS_ATOM, 'updated').next()
+        except StopIteration:
+            msg = u'No atom updated element found in the pubsub item {}'.format(id_)
+            raise failure.Failure(exceptions.DataError(msg))
+        microblog_data['updated'] = unicode(date.rfc3339.tf_from_timestamp(unicode(updated_elt)))
+
+        if 'content' not in microblog_data:
+            # use the atom title data as the microblog body content
             microblog_data['content'] = microblog_data['title']
             del microblog_data['title']
             if 'title_xhtml' in microblog_data:
                 microblog_data['content_xhtml'] = microblog_data['title_xhtml']
                 del microblog_data['title_xhtml']
 
-        # recommended and optional elements with a fallback value
         try:
-            microblog_data['published'] = date2float(entry_elt, 'published')
-        except IndexError:
+            published_elt = entry_elt.elements(NS_ATOM, 'published').next()
+        except StopIteration:
             microblog_data['published'] = microblog_data['updated']
+        else:
+            microblog_data['published'] = unicode(date.rfc3339.tf_from_timestamp(unicode(published_elt)))
 
-        # other recommended and optional elements
+
+        for link_elt in entry_elt.elements(NS_ATOM, 'link'):
+            if link_elt.getAttribute('rel') == 'replies' and link_elt.getAttribute('title') == 'comments':
+                key = check_conflict('comments')
+                microblog_data[key] = link_elt['href']
+                try:
+                    service, node = self.parseCommentUrl(microblog_data[key])
+                except:
+                    log.warning(u"Can't parse url {}".format(microblog_data[key]))
+                    del microblog_data[key]
+                else:
+                    microblog_data['comments_service'] = service.full()
+                    microblog_data['comments_node'] = node
+            else:
+                rel = link_elt.getAttribute('rel','')
+                title = link_elt.getAttribute('title','')
+                href = link_elt.getAttribute('href','')
+                log.warning(u"Unmanaged link element: rel={rel} title={title} href={href}".format(rel=rel, title=title, href=href))
+
         try:
-            link_elt = xpath(entry_elt, "link")[0]
+            author_elt = entry_elt.elements(NS_ATOM, 'author').next()
+        except StopIteration:
+            log.warning("Can't find author element in item {}".format(id_))
+        else:
             try:
-                assert(link_elt.attrib['title'] == "comments")
-                microblog_data['comments'] = link_elt.attrib['href']
-                service, node = self.parseCommentUrl(microblog_data["comments"])
-                microblog_data['comments_service'] = service.full()
-                microblog_data['comments_node'] = node
-            except (exceptions.DataError, RuntimeError, KeyError):
-                log.warning(_(u"Can't parse the link element of atom entry %s") % microblog_data['id'])
-        except:
-            pass
-        try:
-            microblog_data['author'] = xpath(entry_elt, 'author/name')[0].text
-        except IndexError:
-            try:  # XXX: workaround for Jappix behaviour
-                microblog_data['author'] = xpath(entry_elt, 'author/nick')[0].text
-            except IndexError:
-                log.warning(_(u"Can't find author element in atom entry %s") % microblog_data['id'])
+                name_elt = author_elt.elements(NS_ATOM, 'name').next()
+            except StopIteration:
+                log.warning("No name element found in author element of item {}".format(id_))
+            else:
+                microblog_data['author'] = unicode(name_elt)
 
         defer.returnValue(microblog_data)
 
@@ -220,6 +235,7 @@
         @param data: data dict as given by bridge method.
         @return: deferred which fire domish.Element
         """
+        #TODO: rewrite this directly with twisted (i.e. without atom / reparsing)
         _uuid = unicode(uuid.uuid1())
         _entry = atom.Entry()
         _entry.title = ''  # reset the default value which is not empty
@@ -240,7 +256,7 @@
                         elem = elems[key]((u'<div xmlns="%s">%s</div>' % (NS_XHTML, converted)).encode('utf-8'))
                         elem.attrs['type'] = 'xhtml'
                         if hasattr(_entry, '%s_xhtml' % key):
-                            raise exceptions.DataError(_("Can't have xhtml and rich content at the same time"))
+                            raise failure.Failure(exceptions.DataError(_("Can't have xhtml and rich content at the same time")))
                         setattr(_entry, '%s_xhtml' % key, elem)
                     else:  # raw text only needs to be escaped to get HTML-safe sequence
                         elem = elems[key](escape(data[attr]).encode('utf-8'))
@@ -286,34 +302,45 @@
         @param data: must include content
         @param profile: profile which send the mood"""
         if 'content' not in data:
-            log.error(_("Microblog data must contain at least 'content' key"))
-            raise exceptions.DataError('no "content" key found')
+            log.error("Microblog data must contain at least 'content' key")
+            raise failure.Failure(exceptions.DataError('no "content" key found'))
         content = data['content']
         if not content:
-            log.error(_("Microblog data's content value must not be empty"))
-            raise exceptions.DataError('empty content')
+            log.error("Microblog data's content value must not be empty")
+            raise failure.Failure(exceptions.DataError('empty content'))
         item = yield self.data2entry(data, profile)
         ret = yield self.host.plugins["XEP-0060"].publish(None, NS_MICROBLOG, [item], profile_key=profile)
         defer.returnValue(ret)
 
     ## get ##
 
+    def _getLastMicroblogs(self, pub_jid_s, max_items=10, profile_key=C.PROF_KEY_NONE):
+        return self.getLastMicroblogs(jid.JID(pub_jid_s), max_items, profile_key)
+
+    @defer.inlineCallbacks
     def getLastMicroblogs(self, pub_jid, max_items=10, profile_key=C.PROF_KEY_NONE):
         """Get the last published microblogs
 
-        @param pub_jid: jid of the publisher
+        @param pub_jid(jid.JID): jid of the publisher
         @param max_items: how many microblogs we want to get
         @param profile_key: profile key
 
-        @return: a deferred couple with the list of items and RSM information.
+        @return: a deferred couple with the list of items and metadatas.
         """
-        d = self.host.plugins["XEP-0060"].getItems(jid.JID(pub_jid), NS_MICROBLOG, max_items=max_items, profile_key=profile_key)
-        d.addCallback(lambda res: (defer.DeferredList(map(self.item2mbdata, res[0]), consumeErrors=True), res[1]))
-        d.addCallback(lambda res: ([value for (success, value) in res[0] if success], res[1]))
-        return d
+        items, metadata = yield self.host.plugins["XEP-0060"].getItems(pub_jid, NS_MICROBLOG, max_items=max_items, profile_key=profile_key)
+        dlist_result = yield defer.DeferredList(map(self.item2mbdata, items), consumeErrors=True)
+        items_data = []
+        for success, value in dlist_result:
+            if success:
+                items_data.append(value)
+            else:
+                log.warning(u"Error while parsing microblog data: {}".format(value.value))
+        defer.returnValue((items_data, metadata))
 
     def parseCommentUrl(self, node_url):
-        """Determine the fields comments_service and comments_node of a microblog data
+        """Parse a XMPP URI
+
+        Determine the fields comments_service and comments_node of a microblog data
         from the href attribute of an entry's link element. For example this input:
         xmpp:sat-pubsub.libervia.org?node=urn%3Axmpp%3Acomments%3A_c5c4a142-2279-4b2a-ba4c-1bc33aa87634__urn%3Axmpp%3Agroupblog%3Asouliane%40libervia.org
         will return (JID(u'sat-pubsub.libervia.org'), 'urn:xmpp:comments:_c5c4a142-2279-4b2a-ba4c-1bc33aa87634__urn:xmpp:groupblog:souliane@libervia.org')
@@ -328,7 +355,7 @@
         node = parsed_queries.get('node', [''])[0]
 
         if not node:
-            raise exceptions.DataError('Invalid comments link')
+            raise failure.Failure(exceptions.DataError('Invalid comments link'))
 
         return (service, node)
author	Goffi <goffi@goffi.org>
date	Sat, 15 Aug 2015 22:13:27 +0200
parents	16b1ba7ccaaa
children	7797dda847ae