changeset 1453:d5e72362ee91

plugin XEP-0277: better parsing of atom:author element + item2mbdata minor reorganisation for better readability
author Goffi <goffi@goffi.org>
date Sat, 15 Aug 2015 22:22:36 +0200
parents 5116d70ddd1c
children 4e2fab4de195
files src/plugins/plugin_xep_0277.py
diffstat 1 files changed, 66 insertions(+), 23 deletions(-) [+]
line wrap: on
line diff
--- a/src/plugins/plugin_xep_0277.py	Sat Aug 15 22:22:34 2015 +0200
+++ b/src/plugins/plugin_xep_0277.py	Sat Aug 15 22:22:36 2015 +0200
@@ -115,9 +115,26 @@
         """
         microblog_data = {}
 
-        def check_conflict(key):
+        def check_conflict(key, increment=False):
+            """Check if key is already in microblog data
+
+            @param key(unicode): key to check
+            @param increment(bool): if suffix the key with an increment
+                instead of raising an exception
+            @raise exceptions.DataError: the key already exists
+                (not raised if increment is True)
+            """
             if key in microblog_data:
-                raise failure.Failure(exceptions.DataError("key {} is already present for item {}").format(key, item_elt['id']))
+                if not increment:
+                    raise failure.Failure(exceptions.DataError("key {} is already present for item {}").format(key, item_elt['id']))
+                else:
+                    idx=1 # the idx 0 is the key without suffix
+                    fmt = "{}#{}"
+                    new_key = fmt.format(key, idx)
+                    while new_key in microblog_data:
+                        idx+=1
+                        new_key = fmt.format(key, idx)
+                    key = new_key
             return key
 
         @defer.inlineCallbacks
@@ -149,6 +166,17 @@
             msg = u'No atom entry found in the pubsub item {}'.format(id_)
             raise failure.Failure(exceptions.DataError(msg))
 
+        # atom:id
+        try:
+            id_elt = entry_elt.elements(NS_ATOM, 'id').next()
+        except StopIteration:
+            msg = u'No atom id found in the pubsub item {}, this is not standard !'.format(id_)
+            log.warning(msg)
+            microblog_data['atom_id'] = ""
+        else:
+            microblog_data['atom_id'] = unicode(id_elt)
+
+        # title/content(s)
         try:
             title_elt = entry_elt.elements(NS_ATOM, 'title').next()
         except StopIteration:
@@ -171,22 +199,6 @@
                                             self.host.plugins["TEXT-SYNTAXES"].SYNTAX_TEXT,
                                             False)
 
-        try:
-            id_elt = entry_elt.elements(NS_ATOM, 'id').next()
-        except StopIteration:
-            msg = u'No atom id found in the pubsub item {}, this is not standard !'.format(id_)
-            log.warning(msg)
-            microblog_data['atom_id'] = ""
-        else:
-            microblog_data['atom_id'] = unicode(id_elt)
-
-        try:
-            updated_elt = entry_elt.elements(NS_ATOM, 'updated').next()
-        except StopIteration:
-            msg = u'No atom updated element found in the pubsub item {}'.format(id_)
-            raise failure.Failure(exceptions.DataError(msg))
-        microblog_data['updated'] = unicode(date.rfc3339.tf_from_timestamp(unicode(updated_elt)))
-
         if 'content' not in microblog_data:
             # use the atom title data as the microblog body content
             microblog_data['content'] = microblog_data['title']
@@ -195,6 +207,13 @@
                 microblog_data['content_xhtml'] = microblog_data['title_xhtml']
                 del microblog_data['title_xhtml']
 
+        # published/updated dates
+        try:
+            updated_elt = entry_elt.elements(NS_ATOM, 'updated').next()
+        except StopIteration:
+            msg = u'No atom updated element found in the pubsub item {}'.format(id_)
+            raise failure.Failure(exceptions.DataError(msg))
+        microblog_data['updated'] = unicode(date.rfc3339.tf_from_timestamp(unicode(updated_elt)))
         try:
             published_elt = entry_elt.elements(NS_ATOM, 'published').next()
         except StopIteration:
@@ -202,10 +221,10 @@
         else:
             microblog_data['published'] = unicode(date.rfc3339.tf_from_timestamp(unicode(published_elt)))
 
-
+        # links
         for link_elt in entry_elt.elements(NS_ATOM, 'link'):
             if link_elt.getAttribute('rel') == 'replies' and link_elt.getAttribute('title') == 'comments':
-                key = check_conflict('comments')
+                key = check_conflict('comments', True)
                 microblog_data[key] = link_elt['href']
                 try:
                     service, node = self.parseCommentUrl(microblog_data[key])
@@ -213,25 +232,49 @@
                     log.warning(u"Can't parse url {}".format(microblog_data[key]))
                     del microblog_data[key]
                 else:
-                    microblog_data['comments_service'] = service.full()
-                    microblog_data['comments_node'] = node
+                    microblog_data['{}_service'.format(key)] = service.full()
+                    microblog_data['{}_node'.format(key)] = node
             else:
                 rel = link_elt.getAttribute('rel','')
                 title = link_elt.getAttribute('title','')
                 href = link_elt.getAttribute('href','')
                 log.warning(u"Unmanaged link element: rel={rel} title={title} href={href}".format(rel=rel, title=title, href=href))
 
+        # author
         try:
             author_elt = entry_elt.elements(NS_ATOM, 'author').next()
         except StopIteration:
-            log.warning("Can't find author element in item {}".format(id_))
+            log.debug("Can't find author element in item {}".format(id_))
         else:
+            # name
             try:
                 name_elt = author_elt.elements(NS_ATOM, 'name').next()
             except StopIteration:
                 log.warning("No name element found in author element of item {}".format(id_))
             else:
                 microblog_data['author'] = unicode(name_elt)
+            # uri
+            try:
+                uri_elt = author_elt.elements(NS_ATOM, 'uri').next()
+            except StopIteration:
+                log.debug("No uri element found in author element of item {}".format(id_))
+            else:
+                uri = unicode(uri_elt)
+                if uri.startswith("xmpp:"):
+                    uri = uri[5:]
+                    microblog_data['author_uri'] = uri
+                if item_elt.getAttribute("publisher") == uri:
+                    microblog_data['author_uri_verified'] = C.BOOL_TRUE
+                else:
+                    log.warning("item atom:uri differ from publisher attribute, spoofing attempt ? atom:uri = {} publisher = {}".format(uri, item_elt.getAttribute("publisher")))
+                    microblog_data['author_uri_verified'] = C.BOOL_FALSE
+            # email
+            try:
+                email_elt = author_elt.elements(NS_ATOM, 'email').next()
+            except StopIteration:
+                pass
+            else:
+                microblog_data['author_email'] = unicode(email_elt)
 
         defer.returnValue(microblog_data)