changeset 3492:fa796612adad

plugin XEP-0277: better resilience to broken items: - if `author` element can't be found, `publisher` attribute, then `IQ`'s `from` attributes are used as fallback to find author jid - fix categories (tags) parsing if `author` element is not found - remove items which have failed parsing from `mbGet` results (instead of using `None`).
author Goffi <goffi@goffi.org>
date Sat, 27 Mar 2021 14:38:27 +0100
parents 2bd75fc2555d
children b54bdd4ec507
files sat/plugins/plugin_xep_0060.py sat/plugins/plugin_xep_0277.py
diffstat 2 files changed, 18 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/sat/plugins/plugin_xep_0060.py	Sat Mar 27 14:35:07 2021 +0100
+++ b/sat/plugins/plugin_xep_0060.py	Sat Mar 27 14:38:27 2021 +0100
@@ -1181,7 +1181,10 @@
             log.warning(f"Error while parsing item: {failure_.value}")
 
         d = defer.gatherResults([item_cb(item).addErrback(eb) for item in items])
-        d.addCallback(lambda parsed_items: (parsed_items, metadata))
+        d.addCallback(lambda parsed_items: (
+            [i for i in parsed_items if i is not None],
+            metadata
+        ))
         return d
 
     def serDList(self, results, failure_result=None):
--- a/sat/plugins/plugin_xep_0277.py	Sat Mar 27 14:35:07 2021 +0100
+++ b/sat/plugins/plugin_xep_0277.py	Sat Mar 27 14:38:27 2021 +0100
@@ -415,12 +415,19 @@
                 )
 
         # author
+        publisher = item_elt.getAttribute("publisher")
         try:
             author_elt = next(entry_elt.elements(NS_ATOM, "author"))
         except StopIteration:
             log.debug("Can't find author element in item {}".format(id_))
+            if publisher:
+                microblog_data["author_jid"] = publisher
+                microblog_data["author_jid_verified"] = True
+            else:
+                iq_elt = xml_tools.findAncestor(item_elt, "iq", C.NS_CLIENT)
+                microblog_data["author_jid"] = iq_elt["from"]
+                microblog_data["author_jid_verified"] = False
         else:
-            publisher = item_elt.getAttribute("publisher")
             # name
             try:
                 name_elt = next(author_elt.elements(NS_ATOM, "name"))
@@ -470,12 +477,12 @@
             else:
                 microblog_data["author_email"] = str(email_elt)
 
-            # categories
-            categories = [
-                category_elt.getAttribute("term", "")
-                for category_elt in entry_elt.elements(NS_ATOM, "category")
-            ]
-            microblog_data["tags"] = categories
+        # categories
+        categories = [
+            category_elt.getAttribute("term", "")
+            for category_elt in entry_elt.elements(NS_ATOM, "category")
+        ]
+        microblog_data["tags"] = categories
 
         ## the trigger ##
         # if other plugins have things to add or change