changeset 4020:d8a1219e913f

plugin XEP-0277: handle "related" and "enclosure" links: those links are used for attachments, "enclosure" for files, "related" when it's other kind of data, such as an external website (in this case the `external` key is set in the attchment).
author Goffi <goffi@goffi.org>
date Thu, 23 Mar 2023 15:32:10 +0100
parents 7bf7677b893d
children 412b99c29d83
files sat/plugins/plugin_xep_0277.py
diffstat 1 files changed, 75 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/sat/plugins/plugin_xep_0277.py	Thu Mar 23 15:24:10 2023 +0100
+++ b/sat/plugins/plugin_xep_0277.py	Thu Mar 23 15:32:10 2023 +0100
@@ -19,8 +19,9 @@
 import time
 import dateutil
 import calendar
+from mimetypes import guess_type
 from secrets import token_urlsafe
-from typing import List, Optional, Dict, Tuple, Union, Any, Dict
+from typing import List, Optional, Dict, Tuple, Any, Dict
 from functools import partial
 
 import shortuuid
@@ -452,9 +453,15 @@
         # links
         comments = microblog_data['comments'] = []
         for link_elt in entry_elt.elements(NS_ATOM, "link"):
+            href = link_elt.getAttribute("href")
+            if not href:
+                log.warning(
+                    f'missing href in <link> element: {link_elt.toXml()}'
+                )
+                continue
             rel = link_elt.getAttribute("rel")
             if (rel == "replies" and link_elt.getAttribute("title") == "comments"):
-                uri = link_elt["href"]
+                uri = href
                 comments_data = {
                     "uri": uri,
                 }
@@ -468,12 +475,6 @@
                     comments_data["node"] = comment_node
                 comments.append(comments_data)
             elif rel == "via":
-                href = link_elt.getAttribute("href")
-                if not href:
-                    log.warning(
-                        f'missing href in "via" <link> element: {link_elt.toXml()}'
-                    )
-                    continue
                 try:
                     repeater_jid = jid.JID(item_elt["publisher"])
                 except (KeyError, RuntimeError):
@@ -496,13 +497,33 @@
                     "by": repeater_jid.full(),
                     "uri": href
                 }
+            elif rel in ("related", "enclosure"):
+                attachment: Dict[str, Any] = {
+                    "sources": [{"url": href}]
+                }
+                if rel == "related":
+                    attachment["external"] = True
+                for attr, key in (
+                    ("type", "media_type"),
+                    ("title", "desc"),
+                ):
+                    value = link_elt.getAttribute(attr)
+                    if value:
+                        attachment[key] = value
+                try:
+                    attachment["size"] = int(link_elt.attributes["lenght"])
+                except (KeyError, ValueError):
+                    pass
+                if "media_type" not in attachment:
+                    media_type = guess_type(href, False)[0]
+                    if media_type is not None:
+                        attachment["media_type"] = media_type
+
+                attachments = extra.setdefault("attachments", [])
+                attachments.append(attachment)
             else:
-                title = link_elt.getAttribute("title", "")
-                href = link_elt.getAttribute("href", "")
                 log.warning(
-                    "Unmanaged link element: rel={rel} title={title} href={href}".format(
-                        rel=rel, title=title, href=href
-                    )
+                    f"Unmanaged link element: {link_elt.toXml()}"
                 )
 
         # author
@@ -606,6 +627,7 @@
         @return: deferred which fire domish.Element
         """
         entry_elt = domish.Element((NS_ATOM, "entry"))
+        extra = mb_data.get("extra", {})
 
         ## language ##
         if "language" in mb_data:
@@ -616,7 +638,7 @@
 
         for elem_name in ("title", "content"):
             for type_ in ["", "_rich", "_xhtml"]:
-                attr = "{}{}".format(elem_name, type_)
+                attr = f"{elem_name}{type_}"
                 if attr in mb_data:
                     elem = entry_elt.addElement(elem_name)
                     if type_:
@@ -624,7 +646,7 @@
                             xml_content = await synt.convert(
                                 mb_data[attr], synt.getCurrentSyntax(client.profile), "XHTML"
                             )
-                            if "{}_xhtml".format(elem_name) in mb_data:
+                            if f"{elem_name}_xhtml" in mb_data:
                                 raise failure.Failure(
                                     exceptions.DataError(
                                         _(
@@ -681,6 +703,44 @@
             for elem in elems:
                 elem.name = "title"
 
+        ## attachments ##
+        attachments = extra.get(C.KEY_ATTACHMENTS)
+        if attachments:
+            for attachment in attachments:
+                try:
+                    url = attachment["url"]
+                except KeyError:
+                    try:
+                        url = next(
+                            s['url'] for s in attachment["sources"] if 'url' in s
+                        )
+                    except (StopIteration, KeyError):
+                        log.warning(
+                            f'"url" missing in attachment, ignoring: {attachment}'
+                        )
+                        continue
+
+                if not url.startswith("http"):
+                    log.warning(f"non HTTP URL in attachment, ignoring: {attachment}")
+                    continue
+                link_elt = entry_elt.addElement("link")
+                # XXX: "uri" is set in self._manageComments if not already existing
+                link_elt["href"] = url
+                if attachment.get("external", False):
+                    # this is a link to an external data such as a website
+                    link_elt["rel"] = "related"
+                else:
+                    # this is an attached file
+                    link_elt["rel"] = "enclosure"
+                for key, attr in (
+                    ("media_type", "type"),
+                    ("desc", "title"),
+                    ("size", "lenght")
+                ):
+                    value = attachment.get(key)
+                    if value:
+                        link_elt[attr]  = str(value)
+
         ## author ##
         author_elt = entry_elt.addElement("author")
         try:
@@ -736,7 +796,6 @@
             link_elt["rel"] = "replies"
             link_elt["title"] = "comments"
 
-        extra = mb_data.get("extra", {})
         if "repeated" in extra:
             try:
                 repeated = extra["repeated"]