# HG changeset patch # User Goffi # Date 1679581930 -3600 # Node ID d8a1219e913f83e6ed9e2985b0d1012eba78c13e # Parent 7bf7677b893df2e54a6053df051d9568b82b7450 plugin XEP-0277: handle "related" and "enclosure" links: those links are used for attachments, "enclosure" for files, "related" when it's other kind of data, such as an external website (in this case the `external` key is set in the attchment). diff -r 7bf7677b893d -r d8a1219e913f sat/plugins/plugin_xep_0277.py --- a/sat/plugins/plugin_xep_0277.py Thu Mar 23 15:24:10 2023 +0100 +++ b/sat/plugins/plugin_xep_0277.py Thu Mar 23 15:32:10 2023 +0100 @@ -19,8 +19,9 @@ import time import dateutil import calendar +from mimetypes import guess_type from secrets import token_urlsafe -from typing import List, Optional, Dict, Tuple, Union, Any, Dict +from typing import List, Optional, Dict, Tuple, Any, Dict from functools import partial import shortuuid @@ -452,9 +453,15 @@ # links comments = microblog_data['comments'] = [] for link_elt in entry_elt.elements(NS_ATOM, "link"): + href = link_elt.getAttribute("href") + if not href: + log.warning( + f'missing href in element: {link_elt.toXml()}' + ) + continue rel = link_elt.getAttribute("rel") if (rel == "replies" and link_elt.getAttribute("title") == "comments"): - uri = link_elt["href"] + uri = href comments_data = { "uri": uri, } @@ -468,12 +475,6 @@ comments_data["node"] = comment_node comments.append(comments_data) elif rel == "via": - href = link_elt.getAttribute("href") - if not href: - log.warning( - f'missing href in "via" element: {link_elt.toXml()}' - ) - continue try: repeater_jid = jid.JID(item_elt["publisher"]) except (KeyError, RuntimeError): @@ -496,13 +497,33 @@ "by": repeater_jid.full(), "uri": href } + elif rel in ("related", "enclosure"): + attachment: Dict[str, Any] = { + "sources": [{"url": href}] + } + if rel == "related": + attachment["external"] = True + for attr, key in ( + ("type", "media_type"), + ("title", "desc"), + ): + value = link_elt.getAttribute(attr) + if value: + attachment[key] = value + try: + attachment["size"] = int(link_elt.attributes["lenght"]) + except (KeyError, ValueError): + pass + if "media_type" not in attachment: + media_type = guess_type(href, False)[0] + if media_type is not None: + attachment["media_type"] = media_type + + attachments = extra.setdefault("attachments", []) + attachments.append(attachment) else: - title = link_elt.getAttribute("title", "") - href = link_elt.getAttribute("href", "") log.warning( - "Unmanaged link element: rel={rel} title={title} href={href}".format( - rel=rel, title=title, href=href - ) + f"Unmanaged link element: {link_elt.toXml()}" ) # author @@ -606,6 +627,7 @@ @return: deferred which fire domish.Element """ entry_elt = domish.Element((NS_ATOM, "entry")) + extra = mb_data.get("extra", {}) ## language ## if "language" in mb_data: @@ -616,7 +638,7 @@ for elem_name in ("title", "content"): for type_ in ["", "_rich", "_xhtml"]: - attr = "{}{}".format(elem_name, type_) + attr = f"{elem_name}{type_}" if attr in mb_data: elem = entry_elt.addElement(elem_name) if type_: @@ -624,7 +646,7 @@ xml_content = await synt.convert( mb_data[attr], synt.getCurrentSyntax(client.profile), "XHTML" ) - if "{}_xhtml".format(elem_name) in mb_data: + if f"{elem_name}_xhtml" in mb_data: raise failure.Failure( exceptions.DataError( _( @@ -681,6 +703,44 @@ for elem in elems: elem.name = "title" + ## attachments ## + attachments = extra.get(C.KEY_ATTACHMENTS) + if attachments: + for attachment in attachments: + try: + url = attachment["url"] + except KeyError: + try: + url = next( + s['url'] for s in attachment["sources"] if 'url' in s + ) + except (StopIteration, KeyError): + log.warning( + f'"url" missing in attachment, ignoring: {attachment}' + ) + continue + + if not url.startswith("http"): + log.warning(f"non HTTP URL in attachment, ignoring: {attachment}") + continue + link_elt = entry_elt.addElement("link") + # XXX: "uri" is set in self._manageComments if not already existing + link_elt["href"] = url + if attachment.get("external", False): + # this is a link to an external data such as a website + link_elt["rel"] = "related" + else: + # this is an attached file + link_elt["rel"] = "enclosure" + for key, attr in ( + ("media_type", "type"), + ("desc", "title"), + ("size", "lenght") + ): + value = attachment.get(key) + if value: + link_elt[attr] = str(value) + ## author ## author_elt = entry_elt.addElement("author") try: @@ -736,7 +796,6 @@ link_elt["rel"] = "replies" link_elt["title"] = "comments" - extra = mb_data.get("extra", {}) if "repeated" in extra: try: repeated = extra["repeated"]