libervia-backend: sat/plugins/plugin_xep

comparison sat/plugins/plugin_xep_0277.py @ 4020:d8a1219e913f

plugin XEP-0277: handle "related" and "enclosure" links: those links are used for attachments, "enclosure" for files, "related" when it's other kind of data, such as an external website (in this case the `external` key is set in the attchment).

author	Goffi <goffi@goffi.org>
date	Thu, 23 Mar 2023 15:32:10 +0100
parents	86efd854dee1
children	78b5f356900c

comparison

equal deleted inserted replaced

-:7bf7677b893d
+:d8a1219e913f
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 import time
 import dateutil
 import calendar
+from mimetypes import guess_type
 from secrets import token_urlsafe
-from typing import List, Optional, Dict, Tuple, Union, Any, Dict
+from typing import List, Optional, Dict, Tuple, Any, Dict
 from functools import partial
 import shortuuid
 from twisted.words.protocols.jabber import jid, error
 )
 # links
 comments = microblog_data['comments'] = []
 for link_elt in entry_elt.elements(NS_ATOM, "link"):
+href = link_elt.getAttribute("href")
+if not href:
+log.warning(
+f'missing href in <link> element: {link_elt.toXml()}'
+)
+continue
 rel = link_elt.getAttribute("rel")
 if (rel == "replies" and link_elt.getAttribute("title") == "comments"):
-uri = link_elt["href"]
+uri = href
 comments_data = {
 "uri": uri,
 }
 try:
 comment_service, comment_node = self.parseCommentUrl(uri)
 else:
 comments_data["service"] = comment_service.full()
 comments_data["node"] = comment_node
 comments.append(comments_data)
 elif rel == "via":
-href = link_elt.getAttribute("href")
-if not href:
-log.warning(
-f'missing href in "via" <link> element: {link_elt.toXml()}'
-)
-continue
 try:
 repeater_jid = jid.JID(item_elt["publisher"])
 except (KeyError, RuntimeError):
 try:
 # we look for stanza element which is at the root, meaning that it
 extra["repeated"] = {
 "by": repeater_jid.full(),
 "uri": href
 }
+elif rel in ("related", "enclosure"):
+attachment: Dict[str, Any] = {
+"sources": [{"url": href}]
+}
+if rel == "related":
+attachment["external"] = True
+for attr, key in (
+("type", "media_type"),
+("title", "desc"),
+):
+value = link_elt.getAttribute(attr)
+if value:
+attachment[key] = value
+try:
+attachment["size"] = int(link_elt.attributes["lenght"])
+except (KeyError, ValueError):
+pass
+if "media_type" not in attachment:
+media_type = guess_type(href, False)[0]
+if media_type is not None:
+attachment["media_type"] = media_type
+attachments = extra.setdefault("attachments", [])
+attachments.append(attachment)
 else:
-title = link_elt.getAttribute("title", "")
-href = link_elt.getAttribute("href", "")
 log.warning(
-"Unmanaged link element: rel={rel} title={title} href={href}".format(
+f"Unmanaged link element: {link_elt.toXml()}"
-rel=rel, title=title, href=href
-)
 )
 # author
 publisher = item_elt.getAttribute("publisher")
 try:
 @param node(unicode): pubsub node where the item is sent
 Needed to construct Atom id
 @return: deferred which fire domish.Element
 """
 entry_elt = domish.Element((NS_ATOM, "entry"))
+extra = mb_data.get("extra", {})
 ## language ##
 if "language" in mb_data:
 entry_elt[(C.NS_XML, "lang")] = mb_data["language"].strip()
 ## content and title ##
 synt = self.host.plugins["TEXT_SYNTAXES"]
 for elem_name in ("title", "content"):
 for type_ in ["", "_rich", "_xhtml"]:
-attr = "{}{}".format(elem_name, type_)
+attr = f"{elem_name}{type_}"
 if attr in mb_data:
 elem = entry_elt.addElement(elem_name)
 if type_:
 if type_ == "_rich":  # convert input from current syntax to XHTML
 xml_content = await synt.convert(
 mb_data[attr], synt.getCurrentSyntax(client.profile), "XHTML"
 )
-if "{}_xhtml".format(elem_name) in mb_data:
+if f"{elem_name}_xhtml" in mb_data:
 raise failure.Failure(
 exceptions.DataError(
 _(
 "Can't have xhtml and rich content at the same time"
 )
 "There must be at least one content or title element"
 )
 for elem in elems:
 elem.name = "title"
+## attachments ##
+attachments = extra.get(C.KEY_ATTACHMENTS)
+if attachments:
+for attachment in attachments:
+try:
+url = attachment["url"]
+except KeyError:
+try:
+url = next(
+s['url'] for s in attachment["sources"] if 'url' in s
+)
+except (StopIteration, KeyError):
+log.warning(
+f'"url" missing in attachment, ignoring: {attachment}'
+)
+continue
+if not url.startswith("http"):
+log.warning(f"non HTTP URL in attachment, ignoring: {attachment}")
+continue
+link_elt = entry_elt.addElement("link")
+# XXX: "uri" is set in self._manageComments if not already existing
+link_elt["href"] = url
+if attachment.get("external", False):
+# this is a link to an external data such as a website
+link_elt["rel"] = "related"
+else:
+# this is an attached file
+link_elt["rel"] = "enclosure"
+for key, attr in (
+("media_type", "type"),
+("desc", "title"),
+("size", "lenght")
+):
+value = attachment.get(key)
+if value:
+link_elt[attr]  = str(value)
 ## author ##
 author_elt = entry_elt.addElement("author")
 try:
 author_name = mb_data["author"]
 except KeyError:
 # XXX: "uri" is set in self._manageComments if not already existing
 link_elt["href"] = comments_data["uri"]
 link_elt["rel"] = "replies"
 link_elt["title"] = "comments"
-extra = mb_data.get("extra", {})
 if "repeated" in extra:
 try:
 repeated = extra["repeated"]
 link_elt = entry_elt.addElement("link")
 link_elt["rel"] = "via"

Mercurial > libervia-backend

comparison sat/plugins/plugin_xep_0277.py @ 4020:d8a1219e913f