changeset 3833:381340b9a9ee

component AP gateway: convert XMPP mentions to AP: When a XEP-0372 mention is received, the linked pubsub item is looked after in cache, and if found, it is send to mentioned entity with `mention` tag added. However, this doesn't work in some cases (see incoming doc for details). To work around that, `@user@server.tld` type mention are also scanned in body, and mentions are added when found (this can be disabled with `auto_mentions` setting). Mention are only scanned in "public" messages, i.e. for pubsub items, and not direct messages. rel 369
author Goffi <goffi@goffi.org>
date Sun, 10 Jul 2022 16:15:06 +0200 (2022-07-10)
parents 201a22bfbb74
children 3260401fdb98
files sat/plugins/plugin_comp_ap_gateway/__init__.py sat/plugins/plugin_comp_ap_gateway/constants.py sat/plugins/plugin_comp_ap_gateway/http_server.py sat/plugins/plugin_comp_ap_gateway/regex.py
diffstat 4 files changed, 149 insertions(+), 28 deletions(-) [+]
line wrap: on
line diff
--- a/sat/plugins/plugin_comp_ap_gateway/__init__.py	Sun Jul 10 15:16:15 2022 +0200
+++ b/sat/plugins/plugin_comp_ap_gateway/__init__.py	Sun Jul 10 16:15:06 2022 +0200
@@ -60,7 +60,6 @@
     ACTIVITY_TARGET_MANDATORY,
     ACTIVITY_TYPES,
     ACTIVITY_TYPES_LOWER,
-    AP_MB_MAP,
     COMMENTS_MAX_PARENTS,
     CONF_SECTION,
     IMPORT_NAME,
@@ -74,6 +73,7 @@
     NS_AP_PUBLIC,
     PUBLIC_TUPLE
 )
+from .regex import RE_MENTION
 from .http_server import HTTPServer
 from .pubsub_service import APPubsubService
 
@@ -131,6 +131,7 @@
         self.pubsub_service = APPubsubService(self)
         host.trigger.add("messageReceived", self._messageReceivedTrigger, priority=-1000)
         host.trigger.add("XEP-0424_retractReceived", self._onMessageRetract)
+        host.trigger.add("XEP-0372_ref_received", self._onReferenceReceived)
 
         host.bridge.addMethod(
             "APSend",
@@ -223,6 +224,13 @@
         self.local_only = C.bool(
             self.host.memory.getConfig(CONF_SECTION, 'local_only', C.BOOL_TRUE)
         )
+        # if True (default), mention will be parsed in non-private content coming from
+        # XMPP. This is necessary as XEP-0372 are coming separately from item where the
+        # mention is done, which is hard to impossible to translate to ActivityPub (where
+        # mention specified inside the item directly). See documentation for details.
+        self.auto_mentions = C.bool(
+            self.host.memory.getConfig(CONF_SECTION, "auto_mentions", C.BOOL_TRUE)
+        )
 
         # HTTP server launch
         self.server = HTTPServer(self)
@@ -1303,31 +1311,26 @@
             log.warning(f'No "id" found in AP item: {ap_object!r}')
             raise exceptions.DataError
         mb_data = {"id": item_id}
-        for ap_key, mb_key in AP_MB_MAP.items():
-            data = ap_object.get(ap_key)
-            if data is None:
-                continue
-            mb_data[mb_key] = data
 
         # content
         try:
             language, content_xhtml = ap_object["contentMap"].popitem()
         except (KeyError, AttributeError):
             try:
-                mb_data["content_xhtml"] = mb_data["content"]
+                mb_data["content_xhtml"] = ap_object["content"]
             except KeyError:
                 log.warning(f"no content found:\n{ap_object!r}")
                 raise exceptions.DataError
         else:
             mb_data["language"] = language
             mb_data["content_xhtml"] = content_xhtml
-            if not mb_data.get("content"):
-                mb_data["content"] = await self._t.convert(
-                    content_xhtml,
-                    self._t.SYNTAX_XHTML,
-                    self._t.SYNTAX_TEXT,
-                    False,
-                )
+
+        mb_data["content"] = await self._t.convert(
+            mb_data["content_xhtml"],
+            self._t.SYNTAX_XHTML,
+            self._t.SYNTAX_TEXT,
+            False,
+        )
 
         # author
         if is_activity:
@@ -1481,6 +1484,26 @@
 
         if public:
             ap_object["to"] = [NS_AP_PUBLIC]
+            if self.auto_mentions:
+                for m in RE_MENTION.finditer(ap_object["content"]):
+                    mention = m.group()
+                    mentioned = mention[1:]
+                    __, m_host = mentioned.split("@", 1)
+                    if m_host in (self.public_url, self.client.jid.host):
+                        # we ignore mention of local users, they should be sent as XMPP
+                        # references
+                        continue
+                    try:
+                        mentioned_id = await self.getAPActorIdFromAccount(mentioned)
+                    except Exception as e:
+                        log.warning(f"Can't add mention to {mentioned!r}: {e}")
+                    else:
+                        ap_object["to"].append(mentioned_id)
+                        ap_object.setdefault("tag", []).append({
+                            "type": TYPE_MENTION,
+                            "href": mentioned_id,
+                            "name": mention,
+                        })
             try:
                 node = mb_data["node"]
                 service = jid.JID(mb_data["service"])
@@ -1693,6 +1716,101 @@
             )
         return False
 
+    async def _onReferenceReceived(
+        self,
+        client: SatXMPPEntity,
+        message_elt: domish.Element,
+        reference_data: Dict[str, Union[str, int]]
+    ) -> bool:
+        parsed_uri: dict = reference_data.get("parsed_uri")
+        if not parsed_uri:
+            log.warning(f"no parsed URI available in reference {reference_data}")
+            return False
+
+        try:
+            mentioned = jid.JID(parsed_uri["path"])
+        except RuntimeError:
+            log.warning(f"invalid target: {reference_data['uri']}")
+            return False
+
+        if mentioned.host != self.client.jid.full() or not mentioned.user:
+            log.warning(
+                f"ignoring mentioned user {mentioned}, it's not a JID mapping an AP "
+                "account"
+            )
+            return False
+
+        ap_account = self._e.unescape(mentioned.user)
+        actor_id = await self.getAPActorIdFromAccount(ap_account)
+
+        parsed_anchor: dict = reference_data.get("parsed_anchor")
+        if not parsed_anchor:
+            log.warning(f"no XMPP anchor, ignoring reference {reference_data!r}")
+            return False
+
+        if parsed_anchor["type"] != "pubsub":
+            log.warning(
+                f"ignoring reference with non pubsub anchor, this is not supported: "
+                "{reference_data!r}"
+            )
+            return False
+
+        try:
+            pubsub_service = jid.JID(parsed_anchor["path"])
+        except RuntimeError:
+            log.warning(f"invalid anchor: {reference_data['anchor']}")
+            return False
+        pubsub_node = parsed_anchor.get("node")
+        if not pubsub_node:
+            log.warning(f"missing pubsub node in anchor: {reference_data['anchor']}")
+            return False
+        pubsub_item = parsed_anchor.get("item")
+        if not pubsub_item:
+            log.warning(f"missing pubsub item in anchor: {reference_data['anchor']}")
+            return False
+
+        cached_node = await self.host.memory.storage.getPubsubNode(
+            client, pubsub_service, pubsub_node
+        )
+        if not cached_node:
+            log.warning(f"Anchored node not found in cache: {reference_data['anchor']}")
+            return False
+
+        cached_items, __ = await self.host.memory.storage.getItems(
+            cached_node, item_ids=[pubsub_item]
+        )
+        if not cached_items:
+            log.warning(
+                f"Anchored pubsub item not found in cache: {reference_data['anchor']}"
+            )
+            return False
+
+        cached_item = cached_items[0]
+
+        mb_data = await self._m.item2mbdata(
+            client, cached_item.data, pubsub_service, pubsub_node
+        )
+        ap_item = await self.mbdata2APitem(client, mb_data)
+        ap_object = ap_item["object"]
+        ap_object["to"] = [actor_id]
+        ap_object.setdefault("tag", []).append({
+            "type": TYPE_MENTION,
+            "href": actor_id,
+            "name": ap_account,
+        })
+
+        inbox = await self.getAPInboxFromId(actor_id)
+
+        resp = await self.signAndPost(inbox, ap_item["actor"], ap_item)
+        if resp.code >= 300:
+            text = await resp.text()
+            log.warning(
+                f"unexpected return code while sending AP item: {resp.code}\n{text}\n"
+                f"{pformat(ap_item)}"
+            )
+
+        return False
+
     async def newReplyToXMPPItem(
         self,
         client: SatXMPPEntity,
--- a/sat/plugins/plugin_comp_ap_gateway/constants.py	Sun Jul 10 15:16:15 2022 +0200
+++ b/sat/plugins/plugin_comp_ap_gateway/constants.py	Sun Jul 10 16:15:06 2022 +0200
@@ -33,11 +33,6 @@
 NS_AP_PUBLIC = "https://www.w3.org/ns/activitystreams#Public"
 # 3 values can be used, see https://www.w3.org/TR/activitypub/#public-addressing
 PUBLIC_TUPLE = (NS_AP_PUBLIC, "as:Public", "Public")
-# mapping from AP metadata to microblog data
-AP_MB_MAP = {
-    "content": "content_xhtml",
-
-}
 AP_REQUEST_TYPES = {
     "GET": {TYPE_ACTOR, TYPE_OUTBOX, TYPE_FOLLOWERS, TYPE_FOLLOWING},
     "POST": {"inbox"},
--- a/sat/plugins/plugin_comp_ap_gateway/http_server.py	Sun Jul 10 15:16:15 2022 +0200
+++ b/sat/plugins/plugin_comp_ap_gateway/http_server.py	Sun Jul 10 16:15:06 2022 +0200
@@ -23,13 +23,11 @@
 from urllib import parse
 from collections import deque
 import unicodedata
-from pathlib import Path
-from pprint import pformat
 
 from twisted.web import http, resource as web_resource, server
 from twisted.web import static
 from twisted.python import failure
-from twisted.internet import reactor, defer
+from twisted.internet import defer
 from twisted.words.protocols.jabber import jid, error
 from wokkel import pubsub, rsm
 
@@ -37,7 +35,6 @@
 from sat.core.constants import Const as C
 from sat.core.i18n import _
 from sat.core.log import getLogger
-from sat.tools import utils
 from sat.tools.common import date_utils
 from sat.memory.sqla_mapping import SubscriptionState
 
@@ -73,7 +70,8 @@
         msg: Optional[str] = None
     ) -> None:
         """Log and set HTTP return code and associated message"""
-        log.warning(msg)
+        if msg is not None:
+            log.warning(msg)
         request.setResponseCode(http_code, None if msg is None else msg.encode())
 
     def _onRequestError(self, failure_: failure.Failure, request: "HTTPRequest") -> None:
@@ -683,7 +681,7 @@
             return
 
         # default response code, may be changed, e.g. in case of exception
-        self.responseCode(request, http.ACCEPTED)
+        request.setResponseCode(http.ACCEPTED)
         try:
             return await self.APRequest(request, signing_actor)
         except Exception as e:
--- a/sat/plugins/plugin_comp_ap_gateway/regex.py	Sun Jul 10 15:16:15 2022 +0200
+++ b/sat/plugins/plugin_comp_ap_gateway/regex.py	Sun Jul 10 16:15:06 2022 +0200
@@ -16,7 +16,11 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-"""Regular Expression to parse "Signature" header"""
+"""Various Regular Expression for AP gateway"""
+
+import re
+
+## "Signature" header parsing
 
 # those expression have been generated with abnf-to-regex
 # (https://github.com/aas-core-works/abnf-to-regexp)
@@ -38,8 +42,6 @@
 # obs-text = %x80-FF
 # ---
 
-import re
-
 ows = '[ \t]*'
 bws = f'{ows}'
 obs_text = '[\\x80-\\xff]'
@@ -52,3 +54,11 @@
     f'(?P<key>{token}{bws})={bws}'
     f'((?P<uq_value>{token})|(?P<quoted_value>{quoted_string}))'
 )
+
+
+## Account/Mention
+
+# FIXME: naive regex, should be approved following webfinger, but popular implementations
+#   such as Mastodon use a very restricted subset
+RE_ACCOUNT = re.compile(r"[a-zA-Z0-9._-]+@[a-zA-Z0-9-]+.[a-zA-Z0-9-]+")
+RE_MENTION = re.compile(rf"(?<!\w)@{RE_ACCOUNT.pattern}\b")