Mercurial > libervia-backend
changeset 3833:381340b9a9ee
component AP gateway: convert XMPP mentions to AP:
When a XEP-0372 mention is received, the linked pubsub item is looked after in cache, and
if found, it is send to mentioned entity with `mention` tag added.
However, this doesn't work in some cases (see incoming doc for details). To work around
that, `@user@server.tld` type mention are also scanned in body, and mentions are added
when found (this can be disabled with `auto_mentions` setting).
Mention are only scanned in "public" messages, i.e. for pubsub items, and not direct
messages.
rel 369
author | Goffi <goffi@goffi.org> |
---|---|
date | Sun, 10 Jul 2022 16:15:06 +0200 (2022-07-10) |
parents | 201a22bfbb74 |
children | 3260401fdb98 |
files | sat/plugins/plugin_comp_ap_gateway/__init__.py sat/plugins/plugin_comp_ap_gateway/constants.py sat/plugins/plugin_comp_ap_gateway/http_server.py sat/plugins/plugin_comp_ap_gateway/regex.py |
diffstat | 4 files changed, 149 insertions(+), 28 deletions(-) [+] |
line wrap: on
line diff
--- a/sat/plugins/plugin_comp_ap_gateway/__init__.py Sun Jul 10 15:16:15 2022 +0200 +++ b/sat/plugins/plugin_comp_ap_gateway/__init__.py Sun Jul 10 16:15:06 2022 +0200 @@ -60,7 +60,6 @@ ACTIVITY_TARGET_MANDATORY, ACTIVITY_TYPES, ACTIVITY_TYPES_LOWER, - AP_MB_MAP, COMMENTS_MAX_PARENTS, CONF_SECTION, IMPORT_NAME, @@ -74,6 +73,7 @@ NS_AP_PUBLIC, PUBLIC_TUPLE ) +from .regex import RE_MENTION from .http_server import HTTPServer from .pubsub_service import APPubsubService @@ -131,6 +131,7 @@ self.pubsub_service = APPubsubService(self) host.trigger.add("messageReceived", self._messageReceivedTrigger, priority=-1000) host.trigger.add("XEP-0424_retractReceived", self._onMessageRetract) + host.trigger.add("XEP-0372_ref_received", self._onReferenceReceived) host.bridge.addMethod( "APSend", @@ -223,6 +224,13 @@ self.local_only = C.bool( self.host.memory.getConfig(CONF_SECTION, 'local_only', C.BOOL_TRUE) ) + # if True (default), mention will be parsed in non-private content coming from + # XMPP. This is necessary as XEP-0372 are coming separately from item where the + # mention is done, which is hard to impossible to translate to ActivityPub (where + # mention specified inside the item directly). See documentation for details. + self.auto_mentions = C.bool( + self.host.memory.getConfig(CONF_SECTION, "auto_mentions", C.BOOL_TRUE) + ) # HTTP server launch self.server = HTTPServer(self) @@ -1303,31 +1311,26 @@ log.warning(f'No "id" found in AP item: {ap_object!r}') raise exceptions.DataError mb_data = {"id": item_id} - for ap_key, mb_key in AP_MB_MAP.items(): - data = ap_object.get(ap_key) - if data is None: - continue - mb_data[mb_key] = data # content try: language, content_xhtml = ap_object["contentMap"].popitem() except (KeyError, AttributeError): try: - mb_data["content_xhtml"] = mb_data["content"] + mb_data["content_xhtml"] = ap_object["content"] except KeyError: log.warning(f"no content found:\n{ap_object!r}") raise exceptions.DataError else: mb_data["language"] = language mb_data["content_xhtml"] = content_xhtml - if not mb_data.get("content"): - mb_data["content"] = await self._t.convert( - content_xhtml, - self._t.SYNTAX_XHTML, - self._t.SYNTAX_TEXT, - False, - ) + + mb_data["content"] = await self._t.convert( + mb_data["content_xhtml"], + self._t.SYNTAX_XHTML, + self._t.SYNTAX_TEXT, + False, + ) # author if is_activity: @@ -1481,6 +1484,26 @@ if public: ap_object["to"] = [NS_AP_PUBLIC] + if self.auto_mentions: + for m in RE_MENTION.finditer(ap_object["content"]): + mention = m.group() + mentioned = mention[1:] + __, m_host = mentioned.split("@", 1) + if m_host in (self.public_url, self.client.jid.host): + # we ignore mention of local users, they should be sent as XMPP + # references + continue + try: + mentioned_id = await self.getAPActorIdFromAccount(mentioned) + except Exception as e: + log.warning(f"Can't add mention to {mentioned!r}: {e}") + else: + ap_object["to"].append(mentioned_id) + ap_object.setdefault("tag", []).append({ + "type": TYPE_MENTION, + "href": mentioned_id, + "name": mention, + }) try: node = mb_data["node"] service = jid.JID(mb_data["service"]) @@ -1693,6 +1716,101 @@ ) return False + async def _onReferenceReceived( + self, + client: SatXMPPEntity, + message_elt: domish.Element, + reference_data: Dict[str, Union[str, int]] + ) -> bool: + parsed_uri: dict = reference_data.get("parsed_uri") + if not parsed_uri: + log.warning(f"no parsed URI available in reference {reference_data}") + return False + + try: + mentioned = jid.JID(parsed_uri["path"]) + except RuntimeError: + log.warning(f"invalid target: {reference_data['uri']}") + return False + + if mentioned.host != self.client.jid.full() or not mentioned.user: + log.warning( + f"ignoring mentioned user {mentioned}, it's not a JID mapping an AP " + "account" + ) + return False + + ap_account = self._e.unescape(mentioned.user) + actor_id = await self.getAPActorIdFromAccount(ap_account) + + parsed_anchor: dict = reference_data.get("parsed_anchor") + if not parsed_anchor: + log.warning(f"no XMPP anchor, ignoring reference {reference_data!r}") + return False + + if parsed_anchor["type"] != "pubsub": + log.warning( + f"ignoring reference with non pubsub anchor, this is not supported: " + "{reference_data!r}" + ) + return False + + try: + pubsub_service = jid.JID(parsed_anchor["path"]) + except RuntimeError: + log.warning(f"invalid anchor: {reference_data['anchor']}") + return False + pubsub_node = parsed_anchor.get("node") + if not pubsub_node: + log.warning(f"missing pubsub node in anchor: {reference_data['anchor']}") + return False + pubsub_item = parsed_anchor.get("item") + if not pubsub_item: + log.warning(f"missing pubsub item in anchor: {reference_data['anchor']}") + return False + + cached_node = await self.host.memory.storage.getPubsubNode( + client, pubsub_service, pubsub_node + ) + if not cached_node: + log.warning(f"Anchored node not found in cache: {reference_data['anchor']}") + return False + + cached_items, __ = await self.host.memory.storage.getItems( + cached_node, item_ids=[pubsub_item] + ) + if not cached_items: + log.warning( + f"Anchored pubsub item not found in cache: {reference_data['anchor']}" + ) + return False + + cached_item = cached_items[0] + + mb_data = await self._m.item2mbdata( + client, cached_item.data, pubsub_service, pubsub_node + ) + ap_item = await self.mbdata2APitem(client, mb_data) + ap_object = ap_item["object"] + ap_object["to"] = [actor_id] + ap_object.setdefault("tag", []).append({ + "type": TYPE_MENTION, + "href": actor_id, + "name": ap_account, + }) + + inbox = await self.getAPInboxFromId(actor_id) + + resp = await self.signAndPost(inbox, ap_item["actor"], ap_item) + if resp.code >= 300: + text = await resp.text() + log.warning( + f"unexpected return code while sending AP item: {resp.code}\n{text}\n" + f"{pformat(ap_item)}" + ) + + return False + async def newReplyToXMPPItem( self, client: SatXMPPEntity,
--- a/sat/plugins/plugin_comp_ap_gateway/constants.py Sun Jul 10 15:16:15 2022 +0200 +++ b/sat/plugins/plugin_comp_ap_gateway/constants.py Sun Jul 10 16:15:06 2022 +0200 @@ -33,11 +33,6 @@ NS_AP_PUBLIC = "https://www.w3.org/ns/activitystreams#Public" # 3 values can be used, see https://www.w3.org/TR/activitypub/#public-addressing PUBLIC_TUPLE = (NS_AP_PUBLIC, "as:Public", "Public") -# mapping from AP metadata to microblog data -AP_MB_MAP = { - "content": "content_xhtml", - -} AP_REQUEST_TYPES = { "GET": {TYPE_ACTOR, TYPE_OUTBOX, TYPE_FOLLOWERS, TYPE_FOLLOWING}, "POST": {"inbox"},
--- a/sat/plugins/plugin_comp_ap_gateway/http_server.py Sun Jul 10 15:16:15 2022 +0200 +++ b/sat/plugins/plugin_comp_ap_gateway/http_server.py Sun Jul 10 16:15:06 2022 +0200 @@ -23,13 +23,11 @@ from urllib import parse from collections import deque import unicodedata -from pathlib import Path -from pprint import pformat from twisted.web import http, resource as web_resource, server from twisted.web import static from twisted.python import failure -from twisted.internet import reactor, defer +from twisted.internet import defer from twisted.words.protocols.jabber import jid, error from wokkel import pubsub, rsm @@ -37,7 +35,6 @@ from sat.core.constants import Const as C from sat.core.i18n import _ from sat.core.log import getLogger -from sat.tools import utils from sat.tools.common import date_utils from sat.memory.sqla_mapping import SubscriptionState @@ -73,7 +70,8 @@ msg: Optional[str] = None ) -> None: """Log and set HTTP return code and associated message""" - log.warning(msg) + if msg is not None: + log.warning(msg) request.setResponseCode(http_code, None if msg is None else msg.encode()) def _onRequestError(self, failure_: failure.Failure, request: "HTTPRequest") -> None: @@ -683,7 +681,7 @@ return # default response code, may be changed, e.g. in case of exception - self.responseCode(request, http.ACCEPTED) + request.setResponseCode(http.ACCEPTED) try: return await self.APRequest(request, signing_actor) except Exception as e:
--- a/sat/plugins/plugin_comp_ap_gateway/regex.py Sun Jul 10 15:16:15 2022 +0200 +++ b/sat/plugins/plugin_comp_ap_gateway/regex.py Sun Jul 10 16:15:06 2022 +0200 @@ -16,7 +16,11 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. -"""Regular Expression to parse "Signature" header""" +"""Various Regular Expression for AP gateway""" + +import re + +## "Signature" header parsing # those expression have been generated with abnf-to-regex # (https://github.com/aas-core-works/abnf-to-regexp) @@ -38,8 +42,6 @@ # obs-text = %x80-FF # --- -import re - ows = '[ \t]*' bws = f'{ows}' obs_text = '[\\x80-\\xff]' @@ -52,3 +54,11 @@ f'(?P<key>{token}{bws})={bws}' f'((?P<uq_value>{token})|(?P<quoted_value>{quoted_string}))' ) + + +## Account/Mention + +# FIXME: naive regex, should be approved following webfinger, but popular implementations +# such as Mastodon use a very restricted subset +RE_ACCOUNT = re.compile(r"[a-zA-Z0-9._-]+@[a-zA-Z0-9-]+.[a-zA-Z0-9-]+") +RE_MENTION = re.compile(rf"(?<!\w)@{RE_ACCOUNT.pattern}\b")