libervia-backend: sat/plugins/plugin_comp_ap_gateway/__init_

comparison sat/plugins/plugin_comp_ap_gateway/init.py @ 3745:a8c7e5cef0cb

comp AP gateway: signature checking, caching and threads management: - HTTP signature is checked for incoming messages - AP actor can now be followed using pubsub subscription. When following is accepted, the node is cached - replies to posts are put in cached pubsub comment nodes, with a `comments_max_depth` option to limit the number of comment nodes for a root message (documentation will come to explain this). ticket 364

author	Goffi <goffi@goffi.org>
date	Tue, 22 Mar 2022 17:00:42 +0100
parents	bf0505d41c09
children	125c7043b277

comparison

equal deleted inserted replaced

-:658ddbabaf36
+:a8c7e5cef0cb
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 import base64
+import calendar
 import hashlib
 import json
 from pathlib import Path
-from typing import Optional, Dict, Tuple, List, Union
+from pprint import pformat
+import re
+from typing import Any, Dict, List, Optional, Tuple, Union, overload
 from urllib import parse
-import calendar
-import re
+from cryptography.exceptions import InvalidSignature
-import dateutil
 from cryptography.hazmat.primitives import serialization
 from cryptography.hazmat.primitives import hashes
 from cryptography.hazmat.primitives.asymmetric import rsa
 from cryptography.hazmat.primitives.asymmetric import padding
+import dateutil
 import shortuuid
+from sqlalchemy.exc import IntegrityError
 import treq
 from treq.response import _Response as TReqResponse
 from twisted.internet import defer, reactor, threads
 from twisted.web import http
-from twisted.words.protocols.jabber import jid, error
+from twisted.words.protocols.jabber import error, jid
 from twisted.words.xish import domish
 from wokkel import rsm
 from sat.core import exceptions
 from sat.core.constants import Const as C
 from sat.core.core_types import SatXMPPEntity
 from sat.core.i18n import _
 from sat.core.log import getLogger
+from sat.memory.sqla_mapping import PubsubSub, SubscriptionState
 from sat.tools import utils
-from sat.tools.common import data_format, tls
+from sat.tools.common import data_format, tls, uri
 from sat.tools.common.async_utils import async_lru
-from .constants import (IMPORT_NAME, CONF_SECTION, TYPE_ACTOR, TYPE_ITEM, MEDIA_TYPE_AP,
+from .constants import (
-AP_MB_MAP, LRU_MAX_SIZE)
+ACTIVITY_OBJECT_MANDATORY,
+ACTIVITY_TARGET_MANDATORY,
+ACTIVITY_TYPES,
+ACTIVITY_TYPES_LOWER,
+AP_MB_MAP,
+COMMENTS_MAX_PARENTS,
+CONF_SECTION,
+IMPORT_NAME,
+LRU_MAX_SIZE,
+MEDIA_TYPE_AP,
+TYPE_ACTOR,
+TYPE_ITEM,
+)
 from .http_server import HTTPServer
 from .pubsub_service import APPubsubService
 log = getLogger(__name__)
 C.PI_NAME: "ActivityPub Gateway component",
 C.PI_IMPORT_NAME: IMPORT_NAME,
 C.PI_MODES: [C.PLUG_MODE_COMPONENT],
 C.PI_TYPE: C.PLUG_TYPE_ENTRY_POINT,
 C.PI_PROTOCOLS: [],
-C.PI_DEPENDENCIES: ["XEP-0106", "XEP-0277", "XEP-0060"],
+C.PI_DEPENDENCIES: ["XEP-0106", "XEP-0277", "XEP-0060", "PUBSUB_CACHE"],
 C.PI_RECOMMENDATIONS: [],
 C.PI_MAIN: "APGateway",
 C.PI_HANDLER: C.BOOL_TRUE,
 C.PI_DESCRIPTION: _(
 "Gateway for bidirectional communication between XMPP and ActivityPub."
 def __init__(self, host):
 self.host = host
 self.initialised = False
 self._m = host.plugins["XEP-0277"]
 self._p = host.plugins["XEP-0060"]
+self._e = host.plugins["XEP-0106"]
+self._c = host.plugins["PUBSUB_CACHE"]
+self.pubsub_service = APPubsubService(self)
 host.bridge.addMethod(
 "APSend",
 ".plugin",
 in_sign="sss",
 method=self._publishMessage,
 async_=True,
 )
 def getHandler(self, __):
-return APPubsubService(self)
+return self.pubsub_service
 async def init(self, client):
 if self.initialised:
 return
 if connection_type not in ('http', 'https'):
 raise exceptions.ConfigError(
 'bad ap-gateay http_connection_type, you must use one of "http" or '
 '"https"'
 )
-self.max_items = self.host.memory.getConfig(
+self.max_items = int(self.host.memory.getConfig(
 CONF_SECTION, 'new_node_max_items', 50
-)
+))
+self.comments_max_depth = int(self.host.memory.getConfig(
+CONF_SECTION, 'comments_max_depth', 0
+))
 self.ap_path = self.host.memory.getConfig(CONF_SECTION, 'ap_path', '_ap')
 self.base_ap_url = parse.urljoin(f"https://{self.public_url}", f"{self.ap_path}/")
 # True (default) if we provide gateway only to entities/services from our server
 self.local_only = C.bool(
 self.host.memory.getConfig(CONF_SECTION, 'local_only', C.BOOL_TRUE)
 reactor.listenSSL(self.http_port, self.server, context_factory)
 async def profileConnecting(self, client):
 self.client = client
 await self.init(client)
+async def getVirtualClient(self, actor_id: str) -> SatXMPPEntity:
+"""Get client for this component with a specified jid
+This is needed to perform operations with the virtual JID corresponding to the AP
+actor instead of the JID of the gateway itself.
+@param actor_id: ID of the actor
+@return: virtual client
+"""
+account = await self.getAPAccountFromId(actor_id)
+local_jid = self.getLocalJIDFromAccount(account)
+return self.client.getVirtualClient(local_jid)
+def isActivity(self, data: dict) -> bool:
+"""Return True if the data has an activity type"""
+try:
+return (data.get("type") or "").lower() in ACTIVITY_TYPES_LOWER
+except (KeyError, TypeError):
+return False
 async def apGet(self, url: str) -> dict:
 """Retrieve AP JSON from given URL
 @raise error.StanzaError: "service-unavailable" is sent when something went wrong
 "This gateway is configured to map only local entities and services"
 )
 return jid_, node
-def parseAPURL(self, url: str) -> Tuple[str, str]:
+def getLocalJIDFromAccount(self, account: str) -> jid.JID:
+"""Compute JID linking to an AP account
+The local jid is computer by escaping AP actor handle and using it as local part
+of JID, where domain part is this gateway own JID
+"""
+return jid.JID(
+None,
+(
+self._e.escape(account),
+self.client.jid.host,
+None
+)
+)
+def parseAPURL(self, url: str) -> Tuple[str, List[str]]:
 """Parse an URL leading to an AP endpoint
 @param url: URL to parse (schema is not mandatory)
-@return: endpoint type and AP account
+@return: endpoint type and extra arguments
 """
 path = parse.urlparse(url).path.lstrip("/")
-type_, account = path[len(self.ap_path):].lstrip("/").split("/", 1)
+type_, *extra_args = path[len(self.ap_path):].lstrip("/").split("/", 1)
-return type_, parse.unquote(account)
+return type_, [parse.unquote(a) for a in extra_args]
 def buildAPURL(self, type_:str , *args: str) -> str:
 """Build an AP endpoint URL
 @param type_: type of AP endpoing
 return parse.urljoin(
 self.base_ap_url,
 str(Path(type_).joinpath(*(parse.quote_plus(a) for a in args)))
 )
-async def signAndPost(self, url: str, url_actor: str, doc: dict) -> TReqResponse:
+def buildSignatureHeader(self, values: Dict[str, str]) -> str:
-"""Sign a documentent and post it to AP server
+"""Build key="<value>" signature header from signature data"""
+fields = []
-@param url: AP server endpoint
+for key, value in values.items():
-@param url_actor: URL generated by this gateway for local actor
+if key not in ("(created)", "(expired)"):
-@param doc: document to send
+if '"' in value:
-"""
+raise NotImplementedError(
-p_url = parse.urlparse(url)
+"string escaping is not implemented, double-quote can't be used "
-date = http.datetimeToString().decode()
+f"in {value!r}"
-body = json.dumps(doc).encode()
+)
-digest_hash = base64.b64encode(hashlib.sha256(body).digest()).decode()
+value = f'"{value}"'
-digest = f"sha-256={digest_hash}"
+fields.append(f"{key}={value}")
-to_sign = (
-f"(request-target): post {p_url.path}\nhost: {p_url.hostname}\n"
+return ",".join(fields)
-f"date: {date}\ndigest: {digest}"
-)
+def getDigest(self, body: bytes, algo="SHA-256") -> Tuple[str, str]:
+"""Get digest data to use in header and signature
+@param body: body of the request
+@return: hash name and digest
+"""
+if algo != "SHA-256":
+raise NotImplementedError("only SHA-256 is implemented for now")
+return algo, base64.b64encode(hashlib.sha256(body).digest()).decode()
+@async_lru(maxsize=LRU_MAX_SIZE)
+async def getActorPubKeyData(
+self,
+actor_id: str
+) -> Tuple[str, str, rsa.RSAPublicKey]:
+"""Retrieve Public Key data from actor ID
+@param actor_id: actor ID (url)
+@return: key_id, owner and public_key
+@raise KeyError: publicKey is missing from actor data
+"""
+actor_data = await self.apGet(actor_id)
+pub_key_data = actor_data["publicKey"]
+key_id = pub_key_data["id"]
+owner = pub_key_data["owner"]
+pub_key_pem = pub_key_data["publicKeyPem"]
+pub_key = serialization.load_pem_public_key(pub_key_pem.encode())
+return key_id, owner, pub_key
+def createActivity(
+self,
+activity: str,
+actor_id: str,
+object_: Optional[Union[str, dict]] = None,
+target: Optional[Union[str, dict]] = None,
+**kwargs,
+) -> Dict[str, Any]:
+"""Generate base data for an activity
+@param activity: one of ACTIVITY_TYPES
+"""
+if activity not in ACTIVITY_TYPES:
+raise exceptions.InternalError(f"invalid activity: {activity!r}")
+if object_ is None and activity in ACTIVITY_OBJECT_MANDATORY:
+raise exceptions.InternalError(
+f'"object_" is mandatory for activity {activity!r}'
+)
+if target is None and activity in ACTIVITY_TARGET_MANDATORY:
+raise exceptions.InternalError(
+f'"target" is mandatory for activity {activity!r}'
+)
+activity_id = f"{actor_id}#{activity.lower()}_{shortuuid.uuid()}"
+data: Dict[str, Any] = {
+"@context": "https://www.w3.org/ns/activitystreams",
+"actor": actor_id,
+"id": activity_id,
+"type": activity,
+}
+data.update(kwargs)
+if object_ is not None:
+data["object"] = object_
+if target is not None:
+data["target"] = target
+return data
+def getKeyId(self, actor_id: str) -> str:
+"""Get local key ID from actor ID"""
+return f"{actor_id}#main-key"
+async def checkSignature(
+self,
+signature: str,
+key_id: str,
+headers: Dict[str, str]
+) -> str:
+"""Verify that signature matches given headers
+see https://datatracker.ietf.org/doc/html/draft-cavage-http-signatures-06#section-3.1.2
+@param signature: Base64 encoded signature
+@param key_id: ID of the key used to sign the data
+@param headers: headers and their values, including pseudo-headers
+@return: id of the signing actor
+@raise InvalidSignature: signature doesn't match headers
+"""
+to_sign = "\n".join(f"{k.lower()}: {v}" for k,v in headers.items())
+if key_id.startswith("acct:"):
+actor = key_id[5:]
+actor_id = await self.getAPActorIdFromAccount(actor)
+else:
+actor_id = key_id.split("#", 1)[0]
+pub_key_id, pub_key_owner, pub_key = await self.getActorPubKeyData(actor_id)
+if pub_key_id != key_id or pub_key_owner != actor_id:
+raise exceptions.EncryptionError("Public Key mismatch")
+try:
+pub_key.verify(
+base64.b64decode(signature),
+to_sign.encode(),
+# we have to use PKCS1v15 padding to be compatible with Mastodon
+padding.PKCS1v15(),  # type: ignore
+hashes.SHA256()  # type: ignore
+)
+except InvalidSignature:
+raise exceptions.EncryptionError("Invalid signature (using PKC0S1 v1.5 and SHA-256)")
+return actor_id
+def getSignatureData(
+self,
+key_id: str,
+headers: Dict[str, str]
+) -> Tuple[Dict[str, str], Dict[str, str]]:
+"""Generate and return signature and corresponding headers
+@param parsed_url: URL where the request is sent/has been received
+@param key_id: ID of the key (URL linking to the data with public key)
+@param date: HTTP datetime string of signature generation
+@param body: body of the HTTP request
+@param headers: headers to sign and their value:
+default value will be used if not specified
+@return: headers and signature data
+``headers`` is an updated copy of ``headers`` arguments, with pseudo-headers
+removed, and ``Signature`` added.
+"""
+to_sign = "\n".join(f"{k.lower()}: {v}" for k,v in headers.items())
 signature = base64.b64encode(self.private_key.sign(
 to_sign.encode(),
 # we have to use PKCS1v15 padding to be compatible with Mastodon
-padding.PKCS1v15(),
+padding.PKCS1v15(),  # type: ignore
-hashes.SHA256()
+hashes.SHA256()  # type: ignore
 )).decode()
-h_signature = (
+sign_data = {
-f'keyId="{url_actor}",headers="(request-target) host date digest",'
+"keyId": key_id,
-f'signature="{signature}"'
+"Algorithm": "rsa-sha256",
+"headers": " ".join(headers.keys()),
+"signature": signature
+}
+new_headers = {k: v for k,v in headers.items() if not k.startswith("(")}
+new_headers["Signature"] = self.buildSignatureHeader(sign_data)
+return new_headers, sign_data
+async def signAndPost(self, url: str, actor_id: str, doc: dict) -> TReqResponse:
+"""Sign a documentent and post it to AP server
+@param url: AP server endpoint
+@param actor_id: originating actor ID (URL)
+@param doc: document to send
+"""
+p_url = parse.urlparse(url)
+body = json.dumps(doc).encode()
+digest_algo, digest_hash = self.getDigest(body)
+digest = f"{digest_algo}={digest_hash}"
+headers = {
+"(request-target)": f"post {p_url.path}",
+"Host": p_url.hostname,
+"Date": http.datetimeToString().decode(),
+"Digest": digest
+}
+headers, __ = self.getSignatureData(self.getKeyId(actor_id), headers)
+headers["Content-Type"] = (
+'application/ld+json; profile="https://www.w3.org/ns/activitystreams"'
 )
-return await treq.post(
+resp = await treq.post(
 url,
 body,
-headers={
+headers=headers,
-"Host": [p_url.hostname],
-"Date": [date],
-"Digest": [digest],
-"Signature": [h_signature],
-}
 )
+if resp.code >= 400:
+text = await resp.text()
+log.warning(f"POST request to {url} failed: {text}")
+return resp
 def _publishMessage(self, mess_data_s: str, service_s: str, profile: str):
 mess_data: dict = data_format.deserialise(mess_data_s) # type: ignore
 service = jid.JID(service_s)
 client = self.host.getClient(profile)
 return defer.ensureDeferred(self.publishMessage(client, mess_data, service))
+@async_lru(maxsize=LRU_MAX_SIZE)
 async def getAPActorIdFromAccount(self, account: str) -> str:
 """Retrieve account ID from it's handle using WebFinger
 @param account: AP handle (user@domain.tld)
 @return: Actor ID (which is an URL)
 raise ValueError(
 f"No ActivityPub link found for {account!r}"
 )
 return href
-async def getAPActorDataFromId(self, account: str) -> dict:
+async def getAPActorDataFromAccount(self, account: str) -> dict:
 """Retrieve ActivityPub Actor data
 @param account: ActivityPub Actor identifier
 """
 href = await self.getAPActorIdFromAccount(account)
 return await self.apGet(href)
 @async_lru(maxsize=LRU_MAX_SIZE)
-async def getAPAccountFromId(self, actor_id: str):
+async def getAPInboxFromId(self, actor_id: str) -> str:
+"""Retrieve inbox of an actor_id"""
+data = await self.apGet(actor_id)
+return data["inbox"]
+@async_lru(maxsize=LRU_MAX_SIZE)
+async def getAPAccountFromId(self, actor_id: str) -> str:
 """Retrieve AP account from the ID URL
 @param actor_id: AP ID of the actor (URL to the actor data)
 """
 url_parsed = parse.urlparse(actor_id)
 raise exceptions.DataError(msg)
 return account
 async def getAPItems(
 self,
-account: str,
+collection: dict,
 max_items: Optional[int] = None,
 chronological_pagination: bool = True,
 after_id: Optional[str] = None,
 start_index: Optional[int] = None,
 ) -> Tuple[List[domish.Element], rsm.RSMResponse]:
 Due to ActivityStream Collection Paging limitations, this is inefficient and
 all pages before the requested index will be retrieved to count items.
 @return: XMPP Pubsub items and corresponding RSM Response
 Items are always returned in chronological order in the result
 """
-actor_data = await self.getAPActorDataFromId(account)
-outbox = actor_data.get("outbox")
 rsm_resp: Dict[str, Union[bool, int]] = {}
-if not outbox:
-raise exceptions.DataError(f"No outbox found for actor {account}")
-outbox_data = await self.apGet(outbox)
 try:
-count = outbox_data["totalItems"]
+count = collection["totalItems"]
 except KeyError:
 log.warning(
-f'"totalItems" not found in outbox of {account}, defaulting to 20'
+f'"totalItems" not found in collection {collection.get("id")}, '
+"defaulting to 20"
 )
 count = 20
 else:
-log.info(f"{account}'s outbox has {count} item(s)")
+log.info(f"{collection.get('id')} has {count} item(s)")
 rsm_resp["count"] = count
 if start_index is not None:
 assert chronological_pagination and after_id is None
 if start_index >= count:
 else:
 # we'll convert "start_index" to "after_id", thus we need the item just
 # before "start_index"
 previous_index = start_index - 1
 retrieved_items = 0
-current_page = outbox_data["last"]
+current_page = collection["last"]
 while retrieved_items < count:
 page_data, items = await self.parseAPPage(current_page)
 if not items:
 log.warning(f"found an empty AP page at {current_page}")
 return [], rsm_resp
 "Error while retrieving previous page from AP service at "
 f"{current_page}"
 )
 init_page = "last" if chronological_pagination else "first"
-page = outbox_data.get(init_page)
+page = collection.get(init_page)
 if not page:
 raise exceptions.DataError(
-f"Initial page {init_page!r} not found for outbox {outbox}"
+f"Initial page {init_page!r} not found for collection "
+f"{collection.get('id')})"
 )
 items = []
 page_items = []
 retrieved_items = 0
 found_after_id = False
 while retrieved_items < count:
 __, page_items = await self.parseAPPage(page)
+if not page_items:
+break
 retrieved_items += len(page_items)
 if after_id is not None and not found_after_id:
 # if we have an after_id, we ignore all items until the requested one is
 # found
 try:
 limit_idx = [i["id"] for i in page_items].index(after_id)
 except ValueError:
 # if "after_id" is not found, we don't add any item from this page
-log.debug(f"{after_id!r} not found at {page}, skipping")
+page_id = page.get("id") if isinstance(page, dict) else page
+log.debug(f"{after_id!r} not found at {page_id}, skipping")
 else:
 found_after_id = True
 if chronological_pagination:
 start_index = retrieved_items - len(page_items) + limit_idx + 1
 page_items = page_items[limit_idx+1:]
 if chronological_pagination:
 items = items[:max_items]
 else:
 items = items[-max_items:]
 break
-page = outbox_data.get("prev" if chronological_pagination else "next")
+page = collection.get("prev" if chronological_pagination else "next")
 if not page:
 break
 if after_id is not None and not found_after_id:
 raise error.StanzaError("item-not-found")
-if after_id is None:
-rsm_resp["index"] = 0 if chronological_pagination else count - len(items)
-if start_index is not None:
-rsm_resp["index"] = start_index
-elif after_id is not None:
-log.warning("Can't determine index of first element")
-elif chronological_pagination:
-rsm_resp["index"] = 0
-else:
-rsm_resp["index"] = count - len(items)
 if items:
+if after_id is None:
+rsm_resp["index"] = 0 if chronological_pagination else count - len(items)
+if start_index is not None:
+rsm_resp["index"] = start_index
+elif after_id is not None:
+log.warning("Can't determine index of first element")
+elif chronological_pagination:
+rsm_resp["index"] = 0
+else:
+rsm_resp["index"] = count - len(items)
 rsm_resp.update({
 "first": items[0]["id"],
 "last": items[-1]["id"]
 })
 return items, rsm.RSMResponse(**rsm_resp)
-async def parseAPPage(self, url: str) -> Tuple[dict, List[domish.Element]]:
+async def parseAPPage(
+self,
+page: Union[str, dict]
+) -> Tuple[dict, List[domish.Element]]:
 """Convert AP objects from an AP page to XMPP items
-@param url: url linking and AP page
+@param page: Can be either url linking and AP page, or the page data directly
 @return: page data, pubsub items
 """
-page_data = await self.apGet(url)
+page_data = await self.apGetObject(page)
-ap_items = page_data.get("orderedItems")
+if page_data is None:
-if not ap_items:
+log.warning('No data found in collection')
-log.warning('No "orderedItems" collection found')
+return {}, []
-return page_data, []
+ap_items = await self.apGetList(page_data, "orderedItems")
+if ap_items is None:
+ap_items = await self.apGetList(page_data, "items")
+if not ap_items:
+log.warning(f'No item field found in collection: {page_data!r}')
+return page_data, []
+else:
+log.warning(
+"Items are not ordered, this is not spec compliant"
+)
 items = []
 # AP Collections are in antichronological order, but we expect chronological in
 # Pubsub, thus we reverse it
 for ap_item in reversed(ap_items):
 try:
-ap_object, mb_data = await self.apItem2MBdata(ap_item)
+items.append(await self.apItem2Elt(ap_item))
 except (exceptions.DataError, NotImplementedError, error.StanzaError):
 continue
-item_elt = await self._m.data2entry(
-self.client, mb_data, ap_object["id"], None, self._m.namespace
-)
-item_elt["publisher"] = mb_data["author_jid"].full()
-items.append(item_elt)
 return page_data, items
-async def apItem2MBdata(self, ap_item: dict) -> Tuple[dict, dict]:
+async def apItem2MbDataAndElt(self, ap_item: dict) -> Tuple[dict, domish.Element]:
-"""Convert AP item to microblog data
+"""Convert AP item to parsed microblog data and corresponding item element"""
+mb_data = await self.apItem2MBdata(ap_item)
+item_elt = await self._m.data2entry(
+self.client, mb_data, mb_data["id"], None, self._m.namespace
+)
+item_elt["publisher"] = mb_data["author_jid"]
+return mb_data, item_elt
+async def apItem2Elt(self, ap_item: dict) -> domish.Element:
+"""Convert AP item to XMPP item element"""
+__, item_elt = await self.apItem2MbDataAndElt(ap_item)
+return item_elt
+async def getCommentsNodes(
+self,
+item_id: str,
+parent_id: Optional[str]
+) -> Tuple[Optional[str], Optional[str]]:
+"""Get node where this item is and node to use for comments
+if config option "comments_max_depth" is set, a common node will be used below the
+given depth
+@param item_id: ID of the reference item
+@param parent_id: ID of the parent item if any (the ID set in "inReplyTo")
+@return: a tuple with parent_node_id, comments_node_id:
+- parent_node_id is the ID of the node where reference item must be. None is
+returned when the root node (i.e. not a comments node) must be used.
+- comments_node_id: is the ID of the node to use for comments. None is
+returned when no comment node must be used (happens when we have reached
+"comments_max_depth")
+"""
+if parent_id is None or not self.comments_max_depth:
+return (
+self._m.getCommentsNode(parent_id) if parent_id is not None else None,
+self._m.getCommentsNode(item_id)
+)
+parent_url = parent_id
+parents = []
+for __ in range(COMMENTS_MAX_PARENTS):
+parent_item = await self.apGet(parent_url)
+parents.insert(0, parent_item)
+parent_url = parent_item.get("inReplyTo")
+if parent_url is None:
+break
+parent_limit = self.comments_max_depth-1
+if len(parents) <= parent_limit:
+return (
+self._m.getCommentsNode(parents[-1]["id"]),
+self._m.getCommentsNode(item_id)
+)
+else:
+last_level_item = parents[parent_limit]
+return (
+self._m.getCommentsNode(last_level_item["id"]),
+None
+)
+async def apItem2MBdata(self, ap_item: dict) -> dict:
+"""Convert AP activity or object to microblog data
 @return: AP Item's Object and microblog data
 @raise exceptions.DataError: something is invalid in the AP item
 @raise NotImplemented: some AP data is not handled yet
 @raise error.StanzaError: error while contacting the AP server
 """
-ap_object = ap_item.get("object")
+is_activity = self.isActivity(ap_item)
-if not ap_object:
+if is_activity:
-log.warning(f'No "object" found in AP item {ap_item!r}')
+ap_object = await self.apGetObject(ap_item, "object")
+if not ap_object:
+log.warning(f'No "object" found in AP item {ap_item!r}')
+raise exceptions.DataError
+else:
+ap_object = ap_item
+item_id = ap_object.get("id")
+if not item_id:
+log.warning(f'No "id" found in AP item: {ap_object!r}')
 raise exceptions.DataError
-if isinstance(ap_object, str):
+mb_data = {"id": item_id}
-ap_object = await self.apGet(ap_object)
-obj_id = ap_object.get("id")
-if not obj_id:
-log.warning(f'No "id" found in AP object: {ap_object!r}')
-raise exceptions.DataError
-if ap_object.get("inReplyTo") is not None:
-raise NotImplementedError
-mb_data = {}
 for ap_key, mb_key in AP_MB_MAP.items():
 data = ap_object.get(ap_key)
 if data is None:
 continue
 mb_data[mb_key] = data
 else:
 mb_data["language"] = language
 mb_data["content_xhtml"] = content_xhtml
 # author
-actor = ap_item.get("actor")
+if is_activity:
-if not actor:
+authors = await self.apGetActors(ap_item, "actor")
-log.warning(f"no actor associated to object id {obj_id!r}")
+else:
-raise exceptions.DataError
+authors = await self.apGetActors(ap_object, "attributedTo")
-elif isinstance(actor, list):
+if len(authors) > 1:
-# we only keep first item of list as author
+# we only keep first item as author
 # TODO: handle multiple actors
-if len(actor) > 1:
+log.warning("multiple actors are not managed")
-log.warning("multiple actors are not managed")
-actor = actor[0]
+account = authors[0]
+author_jid = self.getLocalJIDFromAccount(account).full()
-if isinstance(actor, dict):
-actor = actor.get("id")
+mb_data["author"] = account.split("@", 1)[0]
-if not actor:
+mb_data["author_jid"] = author_jid
-log.warning(f"no actor id found: {actor!r}")
-raise exceptions.DataError
-if isinstance(actor, str):
-account = await self.getAPAccountFromId(actor)
-mb_data["author"] = account.split("@", 1)[0]
-author_jid = mb_data["author_jid"] = jid.JID(
-None,
-(
-self.host.plugins["XEP-0106"].escape(account),
-self.client.jid.host,
-None
-)
-)
-else:
-log.warning(f"unknown actor type found: {actor!r}")
-raise exceptions.DataError
 # published/updated
 for field in ("published", "updated"):
 value = ap_object.get(field)
 if not value and field == "updated":
 mb_data[field] = calendar.timegm(
 dateutil.parser.parse(str(value)).utctimetuple()
 )
 except dateutil.parser.ParserError as e:
 log.warning(f"Can't parse {field!r} field: {e}")
-return ap_object, mb_data
+# comments
+in_reply_to = ap_object.get("inReplyTo")
+__, comments_node = await self.getCommentsNodes(item_id, in_reply_to)
+if comments_node is not None:
+comments_data = {
+"service": author_jid,
+"node": comments_node,
+"uri": uri.buildXMPPUri(
+"pubsub",
+path=author_jid,
+node=comments_node
+)
+}
+mb_data["comments"] = [comments_data]
+return mb_data
 async def mbdata2APitem(self, client: SatXMPPEntity, mb_data: dict) -> dict:
 """Convert Libervia Microblog Data to ActivityPub item"""
 if not mb_data.get("id"):
 mb_data["id"] = shortuuid.uuid()
 if not mb_data.get("author_jid"):
-mb_data["author_jid"] = client.jid
+mb_data["author_jid"] = client.jid.full()
 ap_account = await self.getAPAccountFromJidAndNode(
 jid.JID(mb_data["author_jid"]),
 None
 )
 url_actor = self.buildAPURL(TYPE_ACTOR, ap_account)
 @param service: JID corresponding to the AP actor.
 """
 if not service.user:
 raise ValueError("service must have a local part")
-account = self.host.plugins["XEP-0106"].unescape(service.user)
+account = self._e.unescape(service.user)
-ap_actor_data = await self.getAPActorDataFromId(account)
+ap_actor_data = await self.getAPActorDataFromAccount(account)
 try:
 inbox_url = ap_actor_data["endpoints"]["sharedInbox"]
 except KeyError:
 raise exceptions.DataError("Can't get ActivityPub actor inbox")
 item_data = await self.mbdata2APitem(client, mess_data)
 url_actor = item_data["object"]["attributedTo"]
 resp = await self.signAndPost(inbox_url, url_actor, item_data)
 if resp.code != 202:
 raise exceptions.NetworkError(f"unexpected return code: {resp.code}")
+async def newAPItem(
+self,
+client: SatXMPPEntity,
+destinee: Optional[jid.JID],
+node: str,
+item: dict,
+) -> None:
+"""Analyse, cache and send notification for received AP item
+@param destinee: jid of the destinee,
+@param node: XMPP pubsub node
+@param item: AP object payload
+"""
+service = client.jid
+in_reply_to = item.get("inReplyTo")
+if in_reply_to and isinstance(in_reply_to, str):
+# this item is a reply, we use or create a corresponding node for comments
+parent_node, __ = await self.getCommentsNodes(item["id"], in_reply_to)
+node = parent_node or node
+cached_node = await self.host.memory.storage.getPubsubNode(
+client, service, node, with_subscriptions=True
+)
+if cached_node is None:
+try:
+cached_node = await self.host.memory.storage.setPubsubNode(
+client,
+service,
+node,
+subscribed=True
+)
+except IntegrityError as e:
+if "unique" in str(e.orig).lower():
+# the node may already exist, if it has been created just after
+# getPubsubNode above
+log.debug("ignoring UNIQUE constraint error")
+cached_node = await self.host.memory.storage.getPubsubNode(
+client, service, node, with_subscriptions=True
+)
+else:
+raise e
+else:
+# it is a root item (i.e. not a reply to an other item)
+cached_node = await self.host.memory.storage.getPubsubNode(
+client, service, node, with_subscriptions=True
+)
+if cached_node is None:
+log.warning(
+f"Received item in unknown node {node!r} at {service}\n{item}"
+)
+return
+mb_data, item_elt = await self.apItem2MbDataAndElt(item)
+await self.host.memory.storage.cachePubsubItems(
+client,
+cached_node,
+[item_elt],
+[mb_data]
+)
+for subscription in cached_node.subscriptions:
+if subscription.state != SubscriptionState.SUBSCRIBED:
+continue
+self.pubsub_service.notifyPublish(
+service,
+node,
+[(subscription.subscriber, None, [item_elt])]
+)

Mercurial > libervia-backend

comparison sat/plugins/plugin_comp_ap_gateway/__init__.py @ 3745:a8c7e5cef0cb

comparison sat/plugins/plugin_comp_ap_gateway/init.py @ 3745:a8c7e5cef0cb