view libervia/backend/plugins/plugin_xep_0447.py @ 4119:ece304ec7077

plugin XEP-0167: fix key used to store metadata: rel 424
author Goffi <goffi@goffi.org>
date Tue, 03 Oct 2023 15:18:25 +0200
parents 4b842c1fb686
children 0d7bb4df2343
line wrap: on
line source

#!/usr/bin/env python3

# Copyright (C) 2009-2022 Jérôme Poisson (goffi@goffi.org)

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.

# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from collections import namedtuple
from functools import partial
import mimetypes
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Tuple, Union

import treq
from twisted.internet import defer
from twisted.words.xish import domish

from libervia.backend.core import exceptions
from libervia.backend.core.constants import Const as C
from libervia.backend.core.core_types import SatXMPPEntity
from libervia.backend.core.i18n import _
from libervia.backend.core.log import getLogger
from libervia.backend.tools import stream
from libervia.backend.tools.web import treq_client_no_ssl

log = getLogger(__name__)


PLUGIN_INFO = {
    C.PI_NAME: "Stateless File Sharing",
    C.PI_IMPORT_NAME: "XEP-0447",
    C.PI_TYPE: "XEP",
    C.PI_MODES: C.PLUG_MODE_BOTH,
    C.PI_PROTOCOLS: ["XEP-0447"],
    C.PI_DEPENDENCIES: ["XEP-0103", "XEP-0334", "XEP-0446", "ATTACH", "DOWNLOAD"],
    C.PI_RECOMMENDATIONS: ["XEP-0363"],
    C.PI_MAIN: "XEP_0447",
    C.PI_HANDLER: "no",
    C.PI_DESCRIPTION: _("""Implementation of XEP-0447 (Stateless File Sharing)"""),
}

NS_SFS = "urn:xmpp:sfs:0"
SourceHandler = namedtuple("SourceHandler", ["callback", "encrypted"])


class XEP_0447:
    namespace = NS_SFS

    def __init__(self, host):
        self.host = host
        log.info(_("XEP-0447 (Stateless File Sharing) plugin initialization"))
        host.register_namespace("sfs", NS_SFS)
        self._sources_handlers = {}
        self._u = host.plugins["XEP-0103"]
        self._hints = host.plugins["XEP-0334"]
        self._m = host.plugins["XEP-0446"]
        self._http_upload = host.plugins.get("XEP-0363")
        self._attach = host.plugins["ATTACH"]
        self._attach.register(
            self.can_handle_attachment, self.attach, priority=1000
        )
        self.register_source_handler(
            self._u.namespace, "url-data", self._u.parse_url_data_elt
        )
        host.plugins["DOWNLOAD"].register_download_handler(self._u.namespace, self.download)
        host.trigger.add("message_received", self._message_received_trigger)

    def register_source_handler(
        self, namespace: str, element_name: str,
        callback: Callable[[domish.Element], Dict[str, Any]],
        encrypted: bool = False
    ) -> None:
        """Register a handler for file source

        @param namespace: namespace of the element supported
        @param element_name: name of the element supported
        @param callback: method to call to parse the element
            get the matching element as argument, must return the parsed data
        @param encrypted: if True, the source is encrypted (the transmitting channel
            should then be end2end encrypted to avoir leaking decrypting data to servers).
        """
        key = (namespace, element_name)
        if key in self._sources_handlers:
            raise exceptions.ConflictError(
                f"There is already a resource handler for namespace {namespace!r} and "
                f"name {element_name!r}"
            )
        self._sources_handlers[key] = SourceHandler(callback, encrypted)

    async def download(
        self,
        client: SatXMPPEntity,
        attachment: Dict[str, Any],
        source: Dict[str, Any],
        dest_path: Union[Path, str],
        extra: Optional[Dict[str, Any]] = None
    ) -> Tuple[str, defer.Deferred]:
        # TODO: handle url-data headers
        if extra is None:
            extra = {}
        try:
            download_url = source["url"]
        except KeyError:
            raise ValueError(f"{source} has missing URL")

        if extra.get('ignore_tls_errors', False):
            log.warning(
                "TLS certificate check disabled, this is highly insecure"
            )
            treq_client = treq_client_no_ssl
        else:
            treq_client = treq

        try:
            file_size = int(attachment["size"])
        except (KeyError, ValueError):
            head_data = await treq_client.head(download_url)
            file_size = int(head_data.headers.getRawHeaders('content-length')[0])

        file_obj = stream.SatFile(
            self.host,
            client,
            dest_path,
            mode="wb",
            size = file_size,
        )

        progress_id = file_obj.uid

        resp = await treq_client.get(download_url, unbuffered=True)
        if resp.code == 200:
            d = treq.collect(resp, file_obj.write)
            d.addCallback(lambda __: file_obj.close())
        else:
            d = defer.Deferred()
            self.host.plugins["DOWNLOAD"].errback_download(file_obj, d, resp)
        return progress_id, d

    async def can_handle_attachment(self, client, data):
        if self._http_upload is None:
            return False
        try:
            await self._http_upload.get_http_upload_entity(client)
        except exceptions.NotFound:
            return False
        else:
            return True

    def get_sources_elt(
        self,
        children: Optional[List[domish.Element]] = None
    ) -> domish.Element:
        """Generate <sources> element"""
        sources_elt = domish.Element((NS_SFS, "sources"))
        if children:
            for child in children:
                sources_elt.addChild(child)
        return sources_elt

    def get_file_sharing_elt(
        self,
        sources: List[Dict[str, Any]],
        disposition: Optional[str] = None,
        name: Optional[str] = None,
        media_type: Optional[str] = None,
        desc: Optional[str] = None,
        size: Optional[int] = None,
        file_hash: Optional[Tuple[str, str]] = None,
        date: Optional[Union[float, int]] = None,
        width: Optional[int] = None,
        height: Optional[int] = None,
        length: Optional[int] = None,
        thumbnail: Optional[str] = None,
        **kwargs,
    ) -> domish.Element:
        """Generate the <file-sharing/> element

        @param extra: extra metadata describing how to access the URL
        @return: ``<sfs/>`` element
        """
        file_sharing_elt = domish.Element((NS_SFS, "file-sharing"))
        if disposition is not None:
            file_sharing_elt["disposition"] = disposition
        if media_type is None and name:
            media_type = mimetypes.guess_type(name, strict=False)[0]
        file_sharing_elt.addChild(
            self._m.get_file_metadata_elt(
                name=name,
                media_type=media_type,
                desc=desc,
                size=size,
                file_hash=file_hash,
                date=date,
                width=width,
                height=height,
                length=length,
                thumbnail=thumbnail,
            )
        )
        sources_elt = self.get_sources_elt()
        file_sharing_elt.addChild(sources_elt)
        for source_data in sources:
            if "url" in source_data:
                sources_elt.addChild(
                    self._u.get_url_data_elt(**source_data)
                )
            else:
                raise NotImplementedError(
                    f"source data not implemented: {source_data}"
                )

        return file_sharing_elt

    def parse_sources_elt(
        self,
        sources_elt: domish.Element
    ) -> List[Dict[str, Any]]:
        """Parse <sources/> element

        @param sources_elt: <sources/> element, or a direct parent element
        @return: list of found sources data
        @raise: exceptions.NotFound: Can't find <sources/> element
        """
        if sources_elt.name != "sources" or sources_elt.uri != NS_SFS:
            try:
                sources_elt = next(sources_elt.elements(NS_SFS, "sources"))
            except StopIteration:
                raise exceptions.NotFound(
                    f"<sources/> element is missing: {sources_elt.toXml()}")
        sources = []
        for elt in sources_elt.elements():
            if not elt.uri:
                log.warning("ignoring source element {elt.toXml()}")
                continue
            key = (elt.uri, elt.name)
            try:
                source_handler = self._sources_handlers[key]
            except KeyError:
                log.warning(f"unmanaged file sharing element: {elt.toXml}")
                continue
            else:
                source_data = source_handler.callback(elt)
                if source_handler.encrypted:
                    source_data[C.MESS_KEY_ENCRYPTED] = True
                if "type" not in source_data:
                    source_data["type"] = elt.uri
                sources.append(source_data)
        return sources

    def parse_file_sharing_elt(
        self,
        file_sharing_elt: domish.Element
    ) -> Dict[str, Any]:
        """Parse <file-sharing/> element and return file-sharing data

        @param file_sharing_elt: <file-sharing/> element
        @return: file-sharing data. It a dict whose keys correspond to
            [get_file_sharing_elt] parameters
        """
        if file_sharing_elt.name != "file-sharing" or file_sharing_elt.uri != NS_SFS:
            try:
                file_sharing_elt = next(
                    file_sharing_elt.elements(NS_SFS, "file-sharing")
                )
            except StopIteration:
                raise exceptions.NotFound
        try:
            data = self._m.parse_file_metadata_elt(file_sharing_elt)
        except exceptions.NotFound:
            data = {}
        disposition = file_sharing_elt.getAttribute("disposition")
        if disposition is not None:
            data["disposition"] = disposition
        try:
            data["sources"] = self.parse_sources_elt(file_sharing_elt)
        except exceptions.NotFound as e:
            raise ValueError(str(e))

        return data

    def _add_file_sharing_attachments(
            self,
            client: SatXMPPEntity,
            message_elt: domish.Element,
            data: Dict[str, Any]
    ) -> Dict[str, Any]:
        """Check <message> for a shared file, and add it as an attachment"""
        # XXX: XEP-0447 doesn't support several attachments in a single message, for now
        #   however that should be fixed in future version, and so we accept several
        #   <file-sharing> element in a message.
        for file_sharing_elt in message_elt.elements(NS_SFS, "file-sharing"):
            attachment = self.parse_file_sharing_elt(message_elt)

            if any(
                    s.get(C.MESS_KEY_ENCRYPTED, False)
                    for s in attachment["sources"]
            ) and client.encryption.isEncrypted(data):
                # we don't add the encrypted flag if the message itself is not encrypted,
                # because the decryption key is part of the link, so sending it over
                # unencrypted channel is like having no encryption at all.
                attachment[C.MESS_KEY_ENCRYPTED] = True

            attachments = data['extra'].setdefault(C.KEY_ATTACHMENTS, [])
            attachments.append(attachment)

        return data

    async def attach(self, client, data):
        # XXX: for now, XEP-0447 only allow to send one file per <message/>, thus we need
        #   to send each file in a separate message
        attachments = data["extra"][C.KEY_ATTACHMENTS]
        if not data['message'] or data['message'] == {'': ''}:
            extra_attachments = attachments[1:]
            del attachments[1:]
        else:
            # we have a message, we must send first attachment separately
            extra_attachments = attachments[:]
            attachments.clear()
            del data["extra"][C.KEY_ATTACHMENTS]

        if attachments:
            if len(attachments) > 1:
                raise exceptions.InternalError(
                    "There should not be more that one attachment at this point"
                )
            await self._attach.upload_files(client, data)
            self._hints.add_hint_elements(data["xml"], [self._hints.HINT_STORE])
            for attachment in attachments:
                try:
                    file_hash = (attachment["hash_algo"], attachment["hash"])
                except KeyError:
                    file_hash = None
                file_sharing_elt = self.get_file_sharing_elt(
                    [{"url": attachment["url"]}],
                    name=attachment.get("name"),
                    size=attachment.get("size"),
                    desc=attachment.get("desc"),
                    media_type=attachment.get("media_type"),
                    file_hash=file_hash
                )
                data["xml"].addChild(file_sharing_elt)

        for attachment in extra_attachments:
            # we send all remaining attachment in a separate message
            await client.sendMessage(
                to_jid=data['to'],
                message={'': ''},
                subject=data['subject'],
                mess_type=data['type'],
                extra={C.KEY_ATTACHMENTS: [attachment]},
            )

        if ((not data['extra']
             and (not data['message'] or data['message'] == {'': ''})
             and not data['subject'])):
            # nothing left to send, we can cancel the message
            raise exceptions.CancelError("Cancelled by XEP_0447 attachment handling")

    def _message_received_trigger(self, client, message_elt, post_treat):
        # we use a post_treat callback instead of "message_parse" trigger because we need
        # to check if the "encrypted" flag is set to decide if we add the same flag to the
        # attachment
        post_treat.addCallback(
            partial(self._add_file_sharing_attachments, client, message_elt)
        )
        return True