diff sat/plugins/plugin_xep_0447.py @ 3922:0ff265725489

plugin XEP-0447: handle attachment and download: - plugin XEP-0447 can now be used in message attachments and to retrieve an attachment - plugin attach: `attachment` being processed is added to `extra` so the handler can inspect it - plugin attach: `size` is added to attachment - plugin download: a whole attachment dict is now used in `download` and `file_download`/`file_download_complete`. `download_uri` can be used as a shortcut when just a URI is used. In addition to URI scheme handler, whole attachment handlers can now be registered with `register_download_handler` - plugin XEP-0363: `file_http_upload` `XEP-0363_upload_size` triggers have been renamed to `XEP-0363_upload_pre_slot` and is now using a dict with arguments, allowing for the size but also the filename to be modified, which is necessary for encryption (filename may be hidden from URL this way). - plugin XEP-0446: fix wrong element name - plugin XEP-0447: source handler can now be registered (`url-data` is registered by default) - plugin XEP-0447: source parsing has been put in a separated `parse_sources_elt` method, as it may be useful to do it independently (notably with XEP-0448) - plugin XEP-0447: parse received message and complete attachments when suitable - plugin XEP-0447: can now be used with message attachments - plugin XEP-0447: can now be used with attachments download - renamed `options` arguments to `extra` for consistency - some style change (progressive move from legacy camelCase to PEP8 snake_case) - some typing rel 379
author Goffi <goffi@goffi.org>
date Thu, 06 Oct 2022 16:02:05 +0200
parents 4b7106eede0c
children 78b5f356900c
line wrap: on
line diff
--- a/sat/plugins/plugin_xep_0447.py	Thu Oct 06 16:02:05 2022 +0200
+++ b/sat/plugins/plugin_xep_0447.py	Thu Oct 06 16:02:05 2022 +0200
@@ -15,14 +15,23 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-from typing import Optional, Dict, List, Tuple, Union, Any
+from collections import namedtuple
+from functools import partial
+import mimetypes
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
+import treq
+from twisted.internet import defer
 from twisted.words.xish import domish
 
+from sat.core import exceptions
 from sat.core.constants import Const as C
+from sat.core.core_types import SatXMPPEntity
 from sat.core.i18n import _
 from sat.core.log import getLogger
-from sat.core import exceptions
+from sat.tools import stream
+from sat.tools.web import treq_client_no_ssl
 
 log = getLogger(__name__)
 
@@ -33,23 +42,130 @@
     C.PI_TYPE: "XEP",
     C.PI_MODES: C.PLUG_MODE_BOTH,
     C.PI_PROTOCOLS: ["XEP-0447"],
-    C.PI_DEPENDENCIES: ["XEP-0103", "XEP-0446"],
+    C.PI_DEPENDENCIES: ["XEP-0103", "XEP-0334", "XEP-0446", "ATTACH", "DOWNLOAD"],
+    C.PI_RECOMMENDATIONS: ["XEP-0363"],
     C.PI_MAIN: "XEP_0447",
     C.PI_HANDLER: "no",
     C.PI_DESCRIPTION: _("""Implementation of XEP-0447 (Stateless File Sharing)"""),
 }
 
 NS_SFS = "urn:xmpp:sfs:0"
+SourceHandler = namedtuple("SourceHandler", ["callback", "encrypted"])
 
 
 class XEP_0447:
     namespace = NS_SFS
 
     def __init__(self, host):
+        self.host = host
         log.info(_("XEP-0447 (Stateless File Sharing) plugin initialization"))
         host.registerNamespace("sfs", NS_SFS)
+        self._sources_handlers = {}
         self._u = host.plugins["XEP-0103"]
+        self._hints = host.plugins["XEP-0334"]
         self._m = host.plugins["XEP-0446"]
+        self._http_upload = host.plugins.get("XEP-0363")
+        self._attach = host.plugins["ATTACH"]
+        self._attach.register(
+            self.can_handle_attachment, self.attach, priority=1000
+        )
+        self.register_source_handler(
+            self._u.namespace, "url-data", self._u.parse_url_data_elt
+        )
+        host.plugins["DOWNLOAD"].register_download_handler(self._u.namespace, self.download)
+        host.trigger.add("messageReceived", self._message_received_trigger)
+
+    def register_source_handler(
+        self, namespace: str, element_name: str,
+        callback: Callable[[domish.Element], Dict[str, Any]],
+        encrypted: bool = False
+    ) -> None:
+        """Register a handler for file source
+
+        @param namespace: namespace of the element supported
+        @param element_name: name of the element supported
+        @param callback: method to call to parse the element
+            get the matching element as argument, must return the parsed data
+        @param encrypted: if True, the source is encrypted (the transmitting channel
+            should then be end2end encrypted to avoir leaking decrypting data to servers).
+        """
+        key = (namespace, element_name)
+        if key in self._sources_handlers:
+            raise exceptions.ConflictError(
+                f"There is already a resource handler for namespace {namespace!r} and "
+                f"name {element_name!r}"
+            )
+        self._sources_handlers[key] = SourceHandler(callback, encrypted)
+
+    async def download(
+        self,
+        client: SatXMPPEntity,
+        attachment: Dict[str, Any],
+        source: Dict[str, Any],
+        dest_path: Union[Path, str],
+        extra: Optional[Dict[str, Any]] = None
+    ) -> Tuple[str, defer.Deferred]:
+        # TODO: handle url-data headers
+        if extra is None:
+            extra = {}
+        try:
+            download_url = source["url"]
+        except KeyError:
+            raise ValueError(f"{source} has missing URL")
+
+        if extra.get('ignore_tls_errors', False):
+            log.warning(
+                "TLS certificate check disabled, this is highly insecure"
+            )
+            treq_client = treq_client_no_ssl
+        else:
+            treq_client = treq
+
+        try:
+            file_size = int(attachment["size"])
+        except (KeyError, ValueError):
+            head_data = await treq_client.head(download_url)
+            file_size = int(head_data.headers.getRawHeaders('content-length')[0])
+
+        file_obj = stream.SatFile(
+            self.host,
+            client,
+            dest_path,
+            mode="wb",
+            size = file_size,
+        )
+
+        progress_id = file_obj.uid
+
+        resp = await treq_client.get(download_url, unbuffered=True)
+        if resp.code == 200:
+            d = treq.collect(resp, file_obj.write)
+            d.addCallback(lambda __: file_obj.close())
+        else:
+            d = defer.Deferred()
+            self.host.plugins["DOWNLOAD"].errback_download(file_obj, d, resp)
+        return progress_id, d
+
+    async def can_handle_attachment(self, client, data):
+        if self._http_upload is None:
+            return False
+        try:
+            await self._http_upload.getHTTPUploadEntity(client)
+        except exceptions.NotFound:
+            return False
+        else:
+            return True
+
+    def get_sources_elt(
+        self,
+        children: Optional[List[domish.Element]] = None
+    ) -> domish.Element:
+        """Generate <sources> element"""
+        sources_elt = domish.Element((NS_SFS, "sources"))
+        if children:
+            for child in children:
+                sources_elt.addChild(child)
+        return sources_elt
 
     def get_file_sharing_elt(
         self,
@@ -75,6 +191,8 @@
         file_sharing_elt = domish.Element((NS_SFS, "file-sharing"))
         if disposition is not None:
             file_sharing_elt["disposition"] = disposition
+        if media_type is None and name:
+            media_type = mimetypes.guess_type(name, strict=False)[0]
         file_sharing_elt.addChild(
             self._m.get_file_metadata_elt(
                 name=name,
@@ -89,7 +207,8 @@
                 thumbnail=thumbnail,
             )
         )
-        sources_elt = file_sharing_elt.addElement("sources")
+        sources_elt = self.get_sources_elt()
+        file_sharing_elt.addChild(sources_elt)
         for source_data in sources:
             if "url" in source_data:
                 sources_elt.addChild(
@@ -102,6 +221,42 @@
 
         return file_sharing_elt
 
+    def parse_sources_elt(
+        self,
+        sources_elt: domish.Element
+    ) -> List[Dict[str, Any]]:
+        """Parse <sources/> element
+
+        @param sources_elt: <sources/> element, or a direct parent element
+        @return: list of found sources data
+        @raise: exceptions.NotFound: Can't find <sources/> element
+        """
+        if sources_elt.name != "sources" or sources_elt.uri != NS_SFS:
+            try:
+                sources_elt = next(sources_elt.elements(NS_SFS, "sources"))
+            except StopIteration:
+                raise exceptions.NotFound(
+                    f"<sources/> element is missing: {sources_elt.toXml()}")
+        sources = []
+        for elt in sources_elt.elements():
+            if not elt.uri:
+                log.warning("ignoring source element {elt.toXml()}")
+                continue
+            key = (elt.uri, elt.name)
+            try:
+                source_handler = self._sources_handlers[key]
+            except KeyError:
+                log.warning(f"unmanaged file sharing element: {elt.toXml}")
+                continue
+            else:
+                source_data = source_handler.callback(elt)
+                if source_handler.encrypted:
+                    source_data[C.MESS_KEY_ENCRYPTED] = True
+                if "type" not in source_data:
+                    source_data["type"] = elt.uri
+                sources.append(source_data)
+        return sources
+
     def parse_file_sharing_elt(
         self,
         file_sharing_elt: domish.Element
@@ -126,17 +281,95 @@
         disposition = file_sharing_elt.getAttribute("disposition")
         if disposition is not None:
             data["disposition"] = disposition
-        sources = data["sources"] = []
         try:
-            sources_elt = next(file_sharing_elt.elements(NS_SFS, "sources"))
-        except StopIteration:
-            raise ValueError(f"<sources/> element is missing: {file_sharing_elt.toXml()}")
-        for elt in sources_elt.elements():
-            if elt.name == "url-data" and elt.uri == self._u.namespace:
-                source_data = self._u.parse_url_data_elt(elt)
-            else:
-                log.warning(f"unmanaged file sharing element: {elt.toXml}")
-                continue
-            sources.append(source_data)
+            data["sources"] = self.parse_sources_elt(file_sharing_elt)
+        except exceptions.NotFound as e:
+            raise ValueError(str(e))
+
+        return data
+
+    def _add_file_sharing_attachments(
+            self,
+            client: SatXMPPEntity,
+            message_elt: domish.Element,
+            data: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """Check <message> for a shared file, and add it as an attachment"""
+        # XXX: XEP-0447 doesn't support several attachments in a single message, thus only
+        #   one attachment can be added
+        try:
+            attachment = self.parse_file_sharing_elt(message_elt)
+        except exceptions.NotFound:
+            return data
+
+        if any(
+                s.get(C.MESS_KEY_ENCRYPTED, False)
+                for s in attachment["sources"]
+        ) and client.encryption.isEncrypted(data):
+            # we don't add the encrypted flag if the message itself is not encrypted,
+            # because the decryption key is part of the link, so sending it over
+            # unencrypted channel is like having no encryption at all.
+            attachment[C.MESS_KEY_ENCRYPTED] = True
+
+        attachments = data['extra'].setdefault(C.MESS_KEY_ATTACHMENTS, [])
+        attachments.append(attachment)
 
         return data
+
+    async def attach(self, client, data):
+        # XXX: for now, XEP-0447 only allow to send one file per <message/>, thus we need
+        #   to send each file in a separate message
+        attachments = data["extra"][C.MESS_KEY_ATTACHMENTS]
+        if not data['message'] or data['message'] == {'': ''}:
+            extra_attachments = attachments[1:]
+            del attachments[1:]
+        else:
+            # we have a message, we must send first attachment separately
+            extra_attachments = attachments[:]
+            attachments.clear()
+            del data["extra"][C.MESS_KEY_ATTACHMENTS]
+
+        if attachments:
+            if len(attachments) > 1:
+                raise exceptions.InternalError(
+                    "There should not be more that one attachment at this point"
+                )
+            await self._attach.upload_files(client, data)
+            self._hints.addHintElements(data["xml"], [self._hints.HINT_STORE])
+            for attachment in attachments:
+                try:
+                    file_hash = (attachment["hash_algo"], attachment["hash"])
+                except KeyError:
+                    file_hash = None
+                file_sharing_elt = self.get_file_sharing_elt(
+                    [{"url": attachment["url"]}],
+                    name=attachment["name"],
+                    size=attachment["size"],
+                    file_hash=file_hash
+                )
+                data["xml"].addChild(file_sharing_elt)
+
+        for attachment in extra_attachments:
+            # we send all remaining attachment in a separate message
+            await client.sendMessage(
+                to_jid=data['to'],
+                message={'': ''},
+                subject=data['subject'],
+                mess_type=data['type'],
+                extra={C.MESS_KEY_ATTACHMENTS: [attachment]},
+            )
+
+        if ((not data['extra']
+             and (not data['message'] or data['message'] == {'': ''})
+             and not data['subject'])):
+            # nothing left to send, we can cancel the message
+            raise exceptions.CancelError("Cancelled by XEP_0447 attachment handling")
+
+    def _message_received_trigger(self, client, message_elt, post_treat):
+        # we use a post_treat callback instead of "message_parse" trigger because we need
+        # to check if the "encrypted" flag is set to decide if we add the same flag to the
+        # attachment
+        post_treat.addCallback(
+            partial(self._add_file_sharing_attachments, client, message_elt)
+        )
+        return True