view sat/plugins/plugin_xep_0231.py @ 3922:0ff265725489

plugin XEP-0447: handle attachment and download: - plugin XEP-0447 can now be used in message attachments and to retrieve an attachment - plugin attach: `attachment` being processed is added to `extra` so the handler can inspect it - plugin attach: `size` is added to attachment - plugin download: a whole attachment dict is now used in `download` and `file_download`/`file_download_complete`. `download_uri` can be used as a shortcut when just a URI is used. In addition to URI scheme handler, whole attachment handlers can now be registered with `register_download_handler` - plugin XEP-0363: `file_http_upload` `XEP-0363_upload_size` triggers have been renamed to `XEP-0363_upload_pre_slot` and is now using a dict with arguments, allowing for the size but also the filename to be modified, which is necessary for encryption (filename may be hidden from URL this way). - plugin XEP-0446: fix wrong element name - plugin XEP-0447: source handler can now be registered (`url-data` is registered by default) - plugin XEP-0447: source parsing has been put in a separated `parse_sources_elt` method, as it may be useful to do it independently (notably with XEP-0448) - plugin XEP-0447: parse received message and complete attachments when suitable - plugin XEP-0447: can now be used with message attachments - plugin XEP-0447: can now be used with attachments download - renamed `options` arguments to `extra` for consistency - some style change (progressive move from legacy camelCase to PEP8 snake_case) - some typing rel 379
author Goffi <goffi@goffi.org>
date Thu, 06 Oct 2022 16:02:05 +0200
parents be6d91572633
children 524856bd7b19
line wrap: on
line source

#!/usr/bin/env python3


# SAT plugin for Bit of Binary handling (XEP-0231)
# Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org)

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.

# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import base64
import time
from pathlib import Path
from functools import partial
from zope.interface import implementer
from twisted.python import failure
from twisted.words.protocols.jabber import xmlstream
from twisted.words.protocols.jabber import jid
from twisted.words.protocols.jabber import error as jabber_error
from twisted.internet import defer
from wokkel import disco, iwokkel
from sat.tools import xml_tools
from sat.core.i18n import _
from sat.core.constants import Const as C
from sat.core import exceptions
from sat.core.log import getLogger

log = getLogger(__name__)


PLUGIN_INFO = {
    C.PI_NAME: "Bits of Binary",
    C.PI_IMPORT_NAME: "XEP-0231",
    C.PI_TYPE: "XEP",
    C.PI_MODES: C.PLUG_MODE_BOTH,
    C.PI_PROTOCOLS: ["XEP-0231"],
    C.PI_MAIN: "XEP_0231",
    C.PI_HANDLER: "yes",
    C.PI_DESCRIPTION: _(
        """Implementation of bits of binary (used for small images/files)"""
    ),
}

NS_BOB = "urn:xmpp:bob"
IQ_BOB_REQUEST = C.IQ_GET + '/data[@xmlns="' + NS_BOB + '"]'


class XEP_0231(object):
    def __init__(self, host):
        log.info(_("plugin Bits of Binary initialization"))
        self.host = host
        host.registerNamespace("bob", NS_BOB)
        host.trigger.add("xhtml_post_treat", self.XHTMLTrigger)
        host.bridge.addMethod(
            "bobGetFile",
            ".plugin",
            in_sign="sss",
            out_sign="s",
            method=self._getFile,
            async_=True,
        )

    def dumpData(self, cache, data_elt, cid):
        """save file encoded in data_elt to cache

        @param cache(memory.cache.Cache): cache to use to store the data
        @param data_elt(domish.Element): <data> as in XEP-0231
        @param cid(unicode): content-id
        @return(unicode): full path to dumped file
        """
        #  FIXME: is it needed to use a separate thread?
        #        probably not with the little data expected with BoB
        try:
            max_age = int(data_elt["max-age"])
            if max_age < 0:
                raise ValueError
        except (KeyError, ValueError):
            log.warning("invalid max-age found")
            max_age = None

        with cache.cacheData(
            PLUGIN_INFO[C.PI_IMPORT_NAME], cid, data_elt.getAttribute("type"), max_age
        ) as f:

            file_path = Path(f.name)
            f.write(base64.b64decode(str(data_elt)))

        return file_path

    def getHandler(self, client):
        return XEP_0231_handler(self)

    def _requestCb(self, iq_elt, cache, cid):
        for data_elt in iq_elt.elements(NS_BOB, "data"):
            if data_elt.getAttribute("cid") == cid:
                file_path = self.dumpData(cache, data_elt, cid)
                return file_path

        log.warning(
            "invalid data stanza received, requested cid was not found:\n{iq_elt}\nrequested cid: {cid}".format(
                iq_elt=iq_elt, cid=cid
            )
        )
        raise failure.Failure(exceptions.DataError("missing data"))

    def _requestEb(self, failure_):
        """Log the error and continue errback chain"""
        log.warning("Can't get requested data:\n{reason}".format(reason=failure_))
        return failure_

    def requestData(self, client, to_jid, cid, cache=None):
        """Request data if we don't have it in cache

        @param to_jid(jid.JID): jid to request the data to
        @param cid(unicode): content id
        @param cache(memory.cache.Cache, None): cache to use
            client.cache will be used if None
        @return D(unicode): path to file with data
        """
        if cache is None:
            cache = client.cache
        iq_elt = client.IQ("get")
        iq_elt["to"] = to_jid.full()
        data_elt = iq_elt.addElement((NS_BOB, "data"))
        data_elt["cid"] = cid
        d = iq_elt.send()
        d.addCallback(self._requestCb, cache, cid)
        d.addErrback(self._requestEb)
        return d

    def _setImgEltSrc(self, path, img_elt):
        img_elt["src"] = "file://{}".format(path)

    def XHTMLTrigger(self, client, message_elt, body_elt, lang, treat_d):
        for img_elt in xml_tools.findAll(body_elt, C.NS_XHTML, "img"):
            source = img_elt.getAttribute("src", "")
            if source.startswith("cid:"):
                cid = source[4:]
                file_path = client.cache.getFilePath(cid)
                if file_path is not None:
                    #  image is in cache, we change the url
                    img_elt["src"] = "file://{}".format(file_path)
                    continue
                else:
                    # image is not in cache, is it given locally?
                    for data_elt in message_elt.elements(NS_BOB, "data"):
                        if data_elt.getAttribute("cid") == cid:
                            file_path = self.dumpData(client.cache, data_elt, cid)
                            img_elt["src"] = "file://{}".format(file_path)
                            break
                    else:
                        # cid not found locally, we need to request it
                        # so we use the deferred
                        d = self.requestData(client, jid.JID(message_elt["from"]), cid)
                        d.addCallback(partial(self._setImgEltSrc, img_elt=img_elt))
                        treat_d.addCallback(lambda __: d)

    def onComponentRequest(self, iq_elt, client):
        """cache data is retrieve from common cache for components"""
        # FIXME: this is a security/privacy issue as no access check is done
        #        but this is mitigated by the fact that the cid must be known.
        #        An access check should be implemented though.

        iq_elt.handled = True
        data_elt = next(iq_elt.elements(NS_BOB, "data"))
        try:
            cid = data_elt["cid"]
        except KeyError:
            error_elt = jabber_error.StanzaError("not-acceptable").toResponse(iq_elt)
            client.send(error_elt)
            return

        metadata = self.host.common_cache.getMetadata(cid)
        if metadata is None:
            error_elt = jabber_error.StanzaError("item-not-found").toResponse(iq_elt)
            client.send(error_elt)
            return

        with open(metadata["path"], 'rb') as f:
            data = f.read()

        result_elt = xmlstream.toResponse(iq_elt, "result")
        data_elt = result_elt.addElement(
            (NS_BOB, "data"), content=base64.b64encode(data).decode())
        data_elt["cid"] = cid
        data_elt["type"] = metadata["mime_type"]
        data_elt["max-age"] = str(int(max(0, metadata["eol"] - time.time())))
        client.send(result_elt)

    def _getFile(self, peer_jid_s, cid, profile):
        peer_jid = jid.JID(peer_jid_s)
        assert cid
        client = self.host.getClient(profile)
        d = self.getFile(client, peer_jid, cid)
        d.addCallback(lambda path: str(path))
        return d

    def getFile(self, client, peer_jid, cid, parent_elt=None):
        """Retrieve a file from it's content-id

        @param peer_jid(jid.JID): jid of the entity offering the data
        @param cid(unicode): content-id of file data
        @param parent_elt(domish.Element, None): if file is not in cache,
            data will be looked after in children of this elements.
            None to ignore
        @return D(Path): path to cached data
        """
        file_path = client.cache.getFilePath(cid)
        if file_path is not None:
            #  file is in cache
            return defer.succeed(file_path)
        else:
            # file not in cache, is it given locally?
            if parent_elt is not None:
                for data_elt in parent_elt.elements(NS_BOB, "data"):
                    if data_elt.getAttribute("cid") == cid:
                        return defer.succeed(self.dumpData(client.cache, data_elt, cid))

            # cid not found locally, we need to request it
            # so we use the deferred
            return self.requestData(client, peer_jid, cid)


@implementer(iwokkel.IDisco)
class XEP_0231_handler(xmlstream.XMPPHandler):

    def __init__(self, plugin_parent):
        self.plugin_parent = plugin_parent
        self.host = plugin_parent.host

    def connectionInitialized(self):
        if self.parent.is_component:
            self.xmlstream.addObserver(
                IQ_BOB_REQUEST, self.plugin_parent.onComponentRequest, client=self.parent
            )

    def getDiscoInfo(self, requestor, target, nodeIdentifier=""):
        return [disco.DiscoFeature(NS_BOB)]

    def getDiscoItems(self, requestor, target, nodeIdentifier=""):
        return []