view sat/plugins/plugin_xep_0231.py @ 3728:b15644cae50d

component AP gateway: JID/node ⟺ AP outbox conversion: - convert a combination of JID and optional pubsub node to AP actor handle (see `getJIDAndNode` for details) and vice versa - the gateway now provides a Pubsub service - retrieve pubsub node and convert it to AP collection, AP pagination is converted to RSM - do the opposite: convert AP collection to pubsub and handle RSM request. Due to ActivityStream collection pagination limitations, some RSM request produce inefficient requests, but caching should be used most of the time in the future and avoid the problem. - set specific name to HTTP Server - new `local_only` setting (`True` by default) to indicate if the gateway can request or not XMPP Pubsub nodes from other servers - disco info now specifies important features such as Pubsub RSM, and nodes metadata ticket 363
author Goffi <goffi@goffi.org>
date Tue, 25 Jan 2022 17:54:06 +0100
parents be6d91572633
children 524856bd7b19
line wrap: on
line source

#!/usr/bin/env python3


# SAT plugin for Bit of Binary handling (XEP-0231)
# Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org)

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.

# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import base64
import time
from pathlib import Path
from functools import partial
from zope.interface import implementer
from twisted.python import failure
from twisted.words.protocols.jabber import xmlstream
from twisted.words.protocols.jabber import jid
from twisted.words.protocols.jabber import error as jabber_error
from twisted.internet import defer
from wokkel import disco, iwokkel
from sat.tools import xml_tools
from sat.core.i18n import _
from sat.core.constants import Const as C
from sat.core import exceptions
from sat.core.log import getLogger

log = getLogger(__name__)


PLUGIN_INFO = {
    C.PI_NAME: "Bits of Binary",
    C.PI_IMPORT_NAME: "XEP-0231",
    C.PI_TYPE: "XEP",
    C.PI_MODES: C.PLUG_MODE_BOTH,
    C.PI_PROTOCOLS: ["XEP-0231"],
    C.PI_MAIN: "XEP_0231",
    C.PI_HANDLER: "yes",
    C.PI_DESCRIPTION: _(
        """Implementation of bits of binary (used for small images/files)"""
    ),
}

NS_BOB = "urn:xmpp:bob"
IQ_BOB_REQUEST = C.IQ_GET + '/data[@xmlns="' + NS_BOB + '"]'


class XEP_0231(object):
    def __init__(self, host):
        log.info(_("plugin Bits of Binary initialization"))
        self.host = host
        host.registerNamespace("bob", NS_BOB)
        host.trigger.add("xhtml_post_treat", self.XHTMLTrigger)
        host.bridge.addMethod(
            "bobGetFile",
            ".plugin",
            in_sign="sss",
            out_sign="s",
            method=self._getFile,
            async_=True,
        )

    def dumpData(self, cache, data_elt, cid):
        """save file encoded in data_elt to cache

        @param cache(memory.cache.Cache): cache to use to store the data
        @param data_elt(domish.Element): <data> as in XEP-0231
        @param cid(unicode): content-id
        @return(unicode): full path to dumped file
        """
        #  FIXME: is it needed to use a separate thread?
        #        probably not with the little data expected with BoB
        try:
            max_age = int(data_elt["max-age"])
            if max_age < 0:
                raise ValueError
        except (KeyError, ValueError):
            log.warning("invalid max-age found")
            max_age = None

        with cache.cacheData(
            PLUGIN_INFO[C.PI_IMPORT_NAME], cid, data_elt.getAttribute("type"), max_age
        ) as f:

            file_path = Path(f.name)
            f.write(base64.b64decode(str(data_elt)))

        return file_path

    def getHandler(self, client):
        return XEP_0231_handler(self)

    def _requestCb(self, iq_elt, cache, cid):
        for data_elt in iq_elt.elements(NS_BOB, "data"):
            if data_elt.getAttribute("cid") == cid:
                file_path = self.dumpData(cache, data_elt, cid)
                return file_path

        log.warning(
            "invalid data stanza received, requested cid was not found:\n{iq_elt}\nrequested cid: {cid}".format(
                iq_elt=iq_elt, cid=cid
            )
        )
        raise failure.Failure(exceptions.DataError("missing data"))

    def _requestEb(self, failure_):
        """Log the error and continue errback chain"""
        log.warning("Can't get requested data:\n{reason}".format(reason=failure_))
        return failure_

    def requestData(self, client, to_jid, cid, cache=None):
        """Request data if we don't have it in cache

        @param to_jid(jid.JID): jid to request the data to
        @param cid(unicode): content id
        @param cache(memory.cache.Cache, None): cache to use
            client.cache will be used if None
        @return D(unicode): path to file with data
        """
        if cache is None:
            cache = client.cache
        iq_elt = client.IQ("get")
        iq_elt["to"] = to_jid.full()
        data_elt = iq_elt.addElement((NS_BOB, "data"))
        data_elt["cid"] = cid
        d = iq_elt.send()
        d.addCallback(self._requestCb, cache, cid)
        d.addErrback(self._requestEb)
        return d

    def _setImgEltSrc(self, path, img_elt):
        img_elt["src"] = "file://{}".format(path)

    def XHTMLTrigger(self, client, message_elt, body_elt, lang, treat_d):
        for img_elt in xml_tools.findAll(body_elt, C.NS_XHTML, "img"):
            source = img_elt.getAttribute("src", "")
            if source.startswith("cid:"):
                cid = source[4:]
                file_path = client.cache.getFilePath(cid)
                if file_path is not None:
                    #  image is in cache, we change the url
                    img_elt["src"] = "file://{}".format(file_path)
                    continue
                else:
                    # image is not in cache, is it given locally?
                    for data_elt in message_elt.elements(NS_BOB, "data"):
                        if data_elt.getAttribute("cid") == cid:
                            file_path = self.dumpData(client.cache, data_elt, cid)
                            img_elt["src"] = "file://{}".format(file_path)
                            break
                    else:
                        # cid not found locally, we need to request it
                        # so we use the deferred
                        d = self.requestData(client, jid.JID(message_elt["from"]), cid)
                        d.addCallback(partial(self._setImgEltSrc, img_elt=img_elt))
                        treat_d.addCallback(lambda __: d)

    def onComponentRequest(self, iq_elt, client):
        """cache data is retrieve from common cache for components"""
        # FIXME: this is a security/privacy issue as no access check is done
        #        but this is mitigated by the fact that the cid must be known.
        #        An access check should be implemented though.

        iq_elt.handled = True
        data_elt = next(iq_elt.elements(NS_BOB, "data"))
        try:
            cid = data_elt["cid"]
        except KeyError:
            error_elt = jabber_error.StanzaError("not-acceptable").toResponse(iq_elt)
            client.send(error_elt)
            return

        metadata = self.host.common_cache.getMetadata(cid)
        if metadata is None:
            error_elt = jabber_error.StanzaError("item-not-found").toResponse(iq_elt)
            client.send(error_elt)
            return

        with open(metadata["path"], 'rb') as f:
            data = f.read()

        result_elt = xmlstream.toResponse(iq_elt, "result")
        data_elt = result_elt.addElement(
            (NS_BOB, "data"), content=base64.b64encode(data).decode())
        data_elt["cid"] = cid
        data_elt["type"] = metadata["mime_type"]
        data_elt["max-age"] = str(int(max(0, metadata["eol"] - time.time())))
        client.send(result_elt)

    def _getFile(self, peer_jid_s, cid, profile):
        peer_jid = jid.JID(peer_jid_s)
        assert cid
        client = self.host.getClient(profile)
        d = self.getFile(client, peer_jid, cid)
        d.addCallback(lambda path: str(path))
        return d

    def getFile(self, client, peer_jid, cid, parent_elt=None):
        """Retrieve a file from it's content-id

        @param peer_jid(jid.JID): jid of the entity offering the data
        @param cid(unicode): content-id of file data
        @param parent_elt(domish.Element, None): if file is not in cache,
            data will be looked after in children of this elements.
            None to ignore
        @return D(Path): path to cached data
        """
        file_path = client.cache.getFilePath(cid)
        if file_path is not None:
            #  file is in cache
            return defer.succeed(file_path)
        else:
            # file not in cache, is it given locally?
            if parent_elt is not None:
                for data_elt in parent_elt.elements(NS_BOB, "data"):
                    if data_elt.getAttribute("cid") == cid:
                        return defer.succeed(self.dumpData(client.cache, data_elt, cid))

            # cid not found locally, we need to request it
            # so we use the deferred
            return self.requestData(client, peer_jid, cid)


@implementer(iwokkel.IDisco)
class XEP_0231_handler(xmlstream.XMPPHandler):

    def __init__(self, plugin_parent):
        self.plugin_parent = plugin_parent
        self.host = plugin_parent.host

    def connectionInitialized(self):
        if self.parent.is_component:
            self.xmlstream.addObserver(
                IQ_BOB_REQUEST, self.plugin_parent.onComponentRequest, client=self.parent
            )

    def getDiscoInfo(self, requestor, target, nodeIdentifier=""):
        return [disco.DiscoFeature(NS_BOB)]

    def getDiscoItems(self, requestor, target, nodeIdentifier=""):
        return []