view src/plugins/plugin_xep_0060.py @ 1569:44854fb5d3b2

plugin XEP-0065: fixed CHUNK_SIZE to 4096 to avoid wild disconnection by some proxies (Prosody's proxy65 disconnect if it receive bigger chunks)
author Goffi <goffi@goffi.org>
date Sun, 08 Nov 2015 14:44:33 +0100
parents e0bde0d0b321
children c2d96f74105e
line wrap: on
line source

#!/usr/bin/python
# -*- coding: utf-8 -*-

# SAT plugin for Publish-Subscribe (xep-0060)
# Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Jérôme Poisson (goffi@goffi.org)

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.

# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from sat.core.i18n import _
from sat.core.constants import Const as C
from sat.core.log import getLogger
log = getLogger(__name__)
from sat.core import exceptions

from sat.tools import sat_defer

from twisted.words.protocols.jabber import jid
from twisted.internet import defer
from wokkel import disco
# XXX: tmp.pubsub is actually use instead of wokkel version
#      same thing for rsm
from wokkel import pubsub
from wokkel import rsm
from zope.interface import implements
from collections import namedtuple
import uuid

UNSPECIFIED = "unspecified error"


PLUGIN_INFO = {
    "name": "Publish-Subscribe",
    "import_name": "XEP-0060",
    "type": "XEP",
    "protocols": ["XEP-0060"],
    "dependencies": [],
    "recommendations": ["XEP-0059"],
    "main": "XEP_0060",
    "handler": "yes",
    "description": _("""Implementation of PubSub Protocol""")
}


Extra = namedtuple('Extra', ('rsm_request', 'extra'))
# rsm_request is the rsm.RSMRequest build with rsm_ prefixed keys, or None
# extra is a potentially empty dict


class XEP_0060(object):
    OPT_ACCESS_MODEL = 'pubsub#access_model'
    OPT_PERSIST_ITEMS = 'pubsub#persist_items'
    OPT_MAX_ITEMS = 'pubsub#max_items'
    OPT_DELIVER_PAYLOADS = 'pubsub#deliver_payloads'
    OPT_SEND_ITEM_SUBSCRIBE = 'pubsub#send_item_subscribe'
    OPT_NODE_TYPE = 'pubsub#node_type'
    OPT_SUBSCRIPTION_TYPE = 'pubsub#subscription_type'
    OPT_SUBSCRIPTION_DEPTH = 'pubsub#subscription_depth'
    OPT_ROSTER_GROUPS_ALLOWED = 'pubsub#roster_groups_allowed'
    OPT_PUBLISH_MODEL = 'pubsub#publish_model'
    ACCESS_OPEN = 'open'
    ACCESS_PRESENCE = 'presence'
    ACCESS_ROSTER = 'roster'
    ACCESS_AUTHORIZE = 'authorize'
    ACCESS_WHITELIST = 'whitelist'

    def __init__(self, host):
        log.info(_(u"PubSub plugin initialization"))
        self.host = host
        self._node_cb = {} # dictionnary of callbacks for node (key: node, value: list of callbacks)
        self.rt_sessions = sat_defer.RTDeferredSessions()
        host.bridge.addMethod("psDeleteNode", ".plugin", in_sign='sss', out_sign='', method=self._deleteNode, async=True)
        host.bridge.addMethod("psRetractItem", ".plugin", in_sign='sssbs', out_sign='', method=self._retractItem, async=True)
        host.bridge.addMethod("psRetractItems", ".plugin", in_sign='ssasbs', out_sign='', method=self._retractItems, async=True)
        host.bridge.addMethod("psSubscribeToMany", ".plugin", in_sign='a(ss)sa{ss}s', out_sign='s', method=self._subscribeToMany)
        host.bridge.addMethod("psGetSubscribeRTResult", ".plugin", in_sign='ss', out_sign='(ua(sss))', method=self._manySubscribeRTResult, async=True)
        host.bridge.addMethod("psGetFromMany", ".plugin", in_sign='a(ss)ia{ss}s', out_sign='s', method=self._getFromMany)
        host.bridge.addMethod("psGetFromManyRTResult", ".plugin", in_sign='ss', out_sign='(ua(sssasa{ss}))', method=self._getFromManyRTResult, async=True)
        host.bridge.addSignal("psEvent", ".plugin", signature='ssssa{ss}s')  # args: category, service(jid), node, type (C.PS_ITEMS, C.PS_DELETE), data, profile

    def getHandler(self, profile):
        client = self.host.getClient(profile)
        client.pubsub_client = SatPubSubClient(self.host, self)
        return client.pubsub_client

    @defer.inlineCallbacks
    def profileConnected(self, profile):
        client = self.host.getClient(profile)
        pubsub_services = yield self.host.findServiceEntities("pubsub", "service", profile=profile)
        if pubsub_services:
            # we use one of the found services as our default pubsub service
            client.pubsub_service = pubsub_services.pop()
        else:
            client.pubsub_service = None

    def getFeatures(self, profile):
        try:
            client = self.host.getClient(profile)
        except exceptions.ProfileNotSetError:
            return {}
        try:
            return {'service': client.pubsub_service.full() if client.pubsub_service is not None else  ''}
        except AttributeError:
            if self.host.isConnected(profile):
                log.debug("Profile is not connected, service is not checked yet")
            else:
                log.error("Service should be available !")
            return {}

    def parseExtra(self, extra):
        """Parse extra dictionnary

        used bridge's extra dictionnaries
        @param extra(dict): extra data used to configure request
        @return(Extra): filled Extra instance
        """
        if extra is not None:
            rsm_dict = { key[4:]: value for key, value in extra.iteritems() if key.startswith('rsm_') }
            if rsm_dict:
                try:
                    rsm_dict['max_'] = rsm_dict.pop('max')
                except KeyError:
                    pass
                rsm_request = rsm.RSMRequest(**rsm_dict)
            else:
                rsm_request = None
        else:
            rsm_request = None
            extra = {}
        return Extra(rsm_request, extra)

    def addManagedNode(self, node, **kwargs):
        """Add a handler for a node

        @param node(unicode): node to monitor, or None to monitor all
        @param **kwargs: method(s) to call when the node is found
            the methode must be named after PubSub constants in lower case
            and suffixed with "_cb"
            e.g.: "publish_cb" for C.PS_PUBLISH, "delete_cb" for C.PS_DELETE
        """
        assert kwargs
        callbacks = self._node_cb.setdefault(node, {})
        for event, cb in kwargs.iteritems():
            event_name = event[:-3]
            assert event_name in C.PS_EVENTS
            callbacks.setdefault(event_name,[]).append(cb)

    def removeManagedNode(self, node, *args):
        """Add a handler for a node

        @param node(unicode): node to monitor
        @param *args: callback(s) to remove
        """
        assert args
        try:
            registred_cb = self._node_cb[node]
        except KeyError:
            pass
        else:
            for callback in args:
                for event, cb_list in registred_cb.iteritems():
                    try:
                        cb_list.remove(callback)
                    except ValueError:
                        pass
                    else:
                        log.debug(u"removed callback {cb} for event {event} on node {node}".format(
                            cb=callback, event=event, node=node))
                        if not cb_list:
                            del registred_cb[event]
                        if not registred_cb:
                            del self._node_cb[node]
                        return
        log.error(u"Trying to remove inexistant callback {cb} for node {node}".format(cb=callback, node=node))

    # def listNodes(self, service, nodeIdentifier='', profile=C.PROF_KEY_NONE):
    #     """Retrieve the name of the nodes that are accessible on the target service.

    #     @param service (JID): target service
    #     @param nodeIdentifier (str): the parent node name (leave empty to retrieve first-level nodes)
    #     @param profile (str): %(doc_profile)s
    #     @return: deferred which fire a list of nodes
    #     """
    #     d = self.host.getDiscoItems(service, nodeIdentifier, profile_key=profile)
    #     d.addCallback(lambda result: [item.getAttribute('node') for item in result.toElement().children if item.hasAttribute('node')])
    #     return d

    # def listSubscribedNodes(self, service, nodeIdentifier='', filter_='subscribed', profile=C.PROF_KEY_NONE):
    #     """Retrieve the name of the nodes to which the profile is subscribed on the target service.

    #     @param service (JID): target service
    #     @param nodeIdentifier (str): the parent node name (leave empty to retrieve all subscriptions)
    #     @param filter_ (str): filter the result according to the given subscription type:
    #         - None: do not filter
    #         - 'pending': subscription has not been approved yet by the node owner
    #         - 'unconfigured': subscription options have not been configured yet
    #         - 'subscribed': subscription is complete
    #     @param profile (str): %(doc_profile)s
    #     @return: Deferred list[str]
    #     """
    #     d = self.subscriptions(service, nodeIdentifier, profile_key=profile)
    #     d.addCallback(lambda subs: [sub.getAttribute('node') for sub in subs if sub.getAttribute('subscription') == filter_])
    #     return d

    def publish(self, service, nodeIdentifier, items=None, profile_key=C.PROF_KEY_NONE):
        client = self.host.getClient(profile_key)
        return client.pubsub_client.publish(service, nodeIdentifier, items, client.pubsub_client.parent.jid)

    def getItems(self, service, node, max_items=None, item_ids=None, sub_id=None, rsm_request=None, extra=None, profile_key=C.PROF_KEY_NONE):
        """Retrieve pubsub items from a node.

        @param service (JID): pubsub service.
        @param node (str): node id.
        @param max_items (int): optional limit on the number of retrieved items.
        @param item_ids (list[str]): identifiers of the items to be retrieved (can't be used with rsm_request).
        @param sub_id (str): optional subscription identifier.
        @param rsm_request (rsm.RSMRequest): RSM request data
        @param profile_key (unicode): %(doc_profile_key)s
        @return: a deferred couple (list[dict], dict) containing:
            - list of items
            - metadata with the following keys:
                - rsm_first, rsm_last, rsm_count, rsm_index: first, last, count and index value of RSMResponse
        """
        if rsm_request and item_ids:
            raise ValueError("items_id can't be used with rsm")
        if extra is None:
            extra = {}
        client = self.host.getClient(profile_key)
        ext_data = {'id': unicode(uuid.uuid4()), 'rsm': rsm_request} if rsm_request is not None else None
        d = client.pubsub_client.items(service, node, max_items, item_ids, sub_id, client.pubsub_client.parent.jid, ext_data)

        try:
            subscribe = C.bool(extra['subscribe'])
        except KeyError:
            subscribe = False

        def doSubscribe(items):
            self.subscribe(service, node, profile_key=profile_key)
            return items

        if subscribe:
            d.addCallback(doSubscribe)

        def addMetadata(items):
            metadata = {}
            if rsm_request is not None:
                rsm_data = client.pubsub_client.getRSMResponse(ext_data['id'])
                metadata.update({'rsm_{}'.format(key): value for key, value in rsm_data.iteritems()})
            return (items, metadata)

        d.addCallback(addMetadata)
        return d

    # @defer.inlineCallbacks
    # def getItemsFromMany(self, service, data, max_items=None, sub_id=None, rsm=None, profile_key=C.PROF_KEY_NONE):
    #     """Massively retrieve pubsub items from many nodes.

    #     @param service (JID): target service.
    #     @param data (dict): dictionnary binding some arbitrary keys to the node identifiers.
    #     @param max_items (int): optional limit on the number of retrieved items *per node*.
    #     @param sub_id (str): optional subscription identifier.
    #     @param rsm (dict): RSM request data
    #     @param profile_key (str): %(doc_profile_key)s
    #     @return: a deferred dict with:
    #         - key: a value in (a subset of) data.keys()
    #         - couple (list[dict], dict) containing:
    #             - list of items
    #             - RSM response data
    #     """
    #     client = self.host.getClient(profile_key)
    #     found_nodes = yield self.listNodes(service, profile=client.profile)
    #     d_dict = {}
    #     for publisher, node in data.items():
    #         if node not in found_nodes:
    #             log.debug(u"Skip the items retrieval for [{node}]: node doesn't exist".format(node=node))
    #             continue  # avoid pubsub "item-not-found" error
    #         d_dict[publisher] = self.getItems(service, node, max_items, None, sub_id, rsm, client.profile)
    #     defer.returnValue(d_dict)

    def getOptions(self, service, nodeIdentifier, subscriber, subscriptionIdentifier=None, profile_key=C.PROF_KEY_NONE):
        client = self.host.getClient(profile_key)
        return client.pubsub_client.getOptions(service, nodeIdentifier, subscriber, subscriptionIdentifier)

    def setOptions(self, service, nodeIdentifier, subscriber, options, subscriptionIdentifier=None, profile_key=C.PROF_KEY_NONE):
        client = self.host.getClient(profile_key)
        return client.pubsub_client.setOptions(service, nodeIdentifier, subscriber, options, subscriptionIdentifier)

    def createNode(self, service, nodeIdentifier, options, profile_key=C.PROF_KEY_NONE):
        client = self.host.getClient(profile_key)
        return client.pubsub_client.createNode(service, nodeIdentifier, options)

    def _deleteNode(self, service_s, nodeIdentifier, profile_key):
        return self.deleteNode(jid.JID(service_s) if service_s else None, nodeIdentifier, profile_key)

    def deleteNode(self, service, nodeIdentifier, profile_key=C.PROF_KEY_NONE):
        client = self.host.getClient(profile_key)
        return client.pubsub_client.deleteNode(service, nodeIdentifier)

    def _retractItem(self, service_s, nodeIdentifier, itemIdentifier, notify, profile_key):
        return self._retractItems(service_s, nodeIdentifier, (itemIdentifier,), notify, profile_key)

    def _retractItems(self, service_s, nodeIdentifier, itemIdentifiers, notify, profile_key):
        return self.retractItems(jid.JID(service_s) if service_s else None, nodeIdentifier, itemIdentifiers, notify, profile_key)

    def retractItems(self, service, nodeIdentifier, itemIdentifiers, notify=True, profile_key=C.PROF_KEY_NONE):
        client = self.host.getClient(profile_key)
        return client.pubsub_client.retractItems(service, nodeIdentifier, itemIdentifiers, notify=True)

    def subscribe(self, service, nodeIdentifier, sub_jid=None, options=None, profile_key=C.PROF_KEY_NONE):
        # TODO: reimplement a subscribtion cache, checking that we have not subscription before trying to subscribe
        client = self.host.getClient(profile_key)
        return client.pubsub_client.subscribe(service, nodeIdentifier, sub_jid or client.pubsub_client.parent.jid.userhostJID(), options=options)

    def subscriptions(self, service, nodeIdentifier='', profile_key=C.PROF_KEY_NONE):
        client = self.host.getClient(profile_key)
        return client.pubsub_client.subscriptions(service, nodeIdentifier)

    ## methods to manage several stanzas/jids at once ##

    # generic #

    def getRTResults(self, session_id, on_success=None, on_error=None, profile=C.PROF_KEY_NONE):
        return self.rt_sessions.getResults(session_id, on_success, on_error, profile)

    def serItemsData(self, items_data, item_cb=lambda item: item.toXml()):
        """Helper method to serialise result from [getItems]

        the items_data must be a tuple(list[domish.Element], dict[unicode, unicode])
        as returned by [getItems]. metadata values are then casted to unicode and
        each item is passed to items_cb
        @param items_data(tuple): tuple returned by [getItems]
        @param item_cb(callable): method to transform each item
        @return (tuple): a serialised form ready to go throught bridge
        """
        items, metadata = items_data
        return [item_cb(item) for item in items], {key: unicode(value) for key, value in metadata.iteritems()}

    def serItemsDataD(self, items_data, item_cb):
        """Helper method to serialise result from [getItems], deferred version

        the items_data must be a tuple(list[domish.Element], dict[unicode, unicode])
        as returned by [getItems]. metadata values are then casted to unicode and
        each item is passed to items_cb
        An errback is added to item_cb, and when it is fired the value is filtered from final items
        @param items_data(tuple): tuple returned by [getItems]
        @param item_cb(callable): method to transform each item (must return a deferred)
        @return (tuple): a deferred which fire a serialised form ready to go throught bridge
        """
        items, metadata = items_data
        def eb(failure):
            log.warning("Error while serialising/parsing item: {}".format(unicode(failure.value)))
        d = defer.gatherResults([item_cb(item).addErrback(eb) for item in items])
        def finishSerialisation(serialised_items):
            return [item for item in serialised_items if item is not None], {key: unicode(value) for key, value in metadata.iteritems()}
        d.addCallback(finishSerialisation)
        return d

    def serDList(self, results, failure_result=None):
        """Serialise a DeferredList result

        @param results: DeferredList results
        @param failure_result: value to use as value for failed Deferred
            (default: empty tuple)
        @return (list): list with:
            - failure: empty in case of success, else error message
            - result
        """
        if failure_result is None:
            failure_result = ()
        return [('', result) if success else (unicode(result.result) or UNSPECIFIED, failure_result) for success, result in results]

    # subscribe #

    def _manySubscribeRTResult(self, session_id, profile_key=C.PROF_KEY_DEFAULT):
        """Get real-time results for subcribeToManu session

        @param session_id: id of the real-time deferred session
        @param return (tuple): (remaining, results) where:
            - remaining is the number of still expected results
            - results is a list of tuple(unicode, unicode, bool, unicode) with:
                - service: pubsub service
                - and node: pubsub node
                - failure(unicode): empty string in case of success, error message else
        @param profile_key: %(doc_profile_key)s
        """
        profile = self.host.getClient(profile_key).profile
        d = self.rt_sessions.getResults(session_id, on_success=lambda result:'', on_error=lambda failure:unicode(failure.value), profile=profile)
        # we need to convert jid.JID to unicode with full() to serialise it for the bridge
        d.addCallback(lambda ret: (ret[0], [(service.full(), node, '' if success else failure or UNSPECIFIED)
                                            for (service, node), (success, failure) in ret[1].iteritems()]))
        return d

    def _subscribeToMany(self, node_data, subscriber=None, options=None, profile_key=C.PROF_KEY_NONE):
        return self.subscribeToMany([(jid.JID(service), unicode(node)) for service, node in node_data], jid.JID(subscriber), options, profile_key)

    def subscribeToMany(self, node_data, subscriber, options=None, profile_key=C.PROF_KEY_NONE):
        """Subscribe to several nodes at once.

        @param node_data (iterable[tuple]): iterable of tuple (service, node) where:
            - service (jid.JID) is the pubsub service
            - node (unicode) is the node to subscribe to
        @param subscriber (jid.JID): optional subscription identifier.
        @param options (dict): subscription options
        @param profile_key (str): %(doc_profile_key)s
        @return (str): RT Deferred session id
        """
        client = self.host.getClient(profile_key)
        deferreds = {}
        for service, node in node_data:
            deferreds[(service, node)] = client.pubsub_client.subscribe(service, node, subscriber, options=options)
        return self.rt_sessions.newSession(deferreds, client.profile)
        # found_nodes = yield self.listNodes(service, profile=client.profile)
        # subscribed_nodes = yield self.listSubscribedNodes(service, profile=client.profile)
        # d_list = []
        # for nodeIdentifier in (set(nodeIdentifiers) - set(subscribed_nodes)):
        #     if nodeIdentifier not in found_nodes:
        #         log.debug(u"Skip the subscription to [{node}]: node doesn't exist".format(node=nodeIdentifier))
        #         continue  # avoid sat-pubsub "SubscriptionExists" error
        #     d_list.append(client.pubsub_client.subscribe(service, nodeIdentifier, sub_jid or client.pubsub_client.parent.jid.userhostJID(), options=options))
        # defer.returnValue(d_list)

    # get #

    def _getFromManyRTResult(self, session_id, profile_key=C.PROF_KEY_DEFAULT):
        """Get real-time results for getFromMany session

        @param session_id: id of the real-time deferred session
        @param profile_key: %(doc_profile_key)s
        @param return (tuple): (remaining, results) where:
            - remaining is the number of still expected results
            - results is a list of tuple with
                - service (unicode): pubsub service
                - node (unicode): pubsub node
                - failure (unicode): empty string in case of success, error message else
                - items (list[s]): raw XML of items
                - metadata(dict): serialised metadata
        """
        profile = self.host.getClient(profile_key).profile
        d = self.rt_sessions.getResults(session_id,
                                        on_success=lambda result: ('', self.serItemsData(result)),
                                        on_error=lambda failure: (unicode(failure.value) or UNSPECIFIED, ([],{})),
                                        profile=profile)
        d.addCallback(lambda ret: (ret[0],
                                   [(service.full(), node, failure, items, metadata)
                                    for (service, node), (success, (failure, (items, metadata))) in ret[1].iteritems()]))
        return d

    def _getFromMany(self, node_data, max_item=10, extra_dict=None, profile_key=C.PROF_KEY_NONE):
        """
        @param max_item(int): maximum number of item to get, C.NO_LIMIT for no limit
        """
        max_item = None if max_item == C.NO_LIMIT else max_item
        extra = self.parseExtra(extra_dict)
        return self.getFromMany([(jid.JID(service), unicode(node)) for service, node in node_data], max_item, extra.rsm_request, extra.extra, profile_key)

    def getFromMany(self, node_data, max_item=None, rsm_request=None, extra=None, profile_key=C.PROF_KEY_NONE):
        """Get items from many nodes at once
        @param node_data (iterable[tuple]): iterable of tuple (service, node) where:
            - service (jid.JID) is the pubsub service
            - node (unicode) is the node to get items from
        @param max_items (int): optional limit on the number of retrieved items.
        @param rsm_request (RSMRequest): RSM request data
        @param profile_key (unicode): %(doc_profile_key)s
        @return (str): RT Deferred session id
        """
        client = self.host.getClient(profile_key)
        deferreds = {}
        for service, node in node_data:
            deferreds[(service, node)] = self.getItems(service, node, max_item, rsm_request=rsm_request, extra=extra, profile_key=profile_key)
        return self.rt_sessions.newSession(deferreds, client.profile)


class SatPubSubClient(rsm.PubSubClient):
    implements(disco.IDisco)

    def __init__(self, host, parent_plugin):
        self.host = host
        self.parent_plugin = parent_plugin
        rsm.PubSubClient.__init__(self)

    def connectionInitialized(self):
        rsm.PubSubClient.connectionInitialized(self)

    def itemsReceived(self, event):
        log.debug(u"Pubsub items received")
        for node in (event.nodeIdentifier, None):
            try:
                callbacks = self.parent_plugin._node_cb[node][C.PS_ITEMS]
            except KeyError:
                pass
            else:
                for callback in callbacks:
                    callback(event, self.parent.profile)

    def deleteReceived(self, event):
        log.debug((u"Publish node deleted"))
        for node in (event.nodeIdentifier, None):
            try:
                callbacks = self.parent_plugin._node_cb[node][C.PS_DELETE]
            except KeyError:
                pass
            else:
                for callback in callbacks:
                    callback(event, self.parent.profile)

    def subscriptions(self, service, nodeIdentifier, sender=None):
        """Return the list of subscriptions to the given service and node.

        @param service: The publish subscribe service to retrieve the subscriptions from.
        @type service: L{JID<twisted.words.protocols.jabber.jid.JID>}
        @param nodeIdentifier: The identifier of the node (leave empty to retrieve all subscriptions).
        @type nodeIdentifier: C{unicode}
        """
        request = pubsub.PubSubRequest('subscriptions')
        request.recipient = service
        request.nodeIdentifier = nodeIdentifier
        request.sender = sender
        d = request.send(self.xmlstream)

        def cb(iq):
            # FIXME: to be checked
            return [sub for sub in iq.pubsub.subscriptions.elements() if
                    (sub.uri == pubsub.NS_PUBSUB and sub.name == 'subscription')]

        return d.addCallback(cb)

    def getDiscoInfo(self, requestor, service, nodeIdentifier=''):
        disco_info = []
        self.host.trigger.point("PubSub Disco Info", disco_info, self.parent.profile)
        return disco_info

    def getDiscoItems(self, requestor, service, nodeIdentifier=''):
        return []