changeset 985:64826e69f365

pages: cache mechanism, first draft: a cache mechanism has been implemented to retrieve pages with a complexe rendering and/or calling expensive methods (e.g. network calls). For now it's is done only for Pubsub and with service profile (i.e. profile used when user is not logged in). When a LiberviaPage use cache, node is subscribed, and as long as no event is received (even can be item update, item retraction, or node deletion), the cached page is returned. This is a first draft, it is planed to handle in the future logged users (which can be tricky as we must not let (un)subscribed node if user is not willing to), multi-nodes pages (e.g.: item + comments) and cache for page not depending on pubsub (e.g. chat).
author Goffi <goffi@goffi.org>
date Sun, 19 Nov 2017 17:18:14 +0100
parents f0fc28b3bd1e
children 7fdd24014aa4
files src/server/constants.py src/server/pages.py src/server/server.py
diffstat 3 files changed, 150 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/src/server/constants.py	Fri Nov 17 12:10:56 2017 +0100
+++ b/src/server/constants.py	Sun Nov 19 17:18:14 2017 +0100
@@ -75,3 +75,6 @@
     HTTP_NOT_FOUND = 404
     HTTP_INTERNAL_ERROR = 500
     HTTP_SERVICE_UNAVAILABLE = 503
+
+    ## Cache ##
+    CACHE_PUBSUB = 0
--- a/src/server/pages.py	Fri Nov 17 12:10:56 2017 +0100
+++ b/src/server/pages.py	Sun Nov 19 17:18:14 2017 +0100
@@ -20,6 +20,7 @@
 from twisted.web import resource as web_resource
 from twisted.web import util as web_util
 from twisted.internet import defer
+from twisted.words.protocols.jabber import jid
 from twisted.python import failure
 
 from sat.core.i18n import _
@@ -34,6 +35,31 @@
 
 import os.path
 import urllib
+import time
+
+
+class Cache(object):
+
+    def __init__(self, rendered):
+        self._created = time.time()
+        self._last_access = self._created
+        self._rendered = rendered
+
+    @property
+    def created(self):
+        return self._created
+
+    @property
+    def last_access(self):
+        return self._last_access
+
+    @last_access.setter
+    def last_access(self, timestamp):
+        self._last_access = timestamp
+
+    @property
+    def rendered(self):
+        return self._rendered
 
 
 class LiberviaPage(web_resource.Resource):
@@ -41,6 +67,10 @@
     named_pages = {}
     uri_callbacks = {}
     pages_redirects = {}
+    cache = {}
+    # Set of tuples (service/node/sub_id) of nodes subscribed for caching
+    # sub_id can be empty string if not handled by service
+    cache_pubsub_sub = set()
 
     def __init__(self, host, root_dir, url, name=None, redirect=None, access=None, parse_url=None,
                  prepare_render=None, render=None, template=None, on_data_post=None):
@@ -126,6 +156,19 @@
         if parse_url is not None and not callable(parse_url):
             log.error(_(u"parse_url must be a callable"))
 
+        # if not None, next rendering will be cached
+        # it must then contain a list of the the keys to use (without the page instance)
+        # e.g. [C.SERVICE_PROFILE, "pubsub", server@example.tld, pubsub_node] 
+        self._do_cache = None
+
+    def __unicode__(self):
+        return u'LiberviaPage {name} at {url}'.format(
+            name = self.name or u'<anonymous>',
+            url = self.url)
+
+    def __str__(self):
+        return self.__unicode__.encode('utf-8')
+
     @classmethod
     def importPages(cls, host, parent=None, path=None):
         """Recursively import Libervia pages"""
@@ -324,6 +367,89 @@
         request.prepath.append(pathElement)
         return urllib.unquote(pathElement).decode('utf-8')
 
+    def checkCacheSubscribeCb(self, sub_id, service, node):
+        self.cache_pubsub_sub.add((service, node, sub_id))
+
+    def checkCacheSubscribeEb(self, failure_, service, node):
+        log.warning(_(u"Can't subscribe to node: {msg}").format(msg=failure_))
+        # FIXME: cache must be marked as unusable here
+
+    def psNodeWatchAddEb(self, failure_, service, node):
+        log.warning(_(u"Can't add node watched: {msg}").format(msg=failure_))
+
+    def checkCache(self, request, cache_type, **kwargs):
+        """check if a page is in cache and return cached version if suitable
+
+        this method may perform extra operation to handle cache (e.g. subscribing to a
+        pubsub node)
+        @param request(server.Request): current HTTP request
+        @param cache_type(int): on of C.CACHE_* const.
+        @param **kwargs: args according to cache_type:
+            C.CACHE_PROFILE:
+                service: pubsub service
+                node: pubsub node
+                short: short name of feature (needed if node is empty)
+
+        """
+        if request.postpath:
+            # we are not on the final page, no need to go further
+            return
+        profile = self.getProfile(request) or C.SERVICE_PROFILE
+
+        if cache_type == C.CACHE_PUBSUB:
+            service, node = kwargs['service'], kwargs['node']
+            if not node:
+                try:
+                    short = kwargs['short']
+                    node = self.host.ns_map[short]
+                except KeyError:
+                    log.warning(_(u"Can't use cache for empty node without namespace set, please ensure to set \"short\" and that it is registered"))
+                    return
+            if profile != C.SERVICE_PROFILE:
+                # only service profile is cache for now
+                return
+            try:
+                cache = self.cache[profile][cache_type][service][node][self]
+            except KeyError:
+                # no cache yet, let's subscribe to the pubsub node
+                d1 = self.host.bridgeCall('psSubscribe', service.full(), node, {}, profile)
+                d1.addCallback(self.checkCacheSubscribeCb, service, node)
+                d1.addErrback(self.checkCacheSubscribeEb, service, node)
+                d2 = self.host.bridgeCall('psNodeWatchAdd', service.full(), node, profile)
+                d2.addErrback(self.psNodeWatchAddEb, service, node)
+                self._do_cache = [profile, cache_type, service, node]
+                # we don't return the Deferreds as it is not needed to wait for
+                # the subscription to continue with page rendering
+                return
+
+        else:
+            raise exceptions.InternalError(u'Unknown cache_type')
+        log.debug(u'using cache for {page}'.format(page=self))
+        cache.last_access = time.time()
+        request.write(cache.rendered)
+        request.finish()
+        raise failure.Failure(exceptions.CancelError(u'cache is used'))
+
+    @classmethod
+    def onNodeEvent(cls, host, service, node, event_type, items, profile):
+        """Invalidate cache for all pages linked to this node"""
+        try:
+            cache = cls.cache[profile][C.CACHE_PUBSUB][jid.JID(service)][node]
+        except KeyError:
+            log.info(_(u'Removing subscription for {service}/{node}: '
+                       u'the page is not cached').format(service=service, node=node))
+            d1 = host.bridgeCall('psUnsubscribe', service, node, profile)
+            d1.addErrback(lambda failure_:
+                log.warning(_(u"Can't unsubscribe from {service}/{node}: {msg}").format(
+                    service=service, node=node, msg=failure_)))
+            d2 = host.bridgeCall('psNodeWatchAdd', service, node, profile)
+            # TODO: check why the page is not in cache, remove subscription?
+            d2.addErrback(lambda failure_:
+                log.warning(_(u"Can't remove watch for {service}/{node}: {msg}").format(
+                    service=service, node=node, msg=failure_)))
+        else:
+            cache.clear()
+
     def HTTPRedirect(self, request, url):
         """redirect to an URL using HTTP redirection
 
@@ -414,8 +540,16 @@
         """write data to transport and finish the request"""
         if data is None:
             self.pageError(request)
-        request.write(data.encode('utf-8'))
+        data_encoded = data.encode('utf-8')
+        request.write(data_encoded)
         request.finish()
+        if self._do_cache is not None:
+            cache = reduce(lambda d, k: d.setdefault(k, {}), self._do_cache, self.cache)
+            cache[self] = Cache(data_encoded)
+            log.debug(_(u'{page} put in cache for [{profile}]').format(
+                page=self,
+                profile=self._do_cache[0]))
+            self._do_cache = None
 
     def _subpagesHandler(self, dummy, request):
         """render subpage if suitable
--- a/src/server/server.py	Fri Nov 17 12:10:56 2017 +0100
+++ b/src/server/server.py	Sun Nov 19 17:18:14 2017 +0100
@@ -53,6 +53,7 @@
 import libervia
 from libervia.server.pages import LiberviaPage
 from libervia.server.utils import quote
+from functools import partial
 
 try:
     import OpenSSL
@@ -1393,6 +1394,7 @@
         self.signal_handler = SignalHandler(self)
         self.sessions = {}  # key = session value = user
         self.prof_connected = set()  # Profiles connected
+        self.ns_map = {}  # map of short name to namespaces
 
         ## bridge ##
         try:
@@ -1402,11 +1404,20 @@
             sys.exit(1)
         self.bridge.bridgeConnect(callback=self._bridgeCb, errback=self._bridgeEb)
 
+    def _namespacesGetCb(self, ns_map):
+        self.ns_map = ns_map
+
+    def _namespacesGetEb(self, failure_):
+        log.error(_(u"Can't get namespaces map: {msg}").format(msg=failure_))
+
     def backendReady(self, dummy):
         self.root = root = LiberviaRootResource(self.options, self.html_dir)
         _register = Register(self)
         _upload_radiocol = UploadManagerRadioCol(self)
         _upload_avatar = UploadManagerAvatar(self)
+        d = self.bridgeCall('namespacesGet')
+        d.addCallback(self._namespacesGetCb)
+        d.addErrback(self._namespacesGetEb)
         self.signal_handler.plugRegister(_register)
         self.bridge.register_signal("connected", self.signal_handler.connected)
         self.bridge.register_signal("disconnected", self.signal_handler.disconnected)
@@ -1442,6 +1453,7 @@
         self.putChild(C.THEMES_URL, ProtectedFile(self.themes_dir))
 
         LiberviaPage.importPages(self)
+        self.bridge.register_signal("psEventRaw", partial(LiberviaPage.onNodeEvent, self), "plugin")
 
         # media dirs
         # FIXME: get rid of dirname and "/" in C.XXX_DIR