diff libervia/server/resources.py @ 1512:65e063657597

server: move resources to a dedicated module
author Goffi <goffi@goffi.org>
date Mon, 22 May 2023 11:57:49 +0200
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libervia/server/resources.py	Mon May 22 11:57:49 2023 +0200
@@ -0,0 +1,708 @@
+#!/usr/bin/env python3
+# Libervia Web
+# Copyright (C) 2011-2021 Jérôme Poisson <goffi@goffi.org>
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# GNU Affero General Public License for more details.
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+import os.path
+from pathlib import Path
+import urllib.error
+import urllib.parse
+import urllib.request
+from twisted.internet import defer
+from twisted.web import server
+from twisted.web import static
+from twisted.web import resource as web_resource
+from libervia.server.constants import Const as C
+from libervia.server.utils import quote
+from sat.core import exceptions
+from sat.core.i18n import D_, _
+from sat.core.log import getLogger
+from sat.tools.common import uri as common_uri
+from sat.tools.common import data_format
+from sat.tools.common.utils import OrderedSet, recursive_update
+from . import proxy
+log = getLogger(__name__)
+class ProtectedFile(static.File):
+    """A static.File class which doesn't show directory listing"""
+    def __init__(self, path, *args, **kwargs):
+        if "defaultType" not in kwargs and len(args) < 2:
+            # defaultType is second positional argument, and Twisted uses it
+            # in File.createSimilarFile, so we set kwargs only if it is missing
+            # in kwargs and it is not in a positional argument
+            kwargs["defaultType"] = "application/octet-stream"
+        super(ProtectedFile, self).__init__(str(path), *args, **kwargs)
+    def directoryListing(self):
+        return web_resource.NoResource()
+    def getChild(self, path, request):
+        return super().getChild(path, request)
+    def getChildWithDefault(self, path, request):
+        return super().getChildWithDefault(path, request)
+    def getChildForRequest(self, request):
+        return super().getChildForRequest(request)
+class LiberviaRootResource(ProtectedFile):
+    """Specialized resource for Libervia root
+    handle redirections declared in sat.conf
+    """
+    def __init__(self, host, host_name, site_name, site_path, *args, **kwargs):
+        ProtectedFile.__init__(self, *args, **kwargs)
+        self.host = host
+        self.host_name = host_name
+        self.site_name = site_name
+        self.site_path = Path(site_path)
+        self.default_theme = self.config_get('theme')
+        if self.default_theme is None:
+            if not host_name:
+                # FIXME: we use bulma theme by default for main site for now
+                #   as the development is focusing on this one, and default theme may
+                #   be broken
+                self.default_theme = 'bulma'
+            else:
+                self.default_theme =  C.TEMPLATE_THEME_DEFAULT
+        self.site_themes = set()
+        self.named_pages = {}
+        self.browser_modules = {}
+        # template dynamic data used in all pages
+        self.dyn_data_common = {"scripts": OrderedSet()}
+        for theme, data in host.renderer.get_themes_data(site_name).items():
+            # we check themes for browser metadata, and merge them here if found
+            self.site_themes.add(theme)
+            browser_meta = data.get('browser_meta')
+            if browser_meta is not None:
+                log.debug(f"merging browser metadata from theme {theme}: {browser_meta}")
+                recursive_update(self.browser_modules, browser_meta)
+            browser_path = data.get('browser_path')
+            if browser_path is not None:
+                self.browser_modules.setdefault('themes_browser_paths', set()).add(
+                    browser_path)
+                try:
+                    next(browser_path.glob("*.py"))
+                except StopIteration:
+                    pass
+                else:
+                    log.debug(f"found brython script(s) for theme {theme}")
+                    self.browser_modules.setdefault('brython', []).append(
+                        {
+                            "path": browser_path,
+                            "url_hash": None,
+                            "url_prefix": f"__t_{theme}"
+                        }
+                    )
+        self.uri_callbacks = {}
+        self.pages_redirects = {}
+        self.cached_urls = {}
+        self.main_menu = None
+        # map Libervia application names => data
+        self.libervia_apps = {}
+        self.build_path = host.get_build_path(site_name)
+        self.build_path.mkdir(parents=True, exist_ok=True)
+        self.dev_build_path = host.get_build_path(site_name, dev=True)
+        self.dev_build_path.mkdir(parents=True, exist_ok=True)
+        self.putChild(
+            C.BUILD_DIR.encode(),
+            ProtectedFile(
+                self.build_path,
+                defaultType="application/octet-stream"),
+        )
+    def __str__(self):
+        return (
+            f"Root resource for {self.host_name or 'default host'} using "
+            f"{self.site_name or 'default site'} at {self.site_path} and deserving "
+            f"files at {self.path}"
+        )
+    def config_get(self, key, default=None, value_type=None):
+        """Retrieve configuration for this site
+        params are the same as for [Libervia.config_get]
+        """
+        return self.host.config_get(self, key, default, value_type)
+    def get_front_url(self, theme):
+        return Path(
+            '/',
+            C.TPL_RESOURCE,
+            self.site_name or C.SITE_NAME_DEFAULT,
+            C.TEMPLATE_TPL_DIR,
+            theme)
+    def add_resource_to_path(self, path: str, resource: web_resource.Resource) -> None:
+        """Add a resource to the given path
+        A "NoResource" will be used for all intermediate segments
+        """
+        segments, __, last_segment = path.rpartition("/")
+        url_segments = segments.split("/") if segments else []
+        current = self
+        for segment in url_segments:
+            resource = web_resource.NoResource()
+            current.putChild(segment, resource)
+            current = resource
+        current.putChild(
+            last_segment.encode('utf-8'),
+            resource
+        )
+    async def _start_app(self, app_name, extra=None) -> dict:
+        """Start a Libervia App
+        @param app_name: canonical application name
+        @param extra: extra parameter to configure app
+        @return: app data
+            app data will not include computed exposed data, at this needs to wait for the
+            app to be started
+        """
+        if extra is None:
+            extra = {}
+        log.info(_(
+            "starting application {app_name}").format(app_name=app_name))
+        app_data = data_format.deserialise(
+            await self.host.bridge_call(
+                "application_start", app_name, data_format.serialise(extra)
+            )
+        )
+        if app_data.get("started", False):
+            log.debug(f"application {app_name!r} is already started or starting")
+            # we do not await on purpose, the workflow should not be blocking at this
+            # point
+            defer.ensureDeferred(self._on_app_started(app_name, app_data["instance"]))
+        else:
+            self.host.apps_cb[app_data["instance"]] = self._on_app_started
+        return app_data
+    async def _on_app_started(
+        self,
+        app_name: str,
+        instance_id: str
+    ) -> None:
+        exposed_data = self.libervia_apps[app_name] = data_format.deserialise(
+            await self.host.bridge_call("application_exposed_get", app_name, "", "")
+        )
+        try:
+            web_port = int(exposed_data['ports']['web'].split(':')[1])
+        except (KeyError, ValueError):
+            log.warning(_(
+                "no web port found for application {app_name!r}, can't use it "
+                ).format(app_name=app_name))
+            raise exceptions.DataError("no web port found")
+        try:
+            url_prefix = exposed_data['url_prefix'].strip().rstrip('/')
+        except (KeyError, AttributeError) as e:
+            log.warning(_(
+                "no URL prefix specified for this application, we can't embed it: {msg}")
+                .format(msg=e))
+            raise exceptions.DataError("no URL prefix")
+        if not url_prefix.startswith('/'):
+            raise exceptions.DataError(
+                f"invalid URL prefix, it must start with '/': {url_prefix!r}")
+        res = proxy.SatReverseProxyResource(
+            "localhost",
+            web_port,
+            url_prefix.encode()
+        )
+        self.add_resource_to_path(url_prefix, res)
+        log.info(
+            f"Resource for app {app_name!r} (instance {instance_id!r}) has been added"
+        )
+    async def _init_redirections(self, options):
+        url_redirections = options["url_redirections_dict"]
+        url_redirections = url_redirections.get(self.site_name, {})
+        ## redirections
+        self.redirections = {}
+        self.inv_redirections = {}  # new URL to old URL map
+        for old, new_data_list in url_redirections.items():
+            # several redirections can be used for one path by using a list.
+            # The redirection will be done using first item of the list, and all items
+            # will be used for inverse redirection.
+            # e.g. if a => [b, c], a will redirect to c, and b and c will both be
+            # equivalent to a
+            if not isinstance(new_data_list, list):
+                new_data_list = [new_data_list]
+            for new_data in new_data_list:
+                # new_data can be a dictionary or a unicode url
+                if isinstance(new_data, dict):
+                    # new_data dict must contain either "url", "page" or "path" key
+                    # (exclusive)
+                    # if "path" is used, a file url is constructed with it
+                    if ((
+                        len(
+                            {"path", "url", "page"}.intersection(list(new_data.keys()))
+                        ) != 1
+                    )):
+                        raise ValueError(
+                            'You must have one and only one of "url", "page" or "path" '
+                            'key in your url_redirections_dict data'
+                        )
+                    if "url" in new_data:
+                        new = new_data["url"]
+                    elif "page" in new_data:
+                        new = new_data
+                        new["type"] = "page"
+                        new.setdefault("path_args", [])
+                        if not isinstance(new["path_args"], list):
+                            log.error(
+                                _('"path_args" in redirection of {old} must be a list. '
+                                  'Ignoring the redirection'.format(old=old)))
+                            continue
+                        new.setdefault("query_args", {})
+                        if not isinstance(new["query_args"], dict):
+                            log.error(
+                                _(
+                                    '"query_args" in redirection of {old} must be a '
+                                    'dictionary. Ignoring the redirection'
+                                ).format(old=old)
+                            )
+                            continue
+                        new["path_args"] = [quote(a) for a in new["path_args"]]
+                        # we keep an inversed dict of page redirection
+                        # (page/path_args => redirecting URL)
+                        # so get_url can return the redirecting URL if the same arguments
+                        # are used # making the URL consistent
+                        args_hash = tuple(new["path_args"])
+                        self.pages_redirects.setdefault(new_data["page"], {}).setdefault(
+                            args_hash,
+                            old
+                        )
+                        # we need lists in query_args because it will be used
+                        # as it in request.path_args
+                        for k, v in new["query_args"].items():
+                            if isinstance(v, str):
+                                new["query_args"][k] = [v]
+                    elif "path" in new_data:
+                        new = "file:{}".format(urllib.parse.quote(new_data["path"]))
+                elif isinstance(new_data, str):
+                    new = new_data
+                    new_data = {}
+                else:
+                    log.error(
+                        _("ignoring invalid redirection value: {new_data}").format(
+                            new_data=new_data
+                        )
+                    )
+                    continue
+                # some normalization
+                if not old.strip():
+                    # root URL special case
+                    old = ""
+                elif not old.startswith("/"):
+                    log.error(
+                        _("redirected url must start with '/', got {value}. Ignoring")
+                        .format(value=old)
+                    )
+                    continue
+                else:
+                    old = self._normalize_url(old)
+                if isinstance(new, dict):
+                    # dict are handled differently, they contain data
+                    # which ared use dynamically when the request is done
+                    self.redirections.setdefault(old, new)
+                    if not old:
+                        if new["type"] == "page":
+                            log.info(
+                                _("Root URL redirected to page {name}").format(
+                                    name=new["page"]
+                                )
+                            )
+                    else:
+                        if new["type"] == "page":
+                            page = self.get_page_by_name(new["page"])
+                            url = page.get_url(*new.get("path_args", []))
+                            self.inv_redirections[url] = old
+                    continue
+                # at this point we have a redirection URL in new, we can parse it
+                new_url = urllib.parse.urlsplit(new)
+                # we handle the known URL schemes
+                if new_url.scheme == "xmpp":
+                    location = self.get_page_path_from_uri(new)
+                    if location is None:
+                        log.warning(
+                            _("ignoring redirection, no page found to handle this URI: "
+                              "{uri}").format(uri=new))
+                        continue
+                    request_data = self._get_request_data(location)
+                    self.inv_redirections[location] = old
+                elif new_url.scheme in ("", "http", "https"):
+                    # direct redirection
+                    if new_url.netloc:
+                        raise NotImplementedError(
+                            "netloc ({netloc}) is not implemented yet for "
+                            "url_redirections_dict, it is not possible to redirect to an "
+                            "external website".format(netloc=new_url.netloc))
+                    location = urllib.parse.urlunsplit(
+                        ("", "", new_url.path, new_url.query, new_url.fragment)
+                    )
+                    request_data = self._get_request_data(location)
+                    self.inv_redirections[location] = old
+                elif new_url.scheme == "file":
+                    # file or directory
+                    if new_url.netloc:
+                        raise NotImplementedError(
+                            "netloc ({netloc}) is not implemented for url redirection to "
+                            "file system, it is not possible to redirect to an external "
+                            "host".format(
+                                netloc=new_url.netloc))
+                    path = urllib.parse.unquote(new_url.path)
+                    if not os.path.isabs(path):
+                        raise ValueError(
+                            "file redirection must have an absolute path: e.g. "
+                            "file:/path/to/my/file")
+                    # for file redirection, we directly put child here
+                    resource_class = (
+                        ProtectedFile if new_data.get("protected", True) else static.File
+                    )
+                    res = resource_class(path, defaultType="application/octet-stream")
+                    self.add_resource_to_path(old, res)
+                    log.info("[{host_name}] Added redirection from /{old} to file system "
+                             "path {path}".format(host_name=self.host_name,
+                                                   old=old,
+                                                   path=path))
+                    # we don't want to use redirection system, so we continue here
+                    continue
+                elif new_url.scheme == "libervia-app":
+                    # a Libervia application
+                    app_name = urllib.parse.unquote(new_url.path).lower().strip()
+                    extra = {"url_prefix": f"/{old}"}
+                    try:
+                        await self._start_app(app_name, extra)
+                    except Exception as e:
+                        log.warning(_(
+                            "Can't launch {app_name!r} for path /{old}: {e}").format(
+                            app_name=app_name, old=old, e=e))
+                        continue
+                    log.info(
+                        f"[{self.host_name}] Added redirection from /{old} to "
+                        f"application {app_name}"
+                    )
+                    # normal redirection system is not used here
+                    continue
+                elif new_url.scheme == "proxy":
+                    # a reverse proxy
+                    host, port = new_url.hostname, new_url.port
+                    if host is None or port is None:
+                        raise ValueError(
+                            "invalid host or port in proxy redirection, please check your "
+                            "configuration: {new_url.geturl()}"
+                        )
+                    url_prefix = (new_url.path or old).rstrip('/')
+                    res = proxy.SatReverseProxyResource(
+                        host,
+                        port,
+                        url_prefix.encode(),
+                    )
+                    self.add_resource_to_path(old, res)
+                    log.info(
+                        f"[{self.host_name}] Added redirection from /{old} to reverse proxy "
+                        f"{new_url.netloc} with URL prefix {url_prefix}/"
+                    )
+                    # normal redirection system is not used here
+                    continue
+                else:
+                    raise NotImplementedError(
+                        "{scheme}: scheme is not managed for url_redirections_dict".format(
+                            scheme=new_url.scheme
+                        )
+                    )
+                self.redirections.setdefault(old, request_data)
+                if not old:
+                    log.info(_("[{host_name}] Root URL redirected to {uri}")
+                        .format(host_name=self.host_name,
+                                uri=request_data[1]))
+        # the default root URL, if not redirected
+        if not "" in self.redirections:
+            self.redirections[""] = self._get_request_data(C.LIBERVIA_PAGE_START)
+    async def _set_menu(self, menus):
+        menus = menus.get(self.site_name, [])
+        main_menu = []
+        for menu in menus:
+            if not menu:
+                msg = _("menu item can't be empty")
+                log.error(msg)
+                raise ValueError(msg)
+            elif isinstance(menu, list):
+                if len(menu) != 2:
+                    msg = _(
+                        "menu item as list must be in the form [page_name, absolue URL]"
+                    )
+                    log.error(msg)
+                    raise ValueError(msg)
+                page_name, url = menu
+            elif menu.startswith("libervia-app:"):
+                app_name = menu[13:].strip().lower()
+                app_data = await self._start_app(app_name)
+                exposed_data = app_data["expose"]
+                front_url = exposed_data['front_url']
+                options = self.host.options
+                url_redirections = options["url_redirections_dict"].setdefault(
+                    self.site_name, {}
+                )
+                if front_url in url_redirections:
+                    raise exceptions.ConflictError(
+                        f"There is already a redirection from {front_url!r}, can't add "
+                        f"{app_name!r}")
+                url_redirections[front_url] = {
+                    "page": 'embed_app',
+                    "path_args": [app_name]
+                }
+                page_name = exposed_data.get('web_label', app_name).title()
+                url = front_url
+                log.debug(
+                    f"Application {app_name} added to menu of {self.site_name}"
+                )
+            else:
+                page_name = menu
+                try:
+                    url = self.get_page_by_name(page_name).url
+                except KeyError as e:
+                    log_msg = _("Can'find a named page ({msg}), please check "
+                                "menu_json in configuration.").format(msg=e.args[0])
+                    log.error(log_msg)
+                    raise exceptions.ConfigError(log_msg)
+            main_menu.append((page_name, url))
+        self.main_menu = main_menu
+    def _normalize_url(self, url, lower=True):
+        """Return URL normalized for self.redirections dict
+        @param url(unicode): URL to normalize
+        @param lower(bool): lower case of url if True
+        @return (str): normalized URL
+        """
+        if lower:
+            url = url.lower()
+        return "/".join((p for p in url.split("/") if p))
+    def _get_request_data(self, uri):
+        """Return data needed to redirect request
+        @param url(unicode): destination url
+        @return (tuple(list[str], str, str, dict): tuple with
+            splitted path as in Request.postpath
+            uri as in Request.uri
+            path as in Request.path
+            args as in Request.args
+        """
+        uri = uri
+        # XXX: we reuse code from twisted.web.http.py here
+        #      as we need to have the same behaviour
+        x = uri.split("?", 1)
+        if len(x) == 1:
+            path = uri
+            args = {}
+        else:
+            path, argstring = x
+            args = urllib.parse.parse_qs(argstring, True)
+        # XXX: splitted path case must not be changed, as it may be significant
+        #      (e.g. for blog items)
+        return (
+            self._normalize_url(path, lower=False).split("/"),
+            uri,
+            path,
+            args,
+        )
+    def _redirect(self, request, request_data):
+        """Redirect an URL by rewritting request
+        this is *NOT* a HTTP redirection, but equivalent to URL rewritting
+        @param request(web.http.request): original request
+        @param request_data(tuple): data returned by self._get_request_data
+        @return (web_resource.Resource): resource to use
+        """
+        # recursion check
+        try:
+            request._redirected
+        except AttributeError:
+            pass
+        else:
+            try:
+                __, uri, __, __ = request_data
+            except ValueError:
+                uri = ""
+            log.error(D_( "recursive redirection, please fix this URL:\n"
+                          "{old} ==> {new}").format(
+                          old=request.uri.decode("utf-8"), new=uri))
+            return web_resource.NoResource()
+        request._redirected = True  # here to avoid recursive redirections
+        if isinstance(request_data, dict):
+            if request_data["type"] == "page":
+                try:
+                    page = self.get_page_by_name(request_data["page"])
+                except KeyError:
+                    log.error(
+                        _(
+                            'Can\'t find page named "{name}" requested in redirection'
+                        ).format(name=request_data["page"])
+                    )
+                    return web_resource.NoResource()
+                path_args = [pa.encode('utf-8') for pa in request_data["path_args"]]
+                request.postpath = path_args + request.postpath
+                try:
+                    request.args.update(request_data["query_args"])
+                except (TypeError, ValueError):
+                    log.error(
+                        _("Invalid args in redirection: {query_args}").format(
+                            query_args=request_data["query_args"]
+                        )
+                    )
+                    return web_resource.NoResource()
+                return page
+            else:
+                raise exceptions.InternalError("unknown request_data type")
+        else:
+            path_list, uri, path, args = request_data
+            path_list = [p.encode('utf-8') for p in path_list]
+            log.debug(
+                "Redirecting URL {old} to {new}".format(
+                    old=request.uri.decode('utf-8'), new=uri
+                )
+            )
+            # we change the request to reflect the new url
+            request.postpath = path_list[1:] + request.postpath
+            request.args.update(args)
+        # we start again to look for a child with the new url
+        return self.getChildWithDefault(path_list[0], request)
+    def get_page_by_name(self, name):
+        """Retrieve page instance from its name
+        @param name(unicode): name of the page
+        @return (LiberviaPage): page instance
+        @raise KeyError: the page doesn't exist
+        """
+        return self.named_pages[name]
+    def get_page_path_from_uri(self, uri):
+        """Retrieve page URL from xmpp: URI
+        @param uri(unicode): URI with a xmpp: scheme
+        @return (unicode,None): absolute path (starting from root "/") to page handling
+            the URI.
+            None is returned if no page has been registered for this URI
+        """
+        uri_data = common_uri.parse_xmpp_uri(uri)
+        try:
+            page, cb = self.uri_callbacks[uri_data["type"], uri_data["sub_type"]]
+        except KeyError:
+            url = None
+        else:
+            url = cb(page, uri_data)
+        if url is None:
+            # no handler found
+            # we try to find a more generic one
+            try:
+                page, cb = self.uri_callbacks[uri_data["type"], None]
+            except KeyError:
+                pass
+            else:
+                url = cb(page, uri_data)
+        return url
+    def getChildWithDefault(self, name, request):
+        # XXX: this method is overriden only for root url
+        #      which is the only ones who need to be handled before other children
+        if name == b"" and not request.postpath:
+            return self._redirect(request, self.redirections[""])
+        return super(LiberviaRootResource, self).getChildWithDefault(name, request)
+    def getChild(self, name, request):
+        resource = super(LiberviaRootResource, self).getChild(name, request)
+        if isinstance(resource, web_resource.NoResource):
+            # if nothing was found, we try our luck with redirections
+            # XXX: we want redirections to happen only if everything else failed
+            path_elt = request.prepath + request.postpath
+            for idx in range(len(path_elt), -1, -1):
+                test_url = b"/".join(path_elt[:idx]).decode('utf-8').lower()
+                if test_url in self.redirections:
+                    request_data = self.redirections[test_url]
+                    request.postpath = path_elt[idx:]
+                    return self._redirect(request, request_data)
+        return resource
+    def putChild(self, path, resource):
+        """Add a child to the root resource"""
+        if not isinstance(path, bytes):
+            raise ValueError("path must be specified in bytes")
+        if not isinstance(resource, web_resource.EncodingResourceWrapper):
+            # FIXME: check that no information is leaked (c.f. https://twistedmatrix.com/documents/current/web/howto/using-twistedweb.html#request-encoders)
+            resource = web_resource.EncodingResourceWrapper(
+                resource, [server.GzipEncoderFactory()])
+        super(LiberviaRootResource, self).putChild(path, resource)
+    def createSimilarFile(self, path):
+        # XXX: this method need to be overriden to avoid recreating a LiberviaRootResource
+        f = LiberviaRootResource.__base__(
+            path, self.defaultType, self.ignoredExts, self.registry
+        )
+        # refactoring by steps, here - constructor should almost certainly take these
+        f.processors = self.processors
+        f.indexNames = self.indexNames[:]
+        f.childNotFound = self.childNotFound
+        return f