view libervia/web/server/resources.py @ 1598:86c7a3a625d5

server: always start a new session on connection: The session was kept when a user was connecting from service profile (but not from other profiles), this was leading to session fixation vulnerability (an attacker on the same machine could get service profile session cookie, and use it when a victim would log-in). This patch fixes it by always starting a new session on connection. fix 443
author Goffi <goffi@goffi.org>
date Fri, 23 Feb 2024 13:35:24 +0100
parents eb00d593801d
children f3305832f3f6
line wrap: on
line source

#!/usr/bin/env python3

# Libervia Web
# Copyright (C) 2011-2021 Jérôme Poisson <goffi@goffi.org>

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.

# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


import os.path
from pathlib import Path
import urllib.error
import urllib.parse
import urllib.request

from twisted.internet import defer
from twisted.web import server
from twisted.web import static
from twisted.web import resource as web_resource

from libervia.web.server.constants import Const as C
from libervia.web.server.utils import quote
from libervia.backend.core import exceptions
from libervia.backend.core.i18n import D_, _
from libervia.backend.core.log import getLogger
from libervia.backend.tools.common import uri as common_uri
from libervia.backend.tools.common import data_format
from libervia.backend.tools.common.utils import OrderedSet, recursive_update

from . import proxy

log = getLogger(__name__)


class ProtectedFile(static.File):
    """A static.File class which doesn't show directory listing"""

    def __init__(self, path, *args, **kwargs):
        if "defaultType" not in kwargs and len(args) < 2:
            # defaultType is second positional argument, and Twisted uses it
            # in File.createSimilarFile, so we set kwargs only if it is missing
            # in kwargs and it is not in a positional argument
            kwargs["defaultType"] = "application/octet-stream"
        super(ProtectedFile, self).__init__(str(path), *args, **kwargs)

    def directoryListing(self):
        return web_resource.NoResource()


    def getChild(self, path, request):
        return super().getChild(path, request)

    def getChildWithDefault(self, path, request):
        return super().getChildWithDefault(path, request)

    def getChildForRequest(self, request):
        return super().getChildForRequest(request)


class LiberviaRootResource(ProtectedFile):
    """Specialized resource for Libervia root

    handle redirections declared in libervia.conf
    """

    def __init__(self, host, host_name, site_name, site_path, *args, **kwargs):
        ProtectedFile.__init__(self, *args, **kwargs)
        self.host = host
        self.host_name = host_name
        self.site_name = site_name
        self.site_path = Path(site_path)
        self.default_theme = self.config_get('theme')
        if self.default_theme is None:
            if not host_name:
                # FIXME: we use bulma theme by default for main site for now
                #   as the development is focusing on this one, and default theme may
                #   be broken
                self.default_theme = 'bulma'
            else:
                self.default_theme =  C.TEMPLATE_THEME_DEFAULT
        self.site_themes = set()
        self.named_pages = {}
        self.browser_modules = {}
        # template dynamic data used in all pages
        self.dyn_data_common = {"scripts": OrderedSet()}
        for theme, data in host.renderer.get_themes_data(site_name).items():
            # we check themes for browser metadata, and merge them here if found
            self.site_themes.add(theme)
            browser_meta = data.get('browser_meta')
            if browser_meta is not None:
                log.debug(f"merging browser metadata from theme {theme}: {browser_meta}")
                recursive_update(self.browser_modules, browser_meta)
            browser_path = data.get('browser_path')
            if browser_path is not None:
                self.browser_modules.setdefault('themes_browser_paths', set()).add(
                    browser_path)
                try:
                    next(browser_path.glob("*.py"))
                except StopIteration:
                    pass
                else:
                    log.debug(f"found brython script(s) for theme {theme}")
                    self.browser_modules.setdefault('brython', []).append(
                        {
                            "path": browser_path,
                            "url_hash": None,
                            "url_prefix": f"__t_{theme}"
                        }
                    )

        self.uri_callbacks = {}
        self.pages_redirects = {}
        self.cached_urls = {}
        self.main_menu = None
        # map Libervia application names => data
        self.libervia_apps = {}
        self.build_path = host.get_build_path(site_name)
        self.build_path.mkdir(parents=True, exist_ok=True)
        self.dev_build_path = host.get_build_path(site_name, dev=True)
        self.dev_build_path.mkdir(parents=True, exist_ok=True)
        self.putChild(
            C.BUILD_DIR.encode(),
            ProtectedFile(
                self.build_path,
                defaultType="application/octet-stream"),
        )

    def __str__(self):
        return (
            f"Root resource for {self.host_name or 'default host'} using "
            f"{self.site_name or 'default site'} at {self.site_path} and deserving "
            f"files at {self.path}"
        )

    def config_get(self, key, default=None, value_type=None):
        """Retrieve configuration for this site

        params are the same as for [Libervia.config_get]
        """
        return self.host.config_get(self, key, default, value_type)

    def get_front_url(self, theme):
        return Path(
            '/',
            C.TPL_RESOURCE,
            self.site_name or C.SITE_NAME_DEFAULT,
            C.TEMPLATE_TPL_DIR,
            theme)

    def add_resource_to_path(self, path: str, resource: web_resource.Resource) -> None:
        """Add a resource to the given path

        A "NoResource" will be used for all intermediate segments
        """
        segments, __, last_segment = path.rpartition("/")
        url_segments = segments.split("/") if segments else []
        current = self
        for segment in url_segments:
            resource = web_resource.NoResource()
            current.putChild(segment, resource)
            current = resource

        current.putChild(
            last_segment.encode('utf-8'),
            resource
        )

    async def _start_app(self, app_name, extra=None) -> dict:
        """Start a Libervia App

        @param app_name: canonical application name
        @param extra: extra parameter to configure app
        @return: app data
            app data will not include computed exposed data, at this needs to wait for the
            app to be started
        """
        if extra is None:
            extra = {}
        log.info(_(
            "starting application {app_name}").format(app_name=app_name))
        app_data = data_format.deserialise(
            await self.host.bridge_call(
                "application_start", app_name, data_format.serialise(extra)
            )
        )
        if app_data.get("started", False):
            log.debug(f"application {app_name!r} is already started or starting")
            # we do not await on purpose, the workflow should not be blocking at this
            # point
            defer.ensureDeferred(self._on_app_started(app_name, app_data["instance"]))
        else:
            self.host.apps_cb[app_data["instance"]] = self._on_app_started
        return app_data

    async def _on_app_started(
        self,
        app_name: str,
        instance_id: str
    ) -> None:
        exposed_data = self.libervia_apps[app_name] = data_format.deserialise(
            await self.host.bridge_call("application_exposed_get", app_name, "", "")
        )

        try:
            web_port = int(exposed_data['ports']['web'].split(':')[1])
        except (KeyError, ValueError):
            log.warning(_(
                "no web port found for application {app_name!r}, can't use it "
                ).format(app_name=app_name))
            raise exceptions.DataError("no web port found")

        try:
            url_prefix = exposed_data['url_prefix'].strip().rstrip('/')
        except (KeyError, AttributeError) as e:
            log.warning(_(
                "no URL prefix specified for this application, we can't embed it: {msg}")
                .format(msg=e))
            raise exceptions.DataError("no URL prefix")

        if not url_prefix.startswith('/'):
            raise exceptions.DataError(
                f"invalid URL prefix, it must start with '/': {url_prefix!r}")

        res = proxy.SatReverseProxyResource(
            "localhost",
            web_port,
            url_prefix.encode()
        )
        self.add_resource_to_path(url_prefix, res)
        log.info(
            f"Resource for app {app_name!r} (instance {instance_id!r}) has been added"
        )

    async def _init_redirections(self, options):
        url_redirections = options["url_redirections_dict"]

        url_redirections = url_redirections.get(self.site_name, {})

        ## redirections
        self.redirections = {}
        self.inv_redirections = {}  # new URL to old URL map

        for old, new_data_list in url_redirections.items():
            # several redirections can be used for one path by using a list.
            # The redirection will be done using first item of the list, and all items
            # will be used for inverse redirection.
            # e.g. if a => [b, c], a will redirect to c, and b and c will both be
            # equivalent to a
            if not isinstance(new_data_list, list):
                new_data_list = [new_data_list]
            for new_data in new_data_list:
                # new_data can be a dictionary or a unicode url
                if isinstance(new_data, dict):
                    # new_data dict must contain either "url", "page" or "path" key
                    # (exclusive)
                    # if "path" is used, a file url is constructed with it
                    if ((
                        len(
                            {"path", "url", "page"}.intersection(list(new_data.keys()))
                        ) != 1
                    )):
                        raise ValueError(
                            'You must have one and only one of "url", "page" or "path" '
                            'key in your url_redirections_dict data'
                        )
                    if "url" in new_data:
                        new = new_data["url"]
                    elif "page" in new_data:
                        new = new_data
                        new["type"] = "page"
                        new.setdefault("path_args", [])
                        if not isinstance(new["path_args"], list):
                            log.error(
                                _('"path_args" in redirection of {old} must be a list. '
                                  'Ignoring the redirection'.format(old=old)))
                            continue
                        new.setdefault("query_args", {})
                        if not isinstance(new["query_args"], dict):
                            log.error(
                                _(
                                    '"query_args" in redirection of {old} must be a '
                                    'dictionary. Ignoring the redirection'
                                ).format(old=old)
                            )
                            continue
                        new["path_args"] = [quote(a) for a in new["path_args"]]
                        # we keep an inversed dict of page redirection
                        # (page/path_args => redirecting URL)
                        # so get_url can return the redirecting URL if the same arguments
                        # are used # making the URL consistent
                        args_hash = tuple(new["path_args"])
                        self.pages_redirects.setdefault(new_data["page"], {}).setdefault(
                            args_hash,
                            old
                        )

                        # we need lists in query_args because it will be used
                        # as it in request.path_args
                        for k, v in new["query_args"].items():
                            if isinstance(v, str):
                                new["query_args"][k] = [v]
                    elif "path" in new_data:
                        new = "file:{}".format(urllib.parse.quote(new_data["path"]))
                elif isinstance(new_data, str):
                    new = new_data
                    new_data = {}
                else:
                    log.error(
                        _("ignoring invalid redirection value: {new_data}").format(
                            new_data=new_data
                        )
                    )
                    continue

                # some normalization
                if not old.strip():
                    # root URL special case
                    old = ""
                elif not old.startswith("/"):
                    log.error(
                        _("redirected url must start with '/', got {value}. Ignoring")
                        .format(value=old)
                    )
                    continue
                else:
                    old = self._normalize_url(old)

                if isinstance(new, dict):
                    # dict are handled differently, they contain data
                    # which ared use dynamically when the request is done
                    self.redirections.setdefault(old, new)
                    if not old:
                        if new["type"] == "page":
                            log.info(
                                _("Root URL redirected to page {name}").format(
                                    name=new["page"]
                                )
                            )
                    else:
                        if new["type"] == "page":
                            page = self.get_page_by_name(new["page"])
                            url = page.get_url(*new.get("path_args", []))
                            self.inv_redirections[url] = old
                    continue

                # at this point we have a redirection URL in new, we can parse it
                new_url = urllib.parse.urlsplit(new)

                # we handle the known URL schemes
                if new_url.scheme == "xmpp":
                    location = self.get_page_path_from_uri(new)
                    if location is None:
                        log.warning(
                            _("ignoring redirection, no page found to handle this URI: "
                              "{uri}").format(uri=new))
                        continue
                    request_data = self._get_request_data(location)
                    self.inv_redirections[location] = old

                elif new_url.scheme in ("", "http", "https"):
                    # direct redirection
                    if new_url.netloc:
                        raise NotImplementedError(
                            "netloc ({netloc}) is not implemented yet for "
                            "url_redirections_dict, it is not possible to redirect to an "
                            "external website".format(netloc=new_url.netloc))
                    location = urllib.parse.urlunsplit(
                        ("", "", new_url.path, new_url.query, new_url.fragment)
                    )
                    request_data = self._get_request_data(location)
                    self.inv_redirections[location] = old

                elif new_url.scheme == "file":
                    # file or directory
                    if new_url.netloc:
                        raise NotImplementedError(
                            "netloc ({netloc}) is not implemented for url redirection to "
                            "file system, it is not possible to redirect to an external "
                            "host".format(
                                netloc=new_url.netloc))
                    path = urllib.parse.unquote(new_url.path)
                    if not os.path.isabs(path):
                        raise ValueError(
                            "file redirection must have an absolute path: e.g. "
                            "file:/path/to/my/file")
                    # for file redirection, we directly put child here
                    resource_class = (
                        ProtectedFile if new_data.get("protected", True) else static.File
                    )
                    res = resource_class(path, defaultType="application/octet-stream")
                    self.add_resource_to_path(old, res)
                    log.info("[{host_name}] Added redirection from /{old} to file system "
                             "path {path}".format(host_name=self.host_name,
                                                   old=old,
                                                   path=path))

                    # we don't want to use redirection system, so we continue here
                    continue

                elif new_url.scheme == "libervia-app":
                    # a Libervia application

                    app_name = urllib.parse.unquote(new_url.path).lower().strip()
                    extra = {"url_prefix": f"/{old}"}
                    try:
                        await self._start_app(app_name, extra)
                    except Exception as e:
                        log.warning(_(
                            "Can't launch {app_name!r} for path /{old}: {e}").format(
                            app_name=app_name, old=old, e=e))
                        continue

                    log.info(
                        f"[{self.host_name}] Added redirection from /{old} to "
                        f"application {app_name}"
                    )
                    # normal redirection system is not used here
                    continue
                elif new_url.scheme == "proxy":
                    # a reverse proxy
                    host, port = new_url.hostname, new_url.port
                    if host is None or port is None:
                        raise ValueError(
                            "invalid host or port in proxy redirection, please check your "
                            "configuration: {new_url.geturl()}"
                        )
                    url_prefix = (new_url.path or old).rstrip('/')
                    res = proxy.SatReverseProxyResource(
                        host,
                        port,
                        url_prefix.encode(),
                    )
                    self.add_resource_to_path(old, res)
                    log.info(
                        f"[{self.host_name}] Added redirection from /{old} to reverse proxy "
                        f"{new_url.netloc} with URL prefix {url_prefix}/"
                    )

                    # normal redirection system is not used here
                    continue
                else:
                    raise NotImplementedError(
                        "{scheme}: scheme is not managed for url_redirections_dict".format(
                            scheme=new_url.scheme
                        )
                    )

                self.redirections.setdefault(old, request_data)
                if not old:
                    log.info(_("[{host_name}] Root URL redirected to {uri}")
                        .format(host_name=self.host_name,
                                uri=request_data[1]))

        # the default root URL, if not redirected
        if not "" in self.redirections:
            self.redirections[""] = self._get_request_data(C.LIBERVIA_PAGE_START)

    async def _set_menu(self, menus):
        menus = menus.get(self.site_name, [])
        main_menu = []
        for menu in menus:
            if not menu:
                msg = _("menu item can't be empty")
                log.error(msg)
                raise ValueError(msg)
            elif isinstance(menu, list):
                if len(menu) != 2:
                    msg = _(
                        "menu item as list must be in the form [page_name, absolue URL]"
                    )
                    log.error(msg)
                    raise ValueError(msg)
                page_name, url = menu
            elif menu.startswith("libervia-app:"):
                app_name = menu[13:].strip().lower()
                app_data = await self._start_app(app_name)
                exposed_data = app_data["expose"]
                front_url = exposed_data['front_url']
                options = self.host.options
                url_redirections = options["url_redirections_dict"].setdefault(
                    self.site_name, {}
                )
                if front_url in url_redirections:
                    raise exceptions.ConflictError(
                        f"There is already a redirection from {front_url!r}, can't add "
                        f"{app_name!r}")

                url_redirections[front_url] = {
                    "page": 'embed_app',
                    "path_args": [app_name]
                }

                page_name = exposed_data.get('web_label', app_name).title()
                url = front_url

                log.debug(
                    f"Application {app_name} added to menu of {self.site_name}"
                )
            else:
                page_name = menu
                try:
                    url = self.get_page_by_name(page_name).url
                except KeyError as e:
                    log_msg = _("Can'find a named page ({msg}), please check "
                                "menu_json in configuration.").format(msg=e.args[0])
                    log.error(log_msg)
                    raise exceptions.ConfigError(log_msg)
            main_menu.append((page_name, url))
        self.main_menu = main_menu

    def _normalize_url(self, url, lower=True):
        """Return URL normalized for self.redirections dict

        @param url(unicode): URL to normalize
        @param lower(bool): lower case of url if True
        @return (str): normalized URL
        """
        if lower:
            url = url.lower()
        return "/".join((p for p in url.split("/") if p))

    def _get_request_data(self, uri):
        """Return data needed to redirect request

        @param url(unicode): destination url
        @return (tuple(list[str], str, str, dict): tuple with
            splitted path as in Request.postpath
            uri as in Request.uri
            path as in Request.path
            args as in Request.args
        """
        uri = uri
        # XXX: we reuse code from twisted.web.http.py here
        #      as we need to have the same behaviour
        x = uri.split("?", 1)

        if len(x) == 1:
            path = uri
            args = {}
        else:
            path, argstring = x
            args = urllib.parse.parse_qs(argstring, True)

        # XXX: splitted path case must not be changed, as it may be significant
        #      (e.g. for blog items)
        return (
            self._normalize_url(path, lower=False).split("/"),
            uri,
            path,
            args,
        )

    def _redirect(self, request, request_data):
        """Redirect an URL by rewritting request

        this is *NOT* a HTTP redirection, but equivalent to URL rewritting
        @param request(web.http.request): original request
        @param request_data(tuple): data returned by self._get_request_data
        @return (web_resource.Resource): resource to use
        """
        # recursion check
        try:
            request._redirected
        except AttributeError:
            pass
        else:
            try:
                __, uri, __, __ = request_data
            except ValueError:
                uri = ""
            log.error(D_( "recursive redirection, please fix this URL:\n"
                          "{old} ==> {new}").format(
                          old=request.uri.decode("utf-8"), new=uri))
            return web_resource.NoResource()

        request._redirected = True  # here to avoid recursive redirections

        if isinstance(request_data, dict):
            if request_data["type"] == "page":
                try:
                    page = self.get_page_by_name(request_data["page"])
                except KeyError:
                    log.error(
                        _(
                            'Can\'t find page named "{name}" requested in redirection'
                        ).format(name=request_data["page"])
                    )
                    return web_resource.NoResource()
                path_args = [pa.encode('utf-8') for pa in request_data["path_args"]]
                request.postpath = path_args + request.postpath

                try:
                    request.args.update(request_data["query_args"])
                except (TypeError, ValueError):
                    log.error(
                        _("Invalid args in redirection: {query_args}").format(
                            query_args=request_data["query_args"]
                        )
                    )
                    return web_resource.NoResource()
                return page
            else:
                raise exceptions.InternalError("unknown request_data type")
        else:
            path_list, uri, path, args = request_data
            path_list = [p.encode('utf-8') for p in path_list]
            log.debug(
                "Redirecting URL {old} to {new}".format(
                    old=request.uri.decode('utf-8'), new=uri
                )
            )
            # we change the request to reflect the new url
            request.postpath = path_list[1:] + request.postpath
            request.args.update(args)

        # we start again to look for a child with the new url
        return self.getChildWithDefault(path_list[0], request)

    def get_page_by_name(self, name):
        """Retrieve page instance from its name

        @param name(unicode): name of the page
        @return (LiberviaPage): page instance
        @raise KeyError: the page doesn't exist
        """
        return self.named_pages[name]

    def get_page_path_from_uri(self, uri):
        """Retrieve page URL from xmpp: URI

        @param uri(unicode): URI with a xmpp: scheme
        @return (unicode,None): absolute path (starting from root "/") to page handling
            the URI.
            None is returned if no page has been registered for this URI
        """
        uri_data = common_uri.parse_xmpp_uri(uri)
        try:
            page, cb = self.uri_callbacks[uri_data["type"], uri_data["sub_type"]]
        except KeyError:
            url = None
        else:
            url = cb(page, uri_data)
        if url is None:
            # no handler found
            # we try to find a more generic one
            try:
                page, cb = self.uri_callbacks[uri_data["type"], None]
            except KeyError:
                pass
            else:
                url = cb(page, uri_data)
        return url

    def getChildWithDefault(self, name, request):
        # XXX: this method is overriden only for root url
        #      which is the only ones who need to be handled before other children
        if name == b"" and not request.postpath:
            return self._redirect(request, self.redirections[""])
        return super(LiberviaRootResource, self).getChildWithDefault(name, request)

    def getChild(self, name, request):
        resource = super(LiberviaRootResource, self).getChild(name, request)

        if isinstance(resource, web_resource.NoResource):
            # if nothing was found, we try our luck with redirections
            # XXX: we want redirections to happen only if everything else failed
            path_elt = request.prepath + request.postpath
            for idx in range(len(path_elt), -1, -1):
                test_url = b"/".join(path_elt[:idx]).decode('utf-8').lower()
                if test_url in self.redirections:
                    request_data = self.redirections[test_url]
                    request.postpath = path_elt[idx:]
                    return self._redirect(request, request_data)

        return resource

    def putChild(self, path, resource):
        """Add a child to the root resource"""
        if not isinstance(path, bytes):
            raise ValueError("path must be specified in bytes")
        if not isinstance(resource, web_resource.EncodingResourceWrapper):
            # FIXME: check that no information is leaked (c.f. https://twistedmatrix.com/documents/current/web/howto/using-twistedweb.html#request-encoders)
            resource = web_resource.EncodingResourceWrapper(
                resource, [server.GzipEncoderFactory()])

        super(LiberviaRootResource, self).putChild(path, resource)

    def createSimilarFile(self, path):
        # XXX: this method need to be overriden to avoid recreating a LiberviaRootResource

        f = LiberviaRootResource.__base__(
            path, self.defaultType, self.ignoredExts, self.registry
        )
        # refactoring by steps, here - constructor should almost certainly take these
        f.processors = self.processors
        f.indexNames = self.indexNames[:]
        f.childNotFound = self.childNotFound
        return f