Mercurial > libervia-web
view libervia/web/server/resources.py @ 1572:7006b55001a4
broweser (jid): add a simple check for JID validation.
author | Goffi <goffi@goffi.org> |
---|---|
date | Wed, 22 Nov 2023 16:31:36 +0100 |
parents | eb00d593801d |
children | f3305832f3f6 |
line wrap: on
line source
#!/usr/bin/env python3 # Libervia Web # Copyright (C) 2011-2021 Jérôme Poisson <goffi@goffi.org> # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import os.path from pathlib import Path import urllib.error import urllib.parse import urllib.request from twisted.internet import defer from twisted.web import server from twisted.web import static from twisted.web import resource as web_resource from libervia.web.server.constants import Const as C from libervia.web.server.utils import quote from libervia.backend.core import exceptions from libervia.backend.core.i18n import D_, _ from libervia.backend.core.log import getLogger from libervia.backend.tools.common import uri as common_uri from libervia.backend.tools.common import data_format from libervia.backend.tools.common.utils import OrderedSet, recursive_update from . import proxy log = getLogger(__name__) class ProtectedFile(static.File): """A static.File class which doesn't show directory listing""" def __init__(self, path, *args, **kwargs): if "defaultType" not in kwargs and len(args) < 2: # defaultType is second positional argument, and Twisted uses it # in File.createSimilarFile, so we set kwargs only if it is missing # in kwargs and it is not in a positional argument kwargs["defaultType"] = "application/octet-stream" super(ProtectedFile, self).__init__(str(path), *args, **kwargs) def directoryListing(self): return web_resource.NoResource() def getChild(self, path, request): return super().getChild(path, request) def getChildWithDefault(self, path, request): return super().getChildWithDefault(path, request) def getChildForRequest(self, request): return super().getChildForRequest(request) class LiberviaRootResource(ProtectedFile): """Specialized resource for Libervia root handle redirections declared in libervia.conf """ def __init__(self, host, host_name, site_name, site_path, *args, **kwargs): ProtectedFile.__init__(self, *args, **kwargs) self.host = host self.host_name = host_name self.site_name = site_name self.site_path = Path(site_path) self.default_theme = self.config_get('theme') if self.default_theme is None: if not host_name: # FIXME: we use bulma theme by default for main site for now # as the development is focusing on this one, and default theme may # be broken self.default_theme = 'bulma' else: self.default_theme = C.TEMPLATE_THEME_DEFAULT self.site_themes = set() self.named_pages = {} self.browser_modules = {} # template dynamic data used in all pages self.dyn_data_common = {"scripts": OrderedSet()} for theme, data in host.renderer.get_themes_data(site_name).items(): # we check themes for browser metadata, and merge them here if found self.site_themes.add(theme) browser_meta = data.get('browser_meta') if browser_meta is not None: log.debug(f"merging browser metadata from theme {theme}: {browser_meta}") recursive_update(self.browser_modules, browser_meta) browser_path = data.get('browser_path') if browser_path is not None: self.browser_modules.setdefault('themes_browser_paths', set()).add( browser_path) try: next(browser_path.glob("*.py")) except StopIteration: pass else: log.debug(f"found brython script(s) for theme {theme}") self.browser_modules.setdefault('brython', []).append( { "path": browser_path, "url_hash": None, "url_prefix": f"__t_{theme}" } ) self.uri_callbacks = {} self.pages_redirects = {} self.cached_urls = {} self.main_menu = None # map Libervia application names => data self.libervia_apps = {} self.build_path = host.get_build_path(site_name) self.build_path.mkdir(parents=True, exist_ok=True) self.dev_build_path = host.get_build_path(site_name, dev=True) self.dev_build_path.mkdir(parents=True, exist_ok=True) self.putChild( C.BUILD_DIR.encode(), ProtectedFile( self.build_path, defaultType="application/octet-stream"), ) def __str__(self): return ( f"Root resource for {self.host_name or 'default host'} using " f"{self.site_name or 'default site'} at {self.site_path} and deserving " f"files at {self.path}" ) def config_get(self, key, default=None, value_type=None): """Retrieve configuration for this site params are the same as for [Libervia.config_get] """ return self.host.config_get(self, key, default, value_type) def get_front_url(self, theme): return Path( '/', C.TPL_RESOURCE, self.site_name or C.SITE_NAME_DEFAULT, C.TEMPLATE_TPL_DIR, theme) def add_resource_to_path(self, path: str, resource: web_resource.Resource) -> None: """Add a resource to the given path A "NoResource" will be used for all intermediate segments """ segments, __, last_segment = path.rpartition("/") url_segments = segments.split("/") if segments else [] current = self for segment in url_segments: resource = web_resource.NoResource() current.putChild(segment, resource) current = resource current.putChild( last_segment.encode('utf-8'), resource ) async def _start_app(self, app_name, extra=None) -> dict: """Start a Libervia App @param app_name: canonical application name @param extra: extra parameter to configure app @return: app data app data will not include computed exposed data, at this needs to wait for the app to be started """ if extra is None: extra = {} log.info(_( "starting application {app_name}").format(app_name=app_name)) app_data = data_format.deserialise( await self.host.bridge_call( "application_start", app_name, data_format.serialise(extra) ) ) if app_data.get("started", False): log.debug(f"application {app_name!r} is already started or starting") # we do not await on purpose, the workflow should not be blocking at this # point defer.ensureDeferred(self._on_app_started(app_name, app_data["instance"])) else: self.host.apps_cb[app_data["instance"]] = self._on_app_started return app_data async def _on_app_started( self, app_name: str, instance_id: str ) -> None: exposed_data = self.libervia_apps[app_name] = data_format.deserialise( await self.host.bridge_call("application_exposed_get", app_name, "", "") ) try: web_port = int(exposed_data['ports']['web'].split(':')[1]) except (KeyError, ValueError): log.warning(_( "no web port found for application {app_name!r}, can't use it " ).format(app_name=app_name)) raise exceptions.DataError("no web port found") try: url_prefix = exposed_data['url_prefix'].strip().rstrip('/') except (KeyError, AttributeError) as e: log.warning(_( "no URL prefix specified for this application, we can't embed it: {msg}") .format(msg=e)) raise exceptions.DataError("no URL prefix") if not url_prefix.startswith('/'): raise exceptions.DataError( f"invalid URL prefix, it must start with '/': {url_prefix!r}") res = proxy.SatReverseProxyResource( "localhost", web_port, url_prefix.encode() ) self.add_resource_to_path(url_prefix, res) log.info( f"Resource for app {app_name!r} (instance {instance_id!r}) has been added" ) async def _init_redirections(self, options): url_redirections = options["url_redirections_dict"] url_redirections = url_redirections.get(self.site_name, {}) ## redirections self.redirections = {} self.inv_redirections = {} # new URL to old URL map for old, new_data_list in url_redirections.items(): # several redirections can be used for one path by using a list. # The redirection will be done using first item of the list, and all items # will be used for inverse redirection. # e.g. if a => [b, c], a will redirect to c, and b and c will both be # equivalent to a if not isinstance(new_data_list, list): new_data_list = [new_data_list] for new_data in new_data_list: # new_data can be a dictionary or a unicode url if isinstance(new_data, dict): # new_data dict must contain either "url", "page" or "path" key # (exclusive) # if "path" is used, a file url is constructed with it if (( len( {"path", "url", "page"}.intersection(list(new_data.keys())) ) != 1 )): raise ValueError( 'You must have one and only one of "url", "page" or "path" ' 'key in your url_redirections_dict data' ) if "url" in new_data: new = new_data["url"] elif "page" in new_data: new = new_data new["type"] = "page" new.setdefault("path_args", []) if not isinstance(new["path_args"], list): log.error( _('"path_args" in redirection of {old} must be a list. ' 'Ignoring the redirection'.format(old=old))) continue new.setdefault("query_args", {}) if not isinstance(new["query_args"], dict): log.error( _( '"query_args" in redirection of {old} must be a ' 'dictionary. Ignoring the redirection' ).format(old=old) ) continue new["path_args"] = [quote(a) for a in new["path_args"]] # we keep an inversed dict of page redirection # (page/path_args => redirecting URL) # so get_url can return the redirecting URL if the same arguments # are used # making the URL consistent args_hash = tuple(new["path_args"]) self.pages_redirects.setdefault(new_data["page"], {}).setdefault( args_hash, old ) # we need lists in query_args because it will be used # as it in request.path_args for k, v in new["query_args"].items(): if isinstance(v, str): new["query_args"][k] = [v] elif "path" in new_data: new = "file:{}".format(urllib.parse.quote(new_data["path"])) elif isinstance(new_data, str): new = new_data new_data = {} else: log.error( _("ignoring invalid redirection value: {new_data}").format( new_data=new_data ) ) continue # some normalization if not old.strip(): # root URL special case old = "" elif not old.startswith("/"): log.error( _("redirected url must start with '/', got {value}. Ignoring") .format(value=old) ) continue else: old = self._normalize_url(old) if isinstance(new, dict): # dict are handled differently, they contain data # which ared use dynamically when the request is done self.redirections.setdefault(old, new) if not old: if new["type"] == "page": log.info( _("Root URL redirected to page {name}").format( name=new["page"] ) ) else: if new["type"] == "page": page = self.get_page_by_name(new["page"]) url = page.get_url(*new.get("path_args", [])) self.inv_redirections[url] = old continue # at this point we have a redirection URL in new, we can parse it new_url = urllib.parse.urlsplit(new) # we handle the known URL schemes if new_url.scheme == "xmpp": location = self.get_page_path_from_uri(new) if location is None: log.warning( _("ignoring redirection, no page found to handle this URI: " "{uri}").format(uri=new)) continue request_data = self._get_request_data(location) self.inv_redirections[location] = old elif new_url.scheme in ("", "http", "https"): # direct redirection if new_url.netloc: raise NotImplementedError( "netloc ({netloc}) is not implemented yet for " "url_redirections_dict, it is not possible to redirect to an " "external website".format(netloc=new_url.netloc)) location = urllib.parse.urlunsplit( ("", "", new_url.path, new_url.query, new_url.fragment) ) request_data = self._get_request_data(location) self.inv_redirections[location] = old elif new_url.scheme == "file": # file or directory if new_url.netloc: raise NotImplementedError( "netloc ({netloc}) is not implemented for url redirection to " "file system, it is not possible to redirect to an external " "host".format( netloc=new_url.netloc)) path = urllib.parse.unquote(new_url.path) if not os.path.isabs(path): raise ValueError( "file redirection must have an absolute path: e.g. " "file:/path/to/my/file") # for file redirection, we directly put child here resource_class = ( ProtectedFile if new_data.get("protected", True) else static.File ) res = resource_class(path, defaultType="application/octet-stream") self.add_resource_to_path(old, res) log.info("[{host_name}] Added redirection from /{old} to file system " "path {path}".format(host_name=self.host_name, old=old, path=path)) # we don't want to use redirection system, so we continue here continue elif new_url.scheme == "libervia-app": # a Libervia application app_name = urllib.parse.unquote(new_url.path).lower().strip() extra = {"url_prefix": f"/{old}"} try: await self._start_app(app_name, extra) except Exception as e: log.warning(_( "Can't launch {app_name!r} for path /{old}: {e}").format( app_name=app_name, old=old, e=e)) continue log.info( f"[{self.host_name}] Added redirection from /{old} to " f"application {app_name}" ) # normal redirection system is not used here continue elif new_url.scheme == "proxy": # a reverse proxy host, port = new_url.hostname, new_url.port if host is None or port is None: raise ValueError( "invalid host or port in proxy redirection, please check your " "configuration: {new_url.geturl()}" ) url_prefix = (new_url.path or old).rstrip('/') res = proxy.SatReverseProxyResource( host, port, url_prefix.encode(), ) self.add_resource_to_path(old, res) log.info( f"[{self.host_name}] Added redirection from /{old} to reverse proxy " f"{new_url.netloc} with URL prefix {url_prefix}/" ) # normal redirection system is not used here continue else: raise NotImplementedError( "{scheme}: scheme is not managed for url_redirections_dict".format( scheme=new_url.scheme ) ) self.redirections.setdefault(old, request_data) if not old: log.info(_("[{host_name}] Root URL redirected to {uri}") .format(host_name=self.host_name, uri=request_data[1])) # the default root URL, if not redirected if not "" in self.redirections: self.redirections[""] = self._get_request_data(C.LIBERVIA_PAGE_START) async def _set_menu(self, menus): menus = menus.get(self.site_name, []) main_menu = [] for menu in menus: if not menu: msg = _("menu item can't be empty") log.error(msg) raise ValueError(msg) elif isinstance(menu, list): if len(menu) != 2: msg = _( "menu item as list must be in the form [page_name, absolue URL]" ) log.error(msg) raise ValueError(msg) page_name, url = menu elif menu.startswith("libervia-app:"): app_name = menu[13:].strip().lower() app_data = await self._start_app(app_name) exposed_data = app_data["expose"] front_url = exposed_data['front_url'] options = self.host.options url_redirections = options["url_redirections_dict"].setdefault( self.site_name, {} ) if front_url in url_redirections: raise exceptions.ConflictError( f"There is already a redirection from {front_url!r}, can't add " f"{app_name!r}") url_redirections[front_url] = { "page": 'embed_app', "path_args": [app_name] } page_name = exposed_data.get('web_label', app_name).title() url = front_url log.debug( f"Application {app_name} added to menu of {self.site_name}" ) else: page_name = menu try: url = self.get_page_by_name(page_name).url except KeyError as e: log_msg = _("Can'find a named page ({msg}), please check " "menu_json in configuration.").format(msg=e.args[0]) log.error(log_msg) raise exceptions.ConfigError(log_msg) main_menu.append((page_name, url)) self.main_menu = main_menu def _normalize_url(self, url, lower=True): """Return URL normalized for self.redirections dict @param url(unicode): URL to normalize @param lower(bool): lower case of url if True @return (str): normalized URL """ if lower: url = url.lower() return "/".join((p for p in url.split("/") if p)) def _get_request_data(self, uri): """Return data needed to redirect request @param url(unicode): destination url @return (tuple(list[str], str, str, dict): tuple with splitted path as in Request.postpath uri as in Request.uri path as in Request.path args as in Request.args """ uri = uri # XXX: we reuse code from twisted.web.http.py here # as we need to have the same behaviour x = uri.split("?", 1) if len(x) == 1: path = uri args = {} else: path, argstring = x args = urllib.parse.parse_qs(argstring, True) # XXX: splitted path case must not be changed, as it may be significant # (e.g. for blog items) return ( self._normalize_url(path, lower=False).split("/"), uri, path, args, ) def _redirect(self, request, request_data): """Redirect an URL by rewritting request this is *NOT* a HTTP redirection, but equivalent to URL rewritting @param request(web.http.request): original request @param request_data(tuple): data returned by self._get_request_data @return (web_resource.Resource): resource to use """ # recursion check try: request._redirected except AttributeError: pass else: try: __, uri, __, __ = request_data except ValueError: uri = "" log.error(D_( "recursive redirection, please fix this URL:\n" "{old} ==> {new}").format( old=request.uri.decode("utf-8"), new=uri)) return web_resource.NoResource() request._redirected = True # here to avoid recursive redirections if isinstance(request_data, dict): if request_data["type"] == "page": try: page = self.get_page_by_name(request_data["page"]) except KeyError: log.error( _( 'Can\'t find page named "{name}" requested in redirection' ).format(name=request_data["page"]) ) return web_resource.NoResource() path_args = [pa.encode('utf-8') for pa in request_data["path_args"]] request.postpath = path_args + request.postpath try: request.args.update(request_data["query_args"]) except (TypeError, ValueError): log.error( _("Invalid args in redirection: {query_args}").format( query_args=request_data["query_args"] ) ) return web_resource.NoResource() return page else: raise exceptions.InternalError("unknown request_data type") else: path_list, uri, path, args = request_data path_list = [p.encode('utf-8') for p in path_list] log.debug( "Redirecting URL {old} to {new}".format( old=request.uri.decode('utf-8'), new=uri ) ) # we change the request to reflect the new url request.postpath = path_list[1:] + request.postpath request.args.update(args) # we start again to look for a child with the new url return self.getChildWithDefault(path_list[0], request) def get_page_by_name(self, name): """Retrieve page instance from its name @param name(unicode): name of the page @return (LiberviaPage): page instance @raise KeyError: the page doesn't exist """ return self.named_pages[name] def get_page_path_from_uri(self, uri): """Retrieve page URL from xmpp: URI @param uri(unicode): URI with a xmpp: scheme @return (unicode,None): absolute path (starting from root "/") to page handling the URI. None is returned if no page has been registered for this URI """ uri_data = common_uri.parse_xmpp_uri(uri) try: page, cb = self.uri_callbacks[uri_data["type"], uri_data["sub_type"]] except KeyError: url = None else: url = cb(page, uri_data) if url is None: # no handler found # we try to find a more generic one try: page, cb = self.uri_callbacks[uri_data["type"], None] except KeyError: pass else: url = cb(page, uri_data) return url def getChildWithDefault(self, name, request): # XXX: this method is overriden only for root url # which is the only ones who need to be handled before other children if name == b"" and not request.postpath: return self._redirect(request, self.redirections[""]) return super(LiberviaRootResource, self).getChildWithDefault(name, request) def getChild(self, name, request): resource = super(LiberviaRootResource, self).getChild(name, request) if isinstance(resource, web_resource.NoResource): # if nothing was found, we try our luck with redirections # XXX: we want redirections to happen only if everything else failed path_elt = request.prepath + request.postpath for idx in range(len(path_elt), -1, -1): test_url = b"/".join(path_elt[:idx]).decode('utf-8').lower() if test_url in self.redirections: request_data = self.redirections[test_url] request.postpath = path_elt[idx:] return self._redirect(request, request_data) return resource def putChild(self, path, resource): """Add a child to the root resource""" if not isinstance(path, bytes): raise ValueError("path must be specified in bytes") if not isinstance(resource, web_resource.EncodingResourceWrapper): # FIXME: check that no information is leaked (c.f. https://twistedmatrix.com/documents/current/web/howto/using-twistedweb.html#request-encoders) resource = web_resource.EncodingResourceWrapper( resource, [server.GzipEncoderFactory()]) super(LiberviaRootResource, self).putChild(path, resource) def createSimilarFile(self, path): # XXX: this method need to be overriden to avoid recreating a LiberviaRootResource f = LiberviaRootResource.__base__( path, self.defaultType, self.ignoredExts, self.registry ) # refactoring by steps, here - constructor should almost certainly take these f.processors = self.processors f.indexNames = self.indexNames[:] f.childNotFound = self.childNotFound return f