comparison libervia/server/resources.py @ 1512:65e063657597

server: move resources to a dedicated module
author Goffi <goffi@goffi.org>
date Mon, 22 May 2023 11:57:49 +0200
parents
children
comparison
equal deleted inserted replaced
1511:f68839a4caba 1512:65e063657597
1 #!/usr/bin/env python3
2
3 # Libervia Web
4 # Copyright (C) 2011-2021 Jérôme Poisson <goffi@goffi.org>
5
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Affero General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
10
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU Affero General Public License for more details.
15
16 # You should have received a copy of the GNU Affero General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
19
20 import os.path
21 from pathlib import Path
22 import urllib.error
23 import urllib.parse
24 import urllib.request
25
26 from twisted.internet import defer
27 from twisted.web import server
28 from twisted.web import static
29 from twisted.web import resource as web_resource
30
31 from libervia.server.constants import Const as C
32 from libervia.server.utils import quote
33 from sat.core import exceptions
34 from sat.core.i18n import D_, _
35 from sat.core.log import getLogger
36 from sat.tools.common import uri as common_uri
37 from sat.tools.common import data_format
38 from sat.tools.common.utils import OrderedSet, recursive_update
39
40 from . import proxy
41
42 log = getLogger(__name__)
43
44
45 class ProtectedFile(static.File):
46 """A static.File class which doesn't show directory listing"""
47
48 def __init__(self, path, *args, **kwargs):
49 if "defaultType" not in kwargs and len(args) < 2:
50 # defaultType is second positional argument, and Twisted uses it
51 # in File.createSimilarFile, so we set kwargs only if it is missing
52 # in kwargs and it is not in a positional argument
53 kwargs["defaultType"] = "application/octet-stream"
54 super(ProtectedFile, self).__init__(str(path), *args, **kwargs)
55
56 def directoryListing(self):
57 return web_resource.NoResource()
58
59
60 def getChild(self, path, request):
61 return super().getChild(path, request)
62
63 def getChildWithDefault(self, path, request):
64 return super().getChildWithDefault(path, request)
65
66 def getChildForRequest(self, request):
67 return super().getChildForRequest(request)
68
69
70 class LiberviaRootResource(ProtectedFile):
71 """Specialized resource for Libervia root
72
73 handle redirections declared in sat.conf
74 """
75
76 def __init__(self, host, host_name, site_name, site_path, *args, **kwargs):
77 ProtectedFile.__init__(self, *args, **kwargs)
78 self.host = host
79 self.host_name = host_name
80 self.site_name = site_name
81 self.site_path = Path(site_path)
82 self.default_theme = self.config_get('theme')
83 if self.default_theme is None:
84 if not host_name:
85 # FIXME: we use bulma theme by default for main site for now
86 # as the development is focusing on this one, and default theme may
87 # be broken
88 self.default_theme = 'bulma'
89 else:
90 self.default_theme = C.TEMPLATE_THEME_DEFAULT
91 self.site_themes = set()
92 self.named_pages = {}
93 self.browser_modules = {}
94 # template dynamic data used in all pages
95 self.dyn_data_common = {"scripts": OrderedSet()}
96 for theme, data in host.renderer.get_themes_data(site_name).items():
97 # we check themes for browser metadata, and merge them here if found
98 self.site_themes.add(theme)
99 browser_meta = data.get('browser_meta')
100 if browser_meta is not None:
101 log.debug(f"merging browser metadata from theme {theme}: {browser_meta}")
102 recursive_update(self.browser_modules, browser_meta)
103 browser_path = data.get('browser_path')
104 if browser_path is not None:
105 self.browser_modules.setdefault('themes_browser_paths', set()).add(
106 browser_path)
107 try:
108 next(browser_path.glob("*.py"))
109 except StopIteration:
110 pass
111 else:
112 log.debug(f"found brython script(s) for theme {theme}")
113 self.browser_modules.setdefault('brython', []).append(
114 {
115 "path": browser_path,
116 "url_hash": None,
117 "url_prefix": f"__t_{theme}"
118 }
119 )
120
121 self.uri_callbacks = {}
122 self.pages_redirects = {}
123 self.cached_urls = {}
124 self.main_menu = None
125 # map Libervia application names => data
126 self.libervia_apps = {}
127 self.build_path = host.get_build_path(site_name)
128 self.build_path.mkdir(parents=True, exist_ok=True)
129 self.dev_build_path = host.get_build_path(site_name, dev=True)
130 self.dev_build_path.mkdir(parents=True, exist_ok=True)
131 self.putChild(
132 C.BUILD_DIR.encode(),
133 ProtectedFile(
134 self.build_path,
135 defaultType="application/octet-stream"),
136 )
137
138 def __str__(self):
139 return (
140 f"Root resource for {self.host_name or 'default host'} using "
141 f"{self.site_name or 'default site'} at {self.site_path} and deserving "
142 f"files at {self.path}"
143 )
144
145 def config_get(self, key, default=None, value_type=None):
146 """Retrieve configuration for this site
147
148 params are the same as for [Libervia.config_get]
149 """
150 return self.host.config_get(self, key, default, value_type)
151
152 def get_front_url(self, theme):
153 return Path(
154 '/',
155 C.TPL_RESOURCE,
156 self.site_name or C.SITE_NAME_DEFAULT,
157 C.TEMPLATE_TPL_DIR,
158 theme)
159
160 def add_resource_to_path(self, path: str, resource: web_resource.Resource) -> None:
161 """Add a resource to the given path
162
163 A "NoResource" will be used for all intermediate segments
164 """
165 segments, __, last_segment = path.rpartition("/")
166 url_segments = segments.split("/") if segments else []
167 current = self
168 for segment in url_segments:
169 resource = web_resource.NoResource()
170 current.putChild(segment, resource)
171 current = resource
172
173 current.putChild(
174 last_segment.encode('utf-8'),
175 resource
176 )
177
178 async def _start_app(self, app_name, extra=None) -> dict:
179 """Start a Libervia App
180
181 @param app_name: canonical application name
182 @param extra: extra parameter to configure app
183 @return: app data
184 app data will not include computed exposed data, at this needs to wait for the
185 app to be started
186 """
187 if extra is None:
188 extra = {}
189 log.info(_(
190 "starting application {app_name}").format(app_name=app_name))
191 app_data = data_format.deserialise(
192 await self.host.bridge_call(
193 "application_start", app_name, data_format.serialise(extra)
194 )
195 )
196 if app_data.get("started", False):
197 log.debug(f"application {app_name!r} is already started or starting")
198 # we do not await on purpose, the workflow should not be blocking at this
199 # point
200 defer.ensureDeferred(self._on_app_started(app_name, app_data["instance"]))
201 else:
202 self.host.apps_cb[app_data["instance"]] = self._on_app_started
203 return app_data
204
205 async def _on_app_started(
206 self,
207 app_name: str,
208 instance_id: str
209 ) -> None:
210 exposed_data = self.libervia_apps[app_name] = data_format.deserialise(
211 await self.host.bridge_call("application_exposed_get", app_name, "", "")
212 )
213
214 try:
215 web_port = int(exposed_data['ports']['web'].split(':')[1])
216 except (KeyError, ValueError):
217 log.warning(_(
218 "no web port found for application {app_name!r}, can't use it "
219 ).format(app_name=app_name))
220 raise exceptions.DataError("no web port found")
221
222 try:
223 url_prefix = exposed_data['url_prefix'].strip().rstrip('/')
224 except (KeyError, AttributeError) as e:
225 log.warning(_(
226 "no URL prefix specified for this application, we can't embed it: {msg}")
227 .format(msg=e))
228 raise exceptions.DataError("no URL prefix")
229
230 if not url_prefix.startswith('/'):
231 raise exceptions.DataError(
232 f"invalid URL prefix, it must start with '/': {url_prefix!r}")
233
234 res = proxy.SatReverseProxyResource(
235 "localhost",
236 web_port,
237 url_prefix.encode()
238 )
239 self.add_resource_to_path(url_prefix, res)
240 log.info(
241 f"Resource for app {app_name!r} (instance {instance_id!r}) has been added"
242 )
243
244 async def _init_redirections(self, options):
245 url_redirections = options["url_redirections_dict"]
246
247 url_redirections = url_redirections.get(self.site_name, {})
248
249 ## redirections
250 self.redirections = {}
251 self.inv_redirections = {} # new URL to old URL map
252
253 for old, new_data_list in url_redirections.items():
254 # several redirections can be used for one path by using a list.
255 # The redirection will be done using first item of the list, and all items
256 # will be used for inverse redirection.
257 # e.g. if a => [b, c], a will redirect to c, and b and c will both be
258 # equivalent to a
259 if not isinstance(new_data_list, list):
260 new_data_list = [new_data_list]
261 for new_data in new_data_list:
262 # new_data can be a dictionary or a unicode url
263 if isinstance(new_data, dict):
264 # new_data dict must contain either "url", "page" or "path" key
265 # (exclusive)
266 # if "path" is used, a file url is constructed with it
267 if ((
268 len(
269 {"path", "url", "page"}.intersection(list(new_data.keys()))
270 ) != 1
271 )):
272 raise ValueError(
273 'You must have one and only one of "url", "page" or "path" '
274 'key in your url_redirections_dict data'
275 )
276 if "url" in new_data:
277 new = new_data["url"]
278 elif "page" in new_data:
279 new = new_data
280 new["type"] = "page"
281 new.setdefault("path_args", [])
282 if not isinstance(new["path_args"], list):
283 log.error(
284 _('"path_args" in redirection of {old} must be a list. '
285 'Ignoring the redirection'.format(old=old)))
286 continue
287 new.setdefault("query_args", {})
288 if not isinstance(new["query_args"], dict):
289 log.error(
290 _(
291 '"query_args" in redirection of {old} must be a '
292 'dictionary. Ignoring the redirection'
293 ).format(old=old)
294 )
295 continue
296 new["path_args"] = [quote(a) for a in new["path_args"]]
297 # we keep an inversed dict of page redirection
298 # (page/path_args => redirecting URL)
299 # so get_url can return the redirecting URL if the same arguments
300 # are used # making the URL consistent
301 args_hash = tuple(new["path_args"])
302 self.pages_redirects.setdefault(new_data["page"], {}).setdefault(
303 args_hash,
304 old
305 )
306
307 # we need lists in query_args because it will be used
308 # as it in request.path_args
309 for k, v in new["query_args"].items():
310 if isinstance(v, str):
311 new["query_args"][k] = [v]
312 elif "path" in new_data:
313 new = "file:{}".format(urllib.parse.quote(new_data["path"]))
314 elif isinstance(new_data, str):
315 new = new_data
316 new_data = {}
317 else:
318 log.error(
319 _("ignoring invalid redirection value: {new_data}").format(
320 new_data=new_data
321 )
322 )
323 continue
324
325 # some normalization
326 if not old.strip():
327 # root URL special case
328 old = ""
329 elif not old.startswith("/"):
330 log.error(
331 _("redirected url must start with '/', got {value}. Ignoring")
332 .format(value=old)
333 )
334 continue
335 else:
336 old = self._normalize_url(old)
337
338 if isinstance(new, dict):
339 # dict are handled differently, they contain data
340 # which ared use dynamically when the request is done
341 self.redirections.setdefault(old, new)
342 if not old:
343 if new["type"] == "page":
344 log.info(
345 _("Root URL redirected to page {name}").format(
346 name=new["page"]
347 )
348 )
349 else:
350 if new["type"] == "page":
351 page = self.get_page_by_name(new["page"])
352 url = page.get_url(*new.get("path_args", []))
353 self.inv_redirections[url] = old
354 continue
355
356 # at this point we have a redirection URL in new, we can parse it
357 new_url = urllib.parse.urlsplit(new)
358
359 # we handle the known URL schemes
360 if new_url.scheme == "xmpp":
361 location = self.get_page_path_from_uri(new)
362 if location is None:
363 log.warning(
364 _("ignoring redirection, no page found to handle this URI: "
365 "{uri}").format(uri=new))
366 continue
367 request_data = self._get_request_data(location)
368 self.inv_redirections[location] = old
369
370 elif new_url.scheme in ("", "http", "https"):
371 # direct redirection
372 if new_url.netloc:
373 raise NotImplementedError(
374 "netloc ({netloc}) is not implemented yet for "
375 "url_redirections_dict, it is not possible to redirect to an "
376 "external website".format(netloc=new_url.netloc))
377 location = urllib.parse.urlunsplit(
378 ("", "", new_url.path, new_url.query, new_url.fragment)
379 )
380 request_data = self._get_request_data(location)
381 self.inv_redirections[location] = old
382
383 elif new_url.scheme == "file":
384 # file or directory
385 if new_url.netloc:
386 raise NotImplementedError(
387 "netloc ({netloc}) is not implemented for url redirection to "
388 "file system, it is not possible to redirect to an external "
389 "host".format(
390 netloc=new_url.netloc))
391 path = urllib.parse.unquote(new_url.path)
392 if not os.path.isabs(path):
393 raise ValueError(
394 "file redirection must have an absolute path: e.g. "
395 "file:/path/to/my/file")
396 # for file redirection, we directly put child here
397 resource_class = (
398 ProtectedFile if new_data.get("protected", True) else static.File
399 )
400 res = resource_class(path, defaultType="application/octet-stream")
401 self.add_resource_to_path(old, res)
402 log.info("[{host_name}] Added redirection from /{old} to file system "
403 "path {path}".format(host_name=self.host_name,
404 old=old,
405 path=path))
406
407 # we don't want to use redirection system, so we continue here
408 continue
409
410 elif new_url.scheme == "libervia-app":
411 # a Libervia application
412
413 app_name = urllib.parse.unquote(new_url.path).lower().strip()
414 extra = {"url_prefix": f"/{old}"}
415 try:
416 await self._start_app(app_name, extra)
417 except Exception as e:
418 log.warning(_(
419 "Can't launch {app_name!r} for path /{old}: {e}").format(
420 app_name=app_name, old=old, e=e))
421 continue
422
423 log.info(
424 f"[{self.host_name}] Added redirection from /{old} to "
425 f"application {app_name}"
426 )
427 # normal redirection system is not used here
428 continue
429 elif new_url.scheme == "proxy":
430 # a reverse proxy
431 host, port = new_url.hostname, new_url.port
432 if host is None or port is None:
433 raise ValueError(
434 "invalid host or port in proxy redirection, please check your "
435 "configuration: {new_url.geturl()}"
436 )
437 url_prefix = (new_url.path or old).rstrip('/')
438 res = proxy.SatReverseProxyResource(
439 host,
440 port,
441 url_prefix.encode(),
442 )
443 self.add_resource_to_path(old, res)
444 log.info(
445 f"[{self.host_name}] Added redirection from /{old} to reverse proxy "
446 f"{new_url.netloc} with URL prefix {url_prefix}/"
447 )
448
449 # normal redirection system is not used here
450 continue
451 else:
452 raise NotImplementedError(
453 "{scheme}: scheme is not managed for url_redirections_dict".format(
454 scheme=new_url.scheme
455 )
456 )
457
458 self.redirections.setdefault(old, request_data)
459 if not old:
460 log.info(_("[{host_name}] Root URL redirected to {uri}")
461 .format(host_name=self.host_name,
462 uri=request_data[1]))
463
464 # the default root URL, if not redirected
465 if not "" in self.redirections:
466 self.redirections[""] = self._get_request_data(C.LIBERVIA_PAGE_START)
467
468 async def _set_menu(self, menus):
469 menus = menus.get(self.site_name, [])
470 main_menu = []
471 for menu in menus:
472 if not menu:
473 msg = _("menu item can't be empty")
474 log.error(msg)
475 raise ValueError(msg)
476 elif isinstance(menu, list):
477 if len(menu) != 2:
478 msg = _(
479 "menu item as list must be in the form [page_name, absolue URL]"
480 )
481 log.error(msg)
482 raise ValueError(msg)
483 page_name, url = menu
484 elif menu.startswith("libervia-app:"):
485 app_name = menu[13:].strip().lower()
486 app_data = await self._start_app(app_name)
487 exposed_data = app_data["expose"]
488 front_url = exposed_data['front_url']
489 options = self.host.options
490 url_redirections = options["url_redirections_dict"].setdefault(
491 self.site_name, {}
492 )
493 if front_url in url_redirections:
494 raise exceptions.ConflictError(
495 f"There is already a redirection from {front_url!r}, can't add "
496 f"{app_name!r}")
497
498 url_redirections[front_url] = {
499 "page": 'embed_app',
500 "path_args": [app_name]
501 }
502
503 page_name = exposed_data.get('web_label', app_name).title()
504 url = front_url
505
506 log.debug(
507 f"Application {app_name} added to menu of {self.site_name}"
508 )
509 else:
510 page_name = menu
511 try:
512 url = self.get_page_by_name(page_name).url
513 except KeyError as e:
514 log_msg = _("Can'find a named page ({msg}), please check "
515 "menu_json in configuration.").format(msg=e.args[0])
516 log.error(log_msg)
517 raise exceptions.ConfigError(log_msg)
518 main_menu.append((page_name, url))
519 self.main_menu = main_menu
520
521 def _normalize_url(self, url, lower=True):
522 """Return URL normalized for self.redirections dict
523
524 @param url(unicode): URL to normalize
525 @param lower(bool): lower case of url if True
526 @return (str): normalized URL
527 """
528 if lower:
529 url = url.lower()
530 return "/".join((p for p in url.split("/") if p))
531
532 def _get_request_data(self, uri):
533 """Return data needed to redirect request
534
535 @param url(unicode): destination url
536 @return (tuple(list[str], str, str, dict): tuple with
537 splitted path as in Request.postpath
538 uri as in Request.uri
539 path as in Request.path
540 args as in Request.args
541 """
542 uri = uri
543 # XXX: we reuse code from twisted.web.http.py here
544 # as we need to have the same behaviour
545 x = uri.split("?", 1)
546
547 if len(x) == 1:
548 path = uri
549 args = {}
550 else:
551 path, argstring = x
552 args = urllib.parse.parse_qs(argstring, True)
553
554 # XXX: splitted path case must not be changed, as it may be significant
555 # (e.g. for blog items)
556 return (
557 self._normalize_url(path, lower=False).split("/"),
558 uri,
559 path,
560 args,
561 )
562
563 def _redirect(self, request, request_data):
564 """Redirect an URL by rewritting request
565
566 this is *NOT* a HTTP redirection, but equivalent to URL rewritting
567 @param request(web.http.request): original request
568 @param request_data(tuple): data returned by self._get_request_data
569 @return (web_resource.Resource): resource to use
570 """
571 # recursion check
572 try:
573 request._redirected
574 except AttributeError:
575 pass
576 else:
577 try:
578 __, uri, __, __ = request_data
579 except ValueError:
580 uri = ""
581 log.error(D_( "recursive redirection, please fix this URL:\n"
582 "{old} ==> {new}").format(
583 old=request.uri.decode("utf-8"), new=uri))
584 return web_resource.NoResource()
585
586 request._redirected = True # here to avoid recursive redirections
587
588 if isinstance(request_data, dict):
589 if request_data["type"] == "page":
590 try:
591 page = self.get_page_by_name(request_data["page"])
592 except KeyError:
593 log.error(
594 _(
595 'Can\'t find page named "{name}" requested in redirection'
596 ).format(name=request_data["page"])
597 )
598 return web_resource.NoResource()
599 path_args = [pa.encode('utf-8') for pa in request_data["path_args"]]
600 request.postpath = path_args + request.postpath
601
602 try:
603 request.args.update(request_data["query_args"])
604 except (TypeError, ValueError):
605 log.error(
606 _("Invalid args in redirection: {query_args}").format(
607 query_args=request_data["query_args"]
608 )
609 )
610 return web_resource.NoResource()
611 return page
612 else:
613 raise exceptions.InternalError("unknown request_data type")
614 else:
615 path_list, uri, path, args = request_data
616 path_list = [p.encode('utf-8') for p in path_list]
617 log.debug(
618 "Redirecting URL {old} to {new}".format(
619 old=request.uri.decode('utf-8'), new=uri
620 )
621 )
622 # we change the request to reflect the new url
623 request.postpath = path_list[1:] + request.postpath
624 request.args.update(args)
625
626 # we start again to look for a child with the new url
627 return self.getChildWithDefault(path_list[0], request)
628
629 def get_page_by_name(self, name):
630 """Retrieve page instance from its name
631
632 @param name(unicode): name of the page
633 @return (LiberviaPage): page instance
634 @raise KeyError: the page doesn't exist
635 """
636 return self.named_pages[name]
637
638 def get_page_path_from_uri(self, uri):
639 """Retrieve page URL from xmpp: URI
640
641 @param uri(unicode): URI with a xmpp: scheme
642 @return (unicode,None): absolute path (starting from root "/") to page handling
643 the URI.
644 None is returned if no page has been registered for this URI
645 """
646 uri_data = common_uri.parse_xmpp_uri(uri)
647 try:
648 page, cb = self.uri_callbacks[uri_data["type"], uri_data["sub_type"]]
649 except KeyError:
650 url = None
651 else:
652 url = cb(page, uri_data)
653 if url is None:
654 # no handler found
655 # we try to find a more generic one
656 try:
657 page, cb = self.uri_callbacks[uri_data["type"], None]
658 except KeyError:
659 pass
660 else:
661 url = cb(page, uri_data)
662 return url
663
664 def getChildWithDefault(self, name, request):
665 # XXX: this method is overriden only for root url
666 # which is the only ones who need to be handled before other children
667 if name == b"" and not request.postpath:
668 return self._redirect(request, self.redirections[""])
669 return super(LiberviaRootResource, self).getChildWithDefault(name, request)
670
671 def getChild(self, name, request):
672 resource = super(LiberviaRootResource, self).getChild(name, request)
673
674 if isinstance(resource, web_resource.NoResource):
675 # if nothing was found, we try our luck with redirections
676 # XXX: we want redirections to happen only if everything else failed
677 path_elt = request.prepath + request.postpath
678 for idx in range(len(path_elt), -1, -1):
679 test_url = b"/".join(path_elt[:idx]).decode('utf-8').lower()
680 if test_url in self.redirections:
681 request_data = self.redirections[test_url]
682 request.postpath = path_elt[idx:]
683 return self._redirect(request, request_data)
684
685 return resource
686
687 def putChild(self, path, resource):
688 """Add a child to the root resource"""
689 if not isinstance(path, bytes):
690 raise ValueError("path must be specified in bytes")
691 if not isinstance(resource, web_resource.EncodingResourceWrapper):
692 # FIXME: check that no information is leaked (c.f. https://twistedmatrix.com/documents/current/web/howto/using-twistedweb.html#request-encoders)
693 resource = web_resource.EncodingResourceWrapper(
694 resource, [server.GzipEncoderFactory()])
695
696 super(LiberviaRootResource, self).putChild(path, resource)
697
698 def createSimilarFile(self, path):
699 # XXX: this method need to be overriden to avoid recreating a LiberviaRootResource
700
701 f = LiberviaRootResource.__base__(
702 path, self.defaultType, self.ignoredExts, self.registry
703 )
704 # refactoring by steps, here - constructor should almost certainly take these
705 f.processors = self.processors
706 f.indexNames = self.indexNames[:]
707 f.childNotFound = self.childNotFound
708 return f