Mercurial > libervia-backend
diff libervia/backend/memory/cache.py @ 4071:4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
author | Goffi <goffi@goffi.org> |
---|---|
date | Fri, 02 Jun 2023 11:49:51 +0200 |
parents | sat/memory/cache.py@524856bd7b19 |
children | 5f2d496c633f |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libervia/backend/memory/cache.py Fri Jun 02 11:49:51 2023 +0200 @@ -0,0 +1,281 @@ +#!/usr/bin/env python3 + + +# SAT: a jabber client +# Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org) + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +from io import BufferedIOBase +import mimetypes +from pathlib import Path +import pickle as pickle +import time +from typing import Any, Dict, Optional + +from libervia.backend.core import exceptions +from libervia.backend.core.constants import Const as C +from libervia.backend.core.i18n import _ +from libervia.backend.core.log import getLogger +from libervia.backend.tools.common import regex + + +log = getLogger(__name__) + +DEFAULT_EXT = ".raw" + + +class Cache(object): + """generic file caching""" + + def __init__(self, host, profile): + """ + @param profile(unicode, None): name of the profile to set the cache for + if None, the cache will be common for all profiles + """ + self.profile = profile + path_elts = [host.memory.config_get("", "local_dir"), C.CACHE_DIR] + if profile: + path_elts.extend(["profiles", regex.path_escape(profile)]) + else: + path_elts.append("common") + self.cache_dir = Path(*path_elts) + + self.cache_dir.mkdir(0o700, parents=True, exist_ok=True) + self.purge() + + def purge(self): + # remove expired files from cache + # TODO: this should not be called only on startup, but at regular interval + # (e.g. once a day) + purged = set() + # we sort files to have metadata files first + for cache_file in sorted(self.cache_dir.iterdir()): + if cache_file in purged: + continue + try: + with cache_file.open('rb') as f: + cache_data = pickle.load(f) + except IOError: + log.warning( + _("Can't read metadata file at {path}") + .format(path=cache_file)) + continue + except (pickle.UnpicklingError, EOFError): + log.debug(f"File at {cache_file} is not a metadata file") + continue + try: + eol = cache_data['eol'] + filename = cache_data['filename'] + except KeyError: + log.warning( + _("Invalid cache metadata at {path}") + .format(path=cache_file)) + continue + + filepath = self.getPath(filename) + + if not filepath.exists(): + log.warning(_( + "cache {cache_file!r} references an inexisting file: {filepath!r}" + ).format(cache_file=str(cache_file), filepath=str(filepath))) + log.debug("purging cache with missing file") + cache_file.unlink() + elif eol < time.time(): + log.debug( + "purging expired cache {filepath!r} (expired for {time}s)" + .format(filepath=str(filepath), time=int(time.time() - eol)) + ) + cache_file.unlink() + try: + filepath.unlink() + except FileNotFoundError: + log.warning( + _("following file is missing while purging cache: {path}") + .format(path=filepath) + ) + purged.add(cache_file) + purged.add(filepath) + + def getPath(self, filename: str) -> Path: + """return cached file URL + + @param filename: cached file name (cache data or actual file) + @return: path to the cached file + """ + if not filename or "/" in filename: + log.error( + "invalid char found in file name, hack attempt? name:{}".format(filename) + ) + raise exceptions.DataError("Invalid char found") + return self.cache_dir / filename + + def get_metadata(self, uid: str, update_eol: bool = True) -> Optional[Dict[str, Any]]: + """Retrieve metadata for cached data + + @param uid(unicode): unique identifier of file + @param update_eol(bool): True if eol must extended + if True, max_age will be added to eol (only if it is not already expired) + @return (dict, None): metadata with following keys: + see [cache_data] for data details, an additional "path" key is the full path to + cached file. + None if file is not in cache (or cache is invalid) + """ + + uid = uid.strip() + if not uid: + raise exceptions.InternalError("uid must not be empty") + cache_url = self.getPath(uid) + if not cache_url.exists(): + return None + + try: + with cache_url.open("rb") as f: + cache_data = pickle.load(f) + except (IOError, EOFError) as e: + log.warning(f"can't read cache at {cache_url}: {e}") + return None + except pickle.UnpicklingError: + log.warning(f"invalid cache found at {cache_url}") + return None + + try: + eol = cache_data["eol"] + except KeyError: + log.warning("no End Of Life found for cached file {}".format(uid)) + eol = 0 + if eol < time.time(): + log.debug( + "removing expired cache (expired for {}s)".format(time.time() - eol) + ) + return None + + if update_eol: + try: + max_age = cache_data["max_age"] + except KeyError: + log.warning(f"no max_age found for cache at {cache_url}, using default") + max_age = cache_data["max_age"] = C.DEFAULT_MAX_AGE + now = int(time.time()) + cache_data["last_access"] = now + cache_data["eol"] = now + max_age + with cache_url.open("wb") as f: + pickle.dump(cache_data, f, protocol=2) + + cache_data["path"] = self.getPath(cache_data["filename"]) + return cache_data + + def get_file_path(self, uid: str) -> Path: + """Retrieve absolute path to file + + @param uid(unicode): unique identifier of file + @return (unicode, None): absolute path to cached file + None if file is not in cache (or cache is invalid) + """ + metadata = self.get_metadata(uid) + if metadata is not None: + return metadata["path"] + + def remove_from_cache(self, uid, metadata=None): + """Remove data from cache + + @param uid(unicode): unique identifier cache file + """ + cache_data = self.get_metadata(uid, update_eol=False) + if cache_data is None: + log.debug(f"cache with uid {uid!r} has already expired or been removed") + return + + try: + filename = cache_data['filename'] + except KeyError: + log.warning(_("missing filename for cache {uid!r}") .format(uid=uid)) + else: + filepath = self.getPath(filename) + try: + filepath.unlink() + except FileNotFoundError: + log.warning( + _("missing file referenced in cache {uid!r}: {filename}") + .format(uid=uid, filename=filename) + ) + + cache_file = self.getPath(uid) + cache_file.unlink() + log.debug(f"cache with uid {uid!r} has been removed") + + def cache_data( + self, + source: str, + uid: str, + mime_type: Optional[str] = None, + max_age: Optional[int] = None, + original_filename: Optional[str] = None + ) -> BufferedIOBase: + """create cache metadata and file object to use for actual data + + @param source: source of the cache (should be plugin's import_name) + @param uid: an identifier of the file which must be unique + @param mime_type: MIME type of the file to cache + it will be used notably to guess file extension + It may be autogenerated if filename is specified + @param max_age: maximum age in seconds + the cache metadata will have an "eol" (end of life) + None to use default value + 0 to ignore cache (file will be re-downloaded on each access) + @param original_filename: if not None, will be used to retrieve file extension and + guess + mime type, and stored in "original_filename" + @return: file object opened in write mode + you have to close it yourself (hint: use ``with`` statement) + """ + if max_age is None: + max_age = C.DEFAULT_MAX_AGE + cache_data = { + "source": source, + # we also store max_age for updating eol + "max_age": max_age, + } + cache_url = self.getPath(uid) + if original_filename is not None: + cache_data["original_filename"] = original_filename + if mime_type is None: + # we have original_filename but not MIME type, we try to guess the later + mime_type = mimetypes.guess_type(original_filename, strict=False)[0] + if mime_type: + ext = mimetypes.guess_extension(mime_type, strict=False) + if ext is None: + log.warning( + "can't find extension for MIME type {}".format(mime_type) + ) + ext = DEFAULT_EXT + elif ext == ".jpe": + ext = ".jpg" + else: + ext = DEFAULT_EXT + mime_type = None + filename = uid + ext + now = int(time.time()) + cache_data.update({ + "filename": filename, + "creation": now, + "eol": now + max_age, + "mime_type": mime_type, + }) + file_path = self.getPath(filename) + + with open(cache_url, "wb") as f: + pickle.dump(cache_data, f, protocol=2) + + return file_path.open("wb")