diff libervia/backend/memory/cache.py @ 4071:4b842c1fb686

refactoring: renamed `sat` package to `libervia.backend`
author Goffi <goffi@goffi.org>
date Fri, 02 Jun 2023 11:49:51 +0200
parents sat/memory/cache.py@524856bd7b19
children 5f2d496c633f
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libervia/backend/memory/cache.py	Fri Jun 02 11:49:51 2023 +0200
@@ -0,0 +1,281 @@
+#!/usr/bin/env python3
+
+
+# SAT: a jabber client
+# Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org)
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+from io import BufferedIOBase
+import mimetypes
+from pathlib import Path
+import pickle as pickle
+import time
+from typing import Any, Dict, Optional
+
+from libervia.backend.core import exceptions
+from libervia.backend.core.constants import Const as C
+from libervia.backend.core.i18n import _
+from libervia.backend.core.log import getLogger
+from libervia.backend.tools.common import regex
+
+
+log = getLogger(__name__)
+
+DEFAULT_EXT = ".raw"
+
+
+class Cache(object):
+    """generic file caching"""
+
+    def __init__(self, host, profile):
+        """
+        @param profile(unicode, None): name of the profile to set the cache for
+            if None, the cache will be common for all profiles
+        """
+        self.profile = profile
+        path_elts = [host.memory.config_get("", "local_dir"), C.CACHE_DIR]
+        if profile:
+            path_elts.extend(["profiles", regex.path_escape(profile)])
+        else:
+            path_elts.append("common")
+        self.cache_dir = Path(*path_elts)
+
+        self.cache_dir.mkdir(0o700, parents=True, exist_ok=True)
+        self.purge()
+
+    def purge(self):
+        # remove expired files from cache
+        # TODO: this should not be called only on startup, but at regular interval
+        #   (e.g. once a day)
+        purged = set()
+        # we sort files to have metadata files first
+        for cache_file in sorted(self.cache_dir.iterdir()):
+            if cache_file in purged:
+                continue
+            try:
+                with cache_file.open('rb') as f:
+                    cache_data = pickle.load(f)
+            except IOError:
+                log.warning(
+                    _("Can't read metadata file at {path}")
+                    .format(path=cache_file))
+                continue
+            except (pickle.UnpicklingError, EOFError):
+                log.debug(f"File at {cache_file} is not a metadata file")
+                continue
+            try:
+                eol = cache_data['eol']
+                filename = cache_data['filename']
+            except KeyError:
+                log.warning(
+                    _("Invalid cache metadata at {path}")
+                    .format(path=cache_file))
+                continue
+
+            filepath = self.getPath(filename)
+
+            if not filepath.exists():
+                log.warning(_(
+                    "cache {cache_file!r} references an inexisting file: {filepath!r}"
+                ).format(cache_file=str(cache_file), filepath=str(filepath)))
+                log.debug("purging cache with missing file")
+                cache_file.unlink()
+            elif eol < time.time():
+                log.debug(
+                    "purging expired cache {filepath!r} (expired for {time}s)"
+                    .format(filepath=str(filepath), time=int(time.time() - eol))
+                )
+                cache_file.unlink()
+                try:
+                    filepath.unlink()
+                except FileNotFoundError:
+                    log.warning(
+                        _("following file is missing while purging cache: {path}")
+                        .format(path=filepath)
+                    )
+                purged.add(cache_file)
+                purged.add(filepath)
+
+    def getPath(self, filename: str) -> Path:
+        """return cached file URL
+
+        @param filename: cached file name (cache data or actual file)
+        @return: path to the cached file
+        """
+        if not filename or "/" in filename:
+            log.error(
+                "invalid char found in file name, hack attempt? name:{}".format(filename)
+            )
+            raise exceptions.DataError("Invalid char found")
+        return self.cache_dir / filename
+
+    def get_metadata(self, uid: str, update_eol: bool = True) -> Optional[Dict[str, Any]]:
+        """Retrieve metadata for cached data
+
+        @param uid(unicode): unique identifier of file
+        @param update_eol(bool): True if eol must extended
+            if True, max_age will be added to eol (only if it is not already expired)
+        @return (dict, None): metadata with following keys:
+            see [cache_data] for data details, an additional "path" key is the full path to
+            cached file.
+            None if file is not in cache (or cache is invalid)
+        """
+
+        uid = uid.strip()
+        if not uid:
+            raise exceptions.InternalError("uid must not be empty")
+        cache_url = self.getPath(uid)
+        if not cache_url.exists():
+            return None
+
+        try:
+            with cache_url.open("rb") as f:
+                cache_data = pickle.load(f)
+        except (IOError, EOFError) as e:
+            log.warning(f"can't read cache at {cache_url}: {e}")
+            return None
+        except pickle.UnpicklingError:
+            log.warning(f"invalid cache found at {cache_url}")
+            return None
+
+        try:
+            eol = cache_data["eol"]
+        except KeyError:
+            log.warning("no End Of Life found for cached file {}".format(uid))
+            eol = 0
+        if eol < time.time():
+            log.debug(
+                "removing expired cache (expired for {}s)".format(time.time() - eol)
+            )
+            return None
+
+        if update_eol:
+            try:
+                max_age = cache_data["max_age"]
+            except KeyError:
+                log.warning(f"no max_age found for cache at {cache_url}, using default")
+                max_age = cache_data["max_age"] = C.DEFAULT_MAX_AGE
+            now = int(time.time())
+            cache_data["last_access"] = now
+            cache_data["eol"] = now + max_age
+            with cache_url.open("wb") as f:
+                pickle.dump(cache_data, f, protocol=2)
+
+        cache_data["path"] = self.getPath(cache_data["filename"])
+        return cache_data
+
+    def get_file_path(self, uid: str) -> Path:
+        """Retrieve absolute path to file
+
+        @param uid(unicode): unique identifier of file
+        @return (unicode, None): absolute path to cached file
+            None if file is not in cache (or cache is invalid)
+        """
+        metadata = self.get_metadata(uid)
+        if metadata is not None:
+            return metadata["path"]
+
+    def remove_from_cache(self, uid, metadata=None):
+        """Remove data from cache
+
+        @param uid(unicode): unique identifier cache file
+        """
+        cache_data = self.get_metadata(uid, update_eol=False)
+        if cache_data is None:
+            log.debug(f"cache with uid {uid!r} has already expired or been removed")
+            return
+
+        try:
+            filename = cache_data['filename']
+        except KeyError:
+            log.warning(_("missing filename for cache {uid!r}") .format(uid=uid))
+        else:
+            filepath = self.getPath(filename)
+            try:
+                filepath.unlink()
+            except FileNotFoundError:
+                log.warning(
+                    _("missing file referenced in cache {uid!r}: {filename}")
+                    .format(uid=uid, filename=filename)
+                )
+
+        cache_file = self.getPath(uid)
+        cache_file.unlink()
+        log.debug(f"cache with uid {uid!r} has been removed")
+
+    def cache_data(
+        self,
+        source: str,
+        uid: str,
+        mime_type: Optional[str] = None,
+        max_age: Optional[int] = None,
+        original_filename: Optional[str] = None
+    ) -> BufferedIOBase:
+        """create cache metadata and file object to use for actual data
+
+        @param source: source of the cache (should be plugin's import_name)
+        @param uid: an identifier of the file which must be unique
+        @param mime_type: MIME type of the file to cache
+            it will be used notably to guess file extension
+            It may be autogenerated if filename is specified
+        @param max_age: maximum age in seconds
+            the cache metadata will have an "eol" (end of life)
+            None to use default value
+            0 to ignore cache (file will be re-downloaded on each access)
+        @param original_filename: if not None, will be used to retrieve file extension and
+            guess
+            mime type, and stored in "original_filename"
+        @return: file object opened in write mode
+            you have to close it yourself (hint: use ``with`` statement)
+        """
+        if max_age is None:
+            max_age = C.DEFAULT_MAX_AGE
+        cache_data = {
+            "source": source,
+            # we also store max_age for updating eol
+            "max_age": max_age,
+        }
+        cache_url = self.getPath(uid)
+        if original_filename is not None:
+            cache_data["original_filename"] = original_filename
+            if mime_type is None:
+                # we have original_filename but not MIME type, we try to guess the later
+                mime_type = mimetypes.guess_type(original_filename, strict=False)[0]
+        if mime_type:
+            ext = mimetypes.guess_extension(mime_type, strict=False)
+            if ext is None:
+                log.warning(
+                    "can't find extension for MIME type {}".format(mime_type)
+                )
+                ext = DEFAULT_EXT
+            elif ext == ".jpe":
+                ext = ".jpg"
+        else:
+            ext = DEFAULT_EXT
+            mime_type = None
+        filename = uid + ext
+        now = int(time.time())
+        cache_data.update({
+            "filename": filename,
+            "creation": now,
+            "eol": now + max_age,
+            "mime_type": mime_type,
+        })
+        file_path = self.getPath(filename)
+
+        with open(cache_url, "wb") as f:
+            pickle.dump(cache_data, f, protocol=2)
+
+        return file_path.open("wb")