Mercurial > libervia-backend
view sat/memory/cache.py @ 3185:554b3b632378
memory (cache): purge + pathlib:
- cache is now purged on backend startup (every file which has passed EOL will be deleted)
- use of pathlib, getPath now returns a Path
author | Goffi <goffi@goffi.org> |
---|---|
date | Wed, 26 Feb 2020 15:54:43 +0100 |
parents | 559a625a236b |
children | a15773c6c273 |
line wrap: on
line source
#!/usr/bin/env python3 # SAT: a jabber client # Copyright (C) 2009-2020 Jérôme Poisson (goffi@goffi.org) # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import pickle as pickle import mimetypes import time from pathlib import Path from sat.core.i18n import _ from sat.core.log import getLogger from sat.core.constants import Const as C from sat.core import exceptions from sat.tools.common import regex log = getLogger(__name__) DEFAULT_EXT = ".raw" class Cache(object): """generic file caching""" def __init__(self, host, profile): """ @param profile(unicode, None): ame of the profile to set the cache for if None, the cache will be common for all profiles """ self.profile = profile path_elts = [host.memory.getConfig("", "local_dir"), C.CACHE_DIR] if profile: path_elts.extend(["profiles", regex.pathEscape(profile)]) else: path_elts.append("common") self.cache_dir = Path(*path_elts) self.cache_dir.mkdir(0o700, parents=True, exist_ok=True) self.purge() def purge(self): # remove expired files from cache # TODO: this should not be called only on startup, but at regular interval # (e.g. once a day) purged = set() # we sort files to have metadata files first for cache_file in sorted(self.cache_dir.iterdir()): if cache_file in purged: continue try: with cache_file.open('rb') as f: cache_data = pickle.load(f) except IOError: log.warning( _("Can't read metadata file at {path}") .format(path=cache_file)) continue except (pickle.UnpicklingError, EOFError): log.debug(f"File at {cache_file} is not a metadata file") continue try: eol = cache_data['eol'] filename = cache_data['filename'] except KeyError: log.warning( _("Invalid cache metadata at {path}") .format(path=cache_file)) continue if eol < time.time(): filepath = self.getPath(filename) log.debug( "purging expired cache {filepath!r} (expired for {time}s)" .format(filepath=str(filepath), time=int(time.time() - eol)) ) cache_file.unlink() try: filepath.unlink() except FileNotFoundError: log.warning( _("following file is missing while purging cache: {path}") .format(path=filepath) ) purged.add(cache_file) purged.add(filepath) def getPath(self, filename): """return cached file URL @param filename(str): cached file name (cache data or actual file) @return (Path): path to the cached file """ if not filename or "/" in filename: log.error( "invalid char found in file name, hack attempt? name:{}".format(filename) ) raise exceptions.DataError("Invalid char found") return self.cache_dir / filename def getMetadata(self, uid): """Retrieve metadata for cached data @param uid(unicode): unique identifier of file @return (dict, None): metadata with following keys: see [cacheData] for data details, an additional "path" key is the full path to cached file. None if file is not in cache (or cache is invalid) """ uid = uid.strip() if not uid: raise exceptions.InternalError("uid must not be empty") cache_url = self.getPath(uid) if not cache_url.exists: return None try: with cache_url.open("rb") as f: cache_data = pickle.load(f) except IOError: log.warning("can't read cache at {}".format(cache_url)) return None except pickle.UnpicklingError: log.warning("invalid cache found at {}".format(cache_url)) return None try: eol = cache_data["eol"] except KeyError: log.warning("no End Of Life found for cached file {}".format(uid)) eol = 0 if eol < time.time(): log.debug( "removing expired cache (expired for {}s)".format(time.time() - eol) ) return None cache_data["path"] = self.getPath(cache_data["filename"]) return cache_data def getFilePath(self, uid): """Retrieve absolute path to file @param uid(unicode): unique identifier of file @return (unicode, None): absolute path to cached file None if file is not in cache (or cache is invalid) """ metadata = self.getMetadata(uid) if metadata is not None: return metadata["path"] def cacheData(self, source, uid, mime_type=None, max_age=None, filename=None): """create cache metadata and file object to use for actual data @param source(unicode): source of the cache (should be plugin's import_name) @param uid(unicode): an identifier of the file which must be unique @param mime_type(unicode): MIME type of the file to cache it will be used notably to guess file extension @param max_age(int, None): maximum age in seconds the cache metadata will have an "eol" (end of life) None to use default value 0 to ignore cache (file will be re-downloaded on each access) @param filename: if not None, will be used as filename else one will be generated from uid and guessed extension @return(file): file object opened in write mode you have to close it yourself (hint: use with statement) """ cache_url = self.getPath(uid) if filename is None: if mime_type: ext = mimetypes.guess_extension(mime_type, strict=False) if ext is None: log.warning( "can't find extension for MIME type {}".format(mime_type) ) ext = DEFAULT_EXT elif ext == ".jpe": ext = ".jpg" else: ext = DEFAULT_EXT mime_type = None filename = uid + ext if max_age is None: max_age = C.DEFAULT_MAX_AGE cache_data = { "source": source, "filename": filename, "eol": int(time.time()) + max_age, "mime_type": mime_type, } file_path = self.getPath(filename) with open(cache_url, "wb") as f: pickle.dump(cache_data, f, protocol=2) return file_path.open("wb")