# HG changeset patch # User Goffi # Date 1582728883 -3600 # Node ID 554b3b63237872285944ba35ca3ab33b397da341 # Parent e8ce30798d15ba91f4a69ea60800c4b78e1ebd84 memory (cache): purge + pathlib: - cache is now purged on backend startup (every file which has passed EOL will be deleted) - use of pathlib, getPath now returns a Path diff -r e8ce30798d15 -r 554b3b632378 sat/memory/cache.py --- a/sat/memory/cache.py Wed Feb 26 15:54:34 2020 +0100 +++ b/sat/memory/cache.py Wed Feb 26 15:54:43 2020 +0100 @@ -17,16 +17,18 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import pickle as pickle +import mimetypes +import time +from pathlib import Path +from sat.core.i18n import _ from sat.core.log import getLogger +from sat.core.constants import Const as C +from sat.core import exceptions +from sat.tools.common import regex + log = getLogger(__name__) -from sat.tools.common import regex -from sat.core import exceptions -from sat.core.constants import Const as C -import pickle as pickle -import mimetypes -import os.path -import time DEFAULT_EXT = ".raw" @@ -45,25 +47,72 @@ path_elts.extend(["profiles", regex.pathEscape(profile)]) else: path_elts.append("common") - self.cache_dir = os.path.join(*path_elts) + self.cache_dir = Path(*path_elts) + + self.cache_dir.mkdir(0o700, parents=True, exist_ok=True) + self.purge() - if not os.path.exists(self.cache_dir): - os.makedirs(self.cache_dir) + def purge(self): + # remove expired files from cache + # TODO: this should not be called only on startup, but at regular interval + # (e.g. once a day) + purged = set() + # we sort files to have metadata files first + for cache_file in sorted(self.cache_dir.iterdir()): + if cache_file in purged: + continue + try: + with cache_file.open('rb') as f: + cache_data = pickle.load(f) + except IOError: + log.warning( + _("Can't read metadata file at {path}") + .format(path=cache_file)) + continue + except (pickle.UnpicklingError, EOFError): + log.debug(f"File at {cache_file} is not a metadata file") + continue + try: + eol = cache_data['eol'] + filename = cache_data['filename'] + except KeyError: + log.warning( + _("Invalid cache metadata at {path}") + .format(path=cache_file)) + continue + + if eol < time.time(): + filepath = self.getPath(filename) + log.debug( + "purging expired cache {filepath!r} (expired for {time}s)" + .format(filepath=str(filepath), time=int(time.time() - eol)) + ) + cache_file.unlink() + try: + filepath.unlink() + except FileNotFoundError: + log.warning( + _("following file is missing while purging cache: {path}") + .format(path=filepath) + ) + purged.add(cache_file) + purged.add(filepath) def getPath(self, filename): """return cached file URL - @param filename(unicode): cached file name (cache data or actual file) + @param filename(str): cached file name (cache data or actual file) + @return (Path): path to the cached file """ if not filename or "/" in filename: log.error( "invalid char found in file name, hack attempt? name:{}".format(filename) ) raise exceptions.DataError("Invalid char found") - return os.path.join(self.cache_dir, filename) + return self.cache_dir / filename def getMetadata(self, uid): - """retrieve metadata for cached data + """Retrieve metadata for cached data @param uid(unicode): unique identifier of file @return (dict, None): metadata with following keys: @@ -75,11 +124,11 @@ if not uid: raise exceptions.InternalError("uid must not be empty") cache_url = self.getPath(uid) - if not os.path.exists(cache_url): + if not cache_url.exists: return None try: - with open(cache_url, "rb") as f: + with cache_url.open("rb") as f: cache_data = pickle.load(f) except IOError: log.warning("can't read cache at {}".format(cache_url)) @@ -103,7 +152,7 @@ return cache_data def getFilePath(self, uid): - """retrieve absolute path to file + """Retrieve absolute path to file @param uid(unicode): unique identifier of file @return (unicode, None): absolute path to cached file @@ -157,4 +206,4 @@ with open(cache_url, "wb") as f: pickle.dump(cache_data, f, protocol=2) - return open(file_path, "wb") + return file_path.open("wb") diff -r e8ce30798d15 -r 554b3b632378 sat/plugins/plugin_xep_0054.py --- a/sat/plugins/plugin_xep_0054.py Wed Feb 26 15:54:34 2020 +0100 +++ b/sat/plugins/plugin_xep_0054.py Wed Feb 26 15:54:43 2020 +0100 @@ -18,25 +18,23 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -from sat.core.i18n import _ -from sat.core.constants import Const as C -from sat.core.log import getLogger - -log = getLogger(__name__) +import mimetypes +from base64 import b64decode, b64encode +from hashlib import sha1 +from pathlib import Path +from zope.interface import implementer from twisted.internet import threads, defer from twisted.words.protocols.jabber import jid, error from twisted.words.xish import domish from twisted.python.failure import Failure - -from zope.interface import implementer - from wokkel import disco, iwokkel +from sat.core import exceptions +from sat.core.i18n import _ +from sat.core.constants import Const as C +from sat.core.log import getLogger +from sat.memory import persistent -from base64 import b64decode, b64encode -from hashlib import sha1 -from sat.core import exceptions -from sat.memory import persistent -import mimetypes +log = getLogger(__name__) try: from PIL import Image @@ -406,8 +404,9 @@ def _getAvatar(self, entity, cache_only, hash_only, profile): client = self.host.getClient(profile) d = self.getAvatar(client, jid.JID(entity), cache_only, hash_only) + # we need to convert the Path to string + d.addCallback(str) d.addErrback(lambda __: "") - return d def getAvatar(self, client, entity, cache_only=True, hash_only=False): @@ -436,7 +435,7 @@ raise KeyError else: # avatar has already been checked but it is not set - full_path = "" + full_path = Path("") except KeyError: # avatar is not in cache if cache_only: