diff sat/memory/cache.py @ 3185:554b3b632378

memory (cache): purge + pathlib: - cache is now purged on backend startup (every file which has passed EOL will be deleted) - use of pathlib, getPath now returns a Path
author Goffi <goffi@goffi.org>
date Wed, 26 Feb 2020 15:54:43 +0100
parents 559a625a236b
children a15773c6c273
line wrap: on
line diff
--- a/sat/memory/cache.py	Wed Feb 26 15:54:34 2020 +0100
+++ b/sat/memory/cache.py	Wed Feb 26 15:54:43 2020 +0100
@@ -17,16 +17,18 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
+import pickle as pickle
+import mimetypes
+import time
+from pathlib import Path
+from sat.core.i18n import _
 from sat.core.log import getLogger
+from sat.core.constants import Const as C
+from sat.core import exceptions
+from sat.tools.common import regex
+
 
 log = getLogger(__name__)
-from sat.tools.common import regex
-from sat.core import exceptions
-from sat.core.constants import Const as C
-import pickle as pickle
-import mimetypes
-import os.path
-import time
 
 DEFAULT_EXT = ".raw"
 
@@ -45,25 +47,72 @@
             path_elts.extend(["profiles", regex.pathEscape(profile)])
         else:
             path_elts.append("common")
-        self.cache_dir = os.path.join(*path_elts)
+        self.cache_dir = Path(*path_elts)
+
+        self.cache_dir.mkdir(0o700, parents=True, exist_ok=True)
+        self.purge()
 
-        if not os.path.exists(self.cache_dir):
-            os.makedirs(self.cache_dir)
+    def purge(self):
+        # remove expired files from cache
+        # TODO: this should not be called only on startup, but at regular interval
+        #   (e.g. once a day)
+        purged = set()
+        # we sort files to have metadata files first
+        for cache_file in sorted(self.cache_dir.iterdir()):
+            if cache_file in purged:
+                continue
+            try:
+                with cache_file.open('rb') as f:
+                    cache_data = pickle.load(f)
+            except IOError:
+                log.warning(
+                    _("Can't read metadata file at {path}")
+                    .format(path=cache_file))
+                continue
+            except (pickle.UnpicklingError, EOFError):
+                log.debug(f"File at {cache_file} is not a metadata file")
+                continue
+            try:
+                eol = cache_data['eol']
+                filename = cache_data['filename']
+            except KeyError:
+                log.warning(
+                    _("Invalid cache metadata at {path}")
+                    .format(path=cache_file))
+                continue
+
+            if eol < time.time():
+                filepath = self.getPath(filename)
+                log.debug(
+                    "purging expired cache {filepath!r} (expired for {time}s)"
+                    .format(filepath=str(filepath), time=int(time.time() - eol))
+                )
+                cache_file.unlink()
+                try:
+                    filepath.unlink()
+                except FileNotFoundError:
+                    log.warning(
+                        _("following file is missing while purging cache: {path}")
+                        .format(path=filepath)
+                    )
+                purged.add(cache_file)
+                purged.add(filepath)
 
     def getPath(self, filename):
         """return cached file URL
 
-        @param filename(unicode): cached file name (cache data or actual file)
+        @param filename(str): cached file name (cache data or actual file)
+        @return (Path): path to the cached file
         """
         if not filename or "/" in filename:
             log.error(
                 "invalid char found in file name, hack attempt? name:{}".format(filename)
             )
             raise exceptions.DataError("Invalid char found")
-        return os.path.join(self.cache_dir, filename)
+        return self.cache_dir / filename
 
     def getMetadata(self, uid):
-        """retrieve metadata for cached data
+        """Retrieve metadata for cached data
 
         @param uid(unicode): unique identifier of file
         @return (dict, None): metadata with following keys:
@@ -75,11 +124,11 @@
         if not uid:
             raise exceptions.InternalError("uid must not be empty")
         cache_url = self.getPath(uid)
-        if not os.path.exists(cache_url):
+        if not cache_url.exists:
             return None
 
         try:
-            with open(cache_url, "rb") as f:
+            with cache_url.open("rb") as f:
                 cache_data = pickle.load(f)
         except IOError:
             log.warning("can't read cache at {}".format(cache_url))
@@ -103,7 +152,7 @@
         return cache_data
 
     def getFilePath(self, uid):
-        """retrieve absolute path to file
+        """Retrieve absolute path to file
 
         @param uid(unicode): unique identifier of file
         @return (unicode, None): absolute path to cached file
@@ -157,4 +206,4 @@
         with open(cache_url, "wb") as f:
             pickle.dump(cache_data, f, protocol=2)
 
-        return open(file_path, "wb")
+        return file_path.open("wb")