comparison sat/memory/cache.py @ 3185:554b3b632378

memory (cache): purge + pathlib: - cache is now purged on backend startup (every file which has passed EOL will be deleted) - use of pathlib, getPath now returns a Path
author Goffi <goffi@goffi.org>
date Wed, 26 Feb 2020 15:54:43 +0100
parents 559a625a236b
children a15773c6c273
comparison
equal deleted inserted replaced
3184:e8ce30798d15 3185:554b3b632378
15 # GNU Affero General Public License for more details. 15 # GNU Affero General Public License for more details.
16 16
17 # You should have received a copy of the GNU Affero General Public License 17 # You should have received a copy of the GNU Affero General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. 18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 19
20 from sat.core.log import getLogger
21
22 log = getLogger(__name__)
23 from sat.tools.common import regex
24 from sat.core import exceptions
25 from sat.core.constants import Const as C
26 import pickle as pickle 20 import pickle as pickle
27 import mimetypes 21 import mimetypes
28 import os.path
29 import time 22 import time
23 from pathlib import Path
24 from sat.core.i18n import _
25 from sat.core.log import getLogger
26 from sat.core.constants import Const as C
27 from sat.core import exceptions
28 from sat.tools.common import regex
29
30
31 log = getLogger(__name__)
30 32
31 DEFAULT_EXT = ".raw" 33 DEFAULT_EXT = ".raw"
32 34
33 35
34 class Cache(object): 36 class Cache(object):
43 path_elts = [host.memory.getConfig("", "local_dir"), C.CACHE_DIR] 45 path_elts = [host.memory.getConfig("", "local_dir"), C.CACHE_DIR]
44 if profile: 46 if profile:
45 path_elts.extend(["profiles", regex.pathEscape(profile)]) 47 path_elts.extend(["profiles", regex.pathEscape(profile)])
46 else: 48 else:
47 path_elts.append("common") 49 path_elts.append("common")
48 self.cache_dir = os.path.join(*path_elts) 50 self.cache_dir = Path(*path_elts)
49 51
50 if not os.path.exists(self.cache_dir): 52 self.cache_dir.mkdir(0o700, parents=True, exist_ok=True)
51 os.makedirs(self.cache_dir) 53 self.purge()
54
55 def purge(self):
56 # remove expired files from cache
57 # TODO: this should not be called only on startup, but at regular interval
58 # (e.g. once a day)
59 purged = set()
60 # we sort files to have metadata files first
61 for cache_file in sorted(self.cache_dir.iterdir()):
62 if cache_file in purged:
63 continue
64 try:
65 with cache_file.open('rb') as f:
66 cache_data = pickle.load(f)
67 except IOError:
68 log.warning(
69 _("Can't read metadata file at {path}")
70 .format(path=cache_file))
71 continue
72 except (pickle.UnpicklingError, EOFError):
73 log.debug(f"File at {cache_file} is not a metadata file")
74 continue
75 try:
76 eol = cache_data['eol']
77 filename = cache_data['filename']
78 except KeyError:
79 log.warning(
80 _("Invalid cache metadata at {path}")
81 .format(path=cache_file))
82 continue
83
84 if eol < time.time():
85 filepath = self.getPath(filename)
86 log.debug(
87 "purging expired cache {filepath!r} (expired for {time}s)"
88 .format(filepath=str(filepath), time=int(time.time() - eol))
89 )
90 cache_file.unlink()
91 try:
92 filepath.unlink()
93 except FileNotFoundError:
94 log.warning(
95 _("following file is missing while purging cache: {path}")
96 .format(path=filepath)
97 )
98 purged.add(cache_file)
99 purged.add(filepath)
52 100
53 def getPath(self, filename): 101 def getPath(self, filename):
54 """return cached file URL 102 """return cached file URL
55 103
56 @param filename(unicode): cached file name (cache data or actual file) 104 @param filename(str): cached file name (cache data or actual file)
105 @return (Path): path to the cached file
57 """ 106 """
58 if not filename or "/" in filename: 107 if not filename or "/" in filename:
59 log.error( 108 log.error(
60 "invalid char found in file name, hack attempt? name:{}".format(filename) 109 "invalid char found in file name, hack attempt? name:{}".format(filename)
61 ) 110 )
62 raise exceptions.DataError("Invalid char found") 111 raise exceptions.DataError("Invalid char found")
63 return os.path.join(self.cache_dir, filename) 112 return self.cache_dir / filename
64 113
65 def getMetadata(self, uid): 114 def getMetadata(self, uid):
66 """retrieve metadata for cached data 115 """Retrieve metadata for cached data
67 116
68 @param uid(unicode): unique identifier of file 117 @param uid(unicode): unique identifier of file
69 @return (dict, None): metadata with following keys: 118 @return (dict, None): metadata with following keys:
70 see [cacheData] for data details, an additional "path" key is the full path to cached file. 119 see [cacheData] for data details, an additional "path" key is the full path to cached file.
71 None if file is not in cache (or cache is invalid) 120 None if file is not in cache (or cache is invalid)
73 122
74 uid = uid.strip() 123 uid = uid.strip()
75 if not uid: 124 if not uid:
76 raise exceptions.InternalError("uid must not be empty") 125 raise exceptions.InternalError("uid must not be empty")
77 cache_url = self.getPath(uid) 126 cache_url = self.getPath(uid)
78 if not os.path.exists(cache_url): 127 if not cache_url.exists:
79 return None 128 return None
80 129
81 try: 130 try:
82 with open(cache_url, "rb") as f: 131 with cache_url.open("rb") as f:
83 cache_data = pickle.load(f) 132 cache_data = pickle.load(f)
84 except IOError: 133 except IOError:
85 log.warning("can't read cache at {}".format(cache_url)) 134 log.warning("can't read cache at {}".format(cache_url))
86 return None 135 return None
87 except pickle.UnpicklingError: 136 except pickle.UnpicklingError:
101 150
102 cache_data["path"] = self.getPath(cache_data["filename"]) 151 cache_data["path"] = self.getPath(cache_data["filename"])
103 return cache_data 152 return cache_data
104 153
105 def getFilePath(self, uid): 154 def getFilePath(self, uid):
106 """retrieve absolute path to file 155 """Retrieve absolute path to file
107 156
108 @param uid(unicode): unique identifier of file 157 @param uid(unicode): unique identifier of file
109 @return (unicode, None): absolute path to cached file 158 @return (unicode, None): absolute path to cached file
110 None if file is not in cache (or cache is invalid) 159 None if file is not in cache (or cache is invalid)
111 """ 160 """
155 file_path = self.getPath(filename) 204 file_path = self.getPath(filename)
156 205
157 with open(cache_url, "wb") as f: 206 with open(cache_url, "wb") as f:
158 pickle.dump(cache_data, f, protocol=2) 207 pickle.dump(cache_data, f, protocol=2)
159 208
160 return open(file_path, "wb") 209 return file_path.open("wb")