Mercurial > libervia-backend
comparison libervia/backend/memory/cache.py @ 4071:4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
author | Goffi <goffi@goffi.org> |
---|---|
date | Fri, 02 Jun 2023 11:49:51 +0200 |
parents | sat/memory/cache.py@524856bd7b19 |
children | 5f2d496c633f |
comparison
equal
deleted
inserted
replaced
4070:d10748475025 | 4071:4b842c1fb686 |
---|---|
1 #!/usr/bin/env python3 | |
2 | |
3 | |
4 # SAT: a jabber client | |
5 # Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org) | |
6 | |
7 # This program is free software: you can redistribute it and/or modify | |
8 # it under the terms of the GNU Affero General Public License as published by | |
9 # the Free Software Foundation, either version 3 of the License, or | |
10 # (at your option) any later version. | |
11 | |
12 # This program is distributed in the hope that it will be useful, | |
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 # GNU Affero General Public License for more details. | |
16 | |
17 # You should have received a copy of the GNU Affero General Public License | |
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
19 | |
20 from io import BufferedIOBase | |
21 import mimetypes | |
22 from pathlib import Path | |
23 import pickle as pickle | |
24 import time | |
25 from typing import Any, Dict, Optional | |
26 | |
27 from libervia.backend.core import exceptions | |
28 from libervia.backend.core.constants import Const as C | |
29 from libervia.backend.core.i18n import _ | |
30 from libervia.backend.core.log import getLogger | |
31 from libervia.backend.tools.common import regex | |
32 | |
33 | |
34 log = getLogger(__name__) | |
35 | |
36 DEFAULT_EXT = ".raw" | |
37 | |
38 | |
39 class Cache(object): | |
40 """generic file caching""" | |
41 | |
42 def __init__(self, host, profile): | |
43 """ | |
44 @param profile(unicode, None): name of the profile to set the cache for | |
45 if None, the cache will be common for all profiles | |
46 """ | |
47 self.profile = profile | |
48 path_elts = [host.memory.config_get("", "local_dir"), C.CACHE_DIR] | |
49 if profile: | |
50 path_elts.extend(["profiles", regex.path_escape(profile)]) | |
51 else: | |
52 path_elts.append("common") | |
53 self.cache_dir = Path(*path_elts) | |
54 | |
55 self.cache_dir.mkdir(0o700, parents=True, exist_ok=True) | |
56 self.purge() | |
57 | |
58 def purge(self): | |
59 # remove expired files from cache | |
60 # TODO: this should not be called only on startup, but at regular interval | |
61 # (e.g. once a day) | |
62 purged = set() | |
63 # we sort files to have metadata files first | |
64 for cache_file in sorted(self.cache_dir.iterdir()): | |
65 if cache_file in purged: | |
66 continue | |
67 try: | |
68 with cache_file.open('rb') as f: | |
69 cache_data = pickle.load(f) | |
70 except IOError: | |
71 log.warning( | |
72 _("Can't read metadata file at {path}") | |
73 .format(path=cache_file)) | |
74 continue | |
75 except (pickle.UnpicklingError, EOFError): | |
76 log.debug(f"File at {cache_file} is not a metadata file") | |
77 continue | |
78 try: | |
79 eol = cache_data['eol'] | |
80 filename = cache_data['filename'] | |
81 except KeyError: | |
82 log.warning( | |
83 _("Invalid cache metadata at {path}") | |
84 .format(path=cache_file)) | |
85 continue | |
86 | |
87 filepath = self.getPath(filename) | |
88 | |
89 if not filepath.exists(): | |
90 log.warning(_( | |
91 "cache {cache_file!r} references an inexisting file: {filepath!r}" | |
92 ).format(cache_file=str(cache_file), filepath=str(filepath))) | |
93 log.debug("purging cache with missing file") | |
94 cache_file.unlink() | |
95 elif eol < time.time(): | |
96 log.debug( | |
97 "purging expired cache {filepath!r} (expired for {time}s)" | |
98 .format(filepath=str(filepath), time=int(time.time() - eol)) | |
99 ) | |
100 cache_file.unlink() | |
101 try: | |
102 filepath.unlink() | |
103 except FileNotFoundError: | |
104 log.warning( | |
105 _("following file is missing while purging cache: {path}") | |
106 .format(path=filepath) | |
107 ) | |
108 purged.add(cache_file) | |
109 purged.add(filepath) | |
110 | |
111 def getPath(self, filename: str) -> Path: | |
112 """return cached file URL | |
113 | |
114 @param filename: cached file name (cache data or actual file) | |
115 @return: path to the cached file | |
116 """ | |
117 if not filename or "/" in filename: | |
118 log.error( | |
119 "invalid char found in file name, hack attempt? name:{}".format(filename) | |
120 ) | |
121 raise exceptions.DataError("Invalid char found") | |
122 return self.cache_dir / filename | |
123 | |
124 def get_metadata(self, uid: str, update_eol: bool = True) -> Optional[Dict[str, Any]]: | |
125 """Retrieve metadata for cached data | |
126 | |
127 @param uid(unicode): unique identifier of file | |
128 @param update_eol(bool): True if eol must extended | |
129 if True, max_age will be added to eol (only if it is not already expired) | |
130 @return (dict, None): metadata with following keys: | |
131 see [cache_data] for data details, an additional "path" key is the full path to | |
132 cached file. | |
133 None if file is not in cache (or cache is invalid) | |
134 """ | |
135 | |
136 uid = uid.strip() | |
137 if not uid: | |
138 raise exceptions.InternalError("uid must not be empty") | |
139 cache_url = self.getPath(uid) | |
140 if not cache_url.exists(): | |
141 return None | |
142 | |
143 try: | |
144 with cache_url.open("rb") as f: | |
145 cache_data = pickle.load(f) | |
146 except (IOError, EOFError) as e: | |
147 log.warning(f"can't read cache at {cache_url}: {e}") | |
148 return None | |
149 except pickle.UnpicklingError: | |
150 log.warning(f"invalid cache found at {cache_url}") | |
151 return None | |
152 | |
153 try: | |
154 eol = cache_data["eol"] | |
155 except KeyError: | |
156 log.warning("no End Of Life found for cached file {}".format(uid)) | |
157 eol = 0 | |
158 if eol < time.time(): | |
159 log.debug( | |
160 "removing expired cache (expired for {}s)".format(time.time() - eol) | |
161 ) | |
162 return None | |
163 | |
164 if update_eol: | |
165 try: | |
166 max_age = cache_data["max_age"] | |
167 except KeyError: | |
168 log.warning(f"no max_age found for cache at {cache_url}, using default") | |
169 max_age = cache_data["max_age"] = C.DEFAULT_MAX_AGE | |
170 now = int(time.time()) | |
171 cache_data["last_access"] = now | |
172 cache_data["eol"] = now + max_age | |
173 with cache_url.open("wb") as f: | |
174 pickle.dump(cache_data, f, protocol=2) | |
175 | |
176 cache_data["path"] = self.getPath(cache_data["filename"]) | |
177 return cache_data | |
178 | |
179 def get_file_path(self, uid: str) -> Path: | |
180 """Retrieve absolute path to file | |
181 | |
182 @param uid(unicode): unique identifier of file | |
183 @return (unicode, None): absolute path to cached file | |
184 None if file is not in cache (or cache is invalid) | |
185 """ | |
186 metadata = self.get_metadata(uid) | |
187 if metadata is not None: | |
188 return metadata["path"] | |
189 | |
190 def remove_from_cache(self, uid, metadata=None): | |
191 """Remove data from cache | |
192 | |
193 @param uid(unicode): unique identifier cache file | |
194 """ | |
195 cache_data = self.get_metadata(uid, update_eol=False) | |
196 if cache_data is None: | |
197 log.debug(f"cache with uid {uid!r} has already expired or been removed") | |
198 return | |
199 | |
200 try: | |
201 filename = cache_data['filename'] | |
202 except KeyError: | |
203 log.warning(_("missing filename for cache {uid!r}") .format(uid=uid)) | |
204 else: | |
205 filepath = self.getPath(filename) | |
206 try: | |
207 filepath.unlink() | |
208 except FileNotFoundError: | |
209 log.warning( | |
210 _("missing file referenced in cache {uid!r}: {filename}") | |
211 .format(uid=uid, filename=filename) | |
212 ) | |
213 | |
214 cache_file = self.getPath(uid) | |
215 cache_file.unlink() | |
216 log.debug(f"cache with uid {uid!r} has been removed") | |
217 | |
218 def cache_data( | |
219 self, | |
220 source: str, | |
221 uid: str, | |
222 mime_type: Optional[str] = None, | |
223 max_age: Optional[int] = None, | |
224 original_filename: Optional[str] = None | |
225 ) -> BufferedIOBase: | |
226 """create cache metadata and file object to use for actual data | |
227 | |
228 @param source: source of the cache (should be plugin's import_name) | |
229 @param uid: an identifier of the file which must be unique | |
230 @param mime_type: MIME type of the file to cache | |
231 it will be used notably to guess file extension | |
232 It may be autogenerated if filename is specified | |
233 @param max_age: maximum age in seconds | |
234 the cache metadata will have an "eol" (end of life) | |
235 None to use default value | |
236 0 to ignore cache (file will be re-downloaded on each access) | |
237 @param original_filename: if not None, will be used to retrieve file extension and | |
238 guess | |
239 mime type, and stored in "original_filename" | |
240 @return: file object opened in write mode | |
241 you have to close it yourself (hint: use ``with`` statement) | |
242 """ | |
243 if max_age is None: | |
244 max_age = C.DEFAULT_MAX_AGE | |
245 cache_data = { | |
246 "source": source, | |
247 # we also store max_age for updating eol | |
248 "max_age": max_age, | |
249 } | |
250 cache_url = self.getPath(uid) | |
251 if original_filename is not None: | |
252 cache_data["original_filename"] = original_filename | |
253 if mime_type is None: | |
254 # we have original_filename but not MIME type, we try to guess the later | |
255 mime_type = mimetypes.guess_type(original_filename, strict=False)[0] | |
256 if mime_type: | |
257 ext = mimetypes.guess_extension(mime_type, strict=False) | |
258 if ext is None: | |
259 log.warning( | |
260 "can't find extension for MIME type {}".format(mime_type) | |
261 ) | |
262 ext = DEFAULT_EXT | |
263 elif ext == ".jpe": | |
264 ext = ".jpg" | |
265 else: | |
266 ext = DEFAULT_EXT | |
267 mime_type = None | |
268 filename = uid + ext | |
269 now = int(time.time()) | |
270 cache_data.update({ | |
271 "filename": filename, | |
272 "creation": now, | |
273 "eol": now + max_age, | |
274 "mime_type": mime_type, | |
275 }) | |
276 file_path = self.getPath(filename) | |
277 | |
278 with open(cache_url, "wb") as f: | |
279 pickle.dump(cache_data, f, protocol=2) | |
280 | |
281 return file_path.open("wb") |