comparison libervia/backend/memory/cache.py @ 4071:4b842c1fb686

refactoring: renamed `sat` package to `libervia.backend`
author Goffi <goffi@goffi.org>
date Fri, 02 Jun 2023 11:49:51 +0200
parents sat/memory/cache.py@524856bd7b19
children 5f2d496c633f
comparison
equal deleted inserted replaced
4070:d10748475025 4071:4b842c1fb686
1 #!/usr/bin/env python3
2
3
4 # SAT: a jabber client
5 # Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org)
6
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU Affero General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Affero General Public License for more details.
16
17 # You should have received a copy of the GNU Affero General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
20 from io import BufferedIOBase
21 import mimetypes
22 from pathlib import Path
23 import pickle as pickle
24 import time
25 from typing import Any, Dict, Optional
26
27 from libervia.backend.core import exceptions
28 from libervia.backend.core.constants import Const as C
29 from libervia.backend.core.i18n import _
30 from libervia.backend.core.log import getLogger
31 from libervia.backend.tools.common import regex
32
33
34 log = getLogger(__name__)
35
36 DEFAULT_EXT = ".raw"
37
38
39 class Cache(object):
40 """generic file caching"""
41
42 def __init__(self, host, profile):
43 """
44 @param profile(unicode, None): name of the profile to set the cache for
45 if None, the cache will be common for all profiles
46 """
47 self.profile = profile
48 path_elts = [host.memory.config_get("", "local_dir"), C.CACHE_DIR]
49 if profile:
50 path_elts.extend(["profiles", regex.path_escape(profile)])
51 else:
52 path_elts.append("common")
53 self.cache_dir = Path(*path_elts)
54
55 self.cache_dir.mkdir(0o700, parents=True, exist_ok=True)
56 self.purge()
57
58 def purge(self):
59 # remove expired files from cache
60 # TODO: this should not be called only on startup, but at regular interval
61 # (e.g. once a day)
62 purged = set()
63 # we sort files to have metadata files first
64 for cache_file in sorted(self.cache_dir.iterdir()):
65 if cache_file in purged:
66 continue
67 try:
68 with cache_file.open('rb') as f:
69 cache_data = pickle.load(f)
70 except IOError:
71 log.warning(
72 _("Can't read metadata file at {path}")
73 .format(path=cache_file))
74 continue
75 except (pickle.UnpicklingError, EOFError):
76 log.debug(f"File at {cache_file} is not a metadata file")
77 continue
78 try:
79 eol = cache_data['eol']
80 filename = cache_data['filename']
81 except KeyError:
82 log.warning(
83 _("Invalid cache metadata at {path}")
84 .format(path=cache_file))
85 continue
86
87 filepath = self.getPath(filename)
88
89 if not filepath.exists():
90 log.warning(_(
91 "cache {cache_file!r} references an inexisting file: {filepath!r}"
92 ).format(cache_file=str(cache_file), filepath=str(filepath)))
93 log.debug("purging cache with missing file")
94 cache_file.unlink()
95 elif eol < time.time():
96 log.debug(
97 "purging expired cache {filepath!r} (expired for {time}s)"
98 .format(filepath=str(filepath), time=int(time.time() - eol))
99 )
100 cache_file.unlink()
101 try:
102 filepath.unlink()
103 except FileNotFoundError:
104 log.warning(
105 _("following file is missing while purging cache: {path}")
106 .format(path=filepath)
107 )
108 purged.add(cache_file)
109 purged.add(filepath)
110
111 def getPath(self, filename: str) -> Path:
112 """return cached file URL
113
114 @param filename: cached file name (cache data or actual file)
115 @return: path to the cached file
116 """
117 if not filename or "/" in filename:
118 log.error(
119 "invalid char found in file name, hack attempt? name:{}".format(filename)
120 )
121 raise exceptions.DataError("Invalid char found")
122 return self.cache_dir / filename
123
124 def get_metadata(self, uid: str, update_eol: bool = True) -> Optional[Dict[str, Any]]:
125 """Retrieve metadata for cached data
126
127 @param uid(unicode): unique identifier of file
128 @param update_eol(bool): True if eol must extended
129 if True, max_age will be added to eol (only if it is not already expired)
130 @return (dict, None): metadata with following keys:
131 see [cache_data] for data details, an additional "path" key is the full path to
132 cached file.
133 None if file is not in cache (or cache is invalid)
134 """
135
136 uid = uid.strip()
137 if not uid:
138 raise exceptions.InternalError("uid must not be empty")
139 cache_url = self.getPath(uid)
140 if not cache_url.exists():
141 return None
142
143 try:
144 with cache_url.open("rb") as f:
145 cache_data = pickle.load(f)
146 except (IOError, EOFError) as e:
147 log.warning(f"can't read cache at {cache_url}: {e}")
148 return None
149 except pickle.UnpicklingError:
150 log.warning(f"invalid cache found at {cache_url}")
151 return None
152
153 try:
154 eol = cache_data["eol"]
155 except KeyError:
156 log.warning("no End Of Life found for cached file {}".format(uid))
157 eol = 0
158 if eol < time.time():
159 log.debug(
160 "removing expired cache (expired for {}s)".format(time.time() - eol)
161 )
162 return None
163
164 if update_eol:
165 try:
166 max_age = cache_data["max_age"]
167 except KeyError:
168 log.warning(f"no max_age found for cache at {cache_url}, using default")
169 max_age = cache_data["max_age"] = C.DEFAULT_MAX_AGE
170 now = int(time.time())
171 cache_data["last_access"] = now
172 cache_data["eol"] = now + max_age
173 with cache_url.open("wb") as f:
174 pickle.dump(cache_data, f, protocol=2)
175
176 cache_data["path"] = self.getPath(cache_data["filename"])
177 return cache_data
178
179 def get_file_path(self, uid: str) -> Path:
180 """Retrieve absolute path to file
181
182 @param uid(unicode): unique identifier of file
183 @return (unicode, None): absolute path to cached file
184 None if file is not in cache (or cache is invalid)
185 """
186 metadata = self.get_metadata(uid)
187 if metadata is not None:
188 return metadata["path"]
189
190 def remove_from_cache(self, uid, metadata=None):
191 """Remove data from cache
192
193 @param uid(unicode): unique identifier cache file
194 """
195 cache_data = self.get_metadata(uid, update_eol=False)
196 if cache_data is None:
197 log.debug(f"cache with uid {uid!r} has already expired or been removed")
198 return
199
200 try:
201 filename = cache_data['filename']
202 except KeyError:
203 log.warning(_("missing filename for cache {uid!r}") .format(uid=uid))
204 else:
205 filepath = self.getPath(filename)
206 try:
207 filepath.unlink()
208 except FileNotFoundError:
209 log.warning(
210 _("missing file referenced in cache {uid!r}: {filename}")
211 .format(uid=uid, filename=filename)
212 )
213
214 cache_file = self.getPath(uid)
215 cache_file.unlink()
216 log.debug(f"cache with uid {uid!r} has been removed")
217
218 def cache_data(
219 self,
220 source: str,
221 uid: str,
222 mime_type: Optional[str] = None,
223 max_age: Optional[int] = None,
224 original_filename: Optional[str] = None
225 ) -> BufferedIOBase:
226 """create cache metadata and file object to use for actual data
227
228 @param source: source of the cache (should be plugin's import_name)
229 @param uid: an identifier of the file which must be unique
230 @param mime_type: MIME type of the file to cache
231 it will be used notably to guess file extension
232 It may be autogenerated if filename is specified
233 @param max_age: maximum age in seconds
234 the cache metadata will have an "eol" (end of life)
235 None to use default value
236 0 to ignore cache (file will be re-downloaded on each access)
237 @param original_filename: if not None, will be used to retrieve file extension and
238 guess
239 mime type, and stored in "original_filename"
240 @return: file object opened in write mode
241 you have to close it yourself (hint: use ``with`` statement)
242 """
243 if max_age is None:
244 max_age = C.DEFAULT_MAX_AGE
245 cache_data = {
246 "source": source,
247 # we also store max_age for updating eol
248 "max_age": max_age,
249 }
250 cache_url = self.getPath(uid)
251 if original_filename is not None:
252 cache_data["original_filename"] = original_filename
253 if mime_type is None:
254 # we have original_filename but not MIME type, we try to guess the later
255 mime_type = mimetypes.guess_type(original_filename, strict=False)[0]
256 if mime_type:
257 ext = mimetypes.guess_extension(mime_type, strict=False)
258 if ext is None:
259 log.warning(
260 "can't find extension for MIME type {}".format(mime_type)
261 )
262 ext = DEFAULT_EXT
263 elif ext == ".jpe":
264 ext = ".jpg"
265 else:
266 ext = DEFAULT_EXT
267 mime_type = None
268 filename = uid + ext
269 now = int(time.time())
270 cache_data.update({
271 "filename": filename,
272 "creation": now,
273 "eol": now + max_age,
274 "mime_type": mime_type,
275 })
276 file_path = self.getPath(filename)
277
278 with open(cache_url, "wb") as f:
279 pickle.dump(cache_data, f, protocol=2)
280
281 return file_path.open("wb")