comparison sat/memory/cache.py @ 3818:2863345c9bbb

core (memory/cache): type hints + filename fix: cached file filename is now always UID + extension to avoid collision. `filename` argument as been renamed to `original_filename`, it is store if present, and used to guess media type when necessary.
author Goffi <goffi@goffi.org>
date Wed, 29 Jun 2022 11:54:53 +0200
parents b3fa179417e7
children 524856bd7b19
comparison
equal deleted inserted replaced
3817:998c5318230f 3818:2863345c9bbb
15 # GNU Affero General Public License for more details. 15 # GNU Affero General Public License for more details.
16 16
17 # You should have received a copy of the GNU Affero General Public License 17 # You should have received a copy of the GNU Affero General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. 18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 19
20 from io import BufferedIOBase
21 import mimetypes
22 from pathlib import Path
20 import pickle as pickle 23 import pickle as pickle
21 import mimetypes
22 import time 24 import time
23 from pathlib import Path 25 from typing import Any, Dict, Optional
26
27 from sat.core import exceptions
28 from sat.core.constants import Const as C
24 from sat.core.i18n import _ 29 from sat.core.i18n import _
25 from sat.core.log import getLogger 30 from sat.core.log import getLogger
26 from sat.core.constants import Const as C
27 from sat.core import exceptions
28 from sat.tools.common import regex 31 from sat.tools.common import regex
29 32
30 33
31 log = getLogger(__name__) 34 log = getLogger(__name__)
32 35
36 class Cache(object): 39 class Cache(object):
37 """generic file caching""" 40 """generic file caching"""
38 41
39 def __init__(self, host, profile): 42 def __init__(self, host, profile):
40 """ 43 """
41 @param profile(unicode, None): ame of the profile to set the cache for 44 @param profile(unicode, None): name of the profile to set the cache for
42 if None, the cache will be common for all profiles 45 if None, the cache will be common for all profiles
43 """ 46 """
44 self.profile = profile 47 self.profile = profile
45 path_elts = [host.memory.getConfig("", "local_dir"), C.CACHE_DIR] 48 path_elts = [host.memory.getConfig("", "local_dir"), C.CACHE_DIR]
46 if profile: 49 if profile:
103 .format(path=filepath) 106 .format(path=filepath)
104 ) 107 )
105 purged.add(cache_file) 108 purged.add(cache_file)
106 purged.add(filepath) 109 purged.add(filepath)
107 110
108 def getPath(self, filename): 111 def getPath(self, filename: str) -> Path:
109 """return cached file URL 112 """return cached file URL
110 113
111 @param filename(str): cached file name (cache data or actual file) 114 @param filename: cached file name (cache data or actual file)
112 @return (Path): path to the cached file 115 @return: path to the cached file
113 """ 116 """
114 if not filename or "/" in filename: 117 if not filename or "/" in filename:
115 log.error( 118 log.error(
116 "invalid char found in file name, hack attempt? name:{}".format(filename) 119 "invalid char found in file name, hack attempt? name:{}".format(filename)
117 ) 120 )
118 raise exceptions.DataError("Invalid char found") 121 raise exceptions.DataError("Invalid char found")
119 return self.cache_dir / filename 122 return self.cache_dir / filename
120 123
121 def getMetadata(self, uid, update_eol=True): 124 def getMetadata(self, uid: str, update_eol: bool = True) -> Optional[Dict[str, Any]]:
122 """Retrieve metadata for cached data 125 """Retrieve metadata for cached data
123 126
124 @param uid(unicode): unique identifier of file 127 @param uid(unicode): unique identifier of file
125 @param update_eol(bool): True if eol must extended 128 @param update_eol(bool): True if eol must extended
126 if True, max_age will be added to eol (only if it is not already expired) 129 if True, max_age will be added to eol (only if it is not already expired)
171 pickle.dump(cache_data, f, protocol=2) 174 pickle.dump(cache_data, f, protocol=2)
172 175
173 cache_data["path"] = self.getPath(cache_data["filename"]) 176 cache_data["path"] = self.getPath(cache_data["filename"])
174 return cache_data 177 return cache_data
175 178
176 def getFilePath(self, uid): 179 def getFilePath(self, uid: str) -> Path:
177 """Retrieve absolute path to file 180 """Retrieve absolute path to file
178 181
179 @param uid(unicode): unique identifier of file 182 @param uid(unicode): unique identifier of file
180 @return (unicode, None): absolute path to cached file 183 @return (unicode, None): absolute path to cached file
181 None if file is not in cache (or cache is invalid) 184 None if file is not in cache (or cache is invalid)
210 213
211 cache_file = self.getPath(uid) 214 cache_file = self.getPath(uid)
212 cache_file.unlink() 215 cache_file.unlink()
213 log.debug(f"cache with uid {uid!r} has been removed") 216 log.debug(f"cache with uid {uid!r} has been removed")
214 217
215 def cacheData(self, source, uid, mime_type=None, max_age=None, filename=None): 218 def cacheData(
219 self,
220 source: str,
221 uid: str,
222 mime_type: Optional[str] = None,
223 max_age: Optional[int] = None,
224 original_filename: Optional[str] = None
225 ) -> BufferedIOBase:
216 """create cache metadata and file object to use for actual data 226 """create cache metadata and file object to use for actual data
217 227
218 @param source(unicode): source of the cache (should be plugin's import_name) 228 @param source: source of the cache (should be plugin's import_name)
219 @param uid(unicode): an identifier of the file which must be unique 229 @param uid: an identifier of the file which must be unique
220 @param mime_type(unicode): MIME type of the file to cache 230 @param mime_type: MIME type of the file to cache
221 it will be used notably to guess file extension 231 it will be used notably to guess file extension
222 It may be autogenerated if filename is specified 232 It may be autogenerated if filename is specified
223 @param max_age(int, None): maximum age in seconds 233 @param max_age: maximum age in seconds
224 the cache metadata will have an "eol" (end of life) 234 the cache metadata will have an "eol" (end of life)
225 None to use default value 235 None to use default value
226 0 to ignore cache (file will be re-downloaded on each access) 236 0 to ignore cache (file will be re-downloaded on each access)
227 @param filename: if not None, will be used as filename 237 @param original_filename: if not None, will be used to retrieve file extension and
228 else one will be generated from uid and guessed extension 238 guess
229 @return(file): file object opened in write mode 239 mime type, and stored in "original_filename"
230 you have to close it yourself (hint: use with statement) 240 @return: file object opened in write mode
231 """ 241 you have to close it yourself (hint: use ``with`` statement)
232 cache_url = self.getPath(uid) 242 """
233 if filename is None:
234 if mime_type:
235 ext = mimetypes.guess_extension(mime_type, strict=False)
236 if ext is None:
237 log.warning(
238 "can't find extension for MIME type {}".format(mime_type)
239 )
240 ext = DEFAULT_EXT
241 elif ext == ".jpe":
242 ext = ".jpg"
243 else:
244 ext = DEFAULT_EXT
245 mime_type = None
246 filename = uid + ext
247 elif mime_type is None:
248 # we have filename but not MIME type, we try to guess the later
249 mime_type = mimetypes.guess_type(filename, strict=False)[0]
250 if max_age is None: 243 if max_age is None:
251 max_age = C.DEFAULT_MAX_AGE 244 max_age = C.DEFAULT_MAX_AGE
252 now = int(time.time())
253 cache_data = { 245 cache_data = {
254 "source": source, 246 "source": source,
247 # we also store max_age for updating eol
248 "max_age": max_age,
249 }
250 cache_url = self.getPath(uid)
251 if original_filename is not None:
252 cache_data["original_filename"] = original_filename
253 if mime_type is None:
254 # we have original_filename but not MIME type, we try to guess the later
255 mime_type = mimetypes.guess_type(original_filename, strict=False)[0]
256 if mime_type:
257 ext = mimetypes.guess_extension(mime_type, strict=False)
258 if ext is None:
259 log.warning(
260 "can't find extension for MIME type {}".format(mime_type)
261 )
262 ext = DEFAULT_EXT
263 elif ext == ".jpe":
264 ext = ".jpg"
265 else:
266 ext = DEFAULT_EXT
267 mime_type = None
268 filename = uid + ext
269 now = int(time.time())
270 cache_data.update({
255 "filename": filename, 271 "filename": filename,
256 "creation": now, 272 "creation": now,
257 "eol": now + max_age, 273 "eol": now + max_age,
258 # we also store max_age for updating eol
259 "max_age": max_age,
260 "mime_type": mime_type, 274 "mime_type": mime_type,
261 } 275 })
262 file_path = self.getPath(filename) 276 file_path = self.getPath(filename)
263 277
264 with open(cache_url, "wb") as f: 278 with open(cache_url, "wb") as f:
265 pickle.dump(cache_data, f, protocol=2) 279 pickle.dump(cache_data, f, protocol=2)
266 280