annotate libervia/backend/memory/cache.py @ 4310:d27228b3c704

test (unit): add test for email gateway: rel 450
author Goffi <goffi@goffi.org>
date Thu, 26 Sep 2024 16:12:01 +0200
parents e11b13418ba6
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
1 #!/usr/bin/env python3
3137
559a625a236b fixed shebangs
Goffi <goffi@goffi.org>
parents: 3136
diff changeset
2
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
3 # Libervia: an XMPP client
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
4 # Copyright (C) 2009-2024 Jérôme Poisson (goffi@goffi.org)
2109
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
5
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
6 # This program is free software: you can redistribute it and/or modify
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
7 # it under the terms of the GNU Affero General Public License as published by
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
8 # the Free Software Foundation, either version 3 of the License, or
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
9 # (at your option) any later version.
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
10
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
11 # This program is distributed in the hope that it will be useful,
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
14 # GNU Affero General Public License for more details.
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
15
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
16 # You should have received a copy of the GNU Affero General Public License
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
18
3818
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
19 from io import BufferedIOBase
3185
554b3b632378 memory (cache): purge + pathlib:
Goffi <goffi@goffi.org>
parents: 3137
diff changeset
20 import mimetypes
3818
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
21 from pathlib import Path
3185
554b3b632378 memory (cache): purge + pathlib:
Goffi <goffi@goffi.org>
parents: 3137
diff changeset
22 import time
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
23 from typing import Any
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
24
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
25 from pydantic import BaseModel, ValidationError
3818
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
26
4071
4b842c1fb686 refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents: 4037
diff changeset
27 from libervia.backend.core import exceptions
4b842c1fb686 refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents: 4037
diff changeset
28 from libervia.backend.core.constants import Const as C
4b842c1fb686 refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents: 4037
diff changeset
29 from libervia.backend.core.i18n import _
4b842c1fb686 refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents: 4037
diff changeset
30 from libervia.backend.core.log import getLogger
4b842c1fb686 refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents: 4037
diff changeset
31 from libervia.backend.tools.common import regex
3185
554b3b632378 memory (cache): purge + pathlib:
Goffi <goffi@goffi.org>
parents: 3137
diff changeset
32
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
33
2109
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
34 log = getLogger(__name__)
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
35
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
36 CACHE_METADATA_EXT = ".cache.json"
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
37 DEFAULT_EXT = ".raw"
2509
d485e9416493 core (memory/cache): common cache:
Goffi <goffi@goffi.org>
parents: 2506
diff changeset
38
2109
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
39
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
40 class CacheMetadata(BaseModel):
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
41 source: str
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
42 uid: str
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
43 filename: str
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
44 creation: int
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
45 eol: int
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
46 max_age: int = C.DEFAULT_MAX_AGE
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
47 original_filename: str | None = None
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
48 mime_type: str | None = None
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
49 last_access: int | None = None
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
50
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
51
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
52 class Cache:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
53 """Generic file caching."""
2109
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
54
2509
d485e9416493 core (memory/cache): common cache:
Goffi <goffi@goffi.org>
parents: 2506
diff changeset
55 def __init__(self, host, profile):
d485e9416493 core (memory/cache): common cache:
Goffi <goffi@goffi.org>
parents: 2506
diff changeset
56 """
3818
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
57 @param profile(unicode, None): name of the profile to set the cache for
2509
d485e9416493 core (memory/cache): common cache:
Goffi <goffi@goffi.org>
parents: 2506
diff changeset
58 if None, the cache will be common for all profiles
d485e9416493 core (memory/cache): common cache:
Goffi <goffi@goffi.org>
parents: 2506
diff changeset
59 """
2109
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
60 self.profile = profile
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3818
diff changeset
61 path_elts = [host.memory.config_get("", "local_dir"), C.CACHE_DIR]
2509
d485e9416493 core (memory/cache): common cache:
Goffi <goffi@goffi.org>
parents: 2506
diff changeset
62 if profile:
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3818
diff changeset
63 path_elts.extend(["profiles", regex.path_escape(profile)])
2509
d485e9416493 core (memory/cache): common cache:
Goffi <goffi@goffi.org>
parents: 2506
diff changeset
64 else:
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
65 path_elts.append("common")
3185
554b3b632378 memory (cache): purge + pathlib:
Goffi <goffi@goffi.org>
parents: 3137
diff changeset
66 self.cache_dir = Path(*path_elts)
554b3b632378 memory (cache): purge + pathlib:
Goffi <goffi@goffi.org>
parents: 3137
diff changeset
67
554b3b632378 memory (cache): purge + pathlib:
Goffi <goffi@goffi.org>
parents: 3137
diff changeset
68 self.cache_dir.mkdir(0o700, parents=True, exist_ok=True)
554b3b632378 memory (cache): purge + pathlib:
Goffi <goffi@goffi.org>
parents: 3137
diff changeset
69 self.purge()
2509
d485e9416493 core (memory/cache): common cache:
Goffi <goffi@goffi.org>
parents: 2506
diff changeset
70
3185
554b3b632378 memory (cache): purge + pathlib:
Goffi <goffi@goffi.org>
parents: 3137
diff changeset
71 def purge(self):
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
72 # Remove expired, unreadable, and unrelated files from cache
3185
554b3b632378 memory (cache): purge + pathlib:
Goffi <goffi@goffi.org>
parents: 3137
diff changeset
73 # TODO: this should not be called only on startup, but at regular interval
554b3b632378 memory (cache): purge + pathlib:
Goffi <goffi@goffi.org>
parents: 3137
diff changeset
74 # (e.g. once a day)
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
75 to_delete = set()
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
76 seen = set()
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
77 now = time.time()
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
78 for cache_data_file in self.cache_dir.glob(f"*{CACHE_METADATA_EXT}"):
3185
554b3b632378 memory (cache): purge + pathlib:
Goffi <goffi@goffi.org>
parents: 3137
diff changeset
79 try:
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
80 with cache_data_file.open("r") as f:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
81 cache_data = CacheMetadata.model_validate_json(f.read())
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
82 except (IOError, ValidationError):
3185
554b3b632378 memory (cache): purge + pathlib:
Goffi <goffi@goffi.org>
parents: 3137
diff changeset
83 log.warning(
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
84 _("Can't read metadata file at {path}, deleting it.").format(
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
85 path=cache_data_file
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
86 )
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
87 )
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
88 to_delete.add(cache_data_file)
3185
554b3b632378 memory (cache): purge + pathlib:
Goffi <goffi@goffi.org>
parents: 3137
diff changeset
89 continue
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
90 else:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
91 cached_file = self.get_path(cache_data.filename)
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
92 if not cached_file.exists():
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
93 log.warning(
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
94 f"Cache file {cache_data_file!r} references a non-existent file "
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
95 f"and will be deleted: {cache_data_file!r}."
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
96 )
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
97 to_delete.add(cache_data_file)
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
98 elif cache_data.eol < now:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
99 log.debug(
4231
e11b13418ba6 plugin XEP-0353, XEP-0234, jingle: WebRTC data channel signaling implementation:
Goffi <goffi@goffi.org>
parents: 4212
diff changeset
100 f"Purging expired cache file {cache_data_file} (expired for "
e11b13418ba6 plugin XEP-0353, XEP-0234, jingle: WebRTC data channel signaling implementation:
Goffi <goffi@goffi.org>
parents: 4212
diff changeset
101 f"{int(time.time() - cache_data.eol)}s)"
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
102 )
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
103 to_delete.add(cache_data_file)
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
104 seen.add(cached_file)
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
105 seen.add(cache_data_file)
3185
554b3b632378 memory (cache): purge + pathlib:
Goffi <goffi@goffi.org>
parents: 3137
diff changeset
106
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
107 for file in to_delete:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
108 log.debug(f"Deleting cache file: {file}")
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
109 file.unlink()
3209
f14eb24328d0 core (memory/cache): purge cache metadata when the referenced file doesn't exist
Goffi <goffi@goffi.org>
parents: 3198
diff changeset
110
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
111 for file in self.cache_dir.iterdir():
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
112 if file not in seen:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
113 log.debug(f"Deleting irrelevant file in cache dir: {file}")
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
114 file.unlink()
2109
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
115
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
116 def get_path(self, filename: str) -> Path:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
117 """Return cached file URL.
2109
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
118
3818
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
119 @param filename: cached file name (cache data or actual file)
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
120 @return: path to the cached file
2109
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
121 """
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
122 if not filename or "/" in filename:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
123 log.error(
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
124 "invalid char found in file name, hack attempt? name:{}".format(filename)
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
125 )
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
126 raise exceptions.DataError("Invalid char found")
3185
554b3b632378 memory (cache): purge + pathlib:
Goffi <goffi@goffi.org>
parents: 3137
diff changeset
127 return self.cache_dir / filename
2109
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
128
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
129 def get_metadata(self, uid: str, update_eol: bool = True) -> dict[str, Any] | None:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
130 """Retrieve metadata for cached data.
2109
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
131
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
132 @param uid: unique identifier of cache metadata.
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
133 @param update_eol: True if eol must extended
3188
a15773c6c273 memory(cache): extend EOL when a file metadata is retrieved
Goffi <goffi@goffi.org>
parents: 3185
diff changeset
134 if True, max_age will be added to eol (only if it is not already expired)
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
135 @return: metadata, see [cache_data] for data details, an additional "path" key is
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
136 the full path to cached file.
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
137 None if file is not in cache (or cache is invalid).
2109
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
138 """
2116
766dbbec56f2 core (memory/cache): geFilePath now return None when uid is empty
Goffi <goffi@goffi.org>
parents: 2109
diff changeset
139 uid = uid.strip()
766dbbec56f2 core (memory/cache): geFilePath now return None when uid is empty
Goffi <goffi@goffi.org>
parents: 2109
diff changeset
140 if not uid:
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
141 raise exceptions.InternalError("uid must not be empty")
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
142 cache_url = self.get_path(f"{uid}{CACHE_METADATA_EXT}")
3188
a15773c6c273 memory(cache): extend EOL when a file metadata is retrieved
Goffi <goffi@goffi.org>
parents: 3185
diff changeset
143 if not cache_url.exists():
2109
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
144 return None
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
145
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
146 try:
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
147 with cache_url.open("r") as f:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
148 cache_data = CacheMetadata.model_validate_json(f.read())
3572
b3fa179417e7 core (memory/cache): don't crash on EOFError in getMetadata
Goffi <goffi@goffi.org>
parents: 3479
diff changeset
149 except (IOError, EOFError) as e:
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
150 log.warning(f"Can't read cache at {cache_url}: {e}")
2109
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
151 return None
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
152 except ValidationError:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
153 log.warning(f"Invalid cache found at {cache_url}")
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
154 return None
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
155 except UnicodeDecodeError as e:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
156 log.warning(f"Invalid encoding, this is not a cache metadata file.")
2109
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
157 return None
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
158
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
159 if cache_data.eol < time.time():
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
160 log.debug(
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
161 "removing expired cache (expired for {}s)".format(
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
162 time.time() - cache_data.eol
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
163 )
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
164 )
2109
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
165 return None
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
166
3188
a15773c6c273 memory(cache): extend EOL when a file metadata is retrieved
Goffi <goffi@goffi.org>
parents: 3185
diff changeset
167 if update_eol:
3198
08151c103636 core (memory/cache): added some metadata:
Goffi <goffi@goffi.org>
parents: 3188
diff changeset
168 now = int(time.time())
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
169 cache_data.last_access = now
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
170 cache_data.eol = now + cache_data.max_age
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
171 with cache_url.open("w") as f:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
172 f.write(cache_data.model_dump_json(exclude_none=True))
3188
a15773c6c273 memory(cache): extend EOL when a file metadata is retrieved
Goffi <goffi@goffi.org>
parents: 3185
diff changeset
173
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
174 # FIXME: we convert to dict to be compatible with former method (pre Pydantic).
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
175 # All call to get_metadata should use directly the Pydantic model in the future.
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
176 cache_data_dict = cache_data.model_dump()
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
177 cache_data_dict["path"] = self.get_path(cache_data.filename)
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
178 return cache_data_dict
2517
cd7a53c31eb6 core (memory/cache): new getMetadata method to retrieve metadata without opening the file
Goffi <goffi@goffi.org>
parents: 2509
diff changeset
179
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
180 def get_file_path(self, uid: str) -> Path | None:
3185
554b3b632378 memory (cache): purge + pathlib:
Goffi <goffi@goffi.org>
parents: 3137
diff changeset
181 """Retrieve absolute path to file
2517
cd7a53c31eb6 core (memory/cache): new getMetadata method to retrieve metadata without opening the file
Goffi <goffi@goffi.org>
parents: 2509
diff changeset
182
cd7a53c31eb6 core (memory/cache): new getMetadata method to retrieve metadata without opening the file
Goffi <goffi@goffi.org>
parents: 2509
diff changeset
183 @param uid(unicode): unique identifier of file
cd7a53c31eb6 core (memory/cache): new getMetadata method to retrieve metadata without opening the file
Goffi <goffi@goffi.org>
parents: 2509
diff changeset
184 @return (unicode, None): absolute path to cached file
cd7a53c31eb6 core (memory/cache): new getMetadata method to retrieve metadata without opening the file
Goffi <goffi@goffi.org>
parents: 2509
diff changeset
185 None if file is not in cache (or cache is invalid)
cd7a53c31eb6 core (memory/cache): new getMetadata method to retrieve metadata without opening the file
Goffi <goffi@goffi.org>
parents: 2509
diff changeset
186 """
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3818
diff changeset
187 metadata = self.get_metadata(uid)
2517
cd7a53c31eb6 core (memory/cache): new getMetadata method to retrieve metadata without opening the file
Goffi <goffi@goffi.org>
parents: 2509
diff changeset
188 if metadata is not None:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
189 return metadata["path"]
2109
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
190
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
191 def remove_from_cache(self, uid: str, metadata=None) -> None:
3210
fedec192a83f core (memory/cache): new removeFromCache method
Goffi <goffi@goffi.org>
parents: 3209
diff changeset
192 """Remove data from cache
fedec192a83f core (memory/cache): new removeFromCache method
Goffi <goffi@goffi.org>
parents: 3209
diff changeset
193
fedec192a83f core (memory/cache): new removeFromCache method
Goffi <goffi@goffi.org>
parents: 3209
diff changeset
194 @param uid(unicode): unique identifier cache file
fedec192a83f core (memory/cache): new removeFromCache method
Goffi <goffi@goffi.org>
parents: 3209
diff changeset
195 """
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3818
diff changeset
196 cache_data = self.get_metadata(uid, update_eol=False)
3210
fedec192a83f core (memory/cache): new removeFromCache method
Goffi <goffi@goffi.org>
parents: 3209
diff changeset
197 if cache_data is None:
fedec192a83f core (memory/cache): new removeFromCache method
Goffi <goffi@goffi.org>
parents: 3209
diff changeset
198 log.debug(f"cache with uid {uid!r} has already expired or been removed")
fedec192a83f core (memory/cache): new removeFromCache method
Goffi <goffi@goffi.org>
parents: 3209
diff changeset
199 return
fedec192a83f core (memory/cache): new removeFromCache method
Goffi <goffi@goffi.org>
parents: 3209
diff changeset
200
fedec192a83f core (memory/cache): new removeFromCache method
Goffi <goffi@goffi.org>
parents: 3209
diff changeset
201 try:
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
202 filename = cache_data["filename"]
3210
fedec192a83f core (memory/cache): new removeFromCache method
Goffi <goffi@goffi.org>
parents: 3209
diff changeset
203 except KeyError:
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
204 log.warning(_("missing filename for cache {uid!r}").format(uid=uid))
3210
fedec192a83f core (memory/cache): new removeFromCache method
Goffi <goffi@goffi.org>
parents: 3209
diff changeset
205 else:
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
206 filepath = self.get_path(filename)
3210
fedec192a83f core (memory/cache): new removeFromCache method
Goffi <goffi@goffi.org>
parents: 3209
diff changeset
207 try:
fedec192a83f core (memory/cache): new removeFromCache method
Goffi <goffi@goffi.org>
parents: 3209
diff changeset
208 filepath.unlink()
fedec192a83f core (memory/cache): new removeFromCache method
Goffi <goffi@goffi.org>
parents: 3209
diff changeset
209 except FileNotFoundError:
fedec192a83f core (memory/cache): new removeFromCache method
Goffi <goffi@goffi.org>
parents: 3209
diff changeset
210 log.warning(
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
211 _("missing file referenced in cache {uid!r}: {filename}").format(
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
212 uid=uid, filename=filename
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
213 )
3210
fedec192a83f core (memory/cache): new removeFromCache method
Goffi <goffi@goffi.org>
parents: 3209
diff changeset
214 )
fedec192a83f core (memory/cache): new removeFromCache method
Goffi <goffi@goffi.org>
parents: 3209
diff changeset
215
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
216 cache_file = self.get_path(f"{uid}{CACHE_METADATA_EXT}")
3210
fedec192a83f core (memory/cache): new removeFromCache method
Goffi <goffi@goffi.org>
parents: 3209
diff changeset
217 cache_file.unlink()
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
218 log.debug(f"Cache with uid {uid!r} has been removed.")
3210
fedec192a83f core (memory/cache): new removeFromCache method
Goffi <goffi@goffi.org>
parents: 3209
diff changeset
219
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3818
diff changeset
220 def cache_data(
3818
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
221 self,
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
222 source: str,
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
223 uid: str,
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
224 mime_type: str | None = None,
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
225 max_age: int = C.DEFAULT_MAX_AGE,
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
226 original_filename: str | None = None,
3818
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
227 ) -> BufferedIOBase:
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
228 """Create cache metadata and file object to use for actual data.
2109
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
229
3818
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
230 @param source: source of the cache (should be plugin's import_name)
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
231 @param uid: an identifier of the file which must be unique
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
232 @param mime_type: MIME type of the file to cache
2109
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
233 it will be used notably to guess file extension
3198
08151c103636 core (memory/cache): added some metadata:
Goffi <goffi@goffi.org>
parents: 3188
diff changeset
234 It may be autogenerated if filename is specified
3818
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
235 @param max_age: maximum age in seconds
2109
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
236 the cache metadata will have an "eol" (end of life)
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
237 None to use default value
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
238 0 to ignore cache (file will be re-downloaded on each access)
3818
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
239 @param original_filename: if not None, will be used to retrieve file extension and
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
240 guess mime type, and stored in "original_filename"
3818
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
241 @return: file object opened in write mode
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
242 you have to close it yourself (hint: use ``with`` statement)
2109
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
243 """
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
244 if original_filename is not None and mime_type is None:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
245 # we have original_filename but not MIME type, we try to guess the later
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
246 mime_type = mimetypes.guess_type(original_filename, strict=False)[0]
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
247
3818
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
248 if mime_type:
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
249 ext = mimetypes.guess_extension(mime_type, strict=False)
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
250 if ext is None:
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
251 log.warning("can't find extension for MIME type {}".format(mime_type))
3818
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
252 ext = DEFAULT_EXT
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
253 elif ext == ".jpe":
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
254 ext = ".jpg"
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
255 else:
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
256 ext = DEFAULT_EXT
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
257 mime_type = None
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
258
3818
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
259 filename = uid + ext
2863345c9bbb core (memory/cache): type hints + filename fix:
Goffi <goffi@goffi.org>
parents: 3572
diff changeset
260 now = int(time.time())
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
261 metadata = CacheMetadata(
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
262 source=source,
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
263 uid=uid,
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
264 mime_type=mime_type,
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
265 max_age=max_age,
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
266 original_filename=original_filename,
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
267 filename=filename,
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
268 creation=now,
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
269 eol=now + max_age,
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
270 )
2109
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
271
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
272 cache_metadata_file = self.get_path(f"{uid}{CACHE_METADATA_EXT}")
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
273 file_path = self.get_path(filename)
2109
85f3e12e984d core (memory/cache): file caching handling, first draft:
Goffi <goffi@goffi.org>
parents:
diff changeset
274
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
275 with open(cache_metadata_file, "w") as f:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
276 f.write(metadata.model_dump_json(exclude_none=True))
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
277
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents: 4071
diff changeset
278 return open(file_path, "wb")