Mercurial > libervia-backend
comparison sat/memory/cache.py @ 3185:554b3b632378
memory (cache): purge + pathlib:
- cache is now purged on backend startup (every file which has passed EOL will be deleted)
- use of pathlib, getPath now returns a Path
author | Goffi <goffi@goffi.org> |
---|---|
date | Wed, 26 Feb 2020 15:54:43 +0100 |
parents | 559a625a236b |
children | a15773c6c273 |
comparison
equal
deleted
inserted
replaced
3184:e8ce30798d15 | 3185:554b3b632378 |
---|---|
15 # GNU Affero General Public License for more details. | 15 # GNU Affero General Public License for more details. |
16 | 16 |
17 # You should have received a copy of the GNU Affero General Public License | 17 # You should have received a copy of the GNU Affero General Public License |
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. | 18 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
19 | 19 |
20 from sat.core.log import getLogger | |
21 | |
22 log = getLogger(__name__) | |
23 from sat.tools.common import regex | |
24 from sat.core import exceptions | |
25 from sat.core.constants import Const as C | |
26 import pickle as pickle | 20 import pickle as pickle |
27 import mimetypes | 21 import mimetypes |
28 import os.path | |
29 import time | 22 import time |
23 from pathlib import Path | |
24 from sat.core.i18n import _ | |
25 from sat.core.log import getLogger | |
26 from sat.core.constants import Const as C | |
27 from sat.core import exceptions | |
28 from sat.tools.common import regex | |
29 | |
30 | |
31 log = getLogger(__name__) | |
30 | 32 |
31 DEFAULT_EXT = ".raw" | 33 DEFAULT_EXT = ".raw" |
32 | 34 |
33 | 35 |
34 class Cache(object): | 36 class Cache(object): |
43 path_elts = [host.memory.getConfig("", "local_dir"), C.CACHE_DIR] | 45 path_elts = [host.memory.getConfig("", "local_dir"), C.CACHE_DIR] |
44 if profile: | 46 if profile: |
45 path_elts.extend(["profiles", regex.pathEscape(profile)]) | 47 path_elts.extend(["profiles", regex.pathEscape(profile)]) |
46 else: | 48 else: |
47 path_elts.append("common") | 49 path_elts.append("common") |
48 self.cache_dir = os.path.join(*path_elts) | 50 self.cache_dir = Path(*path_elts) |
49 | 51 |
50 if not os.path.exists(self.cache_dir): | 52 self.cache_dir.mkdir(0o700, parents=True, exist_ok=True) |
51 os.makedirs(self.cache_dir) | 53 self.purge() |
54 | |
55 def purge(self): | |
56 # remove expired files from cache | |
57 # TODO: this should not be called only on startup, but at regular interval | |
58 # (e.g. once a day) | |
59 purged = set() | |
60 # we sort files to have metadata files first | |
61 for cache_file in sorted(self.cache_dir.iterdir()): | |
62 if cache_file in purged: | |
63 continue | |
64 try: | |
65 with cache_file.open('rb') as f: | |
66 cache_data = pickle.load(f) | |
67 except IOError: | |
68 log.warning( | |
69 _("Can't read metadata file at {path}") | |
70 .format(path=cache_file)) | |
71 continue | |
72 except (pickle.UnpicklingError, EOFError): | |
73 log.debug(f"File at {cache_file} is not a metadata file") | |
74 continue | |
75 try: | |
76 eol = cache_data['eol'] | |
77 filename = cache_data['filename'] | |
78 except KeyError: | |
79 log.warning( | |
80 _("Invalid cache metadata at {path}") | |
81 .format(path=cache_file)) | |
82 continue | |
83 | |
84 if eol < time.time(): | |
85 filepath = self.getPath(filename) | |
86 log.debug( | |
87 "purging expired cache {filepath!r} (expired for {time}s)" | |
88 .format(filepath=str(filepath), time=int(time.time() - eol)) | |
89 ) | |
90 cache_file.unlink() | |
91 try: | |
92 filepath.unlink() | |
93 except FileNotFoundError: | |
94 log.warning( | |
95 _("following file is missing while purging cache: {path}") | |
96 .format(path=filepath) | |
97 ) | |
98 purged.add(cache_file) | |
99 purged.add(filepath) | |
52 | 100 |
53 def getPath(self, filename): | 101 def getPath(self, filename): |
54 """return cached file URL | 102 """return cached file URL |
55 | 103 |
56 @param filename(unicode): cached file name (cache data or actual file) | 104 @param filename(str): cached file name (cache data or actual file) |
105 @return (Path): path to the cached file | |
57 """ | 106 """ |
58 if not filename or "/" in filename: | 107 if not filename or "/" in filename: |
59 log.error( | 108 log.error( |
60 "invalid char found in file name, hack attempt? name:{}".format(filename) | 109 "invalid char found in file name, hack attempt? name:{}".format(filename) |
61 ) | 110 ) |
62 raise exceptions.DataError("Invalid char found") | 111 raise exceptions.DataError("Invalid char found") |
63 return os.path.join(self.cache_dir, filename) | 112 return self.cache_dir / filename |
64 | 113 |
65 def getMetadata(self, uid): | 114 def getMetadata(self, uid): |
66 """retrieve metadata for cached data | 115 """Retrieve metadata for cached data |
67 | 116 |
68 @param uid(unicode): unique identifier of file | 117 @param uid(unicode): unique identifier of file |
69 @return (dict, None): metadata with following keys: | 118 @return (dict, None): metadata with following keys: |
70 see [cacheData] for data details, an additional "path" key is the full path to cached file. | 119 see [cacheData] for data details, an additional "path" key is the full path to cached file. |
71 None if file is not in cache (or cache is invalid) | 120 None if file is not in cache (or cache is invalid) |
73 | 122 |
74 uid = uid.strip() | 123 uid = uid.strip() |
75 if not uid: | 124 if not uid: |
76 raise exceptions.InternalError("uid must not be empty") | 125 raise exceptions.InternalError("uid must not be empty") |
77 cache_url = self.getPath(uid) | 126 cache_url = self.getPath(uid) |
78 if not os.path.exists(cache_url): | 127 if not cache_url.exists: |
79 return None | 128 return None |
80 | 129 |
81 try: | 130 try: |
82 with open(cache_url, "rb") as f: | 131 with cache_url.open("rb") as f: |
83 cache_data = pickle.load(f) | 132 cache_data = pickle.load(f) |
84 except IOError: | 133 except IOError: |
85 log.warning("can't read cache at {}".format(cache_url)) | 134 log.warning("can't read cache at {}".format(cache_url)) |
86 return None | 135 return None |
87 except pickle.UnpicklingError: | 136 except pickle.UnpicklingError: |
101 | 150 |
102 cache_data["path"] = self.getPath(cache_data["filename"]) | 151 cache_data["path"] = self.getPath(cache_data["filename"]) |
103 return cache_data | 152 return cache_data |
104 | 153 |
105 def getFilePath(self, uid): | 154 def getFilePath(self, uid): |
106 """retrieve absolute path to file | 155 """Retrieve absolute path to file |
107 | 156 |
108 @param uid(unicode): unique identifier of file | 157 @param uid(unicode): unique identifier of file |
109 @return (unicode, None): absolute path to cached file | 158 @return (unicode, None): absolute path to cached file |
110 None if file is not in cache (or cache is invalid) | 159 None if file is not in cache (or cache is invalid) |
111 """ | 160 """ |
155 file_path = self.getPath(filename) | 204 file_path = self.getPath(filename) |
156 | 205 |
157 with open(cache_url, "wb") as f: | 206 with open(cache_url, "wb") as f: |
158 pickle.dump(cache_data, f, protocol=2) | 207 pickle.dump(cache_data, f, protocol=2) |
159 | 208 |
160 return open(file_path, "wb") | 209 return file_path.open("wb") |