comparison sat/plugins/plugin_misc_download.py @ 3211:4252176ad993

plugin download: clean unfinished files and re-raise exception in case of download error
author Goffi <goffi@goffi.org>
date Fri, 06 Mar 2020 18:19:03 +0100
parents 2c0628f3927e
children be6d91572633
comparison
equal deleted inserted replaced
3210:fedec192a83f 3211:4252176ad993
142 options = {} 142 options = {}
143 143
144 uri_parsed = urlparse(uri, 'http') 144 uri_parsed = urlparse(uri, 'http')
145 if dest_path: 145 if dest_path:
146 dest_path = Path(dest_path) 146 dest_path = Path(dest_path)
147 cache_uid = None
147 else: 148 else:
148 filename = Path(unquote(uri_parsed.path)).name.strip() or C.FILE_DEFAULT_NAME 149 filename = Path(unquote(uri_parsed.path)).name.strip() or C.FILE_DEFAULT_NAME
149 # we don't use Path.suffixes because we don't want to have more than 2 150 # we don't use Path.suffixes because we don't want to have more than 2
150 # suffixes, but we still want to handle suffixes like "tar.gz". 151 # suffixes, but we still want to handle suffixes like "tar.gz".
151 stem, *suffixes = filename.rsplit('.', 2) 152 stem, *suffixes = filename.rsplit('.', 2)
152 # we hash the URL to have an unique identifier, and avoid double download 153 # we hash the URL to have an unique identifier, and avoid double download
153 url_hash = hashlib.sha256(uri_parsed.geturl().encode()).hexdigest() 154 url_hash = hashlib.sha256(uri_parsed.geturl().encode()).hexdigest()
154 uid = f"{stem}_{url_hash}" 155 cache_uid = f"{stem}_{url_hash}"
155 cache_data = client.cache.getMetadata(uid) 156 cache_data = client.cache.getMetadata(cache_uid)
156 if cache_data is not None: 157 if cache_data is not None:
157 # file is already in cache, we return it 158 # file is already in cache, we return it
158 download_d = defer.succeed(cache_data['path']) 159 download_d = defer.succeed(cache_data['path'])
159 return '', download_d 160 return '', download_d
160 else: 161 else:
161 # the file is not in cache 162 # the file is not in cache
162 unique_name = '.'.join([uid] + suffixes) 163 unique_name = '.'.join([cache_uid] + suffixes)
163 with client.cache.cacheData("DOWNLOAD", uid, filename=unique_name) as f: 164 with client.cache.cacheData(
165 "DOWNLOAD", cache_uid, filename=unique_name) as f:
164 # we close the file and only use its name, the file will be opened 166 # we close the file and only use its name, the file will be opened
165 # by the registered callback 167 # by the registered callback
166 dest_path = f.name 168 dest_path = Path(f.name)
167 169
168 # should we check certificates? 170 # should we check certificates?
169 check_certificate = self.host.memory.getParamA( 171 check_certificate = self.host.memory.getParamA(
170 "check_certificate", "Connection", profile_key=client.profile) 172 "check_certificate", "Connection", profile_key=client.profile)
171 if not check_certificate: 173 if not check_certificate:
176 try: 178 try:
177 callback = self._download_callbacks[uri_parsed.scheme] 179 callback = self._download_callbacks[uri_parsed.scheme]
178 except KeyError: 180 except KeyError:
179 raise exceptions.NotFound(f"Can't find any handler for uri {uri}") 181 raise exceptions.NotFound(f"Can't find any handler for uri {uri}")
180 else: 182 else:
181 progress_id, download_d = await callback( 183 try:
182 client, uri_parsed, dest_path, options) 184 progress_id, download_d = await callback(
185 client, uri_parsed, dest_path, options)
186 except Exception as e:
187 log.warning(_(
188 "Can't download URI {uri}: {reason}").format(
189 uri=uri, reason=e))
190 if cache_uid is not None:
191 client.cache.removeFromCache(cache_uid)
192 elif dest_path.exists():
193 dest_path.unlink()
194 raise e
183 download_d.addCallback(lambda __: dest_path) 195 download_d.addCallback(lambda __: dest_path)
184 return progress_id, download_d 196 return progress_id, download_d
185 197
186 def registerScheme(self, scheme, download_cb): 198 def registerScheme(self, scheme, download_cb):
187 """Register an URI scheme handler 199 """Register an URI scheme handler
228 ) 240 )
229 treq_client = treq_client_no_ssl 241 treq_client = treq_client_no_ssl
230 else: 242 else:
231 treq_client = treq 243 treq_client = treq
232 244
233 head_data = await treq_.head(url) 245 head_data = await treq_client.head(url)
234 try: 246 try:
235 content_length = int(head_data.headers.getRawHeaders('content-length')[0]) 247 content_length = int(head_data.headers.getRawHeaders('content-length')[0])
236 except (KeyError, TypeError, IndexError): 248 except (KeyError, TypeError, IndexError):
237 content_length = None 249 content_length = None
238 log.debug(f"No content lenght found at {url}") 250 log.debug(f"No content lenght found at {url}")