# HG changeset patch # User Goffi # Date 1582750991 -3600 # Node ID d92a144f35895aa3669a7fe6936a70d8b1e127d2 # Parent 84b0c8b4dee049b3be620d3544e62157fadf3622 plugin download: use cache if dest_path is empty: if dest_path is not set, the file will be downloaded in cache. A new name will be generated using a hash of the uri. If the file is already downloded in cache, it won't be downloaded again (in this case returned progress_id is an empty string). diff -r 84b0c8b4dee0 -r d92a144f3589 sat/core/constants.py --- a/sat/core/constants.py Wed Feb 26 15:54:43 2020 +0100 +++ b/sat/core/constants.py Wed Feb 26 22:03:11 2020 +0100 @@ -346,6 +346,8 @@ ## Files ## FILE_TYPE_DIRECTORY = "directory" FILE_TYPE_FILE = "file" + # when filename can't be found automatically, this one will be used + FILE_DEFAULT_NAME = "unnamed" ## Permissions management ## ACCESS_PERM_READ = "read" @@ -378,10 +380,13 @@ ## Misc ## SAVEFILE_DATABASE = APP_NAME_FILE + ".db" IQ_SET = '/iq[@type="set"]' - ENV_PREFIX = "SAT_" # Prefix used for environment variables + # Prefix used for environment variables + ENV_PREFIX = "SAT_" IGNORE = "ignore" - NO_LIMIT = -1 # used in bridge when a integer value is expected - DEFAULT_MAX_AGE = 1209600 # default max age of cached files, in seconds + # used in bridge when a integer value is expected + NO_LIMIT = -1 + # default max age of cached files, in seconds + DEFAULT_MAX_AGE = 3600 * 24 * 14 HASH_SHA1_EMPTY = "da39a3ee5e6b4b0d3255bfef95601890afd80709" STANZA_NAMES = ("iq", "message", "presence") diff -r 84b0c8b4dee0 -r d92a144f3589 sat/plugins/plugin_misc_download.py --- a/sat/plugins/plugin_misc_download.py Wed Feb 26 15:54:43 2020 +0100 +++ b/sat/plugins/plugin_misc_download.py Wed Feb 26 22:03:11 2020 +0100 @@ -17,7 +17,8 @@ # along with this program. If not, see . from pathlib import Path -from urllib.parse import urlparse +from urllib.parse import urlparse, unquote +import hashlib import treq from twisted.internet import defer from twisted.words.protocols.jabber import error as jabber_error @@ -76,7 +77,7 @@ )) async def fileDownload(self, client, uri, dest_path, options=None): - """Send a file using best available method + """Download a file using best available method parameters are the same as for [download] @return (dict): action dictionary, with progress id in case of success, else xmlui @@ -105,7 +106,7 @@ options = data_format.deserialise(options_s) d = defer.ensureDeferred(self.fileDownloadComplete( - client, uri, Path(dest_path), options + client, uri, dest_path, options )) d.addCallback(lambda path: str(path)) return d @@ -115,9 +116,10 @@ parameters are the same as for [download] @return (str): path to the downloaded file + use empty string to store the file in cache """ __, download_d = await self.download(client, uri, dest_path, options) - await download_d + dest_path = await download_d return dest_path async def download(self, client, uri, dest_path, options=None): @@ -125,24 +127,51 @@ @param uri(str): URI to the file to download @param dest_path(str, Path): where the file must be downloaded + if empty string, the file will be stored in local path @param options(dict, None): options depending on scheme handler Some common options: - ignore_tls_errors(bool): True to ignore SSL/TLS certificate verification used only if HTTPS transport is needed @return (tuple[unicode,D(unicode)]): progress_id and a Deferred which fire download URL when download is finished + progress_id can be empty string if the file already exist and is not + downloaded again (can happen if cache is used with empty dest_path) """ if options is None: options = {} - dest_path = Path(dest_path) uri_parsed = urlparse(uri, 'http') + if dest_path: + dest_path = Path(dest_path) + else: + filename = Path(unquote(uri_parsed.path)).name.strip() or C.FILE_DEFAULT_NAME + # we don't use Path.suffixes because we don't want to have more than 2 + # suffixes, but we still want to handle suffixes like "tar.gz". + stem, *suffixes = filename.rsplit('.', 2) + # we hash the URL to have an unique identifier, and avoid double download + url_hash = hashlib.sha256(uri_parsed.geturl().encode()).hexdigest() + uid = f"{stem}_{url_hash}" + cache_data = client.cache.getMetadata(uid) + if cache_data is not None: + # file is already in cache, we return it + download_d = defer.succeed(cache_data['path']) + return '', download_d + else: + # the file is not in cache + unique_name = '.'.join([uid] + suffixes) + with client.cache.cacheData("DOWNLOAD", uid, filename=unique_name) as f: + # we close the file and only use its name, the file will be opened + # by the registered callback + dest_path = f.name try: callback = self._download_callbacks[uri_parsed.scheme] except KeyError: raise exceptions.NotFound(f"Can't find any handler for uri {uri}") else: - return await callback(client, uri_parsed, dest_path, options) + progress_id, download_d = await callback( + client, uri_parsed, dest_path, options) + download_d.addCallback(lambda __: dest_path) + return progress_id, download_d def registerScheme(self, scheme, download_cb): """Register an URI scheme handler