changeset 3187:d92a144f3589

plugin download: use cache if dest_path is empty: if dest_path is not set, the file will be downloaded in cache. A new name will be generated using a hash of the uri. If the file is already downloded in cache, it won't be downloaded again (in this case returned progress_id is an empty string).
author Goffi <goffi@goffi.org>
date Wed, 26 Feb 2020 22:03:11 +0100
parents 84b0c8b4dee0
children a15773c6c273
files sat/core/constants.py sat/plugins/plugin_misc_download.py
diffstat 2 files changed, 43 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/sat/core/constants.py	Wed Feb 26 15:54:43 2020 +0100
+++ b/sat/core/constants.py	Wed Feb 26 22:03:11 2020 +0100
@@ -346,6 +346,8 @@
     ## Files ##
     FILE_TYPE_DIRECTORY = "directory"
     FILE_TYPE_FILE = "file"
+    # when filename can't be found automatically, this one will be used
+    FILE_DEFAULT_NAME = "unnamed"
 
     ## Permissions management ##
     ACCESS_PERM_READ = "read"
@@ -378,10 +380,13 @@
     ## Misc ##
     SAVEFILE_DATABASE = APP_NAME_FILE + ".db"
     IQ_SET = '/iq[@type="set"]'
-    ENV_PREFIX = "SAT_"  # Prefix used for environment variables
+    # Prefix used for environment variables
+    ENV_PREFIX = "SAT_"
     IGNORE = "ignore"
-    NO_LIMIT = -1  # used in bridge when a integer value is expected
-    DEFAULT_MAX_AGE = 1209600  # default max age of cached files, in seconds
+    # used in bridge when a integer value is expected
+    NO_LIMIT = -1
+    # default max age of cached files, in seconds
+    DEFAULT_MAX_AGE = 3600 * 24 * 14
     HASH_SHA1_EMPTY = "da39a3ee5e6b4b0d3255bfef95601890afd80709"
     STANZA_NAMES = ("iq", "message", "presence")
 
--- a/sat/plugins/plugin_misc_download.py	Wed Feb 26 15:54:43 2020 +0100
+++ b/sat/plugins/plugin_misc_download.py	Wed Feb 26 22:03:11 2020 +0100
@@ -17,7 +17,8 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 from pathlib import Path
-from urllib.parse import urlparse
+from urllib.parse import urlparse, unquote
+import hashlib
 import treq
 from twisted.internet import defer
 from twisted.words.protocols.jabber import error as jabber_error
@@ -76,7 +77,7 @@
         ))
 
     async def fileDownload(self, client, uri, dest_path, options=None):
-        """Send a file using best available method
+        """Download a file using best available method
 
         parameters are the same as for [download]
         @return (dict): action dictionary, with progress id in case of success, else xmlui
@@ -105,7 +106,7 @@
         options = data_format.deserialise(options_s)
 
         d = defer.ensureDeferred(self.fileDownloadComplete(
-            client, uri, Path(dest_path), options
+            client, uri, dest_path, options
         ))
         d.addCallback(lambda path: str(path))
         return d
@@ -115,9 +116,10 @@
 
         parameters are the same as for [download]
         @return (str): path to the downloaded file
+            use empty string to store the file in cache
         """
         __, download_d = await self.download(client, uri, dest_path, options)
-        await download_d
+        dest_path = await download_d
         return dest_path
 
     async def download(self, client, uri, dest_path, options=None):
@@ -125,24 +127,51 @@
 
         @param uri(str): URI to the file to download
         @param dest_path(str, Path): where the file must be downloaded
+            if empty string, the file will be stored in local path
         @param options(dict, None): options depending on scheme handler
             Some common options:
                 - ignore_tls_errors(bool): True to ignore SSL/TLS certificate verification
                   used only if HTTPS transport is needed
         @return (tuple[unicode,D(unicode)]): progress_id and a Deferred which fire
             download URL when download is finished
+            progress_id can be empty string if the file already exist and is not
+            downloaded again (can happen if cache is used with empty dest_path)
         """
         if options is None:
             options = {}
 
-        dest_path = Path(dest_path)
         uri_parsed = urlparse(uri, 'http')
+        if dest_path:
+            dest_path = Path(dest_path)
+        else:
+            filename = Path(unquote(uri_parsed.path)).name.strip() or C.FILE_DEFAULT_NAME
+            # we don't use Path.suffixes because we don't want to have more than 2
+            # suffixes, but we still want to handle suffixes like "tar.gz".
+            stem, *suffixes = filename.rsplit('.', 2)
+            # we hash the URL to have an unique identifier, and avoid double download
+            url_hash = hashlib.sha256(uri_parsed.geturl().encode()).hexdigest()
+            uid = f"{stem}_{url_hash}"
+            cache_data = client.cache.getMetadata(uid)
+            if cache_data is not None:
+                # file is already in cache, we return it
+                download_d = defer.succeed(cache_data['path'])
+                return '', download_d
+            else:
+                # the file is not in cache
+                unique_name = '.'.join([uid] + suffixes)
+                with client.cache.cacheData("DOWNLOAD", uid, filename=unique_name) as f:
+                    # we close the file and only use its name, the file will be opened
+                    # by the registered callback
+                    dest_path = f.name
         try:
             callback = self._download_callbacks[uri_parsed.scheme]
         except KeyError:
             raise exceptions.NotFound(f"Can't find any handler for uri {uri}")
         else:
-            return await callback(client, uri_parsed, dest_path, options)
+            progress_id, download_d = await callback(
+                client, uri_parsed, dest_path, options)
+            download_d.addCallback(lambda __: dest_path)
+            return progress_id, download_d
 
     def registerScheme(self, scheme, download_cb):
         """Register an URI scheme handler