comparison sat/plugins/plugin_misc_download.py @ 3187:d92a144f3589

plugin download: use cache if dest_path is empty: if dest_path is not set, the file will be downloaded in cache. A new name will be generated using a hash of the uri. If the file is already downloded in cache, it won't be downloaded again (in this case returned progress_id is an empty string).
author Goffi <goffi@goffi.org>
date Wed, 26 Feb 2020 22:03:11 +0100
parents 84b0c8b4dee0
children 2c0628f3927e
comparison
equal deleted inserted replaced
3186:84b0c8b4dee0 3187:d92a144f3589
15 15
16 # You should have received a copy of the GNU Affero General Public License 16 # You should have received a copy of the GNU Affero General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. 17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18 18
19 from pathlib import Path 19 from pathlib import Path
20 from urllib.parse import urlparse 20 from urllib.parse import urlparse, unquote
21 import hashlib
21 import treq 22 import treq
22 from twisted.internet import defer 23 from twisted.internet import defer
23 from twisted.words.protocols.jabber import error as jabber_error 24 from twisted.words.protocols.jabber import error as jabber_error
24 from sat.core.i18n import _, D_ 25 from sat.core.i18n import _, D_
25 from sat.core.constants import Const as C 26 from sat.core.constants import Const as C
74 return defer.ensureDeferred(self.fileDownload( 75 return defer.ensureDeferred(self.fileDownload(
75 client, uri, Path(dest_path), options 76 client, uri, Path(dest_path), options
76 )) 77 ))
77 78
78 async def fileDownload(self, client, uri, dest_path, options=None): 79 async def fileDownload(self, client, uri, dest_path, options=None):
79 """Send a file using best available method 80 """Download a file using best available method
80 81
81 parameters are the same as for [download] 82 parameters are the same as for [download]
82 @return (dict): action dictionary, with progress id in case of success, else xmlui 83 @return (dict): action dictionary, with progress id in case of success, else xmlui
83 message 84 message
84 """ 85 """
103 def _fileDownloadComplete(self, uri, dest_path, options_s, profile): 104 def _fileDownloadComplete(self, uri, dest_path, options_s, profile):
104 client = self.host.getClient(profile) 105 client = self.host.getClient(profile)
105 options = data_format.deserialise(options_s) 106 options = data_format.deserialise(options_s)
106 107
107 d = defer.ensureDeferred(self.fileDownloadComplete( 108 d = defer.ensureDeferred(self.fileDownloadComplete(
108 client, uri, Path(dest_path), options 109 client, uri, dest_path, options
109 )) 110 ))
110 d.addCallback(lambda path: str(path)) 111 d.addCallback(lambda path: str(path))
111 return d 112 return d
112 113
113 async def fileDownloadComplete(self, client, uri, dest_path, options=None): 114 async def fileDownloadComplete(self, client, uri, dest_path, options=None):
114 """Helper method to fully download a file and return its path 115 """Helper method to fully download a file and return its path
115 116
116 parameters are the same as for [download] 117 parameters are the same as for [download]
117 @return (str): path to the downloaded file 118 @return (str): path to the downloaded file
119 use empty string to store the file in cache
118 """ 120 """
119 __, download_d = await self.download(client, uri, dest_path, options) 121 __, download_d = await self.download(client, uri, dest_path, options)
120 await download_d 122 dest_path = await download_d
121 return dest_path 123 return dest_path
122 124
123 async def download(self, client, uri, dest_path, options=None): 125 async def download(self, client, uri, dest_path, options=None):
124 """Send a file using best available method 126 """Send a file using best available method
125 127
126 @param uri(str): URI to the file to download 128 @param uri(str): URI to the file to download
127 @param dest_path(str, Path): where the file must be downloaded 129 @param dest_path(str, Path): where the file must be downloaded
130 if empty string, the file will be stored in local path
128 @param options(dict, None): options depending on scheme handler 131 @param options(dict, None): options depending on scheme handler
129 Some common options: 132 Some common options:
130 - ignore_tls_errors(bool): True to ignore SSL/TLS certificate verification 133 - ignore_tls_errors(bool): True to ignore SSL/TLS certificate verification
131 used only if HTTPS transport is needed 134 used only if HTTPS transport is needed
132 @return (tuple[unicode,D(unicode)]): progress_id and a Deferred which fire 135 @return (tuple[unicode,D(unicode)]): progress_id and a Deferred which fire
133 download URL when download is finished 136 download URL when download is finished
137 progress_id can be empty string if the file already exist and is not
138 downloaded again (can happen if cache is used with empty dest_path)
134 """ 139 """
135 if options is None: 140 if options is None:
136 options = {} 141 options = {}
137 142
138 dest_path = Path(dest_path)
139 uri_parsed = urlparse(uri, 'http') 143 uri_parsed = urlparse(uri, 'http')
144 if dest_path:
145 dest_path = Path(dest_path)
146 else:
147 filename = Path(unquote(uri_parsed.path)).name.strip() or C.FILE_DEFAULT_NAME
148 # we don't use Path.suffixes because we don't want to have more than 2
149 # suffixes, but we still want to handle suffixes like "tar.gz".
150 stem, *suffixes = filename.rsplit('.', 2)
151 # we hash the URL to have an unique identifier, and avoid double download
152 url_hash = hashlib.sha256(uri_parsed.geturl().encode()).hexdigest()
153 uid = f"{stem}_{url_hash}"
154 cache_data = client.cache.getMetadata(uid)
155 if cache_data is not None:
156 # file is already in cache, we return it
157 download_d = defer.succeed(cache_data['path'])
158 return '', download_d
159 else:
160 # the file is not in cache
161 unique_name = '.'.join([uid] + suffixes)
162 with client.cache.cacheData("DOWNLOAD", uid, filename=unique_name) as f:
163 # we close the file and only use its name, the file will be opened
164 # by the registered callback
165 dest_path = f.name
140 try: 166 try:
141 callback = self._download_callbacks[uri_parsed.scheme] 167 callback = self._download_callbacks[uri_parsed.scheme]
142 except KeyError: 168 except KeyError:
143 raise exceptions.NotFound(f"Can't find any handler for uri {uri}") 169 raise exceptions.NotFound(f"Can't find any handler for uri {uri}")
144 else: 170 else:
145 return await callback(client, uri_parsed, dest_path, options) 171 progress_id, download_d = await callback(
172 client, uri_parsed, dest_path, options)
173 download_d.addCallback(lambda __: dest_path)
174 return progress_id, download_d
146 175
147 def registerScheme(self, scheme, download_cb): 176 def registerScheme(self, scheme, download_cb):
148 """Register an URI scheme handler 177 """Register an URI scheme handler
149 178
150 @param scheme(unicode): URI scheme this callback is handling 179 @param scheme(unicode): URI scheme this callback is handling