comparison sat/tools/web.py @ 3822:65bac82e4049

core (tools/web): helped method to download files: this method is for internal file download, progression mechanism is not used. rel 368
author Goffi <goffi@goffi.org>
date Wed, 29 Jun 2022 12:07:45 +0200
parents 7550ae9cfbac
children 524856bd7b19
comparison
equal deleted inserted replaced
3821:0b1c30ff2cbb 3822:65bac82e4049
14 # GNU Affero General Public License for more details. 14 # GNU Affero General Public License for more details.
15 15
16 # You should have received a copy of the GNU Affero General Public License 16 # You should have received a copy of the GNU Affero General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. 17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18 18
19 from typing import Optional, Union
20 from pathlib import Path
21 from io import BufferedIOBase
22
19 from OpenSSL import SSL 23 from OpenSSL import SSL
20 from zope.interface import implementer 24 import treq
21 from treq.client import HTTPClient 25 from treq.client import HTTPClient
26 from twisted.internet import reactor, ssl
22 from twisted.internet.interfaces import IOpenSSLClientConnectionCreator 27 from twisted.internet.interfaces import IOpenSSLClientConnectionCreator
23 from twisted.internet import reactor, ssl
24 from twisted.web import iweb 28 from twisted.web import iweb
25 from twisted.web import client as http_client 29 from twisted.web import client as http_client
30 from zope.interface import implementer
31
32 from sat.core import exceptions
26 from sat.core.log import getLogger 33 from sat.core.log import getLogger
27 34
28 35
29 log = getLogger(__name__) 36 log = getLogger(__name__)
30 37
63 70
64 71
65 #: following treq doesn't check TLS, obviously it is unsecure and should not be used 72 #: following treq doesn't check TLS, obviously it is unsecure and should not be used
66 #: without explicit warning 73 #: without explicit warning
67 treq_client_no_ssl = HTTPClient(http_client.Agent(reactor, NoCheckContextFactory())) 74 treq_client_no_ssl = HTTPClient(http_client.Agent(reactor, NoCheckContextFactory()))
75
76
77 async def downloadFile(
78 url: str,
79 dest: Union[str, Path, BufferedIOBase],
80 max_size: Optional[int] = None
81 ) -> None:
82 """Helper method to download a file
83
84 This is for internal download, for high level download with progression, use
85 ``plugin_misc_download``.
86
87 Inspired from
88 https://treq.readthedocs.io/en/latest/howto.html#handling-streaming-responses
89
90 @param dest: destination filename or file-like object
91 of it's a file-like object, you'll have to close it yourself
92 @param max_size: if set, an exceptions.DataError will be raised if the downloaded file
93 is bigger that given value (in bytes).
94 """
95 if isinstance(dest, BufferedIOBase):
96 f = dest
97 must_close = False
98 else:
99 dest = Path(dest)
100 f = dest.open("wb")
101 must_close = True
102 d = treq.get(url, unbuffered=True)
103 written = 0
104
105 def write(data: bytes):
106 if max_size is not None:
107 nonlocal written
108 written += len(data)
109 if written > max_size:
110 raise exceptions.DataError(
111 "downloaded file is bigger than expected ({max_size})"
112 )
113 f.write(data)
114
115 d.addCallback(treq.collect, f.write)
116 try:
117 await d
118 except exceptions.DataError as e:
119 log.warning("download cancelled due to file oversized")
120 raise e
121 except Exception as e:
122 log.error(f"Can't write file {dest}: {e}")
123 raise e
124 finally:
125 if must_close:
126 f.close()