Mercurial > libervia-backend
comparison sat/plugins/plugin_comp_file_sharing.py @ 3289:9057713ab124
plugin comp file sharing: files can now be uploaded/downloaded via HTTP:
plugin XEP-0363 can now be used by components, and file sharing uses it.
The new `public_id` file metadata is used to serve files.
Files uploaded are put in the `/uploads` path.
author | Goffi <goffi@goffi.org> |
---|---|
date | Fri, 29 May 2020 21:55:45 +0200 |
parents | 9d0df638c8b4 |
children | 449dfbfcdbcc |
comparison
equal
deleted
inserted
replaced
3288:780fb8dd07ef | 3289:9057713ab124 |
---|---|
14 # GNU Affero General Public License for more details. | 14 # GNU Affero General Public License for more details. |
15 | 15 |
16 # You should have received a copy of the GNU Affero General Public License | 16 # You should have received a copy of the GNU Affero General Public License |
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. | 17 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
18 | 18 |
19 import os | |
20 import os.path | |
21 import mimetypes | |
22 from functools import partial | |
23 import shortuuid | |
24 import unicodedata | |
25 from urllib.parse import urljoin, urlparse, quote, unquote | |
26 from dataclasses import dataclass | |
27 from pathlib import Path | |
19 from sat.core.i18n import _ | 28 from sat.core.i18n import _ |
20 from sat.core.constants import Const as C | 29 from sat.core.constants import Const as C |
21 from sat.core import exceptions | 30 from sat.core import exceptions |
22 from sat.core.log import getLogger | 31 from sat.core.log import getLogger |
23 from sat.tools.common import regex | 32 from sat.tools.common import regex |
24 from sat.tools.common import uri | 33 from sat.tools.common import uri |
34 from sat.tools.common import files_utils | |
35 from sat.tools.common import tls | |
25 from sat.tools import stream | 36 from sat.tools import stream |
26 from twisted.internet import defer | 37 from twisted.internet import defer, reactor |
27 from twisted.words.protocols.jabber import error | 38 from twisted.words.protocols.jabber import error |
39 from twisted.web import server, resource, static | |
28 from wokkel import pubsub | 40 from wokkel import pubsub |
29 from wokkel import generic | 41 from wokkel import generic |
30 from functools import partial | |
31 import os | |
32 import os.path | |
33 import mimetypes | |
34 | 42 |
35 | 43 |
36 log = getLogger(__name__) | 44 log = getLogger(__name__) |
37 | 45 |
38 | 46 |
49 "XEP-0234", | 57 "XEP-0234", |
50 "XEP-0260", | 58 "XEP-0260", |
51 "XEP-0261", | 59 "XEP-0261", |
52 "XEP-0264", | 60 "XEP-0264", |
53 "XEP-0329", | 61 "XEP-0329", |
62 "XEP-0363", | |
54 ], | 63 ], |
55 C.PI_RECOMMENDATIONS: [], | 64 C.PI_RECOMMENDATIONS: [], |
56 C.PI_MAIN: "FileSharing", | 65 C.PI_MAIN: "FileSharing", |
57 C.PI_HANDLER: C.BOOL_TRUE, | 66 C.PI_HANDLER: C.BOOL_TRUE, |
58 C.PI_DESCRIPTION: _("""Component hosting and sharing files"""), | 67 C.PI_DESCRIPTION: _("""Component hosting and sharing files"""), |
59 } | 68 } |
60 | 69 |
61 HASH_ALGO = "sha-256" | 70 HASH_ALGO = "sha-256" |
62 NS_COMMENTS = "org.salut-a-toi.comments" | 71 NS_COMMENTS = "org.salut-a-toi.comments" |
63 COMMENT_NODE_PREFIX = "org.salut-a-toi.file_comments/" | 72 COMMENT_NODE_PREFIX = "org.salut-a-toi.file_comments/" |
64 | 73 # Directory used to buffer request body (i.e. file in case of PUT) we use more than one @ |
65 | 74 # there, to be sure than it's not conflicting with a JID |
66 class FileSharing(object): | 75 TMP_BUFFER_DIR = "@@tmp@@" |
76 | |
77 server.version = unicodedata.normalize( | |
78 'NFKD', | |
79 f"{C.APP_NAME} file sharing {C.APP_VERSION}" | |
80 ).encode('ascii','ignore') | |
81 | |
82 | |
83 class HTTPFileServer(resource.Resource): | |
84 isLeaf = True | |
85 | |
86 def errorPage(self, request, code): | |
87 request.setResponseCode(code) | |
88 if code == 400: | |
89 brief = 'Bad Request' | |
90 details = "Your request is invalid" | |
91 elif code == 403: | |
92 brief = 'Forbidden' | |
93 details = "You're not allowed to use this resource" | |
94 elif code == 404: | |
95 brief = 'Not Found' | |
96 details = "No resource found at this URL" | |
97 else: | |
98 brief = 'Error' | |
99 details = "This resource can't be used" | |
100 log.error(f"Unexpected return code used: {code}") | |
101 log.warning( | |
102 f'Error returned while trying to access url {request.uri.decode()}: ' | |
103 f'"{brief}" ({code}): {details}' | |
104 ) | |
105 | |
106 return resource.ErrorPage(code, brief, details).render(request) | |
107 | |
108 def getDispositionType(self, media_type, media_subtype): | |
109 if media_type in ('image', 'video'): | |
110 return 'inline' | |
111 elif media_type == 'application' and media_subtype == 'pdf': | |
112 return 'inline' | |
113 else: | |
114 return 'attachment' | |
115 | |
116 def render_GET(self, request): | |
117 try: | |
118 request.upload_data | |
119 except exceptions.DataError: | |
120 return self.errorPage(request, 404) | |
121 | |
122 defer.ensureDeferred(self.renderGet(request)) | |
123 return server.NOT_DONE_YET | |
124 | |
125 async def renderGet(self, request): | |
126 try: | |
127 upload_id, filename = request.upload_data | |
128 except exceptions.DataError: | |
129 request.write(self.errorPage(request, 403)) | |
130 request.finish() | |
131 return | |
132 found_files = await request.file_sharing.host.memory.getFiles( | |
133 client=None, peer_jid=None, perms_to_check=None, public_id=upload_id) | |
134 if not found_files: | |
135 request.write(self.errorPage(request, 404)) | |
136 request.finish() | |
137 return | |
138 if len(found_files) > 1: | |
139 log.error(f"more that one files found for public id {upload_id!r}") | |
140 | |
141 found_file = found_files[0] | |
142 file_path = request.file_sharing.files_path/found_file['file_hash'] | |
143 file_res = static.File(file_path) | |
144 file_res.type = f'{found_file["media_type"]}/{found_file["media_subtype"]}' | |
145 file_res.encoding = file_res.contentEncodings.get(Path(found_file['name']).suffix) | |
146 disp_type = self.getDispositionType( | |
147 found_file['media_type'], found_file['media_subtype']) | |
148 # the URL is percent encoded, and not all browsers/tools unquote the file name, | |
149 # thus we add a content disposition header | |
150 request.setHeader( | |
151 'Content-Disposition', | |
152 f"{disp_type}; filename*=UTF-8''{quote(found_file['name'])}" | |
153 ) | |
154 ret = file_res.render(request) | |
155 if ret != server.NOT_DONE_YET: | |
156 # HEAD returns directly the result (while GET use a produced) | |
157 request.write(ret) | |
158 request.finish() | |
159 | |
160 def render_PUT(self, request): | |
161 defer.ensureDeferred(self.renderPut(request)) | |
162 return server.NOT_DONE_YET | |
163 | |
164 async def renderPut(self, request): | |
165 try: | |
166 client, upload_request = request.upload_request_data | |
167 upload_id, filename = request.upload_data | |
168 except AttributeError: | |
169 request.write(self.errorPage(request, 400)) | |
170 request.finish() | |
171 return | |
172 | |
173 # at this point request is checked and file is buffered, we can store it | |
174 # we close the content here, before registering the file | |
175 request.content.close() | |
176 tmp_file_path = Path(request.content.name) | |
177 request.content = None | |
178 | |
179 file_data = { | |
180 "name": unquote(upload_request.filename), | |
181 "mime_type": upload_request.content_type, | |
182 "size": upload_request.size, | |
183 "path": "/uploads" | |
184 } | |
185 | |
186 await request.file_sharing.registerReceivedFile( | |
187 client, upload_request.from_, file_data, tmp_file_path, | |
188 public_id=upload_id, | |
189 ) | |
190 | |
191 request.setResponseCode(201) | |
192 request.finish() | |
193 | |
194 | |
195 class FileSharingRequest(server.Request): | |
196 | |
197 def __init__(self, *args, **kwargs): | |
198 super().__init__(*args, **kwargs) | |
199 self._upload_data = None | |
200 | |
201 @property | |
202 def upload_data(self): | |
203 """A tuple with upload_id and filename retrieve from requested path""" | |
204 if self._upload_data is not None: | |
205 return self._upload_data | |
206 | |
207 # self.path is not available if we are easly in the request (e.g. when gotLength | |
208 # is called), in which case channel._path must be used. On the other hand, when | |
209 # render_[VERB] is called, only self.path is available | |
210 path = self.channel._path if self.path is None else self.path | |
211 # we normalise the path | |
212 path = urlparse(path.decode()).path | |
213 try: | |
214 __, upload_id, filename = path.split('/') | |
215 except ValueError: | |
216 raise exceptions.DataError("no enought path elements") | |
217 if len(upload_id) < 10: | |
218 raise exceptions.DataError(f"invalid upload ID received for a PUT: {upload_id!r}") | |
219 | |
220 self._upload_data = (upload_id, filename) | |
221 return self._upload_data | |
222 | |
223 @property | |
224 def file_sharing(self): | |
225 return self.channel.site.file_sharing | |
226 | |
227 @property | |
228 def file_tmp_dir(self): | |
229 return self.channel.site.file_tmp_dir | |
230 | |
231 def refuseRequest(self): | |
232 if self.content is not None: | |
233 self.content.close() | |
234 self.content = open(os.devnull, 'w+b') | |
235 self.channel._respondToBadRequestAndDisconnect() | |
236 | |
237 def gotLength(self, length): | |
238 if self.channel._command.decode().upper() == 'PUT': | |
239 # for PUT we check early if upload_id is fine, to avoid buffering a file we'll refuse | |
240 # we buffer the file in component's TMP_BUFFER_DIR, so we just have to rename it at the end | |
241 try: | |
242 upload_id, filename = self.upload_data | |
243 except exceptions.DataError as e: | |
244 log.warning("Invalid PUT request, we stop here: {e}") | |
245 return self.refuseRequest() | |
246 try: | |
247 client, upload_request, timer = self.file_sharing.expected_uploads.pop(upload_id) | |
248 except KeyError: | |
249 log.warning(f"unknown (expired?) upload ID received for a PUT: {upload_id!r}") | |
250 return self.refuseRequest() | |
251 | |
252 if not timer.active: | |
253 log.warning(f"upload id {upload_id!r} used for a PUT, but it is expired") | |
254 return self.refuseRequest() | |
255 | |
256 timer.cancel() | |
257 | |
258 if upload_request.filename != filename: | |
259 log.warning( | |
260 f"invalid filename for PUT (upload id: {upload_id!r}, URL: {self.channel._path.decode()}). Original " | |
261 f"{upload_request.filename!r} doesn't match {filename!r}" | |
262 ) | |
263 return self.refuseRequest() | |
264 | |
265 self.upload_request_data = (client, upload_request) | |
266 | |
267 file_tmp_path = files_utils.get_unique_name( | |
268 self.file_tmp_dir/upload_id) | |
269 | |
270 self.content = open(file_tmp_path, 'w+b') | |
271 else: | |
272 return super().gotLength(length) | |
273 | |
274 | |
275 class FileSharingSite(server.Site): | |
276 requestFactory = FileSharingRequest | |
277 | |
278 def __init__(self, file_sharing): | |
279 self.file_sharing = file_sharing | |
280 self.file_tmp_dir = file_sharing.host.getLocalPath( | |
281 None, C.FILES_TMP_DIR, TMP_BUFFER_DIR, component=True, profile=False | |
282 ) | |
283 for old_file in self.file_tmp_dir.iterdir(): | |
284 log.debug(f"purging old buffer file at {old_file}") | |
285 old_file.unlink() | |
286 super().__init__(HTTPFileServer()) | |
287 | |
288 def getContentFile(self, length): | |
289 file_tmp_path = self.file_tmp_dir/shortuuid.uuid() | |
290 return open(file_tmp_path, 'w+b') | |
291 | |
292 | |
293 class FileSharing: | |
294 | |
67 def __init__(self, host): | 295 def __init__(self, host): |
68 log.info(_("File Sharing initialization")) | 296 log.info(_("File Sharing initialization")) |
69 self.host = host | 297 self.host = host |
70 self._f = host.plugins["FILE"] | 298 self._f = host.plugins["FILE"] |
71 self._jf = host.plugins["XEP-0234"] | 299 self._jf = host.plugins["XEP-0234"] |
72 self._h = host.plugins["XEP-0300"] | 300 self._h = host.plugins["XEP-0300"] |
73 self._t = host.plugins["XEP-0264"] | 301 self._t = host.plugins["XEP-0264"] |
302 host.plugins["XEP-0363"].registerHandler(self._onHTTPUpload) | |
74 host.trigger.add("FILE_getDestDir", self._getDestDirTrigger) | 303 host.trigger.add("FILE_getDestDir", self._getDestDirTrigger) |
75 host.trigger.add( | 304 host.trigger.add( |
76 "XEP-0234_fileSendingRequest", self._fileSendingRequestTrigger, priority=1000 | 305 "XEP-0234_fileSendingRequest", self._fileSendingRequestTrigger, priority=1000 |
77 ) | 306 ) |
78 host.trigger.add("XEP-0234_buildFileElement", self._addFileComments) | 307 host.trigger.add("XEP-0234_buildFileElement", self._addFileComments) |
79 host.trigger.add("XEP-0234_parseFileElement", self._getFileComments) | 308 host.trigger.add("XEP-0234_parseFileElement", self._getFileComments) |
80 host.trigger.add("XEP-0329_compGetFilesFromNode", self._addCommentsData) | 309 host.trigger.add("XEP-0329_compGetFilesFromNode", self._addCommentsData) |
81 self.files_path = host.getLocalPath(None, C.FILES_DIR, profile=False) | 310 self.files_path = host.getLocalPath(None, C.FILES_DIR, profile=False) |
311 self.http_port = host.memory.getConfig( | |
312 'component file_sharing', 'http_upload_port', 8888) | |
313 connection_type = host.memory.getConfig( | |
314 'component file_sharing', 'http_upload_connection_type', 'https') | |
315 if connection_type not in ('http', 'https'): | |
316 raise exceptions.ConfigError( | |
317 f'bad http_upload_connection_type, you must use one of "http" or "https"' | |
318 ) | |
319 self.server = FileSharingSite(self) | |
320 self.expected_uploads = {} | |
321 if connection_type == 'http': | |
322 reactor.listenTCP(self.http_port, self.server) | |
323 else: | |
324 options = tls.getOptionsFromConfig(host.memory.config, "component file_sharing") | |
325 tls.TLSOptionsCheck(options) | |
326 context_factory = tls.getTLSContextFactory(options) | |
327 reactor.listenSSL(self.http_port, self.server, context_factory) | |
328 | |
82 | 329 |
83 def getHandler(self, client): | 330 def getHandler(self, client): |
84 return Comments_handler(self) | 331 return Comments_handler(self) |
85 | 332 |
86 def profileConnected(self, client): | 333 def profileConnecting(self, client): |
334 public_base_url = self.host.memory.getConfig( | |
335 'component file_sharing', 'http_upload_public_facing_url') | |
336 if public_base_url is None: | |
337 client._file_sharing_base_url = f"https://{client.host}:{self.http_port}" | |
338 else: | |
339 client._file_sharing_base_url = public_base_url | |
340 client._file_sharing_allowed_hosts = self.host.memory.getConfig( | |
341 'component file_sharing', 'http_upload_allowed_hosts_list') or [client.host] | |
87 path = client.file_tmp_dir = os.path.join( | 342 path = client.file_tmp_dir = os.path.join( |
88 self.host.memory.getConfig("", "local_dir"), | 343 self.host.memory.getConfig("", "local_dir"), |
89 C.FILES_TMP_DIR, | 344 C.FILES_TMP_DIR, |
90 regex.pathEscape(client.profile), | 345 regex.pathEscape(client.profile), |
91 ) | 346 ) |
92 if not os.path.exists(path): | 347 if not os.path.exists(path): |
93 os.makedirs(path) | 348 os.makedirs(path) |
94 | 349 |
95 @defer.inlineCallbacks | 350 async def registerReceivedFile( |
96 def _fileTransferedCb(self, __, client, peer_jid, file_data, file_path): | 351 self, client, peer_jid, file_data, file_path, public_id=None, extra=None): |
97 """post file reception tasks | 352 """Post file reception tasks |
98 | 353 |
99 on file is received, this method create hash/thumbnails if necessary | 354 once file is received, this method create hash/thumbnails if necessary |
100 move the file to the right location, and create metadata entry in database | 355 move the file to the right location, and create metadata entry in database |
101 """ | 356 """ |
102 name = file_data["name"] | 357 name = file_data["name"] |
103 extra = {} | 358 if extra is None: |
104 | 359 extra = {} |
105 if file_data["hash_algo"] == HASH_ALGO: | 360 |
361 if file_data.get("hash_algo") == HASH_ALGO: | |
106 log.debug(_("Reusing already generated hash")) | 362 log.debug(_("Reusing already generated hash")) |
107 file_hash = file_data["hash_hasher"].hexdigest() | 363 file_hash = file_data["hash_hasher"].hexdigest() |
108 else: | 364 else: |
109 hasher = self._h.getHasher(HASH_ALGO) | 365 hasher = self._h.getHasher(HASH_ALGO) |
110 with open("file_path") as f: | 366 with file_path.open('rb') as f: |
111 file_hash = yield self._h.calculateHash(f, hasher) | 367 file_hash = await self._h.calculateHash(f, hasher) |
112 final_path = os.path.join(self.files_path, file_hash) | 368 final_path = self.files_path/file_hash |
113 | 369 |
114 if os.path.isfile(final_path): | 370 if final_path.is_file(): |
115 log.debug( | 371 log.debug( |
116 "file [{file_hash}] already exists, we can remove temporary one".format( | 372 "file [{file_hash}] already exists, we can remove temporary one".format( |
117 file_hash=file_hash | 373 file_hash=file_hash |
118 ) | 374 ) |
119 ) | 375 ) |
120 os.unlink(file_path) | 376 file_path.unlink() |
121 else: | 377 else: |
122 os.rename(file_path, final_path) | 378 file_path.rename(final_path) |
123 log.debug( | 379 log.debug( |
124 "file [{file_hash}] moved to {files_path}".format( | 380 "file [{file_hash}] moved to {files_path}".format( |
125 file_hash=file_hash, files_path=self.files_path | 381 file_hash=file_hash, files_path=self.files_path |
126 ) | 382 ) |
127 ) | 383 ) |
132 | 388 |
133 if mime_type is not None and mime_type.startswith("image"): | 389 if mime_type is not None and mime_type.startswith("image"): |
134 thumbnails = extra.setdefault(C.KEY_THUMBNAILS, []) | 390 thumbnails = extra.setdefault(C.KEY_THUMBNAILS, []) |
135 for max_thumb_size in (self._t.SIZE_SMALL, self._t.SIZE_MEDIUM): | 391 for max_thumb_size in (self._t.SIZE_SMALL, self._t.SIZE_MEDIUM): |
136 try: | 392 try: |
137 thumb_size, thumb_id = yield self._t.generateThumbnail( | 393 thumb_size, thumb_id = await self._t.generateThumbnail( |
138 final_path, | 394 final_path, |
139 max_thumb_size, | 395 max_thumb_size, |
140 # we keep thumbnails for 6 months | 396 # we keep thumbnails for 6 months |
141 60 * 60 * 24 * 31 * 6, | 397 60 * 60 * 24 * 31 * 6, |
142 ) | 398 ) |
153 hash_algo=HASH_ALGO, | 409 hash_algo=HASH_ALGO, |
154 size=file_data["size"], | 410 size=file_data["size"], |
155 path=file_data.get("path"), | 411 path=file_data.get("path"), |
156 namespace=file_data.get("namespace"), | 412 namespace=file_data.get("namespace"), |
157 mime_type=mime_type, | 413 mime_type=mime_type, |
414 public_id=public_id, | |
158 owner=peer_jid, | 415 owner=peer_jid, |
159 extra=extra, | 416 extra=extra, |
160 ) | 417 ) |
161 | 418 |
162 def _getDestDirTrigger( | 419 def _getDestDirTrigger( |
169 assert "stream_object" not in transfer_data | 426 assert "stream_object" not in transfer_data |
170 assert C.KEY_PROGRESS_ID in file_data | 427 assert C.KEY_PROGRESS_ID in file_data |
171 filename = file_data["name"] | 428 filename = file_data["name"] |
172 assert filename and not "/" in filename | 429 assert filename and not "/" in filename |
173 file_tmp_dir = self.host.getLocalPath( | 430 file_tmp_dir = self.host.getLocalPath( |
174 client, C.FILES_TMP_DIR, peer_jid.userhost(), component=True, profile=False | 431 None, C.FILES_TMP_DIR, peer_jid.userhost(), component=True, profile=False |
175 ) | 432 ) |
176 file_tmp_path = file_data["file_path"] = os.path.join( | 433 file_tmp_path = file_data['file_path'] = files_utils.get_unique_name( |
177 file_tmp_dir, file_data["name"] | 434 file_tmp_dir/filename) |
178 ) | |
179 | 435 |
180 transfer_data["finished_d"].addCallback( | 436 transfer_data["finished_d"].addCallback( |
181 self._fileTransferedCb, client, peer_jid, file_data, file_tmp_path | 437 lambda __: defer.ensureDeferred( |
438 self.registerReceivedFile(client, peer_jid, file_data, file_tmp_path) | |
439 ) | |
182 ) | 440 ) |
183 | 441 |
184 self._f.openFileWrite( | 442 self._f.openFileWrite( |
185 client, file_tmp_path, transfer_data, file_data, stream_object | 443 client, file_tmp_path, transfer_data, file_data, stream_object |
186 ) | 444 ) |
254 False, | 512 False, |
255 self._retrieveFiles( | 513 self._retrieveFiles( |
256 client, session, content_data, content_name, file_data, file_elt | 514 client, session, content_data, content_name, file_data, file_elt |
257 ), | 515 ), |
258 ) | 516 ) |
517 | |
518 ## HTTP Upload ## | |
519 | |
520 def _purge_slot(self, upload_id): | |
521 try: | |
522 del self.expected_uploads[upload_id] | |
523 except KeyError: | |
524 log.error(f"trying to purge an inexisting upload slot ({upload_id})") | |
525 | |
526 def _onHTTPUpload(self, client, request): | |
527 # filename should be already cleaned, but it's better to double check | |
528 assert '/' not in request.filename | |
529 if request.from_.host not in client._file_sharing_allowed_hosts: | |
530 raise error.StanzaError("forbidden") | |
531 | |
532 upload_id = shortuuid.ShortUUID().random(length=30) | |
533 assert '/' not in upload_id | |
534 timer = reactor.callLater(30, self._purge_slot, upload_id) | |
535 self.expected_uploads[upload_id] = (client, request, timer) | |
536 url = urljoin(client._file_sharing_base_url, f"{upload_id}/{request.filename}") | |
537 slot = self.host.plugins["XEP-0363"].Slot( | |
538 put=url, | |
539 get=url, | |
540 headers=[], | |
541 ) | |
542 return slot | |
259 | 543 |
260 ## comments triggers ## | 544 ## comments triggers ## |
261 | 545 |
262 def _addFileComments(self, file_elt, extra_args): | 546 def _addFileComments(self, file_elt, extra_args): |
263 try: | 547 try: |