comparison sat/plugins/plugin_comp_file_sharing.py @ 3289:9057713ab124

plugin comp file sharing: files can now be uploaded/downloaded via HTTP: plugin XEP-0363 can now be used by components, and file sharing uses it. The new `public_id` file metadata is used to serve files. Files uploaded are put in the `/uploads` path.
author Goffi <goffi@goffi.org>
date Fri, 29 May 2020 21:55:45 +0200
parents 9d0df638c8b4
children 449dfbfcdbcc
comparison
equal deleted inserted replaced
3288:780fb8dd07ef 3289:9057713ab124
14 # GNU Affero General Public License for more details. 14 # GNU Affero General Public License for more details.
15 15
16 # You should have received a copy of the GNU Affero General Public License 16 # You should have received a copy of the GNU Affero General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. 17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18 18
19 import os
20 import os.path
21 import mimetypes
22 from functools import partial
23 import shortuuid
24 import unicodedata
25 from urllib.parse import urljoin, urlparse, quote, unquote
26 from dataclasses import dataclass
27 from pathlib import Path
19 from sat.core.i18n import _ 28 from sat.core.i18n import _
20 from sat.core.constants import Const as C 29 from sat.core.constants import Const as C
21 from sat.core import exceptions 30 from sat.core import exceptions
22 from sat.core.log import getLogger 31 from sat.core.log import getLogger
23 from sat.tools.common import regex 32 from sat.tools.common import regex
24 from sat.tools.common import uri 33 from sat.tools.common import uri
34 from sat.tools.common import files_utils
35 from sat.tools.common import tls
25 from sat.tools import stream 36 from sat.tools import stream
26 from twisted.internet import defer 37 from twisted.internet import defer, reactor
27 from twisted.words.protocols.jabber import error 38 from twisted.words.protocols.jabber import error
39 from twisted.web import server, resource, static
28 from wokkel import pubsub 40 from wokkel import pubsub
29 from wokkel import generic 41 from wokkel import generic
30 from functools import partial
31 import os
32 import os.path
33 import mimetypes
34 42
35 43
36 log = getLogger(__name__) 44 log = getLogger(__name__)
37 45
38 46
49 "XEP-0234", 57 "XEP-0234",
50 "XEP-0260", 58 "XEP-0260",
51 "XEP-0261", 59 "XEP-0261",
52 "XEP-0264", 60 "XEP-0264",
53 "XEP-0329", 61 "XEP-0329",
62 "XEP-0363",
54 ], 63 ],
55 C.PI_RECOMMENDATIONS: [], 64 C.PI_RECOMMENDATIONS: [],
56 C.PI_MAIN: "FileSharing", 65 C.PI_MAIN: "FileSharing",
57 C.PI_HANDLER: C.BOOL_TRUE, 66 C.PI_HANDLER: C.BOOL_TRUE,
58 C.PI_DESCRIPTION: _("""Component hosting and sharing files"""), 67 C.PI_DESCRIPTION: _("""Component hosting and sharing files"""),
59 } 68 }
60 69
61 HASH_ALGO = "sha-256" 70 HASH_ALGO = "sha-256"
62 NS_COMMENTS = "org.salut-a-toi.comments" 71 NS_COMMENTS = "org.salut-a-toi.comments"
63 COMMENT_NODE_PREFIX = "org.salut-a-toi.file_comments/" 72 COMMENT_NODE_PREFIX = "org.salut-a-toi.file_comments/"
64 73 # Directory used to buffer request body (i.e. file in case of PUT) we use more than one @
65 74 # there, to be sure than it's not conflicting with a JID
66 class FileSharing(object): 75 TMP_BUFFER_DIR = "@@tmp@@"
76
77 server.version = unicodedata.normalize(
78 'NFKD',
79 f"{C.APP_NAME} file sharing {C.APP_VERSION}"
80 ).encode('ascii','ignore')
81
82
83 class HTTPFileServer(resource.Resource):
84 isLeaf = True
85
86 def errorPage(self, request, code):
87 request.setResponseCode(code)
88 if code == 400:
89 brief = 'Bad Request'
90 details = "Your request is invalid"
91 elif code == 403:
92 brief = 'Forbidden'
93 details = "You're not allowed to use this resource"
94 elif code == 404:
95 brief = 'Not Found'
96 details = "No resource found at this URL"
97 else:
98 brief = 'Error'
99 details = "This resource can't be used"
100 log.error(f"Unexpected return code used: {code}")
101 log.warning(
102 f'Error returned while trying to access url {request.uri.decode()}: '
103 f'"{brief}" ({code}): {details}'
104 )
105
106 return resource.ErrorPage(code, brief, details).render(request)
107
108 def getDispositionType(self, media_type, media_subtype):
109 if media_type in ('image', 'video'):
110 return 'inline'
111 elif media_type == 'application' and media_subtype == 'pdf':
112 return 'inline'
113 else:
114 return 'attachment'
115
116 def render_GET(self, request):
117 try:
118 request.upload_data
119 except exceptions.DataError:
120 return self.errorPage(request, 404)
121
122 defer.ensureDeferred(self.renderGet(request))
123 return server.NOT_DONE_YET
124
125 async def renderGet(self, request):
126 try:
127 upload_id, filename = request.upload_data
128 except exceptions.DataError:
129 request.write(self.errorPage(request, 403))
130 request.finish()
131 return
132 found_files = await request.file_sharing.host.memory.getFiles(
133 client=None, peer_jid=None, perms_to_check=None, public_id=upload_id)
134 if not found_files:
135 request.write(self.errorPage(request, 404))
136 request.finish()
137 return
138 if len(found_files) > 1:
139 log.error(f"more that one files found for public id {upload_id!r}")
140
141 found_file = found_files[0]
142 file_path = request.file_sharing.files_path/found_file['file_hash']
143 file_res = static.File(file_path)
144 file_res.type = f'{found_file["media_type"]}/{found_file["media_subtype"]}'
145 file_res.encoding = file_res.contentEncodings.get(Path(found_file['name']).suffix)
146 disp_type = self.getDispositionType(
147 found_file['media_type'], found_file['media_subtype'])
148 # the URL is percent encoded, and not all browsers/tools unquote the file name,
149 # thus we add a content disposition header
150 request.setHeader(
151 'Content-Disposition',
152 f"{disp_type}; filename*=UTF-8''{quote(found_file['name'])}"
153 )
154 ret = file_res.render(request)
155 if ret != server.NOT_DONE_YET:
156 # HEAD returns directly the result (while GET use a produced)
157 request.write(ret)
158 request.finish()
159
160 def render_PUT(self, request):
161 defer.ensureDeferred(self.renderPut(request))
162 return server.NOT_DONE_YET
163
164 async def renderPut(self, request):
165 try:
166 client, upload_request = request.upload_request_data
167 upload_id, filename = request.upload_data
168 except AttributeError:
169 request.write(self.errorPage(request, 400))
170 request.finish()
171 return
172
173 # at this point request is checked and file is buffered, we can store it
174 # we close the content here, before registering the file
175 request.content.close()
176 tmp_file_path = Path(request.content.name)
177 request.content = None
178
179 file_data = {
180 "name": unquote(upload_request.filename),
181 "mime_type": upload_request.content_type,
182 "size": upload_request.size,
183 "path": "/uploads"
184 }
185
186 await request.file_sharing.registerReceivedFile(
187 client, upload_request.from_, file_data, tmp_file_path,
188 public_id=upload_id,
189 )
190
191 request.setResponseCode(201)
192 request.finish()
193
194
195 class FileSharingRequest(server.Request):
196
197 def __init__(self, *args, **kwargs):
198 super().__init__(*args, **kwargs)
199 self._upload_data = None
200
201 @property
202 def upload_data(self):
203 """A tuple with upload_id and filename retrieve from requested path"""
204 if self._upload_data is not None:
205 return self._upload_data
206
207 # self.path is not available if we are easly in the request (e.g. when gotLength
208 # is called), in which case channel._path must be used. On the other hand, when
209 # render_[VERB] is called, only self.path is available
210 path = self.channel._path if self.path is None else self.path
211 # we normalise the path
212 path = urlparse(path.decode()).path
213 try:
214 __, upload_id, filename = path.split('/')
215 except ValueError:
216 raise exceptions.DataError("no enought path elements")
217 if len(upload_id) < 10:
218 raise exceptions.DataError(f"invalid upload ID received for a PUT: {upload_id!r}")
219
220 self._upload_data = (upload_id, filename)
221 return self._upload_data
222
223 @property
224 def file_sharing(self):
225 return self.channel.site.file_sharing
226
227 @property
228 def file_tmp_dir(self):
229 return self.channel.site.file_tmp_dir
230
231 def refuseRequest(self):
232 if self.content is not None:
233 self.content.close()
234 self.content = open(os.devnull, 'w+b')
235 self.channel._respondToBadRequestAndDisconnect()
236
237 def gotLength(self, length):
238 if self.channel._command.decode().upper() == 'PUT':
239 # for PUT we check early if upload_id is fine, to avoid buffering a file we'll refuse
240 # we buffer the file in component's TMP_BUFFER_DIR, so we just have to rename it at the end
241 try:
242 upload_id, filename = self.upload_data
243 except exceptions.DataError as e:
244 log.warning("Invalid PUT request, we stop here: {e}")
245 return self.refuseRequest()
246 try:
247 client, upload_request, timer = self.file_sharing.expected_uploads.pop(upload_id)
248 except KeyError:
249 log.warning(f"unknown (expired?) upload ID received for a PUT: {upload_id!r}")
250 return self.refuseRequest()
251
252 if not timer.active:
253 log.warning(f"upload id {upload_id!r} used for a PUT, but it is expired")
254 return self.refuseRequest()
255
256 timer.cancel()
257
258 if upload_request.filename != filename:
259 log.warning(
260 f"invalid filename for PUT (upload id: {upload_id!r}, URL: {self.channel._path.decode()}). Original "
261 f"{upload_request.filename!r} doesn't match {filename!r}"
262 )
263 return self.refuseRequest()
264
265 self.upload_request_data = (client, upload_request)
266
267 file_tmp_path = files_utils.get_unique_name(
268 self.file_tmp_dir/upload_id)
269
270 self.content = open(file_tmp_path, 'w+b')
271 else:
272 return super().gotLength(length)
273
274
275 class FileSharingSite(server.Site):
276 requestFactory = FileSharingRequest
277
278 def __init__(self, file_sharing):
279 self.file_sharing = file_sharing
280 self.file_tmp_dir = file_sharing.host.getLocalPath(
281 None, C.FILES_TMP_DIR, TMP_BUFFER_DIR, component=True, profile=False
282 )
283 for old_file in self.file_tmp_dir.iterdir():
284 log.debug(f"purging old buffer file at {old_file}")
285 old_file.unlink()
286 super().__init__(HTTPFileServer())
287
288 def getContentFile(self, length):
289 file_tmp_path = self.file_tmp_dir/shortuuid.uuid()
290 return open(file_tmp_path, 'w+b')
291
292
293 class FileSharing:
294
67 def __init__(self, host): 295 def __init__(self, host):
68 log.info(_("File Sharing initialization")) 296 log.info(_("File Sharing initialization"))
69 self.host = host 297 self.host = host
70 self._f = host.plugins["FILE"] 298 self._f = host.plugins["FILE"]
71 self._jf = host.plugins["XEP-0234"] 299 self._jf = host.plugins["XEP-0234"]
72 self._h = host.plugins["XEP-0300"] 300 self._h = host.plugins["XEP-0300"]
73 self._t = host.plugins["XEP-0264"] 301 self._t = host.plugins["XEP-0264"]
302 host.plugins["XEP-0363"].registerHandler(self._onHTTPUpload)
74 host.trigger.add("FILE_getDestDir", self._getDestDirTrigger) 303 host.trigger.add("FILE_getDestDir", self._getDestDirTrigger)
75 host.trigger.add( 304 host.trigger.add(
76 "XEP-0234_fileSendingRequest", self._fileSendingRequestTrigger, priority=1000 305 "XEP-0234_fileSendingRequest", self._fileSendingRequestTrigger, priority=1000
77 ) 306 )
78 host.trigger.add("XEP-0234_buildFileElement", self._addFileComments) 307 host.trigger.add("XEP-0234_buildFileElement", self._addFileComments)
79 host.trigger.add("XEP-0234_parseFileElement", self._getFileComments) 308 host.trigger.add("XEP-0234_parseFileElement", self._getFileComments)
80 host.trigger.add("XEP-0329_compGetFilesFromNode", self._addCommentsData) 309 host.trigger.add("XEP-0329_compGetFilesFromNode", self._addCommentsData)
81 self.files_path = host.getLocalPath(None, C.FILES_DIR, profile=False) 310 self.files_path = host.getLocalPath(None, C.FILES_DIR, profile=False)
311 self.http_port = host.memory.getConfig(
312 'component file_sharing', 'http_upload_port', 8888)
313 connection_type = host.memory.getConfig(
314 'component file_sharing', 'http_upload_connection_type', 'https')
315 if connection_type not in ('http', 'https'):
316 raise exceptions.ConfigError(
317 f'bad http_upload_connection_type, you must use one of "http" or "https"'
318 )
319 self.server = FileSharingSite(self)
320 self.expected_uploads = {}
321 if connection_type == 'http':
322 reactor.listenTCP(self.http_port, self.server)
323 else:
324 options = tls.getOptionsFromConfig(host.memory.config, "component file_sharing")
325 tls.TLSOptionsCheck(options)
326 context_factory = tls.getTLSContextFactory(options)
327 reactor.listenSSL(self.http_port, self.server, context_factory)
328
82 329
83 def getHandler(self, client): 330 def getHandler(self, client):
84 return Comments_handler(self) 331 return Comments_handler(self)
85 332
86 def profileConnected(self, client): 333 def profileConnecting(self, client):
334 public_base_url = self.host.memory.getConfig(
335 'component file_sharing', 'http_upload_public_facing_url')
336 if public_base_url is None:
337 client._file_sharing_base_url = f"https://{client.host}:{self.http_port}"
338 else:
339 client._file_sharing_base_url = public_base_url
340 client._file_sharing_allowed_hosts = self.host.memory.getConfig(
341 'component file_sharing', 'http_upload_allowed_hosts_list') or [client.host]
87 path = client.file_tmp_dir = os.path.join( 342 path = client.file_tmp_dir = os.path.join(
88 self.host.memory.getConfig("", "local_dir"), 343 self.host.memory.getConfig("", "local_dir"),
89 C.FILES_TMP_DIR, 344 C.FILES_TMP_DIR,
90 regex.pathEscape(client.profile), 345 regex.pathEscape(client.profile),
91 ) 346 )
92 if not os.path.exists(path): 347 if not os.path.exists(path):
93 os.makedirs(path) 348 os.makedirs(path)
94 349
95 @defer.inlineCallbacks 350 async def registerReceivedFile(
96 def _fileTransferedCb(self, __, client, peer_jid, file_data, file_path): 351 self, client, peer_jid, file_data, file_path, public_id=None, extra=None):
97 """post file reception tasks 352 """Post file reception tasks
98 353
99 on file is received, this method create hash/thumbnails if necessary 354 once file is received, this method create hash/thumbnails if necessary
100 move the file to the right location, and create metadata entry in database 355 move the file to the right location, and create metadata entry in database
101 """ 356 """
102 name = file_data["name"] 357 name = file_data["name"]
103 extra = {} 358 if extra is None:
104 359 extra = {}
105 if file_data["hash_algo"] == HASH_ALGO: 360
361 if file_data.get("hash_algo") == HASH_ALGO:
106 log.debug(_("Reusing already generated hash")) 362 log.debug(_("Reusing already generated hash"))
107 file_hash = file_data["hash_hasher"].hexdigest() 363 file_hash = file_data["hash_hasher"].hexdigest()
108 else: 364 else:
109 hasher = self._h.getHasher(HASH_ALGO) 365 hasher = self._h.getHasher(HASH_ALGO)
110 with open("file_path") as f: 366 with file_path.open('rb') as f:
111 file_hash = yield self._h.calculateHash(f, hasher) 367 file_hash = await self._h.calculateHash(f, hasher)
112 final_path = os.path.join(self.files_path, file_hash) 368 final_path = self.files_path/file_hash
113 369
114 if os.path.isfile(final_path): 370 if final_path.is_file():
115 log.debug( 371 log.debug(
116 "file [{file_hash}] already exists, we can remove temporary one".format( 372 "file [{file_hash}] already exists, we can remove temporary one".format(
117 file_hash=file_hash 373 file_hash=file_hash
118 ) 374 )
119 ) 375 )
120 os.unlink(file_path) 376 file_path.unlink()
121 else: 377 else:
122 os.rename(file_path, final_path) 378 file_path.rename(final_path)
123 log.debug( 379 log.debug(
124 "file [{file_hash}] moved to {files_path}".format( 380 "file [{file_hash}] moved to {files_path}".format(
125 file_hash=file_hash, files_path=self.files_path 381 file_hash=file_hash, files_path=self.files_path
126 ) 382 )
127 ) 383 )
132 388
133 if mime_type is not None and mime_type.startswith("image"): 389 if mime_type is not None and mime_type.startswith("image"):
134 thumbnails = extra.setdefault(C.KEY_THUMBNAILS, []) 390 thumbnails = extra.setdefault(C.KEY_THUMBNAILS, [])
135 for max_thumb_size in (self._t.SIZE_SMALL, self._t.SIZE_MEDIUM): 391 for max_thumb_size in (self._t.SIZE_SMALL, self._t.SIZE_MEDIUM):
136 try: 392 try:
137 thumb_size, thumb_id = yield self._t.generateThumbnail( 393 thumb_size, thumb_id = await self._t.generateThumbnail(
138 final_path, 394 final_path,
139 max_thumb_size, 395 max_thumb_size,
140 #  we keep thumbnails for 6 months 396 #  we keep thumbnails for 6 months
141 60 * 60 * 24 * 31 * 6, 397 60 * 60 * 24 * 31 * 6,
142 ) 398 )
153 hash_algo=HASH_ALGO, 409 hash_algo=HASH_ALGO,
154 size=file_data["size"], 410 size=file_data["size"],
155 path=file_data.get("path"), 411 path=file_data.get("path"),
156 namespace=file_data.get("namespace"), 412 namespace=file_data.get("namespace"),
157 mime_type=mime_type, 413 mime_type=mime_type,
414 public_id=public_id,
158 owner=peer_jid, 415 owner=peer_jid,
159 extra=extra, 416 extra=extra,
160 ) 417 )
161 418
162 def _getDestDirTrigger( 419 def _getDestDirTrigger(
169 assert "stream_object" not in transfer_data 426 assert "stream_object" not in transfer_data
170 assert C.KEY_PROGRESS_ID in file_data 427 assert C.KEY_PROGRESS_ID in file_data
171 filename = file_data["name"] 428 filename = file_data["name"]
172 assert filename and not "/" in filename 429 assert filename and not "/" in filename
173 file_tmp_dir = self.host.getLocalPath( 430 file_tmp_dir = self.host.getLocalPath(
174 client, C.FILES_TMP_DIR, peer_jid.userhost(), component=True, profile=False 431 None, C.FILES_TMP_DIR, peer_jid.userhost(), component=True, profile=False
175 ) 432 )
176 file_tmp_path = file_data["file_path"] = os.path.join( 433 file_tmp_path = file_data['file_path'] = files_utils.get_unique_name(
177 file_tmp_dir, file_data["name"] 434 file_tmp_dir/filename)
178 )
179 435
180 transfer_data["finished_d"].addCallback( 436 transfer_data["finished_d"].addCallback(
181 self._fileTransferedCb, client, peer_jid, file_data, file_tmp_path 437 lambda __: defer.ensureDeferred(
438 self.registerReceivedFile(client, peer_jid, file_data, file_tmp_path)
439 )
182 ) 440 )
183 441
184 self._f.openFileWrite( 442 self._f.openFileWrite(
185 client, file_tmp_path, transfer_data, file_data, stream_object 443 client, file_tmp_path, transfer_data, file_data, stream_object
186 ) 444 )
254 False, 512 False,
255 self._retrieveFiles( 513 self._retrieveFiles(
256 client, session, content_data, content_name, file_data, file_elt 514 client, session, content_data, content_name, file_data, file_elt
257 ), 515 ),
258 ) 516 )
517
518 ## HTTP Upload ##
519
520 def _purge_slot(self, upload_id):
521 try:
522 del self.expected_uploads[upload_id]
523 except KeyError:
524 log.error(f"trying to purge an inexisting upload slot ({upload_id})")
525
526 def _onHTTPUpload(self, client, request):
527 # filename should be already cleaned, but it's better to double check
528 assert '/' not in request.filename
529 if request.from_.host not in client._file_sharing_allowed_hosts:
530 raise error.StanzaError("forbidden")
531
532 upload_id = shortuuid.ShortUUID().random(length=30)
533 assert '/' not in upload_id
534 timer = reactor.callLater(30, self._purge_slot, upload_id)
535 self.expected_uploads[upload_id] = (client, request, timer)
536 url = urljoin(client._file_sharing_base_url, f"{upload_id}/{request.filename}")
537 slot = self.host.plugins["XEP-0363"].Slot(
538 put=url,
539 get=url,
540 headers=[],
541 )
542 return slot
259 543
260 ## comments triggers ## 544 ## comments triggers ##
261 545
262 def _addFileComments(self, file_elt, extra_args): 546 def _addFileComments(self, file_elt, extra_args):
263 try: 547 try: