comparison libervia/backend/plugins/plugin_sec_aesgcm.py @ 4071:4b842c1fb686

refactoring: renamed `sat` package to `libervia.backend`
author Goffi <goffi@goffi.org>
date Fri, 02 Jun 2023 11:49:51 +0200
parents sat/plugins/plugin_sec_aesgcm.py@c23cad65ae99
children 0d7bb4df2343
comparison
equal deleted inserted replaced
4070:d10748475025 4071:4b842c1fb686
1 #!/usr/bin/env python3
2
3 # SàT plugin for handling AES-GCM file encryption
4 # Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org)
5
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Affero General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
10
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU Affero General Public License for more details.
15
16 # You should have received a copy of the GNU Affero General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
19 import re
20 from textwrap import dedent
21 from functools import partial
22 from urllib import parse
23 import mimetypes
24 import secrets
25 from cryptography.hazmat.primitives import ciphers
26 from cryptography.hazmat.primitives.ciphers import modes
27 from cryptography.hazmat import backends
28 from cryptography.exceptions import AlreadyFinalized
29 import treq
30 from twisted.internet import defer
31 from libervia.backend.core.i18n import _
32 from libervia.backend.core.constants import Const as C
33 from libervia.backend.core import exceptions
34 from libervia.backend.tools import stream
35 from libervia.backend.core.log import getLogger
36 from libervia.backend.tools.web import treq_client_no_ssl
37
38 log = getLogger(__name__)
39
40 PLUGIN_INFO = {
41 C.PI_NAME: "AES-GCM",
42 C.PI_IMPORT_NAME: "AES-GCM",
43 C.PI_TYPE: "SEC",
44 C.PI_PROTOCOLS: ["OMEMO Media sharing"],
45 C.PI_DEPENDENCIES: ["XEP-0363", "XEP-0384", "DOWNLOAD", "ATTACH"],
46 C.PI_MAIN: "AESGCM",
47 C.PI_HANDLER: "no",
48 C.PI_DESCRIPTION: dedent(_("""\
49 Implementation of AES-GCM scheme, a way to encrypt files (not official XMPP standard).
50 See https://xmpp.org/extensions/inbox/omemo-media-sharing.html for details
51 """)),
52 }
53
54 AESGCM_RE = re.compile(
55 r'aesgcm:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9'
56 r'()@:%_\+.~#?&\/\/=]*)')
57
58
59 class AESGCM(object):
60
61 def __init__(self, host):
62 self.host = host
63 log.info(_("AESGCM plugin initialization"))
64 self._http_upload = host.plugins['XEP-0363']
65 self._attach = host.plugins["ATTACH"]
66 host.plugins["DOWNLOAD"].register_scheme(
67 "aesgcm", self.download
68 )
69 self._attach.register(
70 self.can_handle_attachment, self.attach, encrypted=True)
71 host.trigger.add("XEP-0363_upload_pre_slot", self._upload_pre_slot)
72 host.trigger.add("XEP-0363_upload", self._upload_trigger)
73 host.trigger.add("message_received", self._message_received_trigger)
74
75 async def download(self, client, uri_parsed, dest_path, options):
76 fragment = bytes.fromhex(uri_parsed.fragment)
77
78 # legacy method use 16 bits IV, but OMEMO media sharing published spec indicates
79 # which is 12 bits IV (AES-GCM spec recommandation), so we have to determine
80 # which size has been used.
81 if len(fragment) == 48:
82 iv_size = 16
83 elif len(fragment) == 44:
84 iv_size = 12
85 else:
86 raise ValueError(
87 f"Invalid URL fragment, can't decrypt file at {uri_parsed.get_url()}")
88
89 iv, key = fragment[:iv_size], fragment[iv_size:]
90
91 decryptor = ciphers.Cipher(
92 ciphers.algorithms.AES(key),
93 modes.GCM(iv),
94 backend=backends.default_backend(),
95 ).decryptor()
96
97 download_url = parse.urlunparse(
98 ('https', uri_parsed.netloc, uri_parsed.path, '', '', ''))
99
100 if options.get('ignore_tls_errors', False):
101 log.warning(
102 "TLS certificate check disabled, this is highly insecure"
103 )
104 treq_client = treq_client_no_ssl
105 else:
106 treq_client = treq
107
108 head_data = await treq_client.head(download_url)
109 content_length = int(head_data.headers.getRawHeaders('content-length')[0])
110 # the 128 bits tag is put at the end
111 file_size = content_length - 16
112
113 file_obj = stream.SatFile(
114 self.host,
115 client,
116 dest_path,
117 mode="wb",
118 size = file_size,
119 )
120
121 progress_id = file_obj.uid
122
123 resp = await treq_client.get(download_url, unbuffered=True)
124 if resp.code == 200:
125 d = treq.collect(resp, partial(
126 self.on_data_download,
127 client=client,
128 file_obj=file_obj,
129 decryptor=decryptor))
130 else:
131 d = defer.Deferred()
132 self.host.plugins["DOWNLOAD"].errback_download(file_obj, d, resp)
133 return progress_id, d
134
135 async def can_handle_attachment(self, client, data):
136 try:
137 await self._http_upload.get_http_upload_entity(client)
138 except exceptions.NotFound:
139 return False
140 else:
141 return True
142
143 async def _upload_cb(self, client, filepath, filename, extra):
144 extra['encryption'] = C.ENC_AES_GCM
145 return await self._http_upload.file_http_upload(
146 client=client,
147 filepath=filepath,
148 filename=filename,
149 extra=extra
150 )
151
152 async def attach(self, client, data):
153 # XXX: the attachment removal/resend code below is due to the one file per
154 # message limitation of OMEMO media sharing unofficial XEP. We have to remove
155 # attachments from original message, and send them one by one.
156 # TODO: this is to be removed when a better mechanism is available with OMEMO (now
157 # possible with the 0.4 version of OMEMO, it's possible to encrypt other stanza
158 # elements than body).
159 attachments = data["extra"][C.KEY_ATTACHMENTS]
160 if not data['message'] or data['message'] == {'': ''}:
161 extra_attachments = attachments[1:]
162 del attachments[1:]
163 await self._attach.upload_files(client, data, upload_cb=self._upload_cb)
164 else:
165 # we have a message, we must send first attachment separately
166 extra_attachments = attachments[:]
167 attachments.clear()
168 del data["extra"][C.KEY_ATTACHMENTS]
169
170 body_elt = data["xml"].body
171 if body_elt is None:
172 body_elt = data["xml"].addElement("body")
173
174 for attachment in attachments:
175 body_elt.addContent(attachment["url"])
176
177 for attachment in extra_attachments:
178 # we send all remaining attachment in a separate message
179 await client.sendMessage(
180 to_jid=data['to'],
181 message={'': ''},
182 subject=data['subject'],
183 mess_type=data['type'],
184 extra={C.KEY_ATTACHMENTS: [attachment]},
185 )
186
187 if ((not data['extra']
188 and (not data['message'] or data['message'] == {'': ''})
189 and not data['subject'])):
190 # nothing left to send, we can cancel the message
191 raise exceptions.CancelError("Cancelled by AESGCM attachment handling")
192
193 def on_data_download(self, data, client, file_obj, decryptor):
194 if file_obj.tell() + len(data) > file_obj.size:
195 # we're reaching end of file with this bunch of data
196 # we may still have a last bunch if the tag is incomplete
197 bytes_left = file_obj.size - file_obj.tell()
198 if bytes_left > 0:
199 decrypted = decryptor.update(data[:bytes_left])
200 file_obj.write(decrypted)
201 tag = data[bytes_left:]
202 else:
203 tag = data
204 if len(tag) < 16:
205 # the tag is incomplete, either we'll get the rest in next data bunch
206 # or we have already the other part from last bunch of data
207 try:
208 # we store partial tag in decryptor._sat_tag
209 tag = decryptor._sat_tag + tag
210 except AttributeError:
211 # no other part, we'll get the rest at next bunch
212 decryptor.sat_tag = tag
213 else:
214 # we have the complete tag, it must be 128 bits
215 if len(tag) != 16:
216 raise ValueError(f"Invalid tag: {tag}")
217 remain = decryptor.finalize_with_tag(tag)
218 file_obj.write(remain)
219 file_obj.close()
220 else:
221 decrypted = decryptor.update(data)
222 file_obj.write(decrypted)
223
224 def _upload_pre_slot(self, client, extra, file_metadata):
225 if extra.get('encryption') != C.ENC_AES_GCM:
226 return True
227 # the tag is appended to the file
228 file_metadata["size"] += 16
229 return True
230
231 def _encrypt(self, data, encryptor):
232 if data:
233 return encryptor.update(data)
234 else:
235 try:
236 # end of file is reached, me must finalize
237 ret = encryptor.finalize()
238 tag = encryptor.tag
239 return ret + tag
240 except AlreadyFinalized:
241 # as we have already finalized, we can now send EOF
242 return b''
243
244 def _upload_trigger(self, client, extra, sat_file, file_producer, slot):
245 if extra.get('encryption') != C.ENC_AES_GCM:
246 return True
247 log.debug("encrypting file with AES-GCM")
248 iv = secrets.token_bytes(12)
249 key = secrets.token_bytes(32)
250 fragment = f'{iv.hex()}{key.hex()}'
251 ori_url = parse.urlparse(slot.get)
252 # we change the get URL with the one with aesgcm scheme and containing the
253 # encoded key + iv
254 slot.get = parse.urlunparse(['aesgcm', *ori_url[1:5], fragment])
255
256 # encrypted data size will be bigger than original file size
257 # so we need to check with final data length to avoid a warning on close()
258 sat_file.check_size_with_read = True
259
260 # file_producer get length directly from file, and this cause trouble as
261 # we have to change the size because of encryption. So we adapt it here,
262 # else the producer would stop reading prematurely
263 file_producer.length = sat_file.size
264
265 encryptor = ciphers.Cipher(
266 ciphers.algorithms.AES(key),
267 modes.GCM(iv),
268 backend=backends.default_backend(),
269 ).encryptor()
270
271 if sat_file.data_cb is not None:
272 raise exceptions.InternalError(
273 f"data_cb was expected to be None, it is set to {sat_file.data_cb}")
274
275 # with data_cb we encrypt the file on the fly
276 sat_file.data_cb = partial(self._encrypt, encryptor=encryptor)
277 return True
278
279
280 def _pop_aesgcm_links(self, match, links):
281 link = match.group()
282 if link not in links:
283 links.append(link)
284 return ""
285
286 def _check_aesgcm_attachments(self, client, data):
287 if not data.get('message'):
288 return data
289 links = []
290
291 for lang, message in list(data['message'].items()):
292 message = AESGCM_RE.sub(
293 partial(self._pop_aesgcm_links, links=links),
294 message)
295 if links:
296 message = message.strip()
297 if not message:
298 del data['message'][lang]
299 else:
300 data['message'][lang] = message
301 mess_encrypted = client.encryption.isEncrypted(data)
302 attachments = data['extra'].setdefault(C.KEY_ATTACHMENTS, [])
303 for link in links:
304 path = parse.urlparse(link).path
305 attachment = {
306 "url": link,
307 }
308 media_type = mimetypes.guess_type(path, strict=False)[0]
309 if media_type is not None:
310 attachment[C.KEY_ATTACHMENTS_MEDIA_TYPE] = media_type
311
312 if mess_encrypted:
313 # we don't add the encrypted flag if the message itself is not
314 # encrypted, because the decryption key is part of the link,
315 # so sending it over unencrypted channel is like having no
316 # encryption at all.
317 attachment['encrypted'] = True
318 attachments.append(attachment)
319
320 return data
321
322 def _message_received_trigger(self, client, message_elt, post_treat):
323 # we use a post_treat callback instead of "message_parse" trigger because we need
324 # to check if the "encrypted" flag is set to decide if we add the same flag to the
325 # attachment
326 post_treat.addCallback(partial(self._check_aesgcm_attachments, client))
327 return True