diff libervia/backend/plugins/plugin_comp_email_gateway/__init__.py @ 4337:95792a1f26c7

component email gateway: attachments handling: attachments are now stored, and metadata are created in database. rel 453
author Goffi <goffi@goffi.org>
date Tue, 03 Dec 2024 00:13:23 +0100
parents 055930cc81f9
children 7c0b7ecb816f
line wrap: on
line diff
--- a/libervia/backend/plugins/plugin_comp_email_gateway/__init__.py	Tue Dec 03 00:13:23 2024 +0100
+++ b/libervia/backend/plugins/plugin_comp_email_gateway/__init__.py	Tue Dec 03 00:13:23 2024 +0100
@@ -21,11 +21,16 @@
 from email.mime.text import MIMEText
 from email.utils import formataddr, getaddresses, parseaddr
 from functools import partial
+import hashlib
+from pathlib import Path
 import re
-from typing import cast
+import shutil
+import tempfile
+from typing import TYPE_CHECKING, NamedTuple, cast
 
 from pydantic import BaseModel
 from twisted.internet import defer, reactor
+from twisted.internet.threads import deferToThread
 from twisted.mail import smtp
 from twisted.words.protocols.jabber import jid
 from twisted.words.protocols.jabber import error as jabber_error
@@ -37,7 +42,7 @@
 
 from libervia.backend.core import exceptions
 from libervia.backend.core.constants import Const as C
-from libervia.backend.core.core_types import SatXMPPEntity
+from libervia.backend.core.core_types import SatXMPPComponent, SatXMPPEntity
 from libervia.backend.core.i18n import D_, _
 from libervia.backend.core.log import getLogger
 from libervia.backend.memory.persistent import LazyPersistentBinaryDict
@@ -45,17 +50,21 @@
 from libervia.backend.memory.sqla_mapping import PrivateIndBin
 from libervia.backend.models.core import MessageData
 from libervia.backend.plugins.plugin_xep_0033 import (
-    RECIPIENT_FIELDS,
     AddressType,
     AddressesData,
+    RECIPIENT_FIELDS,
 )
 from libervia.backend.plugins.plugin_xep_0077 import XEP_0077
 from libervia.backend.plugins.plugin_xep_0106 import XEP_0106
-from libervia.backend.plugins.plugin_xep_0131 import XEP_0131, HeadersData, Urgency
+from libervia.backend.plugins.plugin_xep_0131 import HeadersData, Urgency, XEP_0131
+from libervia.backend.plugins.plugin_xep_0498 import XEP_0498
 from libervia.backend.tools.utils import aio
 
+from .imap import IMAPClientFactory
 from .models import Credentials, UserData
-from .imap import IMAPClientFactory
+
+if TYPE_CHECKING:
+    from libervia.backend.core.main import LiberviaBackend
 
 
 log = getLogger(__name__)
@@ -69,7 +78,7 @@
     C.PI_MODES: [C.PLUG_MODE_COMPONENT],
     C.PI_TYPE: C.PLUG_TYPE_ENTRY_POINT,
     C.PI_PROTOCOLS: [],
-    C.PI_DEPENDENCIES: ["XEP-0033", "XEP-0077", "XEP-0106"],
+    C.PI_DEPENDENCIES: ["XEP-0033", "XEP-0077", "XEP-0106", "XEP-0498"],
     C.PI_RECOMMENDATIONS: [],
     C.PI_MAIN: "EmailGatewayComponent",
     C.PI_HANDLER: C.BOOL_TRUE,
@@ -86,6 +95,12 @@
 email_pattern = re.compile(r"[^@]+@[^@]+\.[^@]+")
 
 
+class FileMetadata(NamedTuple):
+    path: Path
+    hash: str
+    size: int
+
+
 class SendMailExtra(BaseModel):
     addresses: AddressesData | None = None
     headers: HeadersData | None = None
@@ -95,9 +110,9 @@
     IMPORT_NAME = IMPORT_NAME
     verbose = 0
 
-    def __init__(self, host):
+    def __init__(self, host: "LiberviaBackend") -> None:
         self.host = host
-        self.client: SatXMPPEntity | None = None
+        self.client: SatXMPPComponent | None = None
         self.initalized = False
         self.storage: LazyPersistentBinaryDict | None = None
         self._iq_register = cast(XEP_0077, host.plugins["XEP-0077"])
@@ -106,9 +121,11 @@
         )
         self._e = cast(XEP_0106, host.plugins["XEP-0106"])
         self._shim = cast(XEP_0131, host.plugins["XEP-0131"])
+        self._pfs = cast(XEP_0498, host.plugins["XEP-0498"])
         # TODO: For the moment, all credentials are kept in cache; we should only keep the
         #   X latest.
         self.users_data: dict[jid.JID, UserData] = {}
+        self.files_path = self.host.get_local_path(None, C.FILES_DIR)
         host.trigger.add_with_check(
             "message_received", self, self._message_received_trigger, priority=-1000
         )
@@ -162,6 +179,7 @@
         return EmailGatewayHandler()
 
     async def profile_connecting(self, client: SatXMPPEntity) -> None:
+        assert isinstance(client, SatXMPPComponent)
         self.client = client
         if not self.initalized:
             await self._init()
@@ -756,7 +774,12 @@
                 mode="json", exclude_none=True
             )
 
+        # Handle attachments
+        for part in email.iter_attachments():
+            await self.handle_attachment(part, user_jid)
+
         client = self.client.get_virtual_client(from_jid)
+
         await client.sendMessage(
             user_jid,
             {"": body},
@@ -764,6 +787,63 @@
             extra=extra,
         )
 
+    async def handle_attachment(self, part: EmailMessage, recipient_jid: jid.JID) -> None:
+        """Handle an attachment from an email.
+
+        @param part: The object representing the attachment.
+        @param recipient_jid: JID of the recipient to whom the attachment is being sent.
+        """
+        assert self.client is not None
+        content_type = part.get_content_type()
+        filename = part.get_filename() or "attachment"
+        log.debug(f"Handling attachment: {filename} ({content_type})")
+        file_metadata = await deferToThread(self._save_attachment, part)
+        if file_metadata is not None:
+            log.debug(f"Attachment {filename!r} saved to {file_metadata.path}")
+            try:
+                await self.host.memory.set_file(
+                    self.client,
+                    filename,
+                    file_hash=file_metadata.hash,
+                    hash_algo="sha-256",
+                    size=file_metadata.size,
+                    namespace=PLUGIN_INFO[C.PI_IMPORT_NAME],
+                    mime_type=content_type,
+                    owner=recipient_jid,
+                )
+            except Exception:
+                log.exception(f"Failed to register file {filename!r}")
+
+    def _save_attachment(self, part: EmailMessage) -> FileMetadata | None:
+        """Save the attachment to files path.
+
+        This method must be executed in a thread with deferToThread to avoid blocking the
+        reactor with IO operations if the attachment is large.
+
+        @param part: The object representing the attachment.
+        @return: Attachment data, or None if an error occurs.
+        @raises IOError: Can't save the attachment.
+        """
+        temp_file = None
+        try:
+            with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+                payload = part.get_payload(decode=True)
+                if isinstance(payload, bytes):
+                    temp_file.write(payload)
+                    file_hash = hashlib.sha256(payload).hexdigest()
+                    file_path = self.files_path / file_hash
+                    shutil.move(temp_file.name, file_path)
+                    file_size = len(payload)
+                    return FileMetadata(path=file_path, hash=file_hash, size=file_size)
+                else:
+                    log.warning(f"Can't write payload of type {type(payload)}.")
+                    return None
+        except Exception as e:
+            raise IOError(f"Failed to save attachment: {e}")
+        finally:
+            if temp_file is not None and Path(temp_file.name).exists():
+                Path(temp_file.name).unlink()
+
     async def connect_imap(self, from_jid: jid.JID, user_data: UserData) -> None:
         """Connect to IMAP service.