Mercurial > libervia-backend
view sat/plugins/plugin_xep_0420.py @ 4010:818db4ca3717
tools (xml_tools): accept several namespaces in `findAncestor`
author | Goffi <goffi@goffi.org> |
---|---|
date | Thu, 16 Mar 2023 16:43:08 +0100 |
parents | cecf45416403 |
children |
line wrap: on
line source
#!/usr/bin/env python3 # Libervia plugin for Stanza Content Encryption # Copyright (C) 2022-2022 Tim Henkes (me@syndace.dev) # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. from abc import ABC, abstractmethod from datetime import datetime import enum import secrets import string from typing import Dict, NamedTuple, Optional, Set, Tuple, cast from typing_extensions import Final from lxml import etree from sat.core import exceptions from sat.core.constants import Const as C from sat.core.i18n import D_ from sat.core.log import Logger, getLogger from sat.core.sat_main import SAT from sat.tools.xml_tools import ElementParser from sat.plugins.plugin_xep_0033 import NS_ADDRESS from sat.plugins.plugin_xep_0082 import XEP_0082 from sat.plugins.plugin_xep_0334 import NS_HINTS from sat.plugins.plugin_xep_0359 import NS_SID from sat.plugins.plugin_xep_0380 import NS_EME from twisted.words.protocols.jabber import jid from twisted.words.xish import domish __all__ = [ # pylint: disable=unused-variable "PLUGIN_INFO", "NS_SCE", "XEP_0420", "ProfileRequirementsNotMet", "AffixVerificationFailed", "SCECustomAffix", "SCEAffixPolicy", "SCEProfile", "SCEAffixValues" ] log = cast(Logger, getLogger(__name__)) # type: ignore[no-untyped-call] PLUGIN_INFO = { C.PI_NAME: "SCE", C.PI_IMPORT_NAME: "XEP-0420", C.PI_TYPE: "SEC", C.PI_PROTOCOLS: [ "XEP-0420" ], C.PI_DEPENDENCIES: [ "XEP-0334", "XEP-0082" ], C.PI_RECOMMENDATIONS: [ "XEP-0045", "XEP-0033", "XEP-0359" ], C.PI_MAIN: "XEP_0420", C.PI_HANDLER: "no", C.PI_DESCRIPTION: D_("Implementation of Stanza Content Encryption"), } NS_SCE: Final = "urn:xmpp:sce:1" class ProfileRequirementsNotMet(Exception): """ Raised by :meth:`XEP_0420.unpack_stanza` in case the requirements formulated by the profile are not met. """ class AffixVerificationFailed(Exception): """ Raised by :meth:`XEP_0420.unpack_stanza` in case of affix verification failure. """ class SCECustomAffix(ABC): """ Interface for custom affixes of SCE profiles. """ @property @abstractmethod def element_name(self) -> str: """ @return: The name of the affix's XML element. """ @property @abstractmethod def element_schema(self) -> str: """ @return: The XML schema definition of the affix element's XML structure, i.e. the ``<xs:element/>`` schema element. This element will be referenced using ``<xs:element ref="{element_name}"/>``. """ @abstractmethod def create(self, stanza: domish.Element) -> domish.Element: """ @param stanza: The stanza element which has been processed by :meth:`XEP_0420.pack_stanza`, i.e. all encryptable children have been removed and only the root ``<message/>`` or ``<iq/>`` and unencryptable children remain. Do not modify. @return: An affix element to include in the envelope. The element must have the name :attr:`element_name` and must validate using :attr:`element_schema`. @raise ValueError: if the affix couldn't be built due to missing information on the stanza. """ @abstractmethod def verify(self, stanza: domish.Element, element: domish.Element) -> None: """ @param stanza: The stanza element before being processed by :meth:`XEP_0420.unpack_stanza`, i.e. all encryptable children have been removed and only the root ``<message/>`` or ``<iq/>`` and unencryptable children remain. Do not modify. @param element: The affix element to verify. @raise AffixVerificationFailed: on verification failure. """ @enum.unique class SCEAffixPolicy(enum.Enum): """ Policy for the presence of an affix in an SCE envelope. """ REQUIRED: str = "REQUIRED" OPTIONAL: str = "OPTIONAL" NOT_NEEDED: str = "NOT_NEEDED" class SCEProfile(NamedTuple): # pylint: disable=invalid-name """ An SCE profile, i.e. the definition which affixes are required, optional or not needed at all by an SCE-enabled encryption protocol. """ rpad_policy: SCEAffixPolicy time_policy: SCEAffixPolicy to_policy: SCEAffixPolicy from_policy: SCEAffixPolicy custom_policies: Dict[SCECustomAffix, SCEAffixPolicy] class SCEAffixValues(NamedTuple): # pylint: disable=invalid-name """ Structure returned by :meth:`XEP_0420.unpack_stanza` with the parsed/processes values of all affixes included in the envelope. For custom affixes, the whole affix element is returned. """ rpad: Optional[str] timestamp: Optional[datetime] recipient: Optional[jid.JID] sender: Optional[jid.JID] custom: Dict[SCECustomAffix, domish.Element] ENVELOPE_SCHEMA = """<?xml version="1.0" encoding="utf8"?> <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" targetNamespace="urn:xmpp:sce:1" xmlns="urn:xmpp:sce:1"> <xs:element name="envelope"> <xs:complexType> <xs:all> <xs:element ref="content"/> <xs:element ref="rpad" minOccurs="0"/> <xs:element ref="time" minOccurs="0"/> <xs:element ref="to" minOccurs="0"/> <xs:element ref="from" minOccurs="0"/> {custom_affix_references} </xs:all> </xs:complexType> </xs:element> <xs:element name="content"> <xs:complexType> <xs:sequence> <xs:any minOccurs="0" maxOccurs="unbounded" processContents="skip"/> </xs:sequence> </xs:complexType> </xs:element> <xs:element name="rpad" type="xs:string"/> <xs:element name="time"> <xs:complexType> <xs:attribute name="stamp" type="xs:dateTime"/> </xs:complexType> </xs:element> <xs:element name="to"> <xs:complexType> <xs:attribute name="jid" type="xs:string"/> </xs:complexType> </xs:element> <xs:element name="from"> <xs:complexType> <xs:attribute name="jid" type="xs:string"/> </xs:complexType> </xs:element> {custom_affix_definitions} </xs:schema> """ class XEP_0420: # pylint: disable=invalid-name """ Implementation of XEP-0420: Stanza Content Encryption under namespace ``urn:xmpp:sce:1``. This is a passive plugin, i.e. it doesn't hook into any triggers to process stanzas actively, but offers API for other plugins to use. """ # Set of namespaces whose elements are never allowed to be transferred in an encrypted # envelope. MUST_BE_PLAINTEXT_NAMESPACES: Set[str] = { NS_HINTS, NS_SID, # TODO: Not sure whether this ban applies to both stanza-id and origin-id NS_ADDRESS, # Not part of the specification (yet), but just doesn't make sense in an encrypted # envelope: NS_EME } # Set of (namespace, element name) tuples that define elements which are never allowed # to be transferred in an encrypted envelope. If all elements under a certain # namespace are forbidden, the namespace can be added to # :attr:`MUST_BE_PLAINTEXT_NAMESPACES` instead. # Note: only full namespaces are forbidden by the spec for now, the following is for # potential future use. MUST_BE_PLAINTEXT_ELEMENTS: Set[Tuple[str, str]] = set() def __init__(self, sat: SAT) -> None: """ @param sat: The SAT instance. """ @staticmethod def pack_stanza(profile: SCEProfile, stanza: domish.Element) -> bytes: """Pack a stanza according to Stanza Content Encryption. Removes all elements from the stanza except for a few exceptions that explicitly need to be transferred in plaintext, e.g. because they contain hints/instructions for the server on how to process the stanza. Together with the affix elements as requested by the profile, the removed elements are added to an envelope XML structure that builds the plaintext to be encrypted by the SCE-enabled encryption scheme. Optional affixes are always added to the structure, i.e. they are treated by the packing code as if they were required. Once built, the envelope structure is serialized to a byte string and returned for the encryption scheme to encrypt and add to the stanza. @param profile: The SCE profile, i.e. the definition of affixes to include in the envelope. @param stanza: The stanza to process. Will be modified by the call. @return: The serialized envelope structure that builds the plaintext for the encryption scheme to process. @raise ValueError: if the <to/> or <from/> affixes are requested but the stanza doesn't have the "to"/"from" attribute set to extract the value from. Can also be raised by custom affixes. @warning: It is up to the calling code to add a <store/> message processing hint if applicable. """ # Prepare the envelope and content elements envelope = domish.Element((NS_SCE, "envelope")) content = envelope.addElement((NS_SCE, "content")) # Note the serialized byte size of the content element before adding any children empty_content_byte_size = len(content.toXml().encode("utf-8")) # Move elements that are not explicitly forbidden from being encrypted from the # stanza to the content element. for child in list(stanza.elements()): if ( child.uri not in XEP_0420.MUST_BE_PLAINTEXT_NAMESPACES and (child.uri, child.name) not in XEP_0420.MUST_BE_PLAINTEXT_ELEMENTS ): # Remove the child from the stanza stanza.children.remove(child) # A namespace of ``None`` can be used on domish elements to inherit the # namespace from the parent. When moving elements from the stanza root to # the content element, however, we don't want elements to inherit the # namespace of the content element. Thus, check for elements with ``None`` # for their namespace and set the namespace to jabber:client, which is the # namespace of the parent element. if child.uri is None: child.uri = C.NS_CLIENT child.defaultUri = C.NS_CLIENT # Add the child with corrected namespaces to the content element content.addChild(child) # Add the affixes requested by the profile if profile.rpad_policy is not SCEAffixPolicy.NOT_NEEDED: # The specification defines the rpad affix to contain "[...] a randomly # generated sequence of random length between 0 and 200 characters." This # implementation differs a bit from the specification in that a minimum size # other than 0 is chosen depending on the serialized size of the content # element. This is to prevent the scenario where the encrypted content is # short and the rpad is also randomly chosen to be short, which could allow # guessing the content of a short message. To do so, the rpad length is first # chosen to pad the content to at least 53 bytes, then afterwards another 0 to # 200 bytes are added. Note that single-byte characters are used by this # implementation, thus the number of characters equals the number of bytes. content_byte_size = len(content.toXml().encode("utf-8")) content_byte_size_diff = content_byte_size - empty_content_byte_size rpad_length = max(0, 53 - content_byte_size_diff) + secrets.randbelow(201) rpad_content = "".join( secrets.choice(string.digits + string.ascii_letters + string.punctuation) for __ in range(rpad_length) ) envelope.addElement((NS_SCE, "rpad"), content=rpad_content) if profile.time_policy is not SCEAffixPolicy.NOT_NEEDED: time_element = envelope.addElement((NS_SCE, "time")) time_element["stamp"] = XEP_0082.format_datetime() if profile.to_policy is not SCEAffixPolicy.NOT_NEEDED: recipient = stanza.getAttribute("to", None) if recipient is not None: to_element = envelope.addElement((NS_SCE, "to")) to_element["jid"] = jid.JID(recipient).userhost() elif profile.to_policy is SCEAffixPolicy.REQUIRED: raise ValueError( "<to/> affix requested, but stanza doesn't have the 'to' attribute" " set." ) if profile.from_policy is not SCEAffixPolicy.NOT_NEEDED: sender = stanza.getAttribute("from", None) if sender is not None: from_element = envelope.addElement((NS_SCE, "from")) from_element["jid"] = jid.JID(sender).userhost() elif profile.from_policy is SCEAffixPolicy.REQUIRED: raise ValueError( "<from/> affix requested, but stanza doesn't have the 'from'" " attribute set." ) for affix, policy in profile.custom_policies.items(): if policy is not SCEAffixPolicy.NOT_NEEDED: envelope.addChild(affix.create(stanza)) return envelope.toXml().encode("utf-8") @staticmethod def unpack_stanza( profile: SCEProfile, stanza: domish.Element, envelope_serialized: bytes ) -> SCEAffixValues: """Unpack a stanza packed according to Stanza Content Encryption. Parses the serialized envelope as XML, verifies included affixes and makes sure the requirements of the profile are met, and restores the stanza by moving decrypted elements from the envelope back to the stanza top level. @param profile: The SCE profile, i.e. the definition of affixes that have to/may be included in the envelope. @param stanza: The stanza to process. Will be modified by the call. @param envelope_serialized: The serialized envelope, i.e. the plaintext produced by the decryption scheme utilizing SCE. @return: The parsed and processed values of all affixes that were present on the envelope, notably including the timestamp. @raise exceptions.ParsingError: if the serialized envelope element is malformed. @raise ProfileRequirementsNotMet: if one or more affixes required by the profile are missing from the envelope. @raise AffixVerificationFailed: if an affix included in the envelope fails to validate. It doesn't matter whether the affix is required by the profile or not, all affixes included in the envelope are validated and cause this exception to be raised on failure. @warning: It is up to the calling code to verify the timestamp, if returned, since the requirements on the timestamp may vary between SCE-enabled protocols. """ try: envelope_serialized_string = envelope_serialized.decode("utf-8") except UnicodeError as e: raise exceptions.ParsingError( "Serialized envelope can't bare parsed as utf-8." ) from e custom_affixes = set(profile.custom_policies.keys()) # Make sure the envelope adheres to the schema parser = etree.XMLParser(schema=etree.XMLSchema(etree.XML(ENVELOPE_SCHEMA.format( custom_affix_references="".join( f'<xs:element ref="{custom_affix.element_name}" minOccurs="0"/>' for custom_affix in custom_affixes ), custom_affix_definitions="".join( custom_affix.element_schema for custom_affix in custom_affixes ) ).encode("utf-8")))) try: etree.fromstring(envelope_serialized_string, parser) except etree.XMLSyntaxError as e: raise exceptions.ParsingError( "Serialized envelope doesn't pass schema validation." ) from e # Prepare the envelope and content elements envelope = cast(domish.Element, ElementParser()(envelope_serialized_string)) content = next(envelope.elements(NS_SCE, "content")) # Verify the affixes rpad_element = cast( Optional[domish.Element], next(envelope.elements(NS_SCE, "rpad"), None) ) time_element = cast( Optional[domish.Element], next(envelope.elements(NS_SCE, "time"), None) ) to_element = cast( Optional[domish.Element], next(envelope.elements(NS_SCE, "to"), None) ) from_element = cast( Optional[domish.Element], next(envelope.elements(NS_SCE, "from"), None) ) # The rpad doesn't need verification. rpad_value = None if rpad_element is None else str(rpad_element) # The time affix isn't verified other than that the timestamp is parseable. try: timestamp_value = None if time_element is None else \ XEP_0082.parse_datetime(time_element["stamp"]) except ValueError as e: raise AffixVerificationFailed("Malformed time affix.") from e # The to affix is verified by comparing the to attribute of the stanza with the # JID referenced by the affix. Note that only bare JIDs are compared as per the # specification. recipient_value: Optional[jid.JID] = None if to_element is not None: recipient_value = jid.JID(to_element["jid"]) recipient_actual = stanza.getAttribute("to", None) if recipient_actual is None: raise AffixVerificationFailed( "'To' affix is included in the envelope, but the stanza is lacking a" " 'to' attribute to compare the value to." ) recipient_actual_bare_jid = jid.JID(recipient_actual).userhost() recipient_target_bare_jid = recipient_value.userhost() if recipient_actual_bare_jid != recipient_target_bare_jid: raise AffixVerificationFailed( f"Mismatch between actual and target recipient bare JIDs:" f" {recipient_actual_bare_jid} vs {recipient_target_bare_jid}." ) # The from affix is verified by comparing the from attribute of the stanza with # the JID referenced by the affix. Note that only bare JIDs are compared as per # the specification. sender_value: Optional[jid.JID] = None if from_element is not None: sender_value = jid.JID(from_element["jid"]) sender_actual = stanza.getAttribute("from", None) if sender_actual is None: raise AffixVerificationFailed( "'From' affix is included in the envelope, but the stanza is lacking" " a 'from' attribute to compare the value to." ) sender_actual_bare_jid = jid.JID(sender_actual).userhost() sender_target_bare_jid = sender_value.userhost() if sender_actual_bare_jid != sender_target_bare_jid: raise AffixVerificationFailed( f"Mismatch between actual and target sender bare JIDs:" f" {sender_actual_bare_jid} vs {sender_target_bare_jid}." ) # Find and verify custom affixes custom_values: Dict[SCECustomAffix, domish.Element] = {} for affix in custom_affixes: element_name = affix.element_name element = cast( Optional[domish.Element], next(envelope.elements(NS_SCE, element_name), None) ) if element is not None: affix.verify(stanza, element) custom_values[affix] = element # Check whether all affixes required by the profile are present rpad_missing = \ profile.rpad_policy is SCEAffixPolicy.REQUIRED and rpad_element is None time_missing = \ profile.time_policy is SCEAffixPolicy.REQUIRED and time_element is None to_missing = \ profile.to_policy is SCEAffixPolicy.REQUIRED and to_element is None from_missing = \ profile.from_policy is SCEAffixPolicy.REQUIRED and from_element is None custom_missing = any( affix not in custom_values for affix, policy in profile.custom_policies.items() if policy is SCEAffixPolicy.REQUIRED ) if rpad_missing or time_missing or to_missing or from_missing or custom_missing: custom_missing_string = "" for custom_affix in custom_affixes: value = "present" if custom_affix in custom_values else "missing" custom_missing_string += f", [custom]{custom_affix.element_name}={value}" raise ProfileRequirementsNotMet( f"SCE envelope is missing affixes required by the profile {profile}." f" Affix presence:" f" rpad={'missing' if rpad_missing else 'present'}" f", time={'missing' if time_missing else 'present'}" f", to={'missing' if to_missing else 'present'}" f", from={'missing' if from_missing else 'present'}" + custom_missing_string ) # Move elements that are not explicitly forbidden from being encrypted from the # content element to the stanza. for child in list(content.elements()): if ( child.uri in XEP_0420.MUST_BE_PLAINTEXT_NAMESPACES or (child.uri, child.name) in XEP_0420.MUST_BE_PLAINTEXT_ELEMENTS ): log.warning( f"An element that MUST be transferred in plaintext was found in an" f" SCE envelope: {child.toXml()}" ) else: # Remove the child from the content element content.children.remove(child) # Add the child to the stanza stanza.addChild(child) return SCEAffixValues( rpad_value, timestamp_value, recipient_value, sender_value, custom_values )