diff sat/tools/xml_tools.py @ 3914:4cb38c8312a1

plugin XEP-0384, xml_tools: avoid `getItems` timeout + fix empty node crash + parsing: - use `max_items` in `getItems` calls for bundles, as otherwise some pubsub service may return full nodes, which may be huge is `max_items=1` is not set on the node, possibly resulting in timeouts. - the plugin was crashing when TWOMEMO devices list node has no items at all. This is not the case anymore. - a naive parsing method has been implemented in `xml_tools` to replace the serialisation/deserialisation method. This should be more efficient and will avoid annoying `ns0:` prefixes in XML logs.
author Goffi <goffi@goffi.org>
date Sat, 24 Sep 2022 16:37:46 +0200
parents 384b7e6c2dbf
children 323017a4e4d2
line wrap: on
line diff
--- a/sat/tools/xml_tools.py	Sat Sep 24 16:31:39 2022 +0200
+++ b/sat/tools/xml_tools.py	Sat Sep 24 16:37:46 2022 +0200
@@ -18,7 +18,7 @@
 
 
 import re
-from typing import Optional
+from typing import Optional, Tuple
 import html.entities
 from collections import OrderedDict
 from xml.dom import minidom, NotFoundErr
@@ -30,6 +30,7 @@
 from sat.core.i18n import _
 from sat.core.constants import Const as C
 from sat.core.log import getLogger
+import xml.etree.ElementTree as ET
 
 
 log = getLogger(__name__)
@@ -1957,3 +1958,55 @@
 def ppElt(elt):
     """Pretty print a domish.Element"""
     print(pFmtElt(elt))
+
+
+# ElementTree
+
+def et_get_namespace_and_name(et_elt: ET.Element) -> Tuple[Optional[str], str]:
+    """Retrieve element namespace and name from ElementTree element
+
+    @param et_elt: ElementTree element
+    @return: namespace and name of the element
+        if not namespace if specified, None is returned
+    """
+    name = et_elt.tag
+    if not name:
+        raise ValueError("no name set in ET element")
+    elif name[0] != "{":
+        return None, name
+    end_idx = name.find("}")
+    if end_idx == -1:
+        raise ValueError("Invalid ET name")
+    return name[1:end_idx], name[end_idx+1:]
+
+
+def et_elt_2_domish_elt(et_elt: ET.Element) -> domish.Element:
+    """Convert ElementTree element to Twisted's domish.Element
+
+    Note: this is a naive implementation, adapted to XMPP, and some content are ignored
+        (attributes namespaces, tail)
+    """
+    namespace, name = et_get_namespace_and_name(et_elt)
+    elt = domish.Element((namespace, name), attribs=et_elt.attrib)
+    if et_elt.text:
+        elt.addContent(et_elt.text)
+    for child in et_elt:
+        elt.addChild(et_elt_2_domish_elt(child))
+    return elt
+
+
+def domish_elt_2_et_elt(elt: domish.Element) -> ET.Element:
+    """Convert Twisted's domish.Element to ElementTree equivalent
+
+    Note: this is a naive implementation, adapter to XMPP, and some text content may be
+        missing (content put after a tag, i.e. what would go to the "tail" attribute of ET
+        Element)
+    """
+    tag = f"{{{elt.uri}}}{elt.name}" if elt.uri else elt.name
+    et_elt = ET.Element(tag, attrib=elt.attributes)
+    content = str(elt)
+    if content:
+        et_elt.text = str(elt)
+    for child in elt.elements():
+        et_elt.append(domish_elt_2_et_elt(child))
+    return et_elt