comparison sat/tools/xml_tools.py @ 3914:4cb38c8312a1

plugin XEP-0384, xml_tools: avoid `getItems` timeout + fix empty node crash + parsing: - use `max_items` in `getItems` calls for bundles, as otherwise some pubsub service may return full nodes, which may be huge is `max_items=1` is not set on the node, possibly resulting in timeouts. - the plugin was crashing when TWOMEMO devices list node has no items at all. This is not the case anymore. - a naive parsing method has been implemented in `xml_tools` to replace the serialisation/deserialisation method. This should be more efficient and will avoid annoying `ns0:` prefixes in XML logs.
author Goffi <goffi@goffi.org>
date Sat, 24 Sep 2022 16:37:46 +0200
parents 384b7e6c2dbf
children 323017a4e4d2
comparison
equal deleted inserted replaced
3913:944f51f9c2b4 3914:4cb38c8312a1
16 # You should have received a copy of the GNU Affero General Public License 16 # You should have received a copy of the GNU Affero General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>. 17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18 18
19 19
20 import re 20 import re
21 from typing import Optional 21 from typing import Optional, Tuple
22 import html.entities 22 import html.entities
23 from collections import OrderedDict 23 from collections import OrderedDict
24 from xml.dom import minidom, NotFoundErr 24 from xml.dom import minidom, NotFoundErr
25 from twisted.words.xish import domish 25 from twisted.words.xish import domish
26 from twisted.words.protocols.jabber import jid 26 from twisted.words.protocols.jabber import jid
28 from wokkel import data_form 28 from wokkel import data_form
29 from sat.core import exceptions 29 from sat.core import exceptions
30 from sat.core.i18n import _ 30 from sat.core.i18n import _
31 from sat.core.constants import Const as C 31 from sat.core.constants import Const as C
32 from sat.core.log import getLogger 32 from sat.core.log import getLogger
33 import xml.etree.ElementTree as ET
33 34
34 35
35 log = getLogger(__name__) 36 log = getLogger(__name__)
36 37
37 """This library help manage XML used in SàT (parameters, registration, etc)""" 38 """This library help manage XML used in SàT (parameters, registration, etc)"""
1955 1956
1956 1957
1957 def ppElt(elt): 1958 def ppElt(elt):
1958 """Pretty print a domish.Element""" 1959 """Pretty print a domish.Element"""
1959 print(pFmtElt(elt)) 1960 print(pFmtElt(elt))
1961
1962
1963 # ElementTree
1964
1965 def et_get_namespace_and_name(et_elt: ET.Element) -> Tuple[Optional[str], str]:
1966 """Retrieve element namespace and name from ElementTree element
1967
1968 @param et_elt: ElementTree element
1969 @return: namespace and name of the element
1970 if not namespace if specified, None is returned
1971 """
1972 name = et_elt.tag
1973 if not name:
1974 raise ValueError("no name set in ET element")
1975 elif name[0] != "{":
1976 return None, name
1977 end_idx = name.find("}")
1978 if end_idx == -1:
1979 raise ValueError("Invalid ET name")
1980 return name[1:end_idx], name[end_idx+1:]
1981
1982
1983 def et_elt_2_domish_elt(et_elt: ET.Element) -> domish.Element:
1984 """Convert ElementTree element to Twisted's domish.Element
1985
1986 Note: this is a naive implementation, adapted to XMPP, and some content are ignored
1987 (attributes namespaces, tail)
1988 """
1989 namespace, name = et_get_namespace_and_name(et_elt)
1990 elt = domish.Element((namespace, name), attribs=et_elt.attrib)
1991 if et_elt.text:
1992 elt.addContent(et_elt.text)
1993 for child in et_elt:
1994 elt.addChild(et_elt_2_domish_elt(child))
1995 return elt
1996
1997
1998 def domish_elt_2_et_elt(elt: domish.Element) -> ET.Element:
1999 """Convert Twisted's domish.Element to ElementTree equivalent
2000
2001 Note: this is a naive implementation, adapter to XMPP, and some text content may be
2002 missing (content put after a tag, i.e. what would go to the "tail" attribute of ET
2003 Element)
2004 """
2005 tag = f"{{{elt.uri}}}{elt.name}" if elt.uri else elt.name
2006 et_elt = ET.Element(tag, attrib=elt.attributes)
2007 content = str(elt)
2008 if content:
2009 et_elt.text = str(elt)
2010 for child in elt.elements():
2011 et_elt.append(domish_elt_2_et_elt(child))
2012 return et_elt