annotate sat_frontends/jp/xml_tools.py @ 2786:be8405795e09

plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
author Goffi <goffi@goffi.org>
date Sat, 19 Jan 2019 11:39:02 +0100
parents ff1b40823b07
children 710de41da2f2
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2777
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
1 #!/usr/bin/env python2
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
2 # -*- coding: utf-8 -*-
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
3
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
4 # jp: a SàT command line tool
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
5 # Copyright (C) 2009-2019 Jérôme Poisson (goffi@goffi.org)
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
6
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
7 # This program is free software: you can redistribute it and/or modify
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
8 # it under the terms of the GNU Affero General Public License as published by
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
9 # the Free Software Foundation, either version 3 of the License, or
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
10 # (at your option) any later version.
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
11
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
12 # This program is distributed in the hope that it will be useful,
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
15 # GNU Affero General Public License for more details.
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
16
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
17 # You should have received a copy of the GNU Affero General Public License
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
19
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
20 from sat.core.i18n import _
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
21 from sat_frontends.jp.constants import Const as C
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
22
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
23 def etreeParse(cmd, raw_xml):
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
24 """Import lxml and parse raw XML
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
25
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
26 @param cmd(CommandBase): current command instance
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
27 @param raw_xml(file, str): an XML bytestring, string or file-like object
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
28 @return (tuple(etree.Element, module): parsed element, etree module
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
29 """
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
30 try:
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
31 from lxml import etree
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
32 except ImportError:
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
33 cmd.disp(
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
34 u'lxml module must be installed, please install it with "pip install lxml"',
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
35 error=True,
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
36 )
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
37 cmd.host.quit(C.EXIT_ERROR)
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
38 try:
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
39 if isinstance(raw_xml, basestring):
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
40 parser = etree.XMLParser(remove_blank_text=True)
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
41 element = etree.fromstring(raw_xml, parser)
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
42 else:
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
43 element = etree.parse(raw_xml).getroot()
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
44 except Exception as e:
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
45 cmd.parser.error(
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
46 _(u"Can't parse the payload XML in input: {msg}").format(msg=e)
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
47 )
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
48 return element, etree
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
49
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
50 def getPayload(cmd, element):
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
51 """Retrieve payload element and exit with and error if not found
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
52
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
53 @param element(etree.Element): root element
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
54 @return element(etree.Element): payload element
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
55 """
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
56 if element.tag in ("item", "{http://jabber.org/protocol/pubsub}item"):
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
57 if len(element) > 1:
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
58 cmd.disp(_(u"<item> can only have one child element (the payload)"),
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
59 error=True)
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
60 cmd.host.quit(C.EXIT_DATA_ERROR)
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
61 element = element[0]
ff1b40823b07 jp (pubsub): new "transform" command:
Goffi <goffi@goffi.org>
parents:
diff changeset
62 return element