Mercurial > libervia-backend
changeset 1818:7e6342de71fb
core (xml_tools): unescape HTML sequences to avoid parsing error in ElementParser
author | Goffi <goffi@goffi.org> |
---|---|
date | Fri, 22 Jan 2016 20:24:17 +0100 |
parents | 7ef0f5f90862 |
children | c271087d2020 |
files | src/tools/xml_tools.py |
diffstat | 1 files changed, 21 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/src/tools/xml_tools.py Fri Jan 22 20:24:17 2016 +0100 +++ b/src/tools/xml_tools.py Fri Jan 22 20:24:17 2016 +0100 @@ -29,11 +29,15 @@ from twisted.internet import defer from sat.core import exceptions from collections import OrderedDict +import htmlentitydefs +import re """This library help manage XML used in SàT (parameters, registration, etc)""" SAT_FORM_PREFIX = "SAT_FORM_" SAT_PARAM_SEPARATOR = "_XMLUI_PARAM_" # used to have unique elements names +html_entity_re = re.compile(r'&([a-zA-Z]+?);') +XML_ENTITIES = ('quot', 'amp', 'apos', 'lt', 'gt') # Helper functions @@ -1320,11 +1324,20 @@ # Misc other funtions class ElementParser(object): - """callable class to parse XML string into Element + """callable class to parse XML string into Element""" + # XXX: Found at http://stackoverflow.com/questions/2093400/how-to-create-twisted-words-xish-domish-element-entirely-from-raw-xml/2095942#2095942 - Found at http://stackoverflow.com/questions/2093400/how-to-create-twisted-words-xish-domish-element-entirely-from-raw-xml/2095942#2095942 - (c) Karl Anderson - """ + def _escapeHTML(self, matchobj): + entity = matchobj.group(1) + if entity in XML_ENTITIES: + # we don't escape XML entities + return matchobj.group(0) + else: + try: + return unichr(htmlentitydefs.name2codepoint[entity]) + except KeyError: + log.warning(u"removing unknown entity {}".format(entity)) + return u'' def __call__(self, raw_xml, force_spaces=False, namespace=None): """ @@ -1339,6 +1352,9 @@ else: raw_xml = u"<div>{}</div>".format(raw_xml) + # avoid ParserError on HTML escaped chars + raw_xml = html_entity_re.sub(self._escapeHTML, raw_xml) + self.result = None def onStart(elem): @@ -1362,7 +1378,7 @@ top_elt = self.result.firstChildElement() # we now can check if there was a unique element on the top # and remove our wrapping <div/> is this was the case - if len(top_elt.children) == 1: + if len(top_elt.children) == 1 and domish.IElement.providedBy(top_elt.children[0]): top_elt = top_elt.firstChildElement() return top_elt