Mercurial > libervia-backend
diff src/plugins/plugin_misc_text_syntaxes.py @ 1805:3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
- use tuple instead of list for STYLES_WHITELIST
- specify where to get python makdown and html2text
- renamed clean_xhtml to cleanXHTML for consistency
- check name for conflict in addSyntax
author | Goffi <goffi@goffi.org> |
---|---|
date | Sun, 17 Jan 2016 17:33:10 +0100 |
parents | 14a97a5fe1c0 |
children | 90ae9a2462c2 |
line wrap: on
line diff
--- a/src/plugins/plugin_misc_text_syntaxes.py Sat Jan 16 10:09:16 2016 +0100 +++ b/src/plugins/plugin_misc_text_syntaxes.py Sun Jan 17 17:33:10 2016 +0100 @@ -39,7 +39,8 @@ _SYNTAX_CURRENT = "@CURRENT@" # TODO: check/adapt following list -STYLES_WHITELIST = ["azimuth", "background-color", "border-bottom-color", "border-collapse", "border-color", "border-left-color", "border-right-color", "border-top-color", "clear", "color", "cursor", "direction", "display", "elevation", "float", "font", "font-family", "font-size", "font-style", "font-variant", "font-weight", "height", "letter-spacing", "line-height", "overflow", "pause", "pause-after", "pause-before", "pitch", "pitch-range", "richness", "speak", "speak-header", "speak-numeral", "speak-punctuation", "speech-rate", "stress", "text-align", "text-decoration", "text-indent", "unicode-bidi", "vertical-align", "voice-family", "volume", "white-space", "width"] # based on feedparser list (http://pythonhosted.org/feedparser/html-sanitization.html) +# list initialy based on feedparser list (http://pythonhosted.org/feedparser/html-sanitization.html) +STYLES_WHITELIST = ("azimuth", "background-color", "border-bottom-color", "border-collapse", "border-color", "border-left-color", "border-right-color", "border-top-color", "clear", "color", "cursor", "direction", "display", "elevation", "float", "font", "font-family", "font-size", "font-style", "font-variant", "font-weight", "height", "letter-spacing", "line-height", "overflow", "pause", "pause-after", "pause-before", "pitch", "pitch-range", "richness", "speak", "speak-header", "speak-numeral", "speak-punctuation", "speech-rate", "stress", "text-align", "text-decoration", "text-indent", "unicode-bidi", "vertical-align", "voice-family", "volume", "white-space", "width") SAFE_ATTRS = html.defs.safe_attrs.union(('style',)) STYLES_VALUES_REGEX = r'^(' + '|'.join(['([a-z-]+)', # alphabetical names @@ -112,7 +113,8 @@ return h.handle(html) self.addSyntax(self.SYNTAX_MARKDOWN, markdown.markdown, _html2text, [TextSyntaxes.OPT_DEFAULT]) except ImportError: - log.warning("markdown or html2text not found, can't use Markdown syntax") + log.warning(u"markdown or html2text not found, can't use Markdown syntax") + log.info(u"You can download/install them from https://pythonhosted.org/Markdown/ and https://github.com/Alir3z4/html2text/") host.bridge.addMethod("syntaxConvert", ".plugin", in_sign='sssbs', out_sign='s', async=True, method=self.convert) @@ -125,7 +127,7 @@ if TextSyntaxes.OPT_HIDDEN not in flags: syntaxes.append(syntax) - syntaxes.sort(key=unicode.lower) + syntaxes.sort(key=lambda synt: synt.lower()) options = [] for syntax in syntaxes: @@ -143,7 +145,7 @@ """ return self.host.memory.getParamA(NAME, CATEGORY , profile_key=profile) - def clean_xhtml(self, xhtml): + def cleanXHTML(self, xhtml): """ Clean XHTML text by removing potentially dangerous/malicious parts @param xhtml: raw xhtml text to clean (or lxml's HtmlElement) """ @@ -198,6 +200,9 @@ @param profile: needed only when syntax_from or syntax_to is set to _SYNTAX_CURRENT @return(unicode): converted text """ + # FIXME: convert should be abled to handle domish.Element directly + # when dealing with XHTML + # TODO: a way for parser to return parsing errors/warnings if syntax_from == _SYNTAX_CURRENT: syntax_from = self.getCurrentSyntax(profile) @@ -218,7 +223,7 @@ #TODO: keep only body element and change it to a div here ? if safe: - d.addCallback(self.clean_xhtml) + d.addCallback(self.cleanXHTML) if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_to]["flags"]: d.addCallback(syntaxes[syntax_to]["from"]) @@ -240,12 +245,13 @@ TextSyntaxes.OPT_HIDDEN: do not show in parameters TextSyntaxes.OPT_NO_THREAD: do not defer to thread when converting (the callback may then return a deferred) """ - name = unicode(name) - flags = flags or [] + flags = flags if flags is not None else [] if TextSyntaxes.OPT_HIDDEN in flags and TextSyntaxes.OPT_DEFAULT in flags: raise ValueError(u"{} and {} are mutually exclusive".format(TextSyntaxes.OPT_HIDDEN, TextSyntaxes.OPT_DEFAULT)) syntaxes = TextSyntaxes.params_data['syntaxes'] + if name in syntaxes: + raise exceptions.ConflitError(u"This syntax name already exists: {}".format(name)) syntaxes[name] = {"to": to_xhtml_cb, "from": from_xhtml_cb, "flags": flags} if TextSyntaxes.OPT_DEFAULT in flags: syntaxes = TextSyntaxes.params_data['default'] = name