changeset 1805:3c40fa0dcd7a

pluging text syntaxes: various minor improvments: - use tuple instead of list for STYLES_WHITELIST - specify where to get python makdown and html2text - renamed clean_xhtml to cleanXHTML for consistency - check name for conflict in addSyntax
author Goffi <goffi@goffi.org>
date Sun, 17 Jan 2016 17:33:10 +0100
parents 33e73c70d78a
children fd788d24277a
files src/plugins/plugin_misc_text_syntaxes.py
diffstat 1 files changed, 13 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/src/plugins/plugin_misc_text_syntaxes.py	Sat Jan 16 10:09:16 2016 +0100
+++ b/src/plugins/plugin_misc_text_syntaxes.py	Sun Jan 17 17:33:10 2016 +0100
@@ -39,7 +39,8 @@
 _SYNTAX_CURRENT = "@CURRENT@"
 
 # TODO: check/adapt following list
-STYLES_WHITELIST = ["azimuth", "background-color", "border-bottom-color", "border-collapse", "border-color", "border-left-color", "border-right-color", "border-top-color", "clear", "color", "cursor", "direction", "display", "elevation", "float", "font", "font-family", "font-size", "font-style", "font-variant", "font-weight", "height", "letter-spacing", "line-height", "overflow", "pause", "pause-after", "pause-before", "pitch", "pitch-range", "richness", "speak", "speak-header", "speak-numeral", "speak-punctuation", "speech-rate", "stress", "text-align", "text-decoration", "text-indent", "unicode-bidi", "vertical-align", "voice-family", "volume", "white-space", "width"] # based on feedparser list (http://pythonhosted.org/feedparser/html-sanitization.html)
+# list initialy based on feedparser list (http://pythonhosted.org/feedparser/html-sanitization.html)
+STYLES_WHITELIST = ("azimuth", "background-color", "border-bottom-color", "border-collapse", "border-color", "border-left-color", "border-right-color", "border-top-color", "clear", "color", "cursor", "direction", "display", "elevation", "float", "font", "font-family", "font-size", "font-style", "font-variant", "font-weight", "height", "letter-spacing", "line-height", "overflow", "pause", "pause-after", "pause-before", "pitch", "pitch-range", "richness", "speak", "speak-header", "speak-numeral", "speak-punctuation", "speech-rate", "stress", "text-align", "text-decoration", "text-indent", "unicode-bidi", "vertical-align", "voice-family", "volume", "white-space", "width")
 
 SAFE_ATTRS = html.defs.safe_attrs.union(('style',))
 STYLES_VALUES_REGEX = r'^(' + '|'.join(['([a-z-]+)', # alphabetical names
@@ -112,7 +113,8 @@
                 return h.handle(html)
             self.addSyntax(self.SYNTAX_MARKDOWN, markdown.markdown, _html2text, [TextSyntaxes.OPT_DEFAULT])
         except ImportError:
-            log.warning("markdown or html2text not found, can't use Markdown syntax")
+            log.warning(u"markdown or html2text not found, can't use Markdown syntax")
+            log.info(u"You can download/install them from https://pythonhosted.org/Markdown/ and https://github.com/Alir3z4/html2text/")
         host.bridge.addMethod("syntaxConvert", ".plugin", in_sign='sssbs', out_sign='s',
                               async=True, method=self.convert)
 
@@ -125,7 +127,7 @@
             if TextSyntaxes.OPT_HIDDEN not in flags:
                 syntaxes.append(syntax)
 
-        syntaxes.sort(key=unicode.lower)
+        syntaxes.sort(key=lambda synt: synt.lower())
         options = []
 
         for syntax in syntaxes:
@@ -143,7 +145,7 @@
         """
         return self.host.memory.getParamA(NAME, CATEGORY , profile_key=profile)
 
-    def clean_xhtml(self, xhtml):
+    def cleanXHTML(self, xhtml):
         """ Clean XHTML text by removing potentially dangerous/malicious parts
         @param xhtml: raw xhtml text to clean (or lxml's HtmlElement)
         """
@@ -198,6 +200,9 @@
         @param profile: needed only when syntax_from or syntax_to is set to _SYNTAX_CURRENT
         @return(unicode): converted text
         """
+        # FIXME: convert should be abled to handle domish.Element directly
+        #        when dealing with XHTML
+        # TODO: a way for parser to return parsing errors/warnings
 
         if syntax_from == _SYNTAX_CURRENT:
             syntax_from = self.getCurrentSyntax(profile)
@@ -218,7 +223,7 @@
         #TODO: keep only body element and change it to a div here ?
 
         if safe:
-            d.addCallback(self.clean_xhtml)
+            d.addCallback(self.cleanXHTML)
 
         if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_to]["flags"]:
             d.addCallback(syntaxes[syntax_to]["from"])
@@ -240,12 +245,13 @@
             TextSyntaxes.OPT_HIDDEN: do not show in parameters
             TextSyntaxes.OPT_NO_THREAD: do not defer to thread when converting (the callback may then return a deferred)
         """
-        name = unicode(name)
-        flags = flags or []
+        flags = flags if flags is not None else []
         if TextSyntaxes.OPT_HIDDEN in flags and TextSyntaxes.OPT_DEFAULT in flags:
             raise ValueError(u"{} and {} are mutually exclusive".format(TextSyntaxes.OPT_HIDDEN, TextSyntaxes.OPT_DEFAULT))
 
         syntaxes = TextSyntaxes.params_data['syntaxes']
+        if name in syntaxes:
+            raise exceptions.ConflitError(u"This syntax name already exists: {}".format(name))
         syntaxes[name] = {"to": to_xhtml_cb, "from": from_xhtml_cb, "flags": flags}
         if TextSyntaxes.OPT_DEFAULT in flags:
             syntaxes = TextSyntaxes.params_data['default'] = name