comparison src/plugins/plugin_misc_text_syntaxes.py @ 1805:3c40fa0dcd7a

pluging text syntaxes: various minor improvments: - use tuple instead of list for STYLES_WHITELIST - specify where to get python makdown and html2text - renamed clean_xhtml to cleanXHTML for consistency - check name for conflict in addSyntax
author Goffi <goffi@goffi.org>
date Sun, 17 Jan 2016 17:33:10 +0100
parents 14a97a5fe1c0
children 90ae9a2462c2
comparison
equal deleted inserted replaced
1804:33e73c70d78a 1805:3c40fa0dcd7a
37 NAME = "Syntax" 37 NAME = "Syntax"
38 _SYNTAX_XHTML = "XHTML" 38 _SYNTAX_XHTML = "XHTML"
39 _SYNTAX_CURRENT = "@CURRENT@" 39 _SYNTAX_CURRENT = "@CURRENT@"
40 40
41 # TODO: check/adapt following list 41 # TODO: check/adapt following list
42 STYLES_WHITELIST = ["azimuth", "background-color", "border-bottom-color", "border-collapse", "border-color", "border-left-color", "border-right-color", "border-top-color", "clear", "color", "cursor", "direction", "display", "elevation", "float", "font", "font-family", "font-size", "font-style", "font-variant", "font-weight", "height", "letter-spacing", "line-height", "overflow", "pause", "pause-after", "pause-before", "pitch", "pitch-range", "richness", "speak", "speak-header", "speak-numeral", "speak-punctuation", "speech-rate", "stress", "text-align", "text-decoration", "text-indent", "unicode-bidi", "vertical-align", "voice-family", "volume", "white-space", "width"] # based on feedparser list (http://pythonhosted.org/feedparser/html-sanitization.html) 42 # list initialy based on feedparser list (http://pythonhosted.org/feedparser/html-sanitization.html)
43 STYLES_WHITELIST = ("azimuth", "background-color", "border-bottom-color", "border-collapse", "border-color", "border-left-color", "border-right-color", "border-top-color", "clear", "color", "cursor", "direction", "display", "elevation", "float", "font", "font-family", "font-size", "font-style", "font-variant", "font-weight", "height", "letter-spacing", "line-height", "overflow", "pause", "pause-after", "pause-before", "pitch", "pitch-range", "richness", "speak", "speak-header", "speak-numeral", "speak-punctuation", "speech-rate", "stress", "text-align", "text-decoration", "text-indent", "unicode-bidi", "vertical-align", "voice-family", "volume", "white-space", "width")
43 44
44 SAFE_ATTRS = html.defs.safe_attrs.union(('style',)) 45 SAFE_ATTRS = html.defs.safe_attrs.union(('style',))
45 STYLES_VALUES_REGEX = r'^(' + '|'.join(['([a-z-]+)', # alphabetical names 46 STYLES_VALUES_REGEX = r'^(' + '|'.join(['([a-z-]+)', # alphabetical names
46 '(#[0-9a-f]+)', # hex value 47 '(#[0-9a-f]+)', # hex value
47 '(\d+(.\d+)? *(|%|em|ex|px|in|cm|mm|pt|pc))', # values with units (or not) 48 '(\d+(.\d+)? *(|%|em|ex|px|in|cm|mm|pt|pc))', # values with units (or not)
110 h = html2text.HTML2Text(baseurl=baseurl) 111 h = html2text.HTML2Text(baseurl=baseurl)
111 h.body_width = 0 # do not truncate the lines, it breaks the long URLs 112 h.body_width = 0 # do not truncate the lines, it breaks the long URLs
112 return h.handle(html) 113 return h.handle(html)
113 self.addSyntax(self.SYNTAX_MARKDOWN, markdown.markdown, _html2text, [TextSyntaxes.OPT_DEFAULT]) 114 self.addSyntax(self.SYNTAX_MARKDOWN, markdown.markdown, _html2text, [TextSyntaxes.OPT_DEFAULT])
114 except ImportError: 115 except ImportError:
115 log.warning("markdown or html2text not found, can't use Markdown syntax") 116 log.warning(u"markdown or html2text not found, can't use Markdown syntax")
117 log.info(u"You can download/install them from https://pythonhosted.org/Markdown/ and https://github.com/Alir3z4/html2text/")
116 host.bridge.addMethod("syntaxConvert", ".plugin", in_sign='sssbs', out_sign='s', 118 host.bridge.addMethod("syntaxConvert", ".plugin", in_sign='sssbs', out_sign='s',
117 async=True, method=self.convert) 119 async=True, method=self.convert)
118 120
119 def _updateParamOptions(self): 121 def _updateParamOptions(self):
120 data_synt = TextSyntaxes.params_data['syntaxes'] 122 data_synt = TextSyntaxes.params_data['syntaxes']
123 for syntax in data_synt.keys(): 125 for syntax in data_synt.keys():
124 flags = data_synt[syntax]["flags"] 126 flags = data_synt[syntax]["flags"]
125 if TextSyntaxes.OPT_HIDDEN not in flags: 127 if TextSyntaxes.OPT_HIDDEN not in flags:
126 syntaxes.append(syntax) 128 syntaxes.append(syntax)
127 129
128 syntaxes.sort(key=unicode.lower) 130 syntaxes.sort(key=lambda synt: synt.lower())
129 options = [] 131 options = []
130 132
131 for syntax in syntaxes: 133 for syntax in syntaxes:
132 selected = 'selected="true"' if syntax == _SYNTAX_XHTML else '' 134 selected = 'selected="true"' if syntax == _SYNTAX_XHTML else ''
133 options.append(u'<option value="%s" %s/>' % (syntax, selected)) 135 options.append(u'<option value="%s" %s/>' % (syntax, selected))
141 @param profile: %(doc_profile)s 143 @param profile: %(doc_profile)s
142 @return: profile selected syntax 144 @return: profile selected syntax
143 """ 145 """
144 return self.host.memory.getParamA(NAME, CATEGORY , profile_key=profile) 146 return self.host.memory.getParamA(NAME, CATEGORY , profile_key=profile)
145 147
146 def clean_xhtml(self, xhtml): 148 def cleanXHTML(self, xhtml):
147 """ Clean XHTML text by removing potentially dangerous/malicious parts 149 """ Clean XHTML text by removing potentially dangerous/malicious parts
148 @param xhtml: raw xhtml text to clean (or lxml's HtmlElement) 150 @param xhtml: raw xhtml text to clean (or lxml's HtmlElement)
149 """ 151 """
150 def blocking_cleaning(xhtml): 152 def blocking_cleaning(xhtml):
151 """ Clean XHTML and style attributes """ 153 """ Clean XHTML and style attributes """
196 @param syntax_to: dest syntax (e.g.: "XHTML") 198 @param syntax_to: dest syntax (e.g.: "XHTML")
197 @param safe: clean resulting XHTML to avoid malicious code if True 199 @param safe: clean resulting XHTML to avoid malicious code if True
198 @param profile: needed only when syntax_from or syntax_to is set to _SYNTAX_CURRENT 200 @param profile: needed only when syntax_from or syntax_to is set to _SYNTAX_CURRENT
199 @return(unicode): converted text 201 @return(unicode): converted text
200 """ 202 """
203 # FIXME: convert should be abled to handle domish.Element directly
204 # when dealing with XHTML
205 # TODO: a way for parser to return parsing errors/warnings
201 206
202 if syntax_from == _SYNTAX_CURRENT: 207 if syntax_from == _SYNTAX_CURRENT:
203 syntax_from = self.getCurrentSyntax(profile) 208 syntax_from = self.getCurrentSyntax(profile)
204 if syntax_to == _SYNTAX_CURRENT: 209 if syntax_to == _SYNTAX_CURRENT:
205 syntax_to = self.getCurrentSyntax(profile) 210 syntax_to = self.getCurrentSyntax(profile)
216 d = deferToThread(syntaxes[syntax_from]["to"], text) 221 d = deferToThread(syntaxes[syntax_from]["to"], text)
217 222
218 #TODO: keep only body element and change it to a div here ? 223 #TODO: keep only body element and change it to a div here ?
219 224
220 if safe: 225 if safe:
221 d.addCallback(self.clean_xhtml) 226 d.addCallback(self.cleanXHTML)
222 227
223 if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_to]["flags"]: 228 if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_to]["flags"]:
224 d.addCallback(syntaxes[syntax_to]["from"]) 229 d.addCallback(syntaxes[syntax_to]["from"])
225 else: 230 else:
226 d.addCallback(lambda xhtml: deferToThread(syntaxes[syntax_to]["from"], xhtml)) 231 d.addCallback(lambda xhtml: deferToThread(syntaxes[syntax_to]["from"], xhtml))
238 @param flags: set of optional flags, can be: 243 @param flags: set of optional flags, can be:
239 TextSyntaxes.OPT_DEFAULT: use as the default syntax (replace former one) 244 TextSyntaxes.OPT_DEFAULT: use as the default syntax (replace former one)
240 TextSyntaxes.OPT_HIDDEN: do not show in parameters 245 TextSyntaxes.OPT_HIDDEN: do not show in parameters
241 TextSyntaxes.OPT_NO_THREAD: do not defer to thread when converting (the callback may then return a deferred) 246 TextSyntaxes.OPT_NO_THREAD: do not defer to thread when converting (the callback may then return a deferred)
242 """ 247 """
243 name = unicode(name) 248 flags = flags if flags is not None else []
244 flags = flags or []
245 if TextSyntaxes.OPT_HIDDEN in flags and TextSyntaxes.OPT_DEFAULT in flags: 249 if TextSyntaxes.OPT_HIDDEN in flags and TextSyntaxes.OPT_DEFAULT in flags:
246 raise ValueError(u"{} and {} are mutually exclusive".format(TextSyntaxes.OPT_HIDDEN, TextSyntaxes.OPT_DEFAULT)) 250 raise ValueError(u"{} and {} are mutually exclusive".format(TextSyntaxes.OPT_HIDDEN, TextSyntaxes.OPT_DEFAULT))
247 251
248 syntaxes = TextSyntaxes.params_data['syntaxes'] 252 syntaxes = TextSyntaxes.params_data['syntaxes']
253 if name in syntaxes:
254 raise exceptions.ConflitError(u"This syntax name already exists: {}".format(name))
249 syntaxes[name] = {"to": to_xhtml_cb, "from": from_xhtml_cb, "flags": flags} 255 syntaxes[name] = {"to": to_xhtml_cb, "from": from_xhtml_cb, "flags": flags}
250 if TextSyntaxes.OPT_DEFAULT in flags: 256 if TextSyntaxes.OPT_DEFAULT in flags:
251 syntaxes = TextSyntaxes.params_data['default'] = name 257 syntaxes = TextSyntaxes.params_data['default'] = name
252 258
253 self._updateParamOptions() 259 self._updateParamOptions()