Mercurial > libervia-backend
comparison src/plugins/plugin_misc_text_syntaxes.py @ 1805:3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
- use tuple instead of list for STYLES_WHITELIST
- specify where to get python makdown and html2text
- renamed clean_xhtml to cleanXHTML for consistency
- check name for conflict in addSyntax
author | Goffi <goffi@goffi.org> |
---|---|
date | Sun, 17 Jan 2016 17:33:10 +0100 |
parents | 14a97a5fe1c0 |
children | 90ae9a2462c2 |
comparison
equal
deleted
inserted
replaced
1804:33e73c70d78a | 1805:3c40fa0dcd7a |
---|---|
37 NAME = "Syntax" | 37 NAME = "Syntax" |
38 _SYNTAX_XHTML = "XHTML" | 38 _SYNTAX_XHTML = "XHTML" |
39 _SYNTAX_CURRENT = "@CURRENT@" | 39 _SYNTAX_CURRENT = "@CURRENT@" |
40 | 40 |
41 # TODO: check/adapt following list | 41 # TODO: check/adapt following list |
42 STYLES_WHITELIST = ["azimuth", "background-color", "border-bottom-color", "border-collapse", "border-color", "border-left-color", "border-right-color", "border-top-color", "clear", "color", "cursor", "direction", "display", "elevation", "float", "font", "font-family", "font-size", "font-style", "font-variant", "font-weight", "height", "letter-spacing", "line-height", "overflow", "pause", "pause-after", "pause-before", "pitch", "pitch-range", "richness", "speak", "speak-header", "speak-numeral", "speak-punctuation", "speech-rate", "stress", "text-align", "text-decoration", "text-indent", "unicode-bidi", "vertical-align", "voice-family", "volume", "white-space", "width"] # based on feedparser list (http://pythonhosted.org/feedparser/html-sanitization.html) | 42 # list initialy based on feedparser list (http://pythonhosted.org/feedparser/html-sanitization.html) |
43 STYLES_WHITELIST = ("azimuth", "background-color", "border-bottom-color", "border-collapse", "border-color", "border-left-color", "border-right-color", "border-top-color", "clear", "color", "cursor", "direction", "display", "elevation", "float", "font", "font-family", "font-size", "font-style", "font-variant", "font-weight", "height", "letter-spacing", "line-height", "overflow", "pause", "pause-after", "pause-before", "pitch", "pitch-range", "richness", "speak", "speak-header", "speak-numeral", "speak-punctuation", "speech-rate", "stress", "text-align", "text-decoration", "text-indent", "unicode-bidi", "vertical-align", "voice-family", "volume", "white-space", "width") | |
43 | 44 |
44 SAFE_ATTRS = html.defs.safe_attrs.union(('style',)) | 45 SAFE_ATTRS = html.defs.safe_attrs.union(('style',)) |
45 STYLES_VALUES_REGEX = r'^(' + '|'.join(['([a-z-]+)', # alphabetical names | 46 STYLES_VALUES_REGEX = r'^(' + '|'.join(['([a-z-]+)', # alphabetical names |
46 '(#[0-9a-f]+)', # hex value | 47 '(#[0-9a-f]+)', # hex value |
47 '(\d+(.\d+)? *(|%|em|ex|px|in|cm|mm|pt|pc))', # values with units (or not) | 48 '(\d+(.\d+)? *(|%|em|ex|px|in|cm|mm|pt|pc))', # values with units (or not) |
110 h = html2text.HTML2Text(baseurl=baseurl) | 111 h = html2text.HTML2Text(baseurl=baseurl) |
111 h.body_width = 0 # do not truncate the lines, it breaks the long URLs | 112 h.body_width = 0 # do not truncate the lines, it breaks the long URLs |
112 return h.handle(html) | 113 return h.handle(html) |
113 self.addSyntax(self.SYNTAX_MARKDOWN, markdown.markdown, _html2text, [TextSyntaxes.OPT_DEFAULT]) | 114 self.addSyntax(self.SYNTAX_MARKDOWN, markdown.markdown, _html2text, [TextSyntaxes.OPT_DEFAULT]) |
114 except ImportError: | 115 except ImportError: |
115 log.warning("markdown or html2text not found, can't use Markdown syntax") | 116 log.warning(u"markdown or html2text not found, can't use Markdown syntax") |
117 log.info(u"You can download/install them from https://pythonhosted.org/Markdown/ and https://github.com/Alir3z4/html2text/") | |
116 host.bridge.addMethod("syntaxConvert", ".plugin", in_sign='sssbs', out_sign='s', | 118 host.bridge.addMethod("syntaxConvert", ".plugin", in_sign='sssbs', out_sign='s', |
117 async=True, method=self.convert) | 119 async=True, method=self.convert) |
118 | 120 |
119 def _updateParamOptions(self): | 121 def _updateParamOptions(self): |
120 data_synt = TextSyntaxes.params_data['syntaxes'] | 122 data_synt = TextSyntaxes.params_data['syntaxes'] |
123 for syntax in data_synt.keys(): | 125 for syntax in data_synt.keys(): |
124 flags = data_synt[syntax]["flags"] | 126 flags = data_synt[syntax]["flags"] |
125 if TextSyntaxes.OPT_HIDDEN not in flags: | 127 if TextSyntaxes.OPT_HIDDEN not in flags: |
126 syntaxes.append(syntax) | 128 syntaxes.append(syntax) |
127 | 129 |
128 syntaxes.sort(key=unicode.lower) | 130 syntaxes.sort(key=lambda synt: synt.lower()) |
129 options = [] | 131 options = [] |
130 | 132 |
131 for syntax in syntaxes: | 133 for syntax in syntaxes: |
132 selected = 'selected="true"' if syntax == _SYNTAX_XHTML else '' | 134 selected = 'selected="true"' if syntax == _SYNTAX_XHTML else '' |
133 options.append(u'<option value="%s" %s/>' % (syntax, selected)) | 135 options.append(u'<option value="%s" %s/>' % (syntax, selected)) |
141 @param profile: %(doc_profile)s | 143 @param profile: %(doc_profile)s |
142 @return: profile selected syntax | 144 @return: profile selected syntax |
143 """ | 145 """ |
144 return self.host.memory.getParamA(NAME, CATEGORY , profile_key=profile) | 146 return self.host.memory.getParamA(NAME, CATEGORY , profile_key=profile) |
145 | 147 |
146 def clean_xhtml(self, xhtml): | 148 def cleanXHTML(self, xhtml): |
147 """ Clean XHTML text by removing potentially dangerous/malicious parts | 149 """ Clean XHTML text by removing potentially dangerous/malicious parts |
148 @param xhtml: raw xhtml text to clean (or lxml's HtmlElement) | 150 @param xhtml: raw xhtml text to clean (or lxml's HtmlElement) |
149 """ | 151 """ |
150 def blocking_cleaning(xhtml): | 152 def blocking_cleaning(xhtml): |
151 """ Clean XHTML and style attributes """ | 153 """ Clean XHTML and style attributes """ |
196 @param syntax_to: dest syntax (e.g.: "XHTML") | 198 @param syntax_to: dest syntax (e.g.: "XHTML") |
197 @param safe: clean resulting XHTML to avoid malicious code if True | 199 @param safe: clean resulting XHTML to avoid malicious code if True |
198 @param profile: needed only when syntax_from or syntax_to is set to _SYNTAX_CURRENT | 200 @param profile: needed only when syntax_from or syntax_to is set to _SYNTAX_CURRENT |
199 @return(unicode): converted text | 201 @return(unicode): converted text |
200 """ | 202 """ |
203 # FIXME: convert should be abled to handle domish.Element directly | |
204 # when dealing with XHTML | |
205 # TODO: a way for parser to return parsing errors/warnings | |
201 | 206 |
202 if syntax_from == _SYNTAX_CURRENT: | 207 if syntax_from == _SYNTAX_CURRENT: |
203 syntax_from = self.getCurrentSyntax(profile) | 208 syntax_from = self.getCurrentSyntax(profile) |
204 if syntax_to == _SYNTAX_CURRENT: | 209 if syntax_to == _SYNTAX_CURRENT: |
205 syntax_to = self.getCurrentSyntax(profile) | 210 syntax_to = self.getCurrentSyntax(profile) |
216 d = deferToThread(syntaxes[syntax_from]["to"], text) | 221 d = deferToThread(syntaxes[syntax_from]["to"], text) |
217 | 222 |
218 #TODO: keep only body element and change it to a div here ? | 223 #TODO: keep only body element and change it to a div here ? |
219 | 224 |
220 if safe: | 225 if safe: |
221 d.addCallback(self.clean_xhtml) | 226 d.addCallback(self.cleanXHTML) |
222 | 227 |
223 if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_to]["flags"]: | 228 if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_to]["flags"]: |
224 d.addCallback(syntaxes[syntax_to]["from"]) | 229 d.addCallback(syntaxes[syntax_to]["from"]) |
225 else: | 230 else: |
226 d.addCallback(lambda xhtml: deferToThread(syntaxes[syntax_to]["from"], xhtml)) | 231 d.addCallback(lambda xhtml: deferToThread(syntaxes[syntax_to]["from"], xhtml)) |
238 @param flags: set of optional flags, can be: | 243 @param flags: set of optional flags, can be: |
239 TextSyntaxes.OPT_DEFAULT: use as the default syntax (replace former one) | 244 TextSyntaxes.OPT_DEFAULT: use as the default syntax (replace former one) |
240 TextSyntaxes.OPT_HIDDEN: do not show in parameters | 245 TextSyntaxes.OPT_HIDDEN: do not show in parameters |
241 TextSyntaxes.OPT_NO_THREAD: do not defer to thread when converting (the callback may then return a deferred) | 246 TextSyntaxes.OPT_NO_THREAD: do not defer to thread when converting (the callback may then return a deferred) |
242 """ | 247 """ |
243 name = unicode(name) | 248 flags = flags if flags is not None else [] |
244 flags = flags or [] | |
245 if TextSyntaxes.OPT_HIDDEN in flags and TextSyntaxes.OPT_DEFAULT in flags: | 249 if TextSyntaxes.OPT_HIDDEN in flags and TextSyntaxes.OPT_DEFAULT in flags: |
246 raise ValueError(u"{} and {} are mutually exclusive".format(TextSyntaxes.OPT_HIDDEN, TextSyntaxes.OPT_DEFAULT)) | 250 raise ValueError(u"{} and {} are mutually exclusive".format(TextSyntaxes.OPT_HIDDEN, TextSyntaxes.OPT_DEFAULT)) |
247 | 251 |
248 syntaxes = TextSyntaxes.params_data['syntaxes'] | 252 syntaxes = TextSyntaxes.params_data['syntaxes'] |
253 if name in syntaxes: | |
254 raise exceptions.ConflitError(u"This syntax name already exists: {}".format(name)) | |
249 syntaxes[name] = {"to": to_xhtml_cb, "from": from_xhtml_cb, "flags": flags} | 255 syntaxes[name] = {"to": to_xhtml_cb, "from": from_xhtml_cb, "flags": flags} |
250 if TextSyntaxes.OPT_DEFAULT in flags: | 256 if TextSyntaxes.OPT_DEFAULT in flags: |
251 syntaxes = TextSyntaxes.params_data['default'] = name | 257 syntaxes = TextSyntaxes.params_data['default'] = name |
252 | 258 |
253 self._updateParamOptions() | 259 self._updateParamOptions() |