Mercurial > libervia-backend
comparison sat/plugins/plugin_misc_text_syntaxes.py @ 2624:56f94936df1e
code style reformatting using black
author | Goffi <goffi@goffi.org> |
---|---|
date | Wed, 27 Jun 2018 20:14:46 +0200 |
parents | 26edcf3a30eb |
children | 003b8b4b56a7 |
comparison
equal
deleted
inserted
replaced
2623:49533de4540b | 2624:56f94936df1e |
---|---|
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. | 18 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
19 | 19 |
20 from sat.core.i18n import _, D_ | 20 from sat.core.i18n import _, D_ |
21 from sat.core.constants import Const as C | 21 from sat.core.constants import Const as C |
22 from sat.core.log import getLogger | 22 from sat.core.log import getLogger |
23 | |
23 log = getLogger(__name__) | 24 log = getLogger(__name__) |
24 | 25 |
25 from twisted.internet import defer | 26 from twisted.internet import defer |
26 from twisted.internet.threads import deferToThread | 27 from twisted.internet.threads import deferToThread |
27 from sat.core import exceptions | 28 from sat.core import exceptions |
29 | |
28 try: | 30 try: |
29 from lxml import html | 31 from lxml import html |
30 from lxml.html import clean | 32 from lxml.html import clean |
31 except ImportError: | 33 except ImportError: |
32 raise exceptions.MissingModule(u"Missing module lxml, please download/install it from http://lxml.de/") | 34 raise exceptions.MissingModule( |
35 u"Missing module lxml, please download/install it from http://lxml.de/" | |
36 ) | |
33 from cgi import escape | 37 from cgi import escape |
34 import re | 38 import re |
35 | 39 |
36 | 40 |
37 CATEGORY = D_("Composition") | 41 CATEGORY = D_("Composition") |
39 _SYNTAX_XHTML = "XHTML" | 43 _SYNTAX_XHTML = "XHTML" |
40 _SYNTAX_CURRENT = "@CURRENT@" | 44 _SYNTAX_CURRENT = "@CURRENT@" |
41 | 45 |
42 # TODO: check/adapt following list | 46 # TODO: check/adapt following list |
43 # list initialy based on feedparser list (http://pythonhosted.org/feedparser/html-sanitization.html) | 47 # list initialy based on feedparser list (http://pythonhosted.org/feedparser/html-sanitization.html) |
44 STYLES_WHITELIST = ("azimuth", "background-color", "border-bottom-color", "border-collapse", "border-color", "border-left-color", "border-right-color", "border-top-color", "clear", "color", "cursor", "direction", "display", "elevation", "float", "font", "font-family", "font-size", "font-style", "font-variant", "font-weight", "height", "letter-spacing", "line-height", "overflow", "pause", "pause-after", "pause-before", "pitch", "pitch-range", "richness", "speak", "speak-header", "speak-numeral", "speak-punctuation", "speech-rate", "stress", "text-align", "text-decoration", "text-indent", "unicode-bidi", "vertical-align", "voice-family", "volume", "white-space", "width") | 48 STYLES_WHITELIST = ( |
45 | 49 "azimuth", |
46 SAFE_ATTRS = html.defs.safe_attrs.union(('style', 'poster', 'controls')) | 50 "background-color", |
47 STYLES_VALUES_REGEX = r'^(' + '|'.join(['([a-z-]+)', # alphabetical names | 51 "border-bottom-color", |
48 '(#[0-9a-f]+)', # hex value | 52 "border-collapse", |
49 '(\d+(.\d+)? *(|%|em|ex|px|in|cm|mm|pt|pc))', # values with units (or not) | 53 "border-color", |
50 'rgb\( *((\d+(.\d+)?), *){2}(\d+(.\d+)?) *\)', # rgb function | 54 "border-left-color", |
51 'rgba\( *((\d+(.\d+)?), *){3}(\d+(.\d+)?) *\)', # rgba function | 55 "border-right-color", |
52 ]) + ') *(!important)?$' # we accept "!important" at the end | 56 "border-top-color", |
57 "clear", | |
58 "color", | |
59 "cursor", | |
60 "direction", | |
61 "display", | |
62 "elevation", | |
63 "float", | |
64 "font", | |
65 "font-family", | |
66 "font-size", | |
67 "font-style", | |
68 "font-variant", | |
69 "font-weight", | |
70 "height", | |
71 "letter-spacing", | |
72 "line-height", | |
73 "overflow", | |
74 "pause", | |
75 "pause-after", | |
76 "pause-before", | |
77 "pitch", | |
78 "pitch-range", | |
79 "richness", | |
80 "speak", | |
81 "speak-header", | |
82 "speak-numeral", | |
83 "speak-punctuation", | |
84 "speech-rate", | |
85 "stress", | |
86 "text-align", | |
87 "text-decoration", | |
88 "text-indent", | |
89 "unicode-bidi", | |
90 "vertical-align", | |
91 "voice-family", | |
92 "volume", | |
93 "white-space", | |
94 "width", | |
95 ) | |
96 | |
97 SAFE_ATTRS = html.defs.safe_attrs.union(("style", "poster", "controls")) | |
98 STYLES_VALUES_REGEX = ( | |
99 r"^(" | |
100 + "|".join( | |
101 [ | |
102 "([a-z-]+)", # alphabetical names | |
103 "(#[0-9a-f]+)", # hex value | |
104 "(\d+(.\d+)? *(|%|em|ex|px|in|cm|mm|pt|pc))", # values with units (or not) | |
105 "rgb\( *((\d+(.\d+)?), *){2}(\d+(.\d+)?) *\)", # rgb function | |
106 "rgba\( *((\d+(.\d+)?), *){3}(\d+(.\d+)?) *\)", # rgba function | |
107 ] | |
108 ) | |
109 + ") *(!important)?$" | |
110 ) # we accept "!important" at the end | |
53 STYLES_ACCEPTED_VALUE = re.compile(STYLES_VALUES_REGEX) | 111 STYLES_ACCEPTED_VALUE = re.compile(STYLES_VALUES_REGEX) |
54 | 112 |
55 PLUGIN_INFO = { | 113 PLUGIN_INFO = { |
56 C.PI_NAME: "Text syntaxes", | 114 C.PI_NAME: "Text syntaxes", |
57 C.PI_IMPORT_NAME: "TEXT-SYNTAXES", | 115 C.PI_IMPORT_NAME: "TEXT-SYNTAXES", |
58 C.PI_TYPE: "MISC", | 116 C.PI_TYPE: "MISC", |
59 C.PI_PROTOCOLS: [], | 117 C.PI_PROTOCOLS: [], |
60 C.PI_DEPENDENCIES: [], | 118 C.PI_DEPENDENCIES: [], |
61 C.PI_MAIN: "TextSyntaxes", | 119 C.PI_MAIN: "TextSyntaxes", |
62 C.PI_HANDLER: "no", | 120 C.PI_HANDLER: "no", |
63 C.PI_DESCRIPTION: _("""Management of various text syntaxes (XHTML-IM, Markdown, etc)""") | 121 C.PI_DESCRIPTION: _( |
122 """Management of various text syntaxes (XHTML-IM, Markdown, etc)""" | |
123 ), | |
64 } | 124 } |
65 | 125 |
66 | 126 |
67 class TextSyntaxes(object): | 127 class TextSyntaxes(object): |
68 """ Text conversion class | 128 """ Text conversion class |
89 </individual> | 149 </individual> |
90 </params> | 150 </params> |
91 """ | 151 """ |
92 | 152 |
93 params_data = { | 153 params_data = { |
94 'category_name': CATEGORY, | 154 "category_name": CATEGORY, |
95 'category_label': _(CATEGORY), | 155 "category_label": _(CATEGORY), |
96 'name': NAME, | 156 "name": NAME, |
97 'label': _(NAME), | 157 "label": _(NAME), |
98 'syntaxes': syntaxes, | 158 "syntaxes": syntaxes, |
99 } | 159 } |
100 | 160 |
101 def __init__(self, host): | 161 def __init__(self, host): |
102 log.info(_("Text syntaxes plugin initialization")) | 162 log.info(_("Text syntaxes plugin initialization")) |
103 self.host = host | 163 self.host = host |
104 self.addSyntax(self.SYNTAX_XHTML, lambda xhtml: defer.succeed(xhtml), lambda xhtml: defer.succeed(xhtml), | 164 self.addSyntax( |
105 TextSyntaxes.OPT_NO_THREAD) | 165 self.SYNTAX_XHTML, |
166 lambda xhtml: defer.succeed(xhtml), | |
167 lambda xhtml: defer.succeed(xhtml), | |
168 TextSyntaxes.OPT_NO_THREAD, | |
169 ) | |
106 # TODO: text => XHTML should add <a/> to url like in frontends | 170 # TODO: text => XHTML should add <a/> to url like in frontends |
107 # it's probably best to move sat_frontends.tools.strings to sat.tools.common or similar | 171 # it's probably best to move sat_frontends.tools.strings to sat.tools.common or similar |
108 self.addSyntax(self.SYNTAX_TEXT, lambda text: escape(text), lambda xhtml: self._removeMarkups(xhtml), [TextSyntaxes.OPT_HIDDEN]) | 172 self.addSyntax( |
173 self.SYNTAX_TEXT, | |
174 lambda text: escape(text), | |
175 lambda xhtml: self._removeMarkups(xhtml), | |
176 [TextSyntaxes.OPT_HIDDEN], | |
177 ) | |
109 try: | 178 try: |
110 import markdown, html2text | 179 import markdown, html2text |
111 | 180 |
112 def _html2text(html, baseurl=''): | 181 def _html2text(html, baseurl=""): |
113 h = html2text.HTML2Text(baseurl=baseurl) | 182 h = html2text.HTML2Text(baseurl=baseurl) |
114 h.body_width = 0 # do not truncate the lines, it breaks the long URLs | 183 h.body_width = 0 # do not truncate the lines, it breaks the long URLs |
115 return h.handle(html) | 184 return h.handle(html) |
116 self.addSyntax(self.SYNTAX_MARKDOWN, markdown.markdown, _html2text, [TextSyntaxes.OPT_DEFAULT]) | 185 |
186 self.addSyntax( | |
187 self.SYNTAX_MARKDOWN, | |
188 markdown.markdown, | |
189 _html2text, | |
190 [TextSyntaxes.OPT_DEFAULT], | |
191 ) | |
117 except ImportError: | 192 except ImportError: |
118 log.warning(u"markdown or html2text not found, can't use Markdown syntax") | 193 log.warning(u"markdown or html2text not found, can't use Markdown syntax") |
119 log.info(u"You can download/install them from https://pythonhosted.org/Markdown/ and https://github.com/Alir3z4/html2text/") | 194 log.info( |
120 host.bridge.addMethod("syntaxConvert", ".plugin", in_sign='sssbs', out_sign='s', | 195 u"You can download/install them from https://pythonhosted.org/Markdown/ and https://github.com/Alir3z4/html2text/" |
121 async=True, method=self.convert) | 196 ) |
122 host.bridge.addMethod("syntaxGet", ".plugin", in_sign='s', out_sign='s', | 197 host.bridge.addMethod( |
123 method=self.getSyntax) | 198 "syntaxConvert", |
199 ".plugin", | |
200 in_sign="sssbs", | |
201 out_sign="s", | |
202 async=True, | |
203 method=self.convert, | |
204 ) | |
205 host.bridge.addMethod( | |
206 "syntaxGet", ".plugin", in_sign="s", out_sign="s", method=self.getSyntax | |
207 ) | |
124 | 208 |
125 def _updateParamOptions(self): | 209 def _updateParamOptions(self): |
126 data_synt = TextSyntaxes.syntaxes | 210 data_synt = TextSyntaxes.syntaxes |
127 default_synt = TextSyntaxes.default_syntax | 211 default_synt = TextSyntaxes.default_syntax |
128 syntaxes = [] | 212 syntaxes = [] |
134 | 218 |
135 syntaxes.sort(key=lambda synt: synt.lower()) | 219 syntaxes.sort(key=lambda synt: synt.lower()) |
136 options = [] | 220 options = [] |
137 | 221 |
138 for syntax in syntaxes: | 222 for syntax in syntaxes: |
139 selected = 'selected="true"' if syntax == default_synt else '' | 223 selected = 'selected="true"' if syntax == default_synt else "" |
140 options.append(u'<option value="%s" %s/>' % (syntax, selected)) | 224 options.append(u'<option value="%s" %s/>' % (syntax, selected)) |
141 | 225 |
142 TextSyntaxes.params_data["options"] = u'\n'.join(options) | 226 TextSyntaxes.params_data["options"] = u"\n".join(options) |
143 self.host.memory.updateParams(TextSyntaxes.params % TextSyntaxes.params_data) | 227 self.host.memory.updateParams(TextSyntaxes.params % TextSyntaxes.params_data) |
144 | 228 |
145 def getCurrentSyntax(self, profile): | 229 def getCurrentSyntax(self, profile): |
146 """ Return the selected syntax for the given profile | 230 """ Return the selected syntax for the given profile |
147 | 231 |
148 @param profile: %(doc_profile)s | 232 @param profile: %(doc_profile)s |
149 @return: profile selected syntax | 233 @return: profile selected syntax |
150 """ | 234 """ |
151 return self.host.memory.getParamA(NAME, CATEGORY , profile_key=profile) | 235 return self.host.memory.getParamA(NAME, CATEGORY, profile_key=profile) |
152 | 236 |
153 def _logError(self, failure, action=u"converting syntax"): | 237 def _logError(self, failure, action=u"converting syntax"): |
154 log.error(u"Error while {action}: {failure}".format(action=action, failure=failure)) | 238 log.error( |
239 u"Error while {action}: {failure}".format(action=action, failure=failure) | |
240 ) | |
155 return failure | 241 return failure |
156 | 242 |
157 def cleanXHTML(self, xhtml): | 243 def cleanXHTML(self, xhtml): |
158 """ Clean XHTML text by removing potentially dangerous/malicious parts | 244 """ Clean XHTML text by removing potentially dangerous/malicious parts |
159 @param xhtml: raw xhtml text to clean (or lxml's HtmlElement) | 245 @param xhtml: raw xhtml text to clean (or lxml's HtmlElement) |
160 """ | 246 """ |
247 | |
161 def blocking_cleaning(xhtml): | 248 def blocking_cleaning(xhtml): |
162 """ Clean XHTML and style attributes """ | 249 """ Clean XHTML and style attributes """ |
163 | 250 |
164 def clean_style(styles_raw): | 251 def clean_style(styles_raw): |
165 """" Remove styles not in the whitelist, | 252 """" Remove styles not in the whitelist, |
166 or where the value doesn't match the regex """ | 253 or where the value doesn't match the regex """ |
167 styles = styles_raw.split(";") | 254 styles = styles_raw.split(";") |
168 cleaned_styles = [] | 255 cleaned_styles = [] |
169 for style in styles: | 256 for style in styles: |
170 try: | 257 try: |
171 key, value = style.split(':') | 258 key, value = style.split(":") |
172 except ValueError: | 259 except ValueError: |
173 continue | 260 continue |
174 key = key.lower().strip() | 261 key = key.lower().strip() |
175 if key not in STYLES_WHITELIST: | 262 if key not in STYLES_WHITELIST: |
176 continue | 263 continue |
178 if not STYLES_ACCEPTED_VALUE.match(value): | 265 if not STYLES_ACCEPTED_VALUE.match(value): |
179 continue | 266 continue |
180 if value == "none": | 267 if value == "none": |
181 continue | 268 continue |
182 cleaned_styles.append((key, value)) | 269 cleaned_styles.append((key, value)) |
183 return "; ".join(["%s: %s" % (key_, value_) for key_, value_ in cleaned_styles]) | 270 return "; ".join( |
271 ["%s: %s" % (key_, value_) for key_, value_ in cleaned_styles] | |
272 ) | |
184 | 273 |
185 if isinstance(xhtml, basestring): | 274 if isinstance(xhtml, basestring): |
186 xhtml_elt = html.fromstring(xhtml) | 275 xhtml_elt = html.fromstring(xhtml) |
187 elif isinstance(xhtml, html.HtmlElement): | 276 elif isinstance(xhtml, html.HtmlElement): |
188 xhtml_elt = xhtml | 277 xhtml_elt = xhtml |
189 else: | 278 else: |
190 log.error("Only strings and HtmlElements can be cleaned") | 279 log.error("Only strings and HtmlElements can be cleaned") |
191 raise exceptions.DataError | 280 raise exceptions.DataError |
192 cleaner = clean.Cleaner(style=False, | 281 cleaner = clean.Cleaner( |
193 add_nofollow=False, | 282 style=False, add_nofollow=False, safe_attrs=SAFE_ATTRS |
194 safe_attrs=SAFE_ATTRS) | 283 ) |
195 xhtml_elt = cleaner.clean_html(xhtml_elt) | 284 xhtml_elt = cleaner.clean_html(xhtml_elt) |
196 for elt in xhtml_elt.xpath("//*[@style]"): | 285 for elt in xhtml_elt.xpath("//*[@style]"): |
197 elt.set("style", clean_style(elt.get('style'))) | 286 elt.set("style", clean_style(elt.get("style"))) |
198 return html.tostring(xhtml_elt, encoding=unicode, method='xml') | 287 return html.tostring(xhtml_elt, encoding=unicode, method="xml") |
199 | 288 |
200 d = deferToThread(blocking_cleaning, xhtml) | 289 d = deferToThread(blocking_cleaning, xhtml) |
201 d.addErrback(self._logError, action=u"cleaning syntax") | 290 d.addErrback(self._logError, action=u"cleaning syntax") |
202 return d | 291 return d |
203 | 292 |
204 def convert(self, text, syntax_from, syntax_to=_SYNTAX_XHTML, safe=True, profile=None): | 293 def convert( |
294 self, text, syntax_from, syntax_to=_SYNTAX_XHTML, safe=True, profile=None | |
295 ): | |
205 """Convert a text between two syntaxes | 296 """Convert a text between two syntaxes |
206 | 297 |
207 @param text: text to convert | 298 @param text: text to convert |
208 @param syntax_from: source syntax (e.g. "markdown") | 299 @param syntax_from: source syntax (e.g. "markdown") |
209 @param syntax_to: dest syntax (e.g.: "XHTML") | 300 @param syntax_to: dest syntax (e.g.: "XHTML") |
233 if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_from]["flags"]: | 324 if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_from]["flags"]: |
234 d = defer.maybeDeferred(syntaxes[syntax_from]["to"], text) | 325 d = defer.maybeDeferred(syntaxes[syntax_from]["to"], text) |
235 else: | 326 else: |
236 d = deferToThread(syntaxes[syntax_from]["to"], text) | 327 d = deferToThread(syntaxes[syntax_from]["to"], text) |
237 | 328 |
238 #TODO: keep only body element and change it to a div here ? | 329 # TODO: keep only body element and change it to a div here ? |
239 | 330 |
240 if safe: | 331 if safe: |
241 d.addCallback(self.cleanXHTML) | 332 d.addCallback(self.cleanXHTML) |
242 | 333 |
243 if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_to]["flags"]: | 334 if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_to]["flags"]: |
247 | 338 |
248 # converters can add new lines that disturb the microblog change detection | 339 # converters can add new lines that disturb the microblog change detection |
249 d.addCallback(lambda text: text.rstrip()) | 340 d.addCallback(lambda text: text.rstrip()) |
250 return d | 341 return d |
251 | 342 |
252 def addSyntax(self, name, to_xhtml_cb, from_xhtml_cb, flags = None): | 343 def addSyntax(self, name, to_xhtml_cb, from_xhtml_cb, flags=None): |
253 """Add a new syntax to the manager | 344 """Add a new syntax to the manager |
254 | 345 |
255 @param name: unique name of the syntax | 346 @param name: unique name of the syntax |
256 @param to_xhtml_cb: callback to convert from syntax to XHTML | 347 @param to_xhtml_cb: callback to convert from syntax to XHTML |
257 @param from_xhtml_cb: callback to convert from XHTML to syntax | 348 @param from_xhtml_cb: callback to convert from XHTML to syntax |
260 TextSyntaxes.OPT_HIDDEN: do not show in parameters | 351 TextSyntaxes.OPT_HIDDEN: do not show in parameters |
261 TextSyntaxes.OPT_NO_THREAD: do not defer to thread when converting (the callback may then return a deferred) | 352 TextSyntaxes.OPT_NO_THREAD: do not defer to thread when converting (the callback may then return a deferred) |
262 """ | 353 """ |
263 flags = flags if flags is not None else [] | 354 flags = flags if flags is not None else [] |
264 if TextSyntaxes.OPT_HIDDEN in flags and TextSyntaxes.OPT_DEFAULT in flags: | 355 if TextSyntaxes.OPT_HIDDEN in flags and TextSyntaxes.OPT_DEFAULT in flags: |
265 raise ValueError(u"{} and {} are mutually exclusive".format(TextSyntaxes.OPT_HIDDEN, TextSyntaxes.OPT_DEFAULT)) | 356 raise ValueError( |
357 u"{} and {} are mutually exclusive".format( | |
358 TextSyntaxes.OPT_HIDDEN, TextSyntaxes.OPT_DEFAULT | |
359 ) | |
360 ) | |
266 | 361 |
267 syntaxes = TextSyntaxes.syntaxes | 362 syntaxes = TextSyntaxes.syntaxes |
268 key = name.lower().strip() | 363 key = name.lower().strip() |
269 if key in syntaxes: | 364 if key in syntaxes: |
270 raise exceptions.ConflictError(u"This syntax key already exists: {}".format(key)) | 365 raise exceptions.ConflictError( |
271 syntaxes[key] = {"name": name, "to": to_xhtml_cb, "from": from_xhtml_cb, "flags": flags} | 366 u"This syntax key already exists: {}".format(key) |
367 ) | |
368 syntaxes[key] = { | |
369 "name": name, | |
370 "to": to_xhtml_cb, | |
371 "from": from_xhtml_cb, | |
372 "flags": flags, | |
373 } | |
272 if TextSyntaxes.OPT_DEFAULT in flags: | 374 if TextSyntaxes.OPT_DEFAULT in flags: |
273 TextSyntaxes.default_syntaxe = key | 375 TextSyntaxes.default_syntaxe = key |
274 | 376 |
275 self._updateParamOptions() | 377 self._updateParamOptions() |
276 | 378 |
288 """Remove XHTML markups from the given string. | 390 """Remove XHTML markups from the given string. |
289 | 391 |
290 @param xhtml: the XHTML string to be cleaned | 392 @param xhtml: the XHTML string to be cleaned |
291 @return: the cleaned string | 393 @return: the cleaned string |
292 """ | 394 """ |
293 cleaner = clean.Cleaner(kill_tags=['style']) | 395 cleaner = clean.Cleaner(kill_tags=["style"]) |
294 cleaned = cleaner.clean_html(html.fromstring(xhtml)) | 396 cleaned = cleaner.clean_html(html.fromstring(xhtml)) |
295 return html.tostring(cleaned, encoding=unicode, method="text") | 397 return html.tostring(cleaned, encoding=unicode, method="text") |