Mercurial > libervia-backend
comparison libervia/backend/plugins/plugin_misc_text_syntaxes.py @ 4270:0d7bb4df2343
Reformatted code base using black.
author | Goffi <goffi@goffi.org> |
---|---|
date | Wed, 19 Jun 2024 18:44:57 +0200 |
parents | ba28ca268f4a |
children | 45662662a432 |
comparison
equal
deleted
inserted
replaced
4269:64a85ce8be70 | 4270:0d7bb4df2343 |
---|---|
113 "menuitem", | 113 "menuitem", |
114 "meta", | 114 "meta", |
115 "param", | 115 "param", |
116 "source", | 116 "source", |
117 "track", | 117 "track", |
118 "wbr") | 118 "wbr", |
119 ) | |
119 | 120 |
120 SAFE_ATTRS = html.defs.safe_attrs.union({"style", "poster", "controls"}) - {"id"} | 121 SAFE_ATTRS = html.defs.safe_attrs.union({"style", "poster", "controls"}) - {"id"} |
121 SAFE_CLASSES = { | 122 SAFE_CLASSES = { |
122 # those classes are used for code highlighting | 123 # those classes are used for code highlighting |
123 "bp", "c", "ch", "cm", "cp", "cpf", "cs", "dl", "err", "fm", "gd", "ge", "get", "gh", | 124 "bp", |
124 "gi", "go", "gp", "gr", "gs", "gt", "gu", "highlight", "hll", "il", "k", "kc", "kd", | 125 "c", |
125 "kn", "kp", "kr", "kt", "m", "mb", "mf", "mh", "mi", "mo", "na", "nb", "nc", "nd", | 126 "ch", |
126 "ne", "nf", "ni", "nl", "nn", "no", "nt", "nv", "o", "ow", "s", "sa", "sb", "sc", | 127 "cm", |
127 "sd", "se", "sh", "si", "sr", "ss", "sx", "vc", "vg", "vi", "vm", "w", "write", | 128 "cp", |
129 "cpf", | |
130 "cs", | |
131 "dl", | |
132 "err", | |
133 "fm", | |
134 "gd", | |
135 "ge", | |
136 "get", | |
137 "gh", | |
138 "gi", | |
139 "go", | |
140 "gp", | |
141 "gr", | |
142 "gs", | |
143 "gt", | |
144 "gu", | |
145 "highlight", | |
146 "hll", | |
147 "il", | |
148 "k", | |
149 "kc", | |
150 "kd", | |
151 "kn", | |
152 "kp", | |
153 "kr", | |
154 "kt", | |
155 "m", | |
156 "mb", | |
157 "mf", | |
158 "mh", | |
159 "mi", | |
160 "mo", | |
161 "na", | |
162 "nb", | |
163 "nc", | |
164 "nd", | |
165 "ne", | |
166 "nf", | |
167 "ni", | |
168 "nl", | |
169 "nn", | |
170 "no", | |
171 "nt", | |
172 "nv", | |
173 "o", | |
174 "ow", | |
175 "s", | |
176 "sa", | |
177 "sb", | |
178 "sc", | |
179 "sd", | |
180 "se", | |
181 "sh", | |
182 "si", | |
183 "sr", | |
184 "ss", | |
185 "sx", | |
186 "vc", | |
187 "vg", | |
188 "vi", | |
189 "vm", | |
190 "w", | |
191 "write", | |
128 } | 192 } |
129 STYLES_VALUES_REGEX = ( | 193 STYLES_VALUES_REGEX = ( |
130 r"^(" | 194 r"^(" |
131 + "|".join( | 195 + "|".join( |
132 [ | 196 [ |
155 ), | 219 ), |
156 } | 220 } |
157 | 221 |
158 | 222 |
159 class TextSyntaxes(object): | 223 class TextSyntaxes(object): |
160 """ Text conversion class | 224 """Text conversion class |
161 XHTML utf-8 is used as intermediate language for conversions | 225 XHTML utf-8 is used as intermediate language for conversions |
162 """ | 226 """ |
163 | 227 |
164 OPT_DEFAULT = "DEFAULT" | 228 OPT_DEFAULT = "DEFAULT" |
165 OPT_HIDDEN = "HIDDEN" | 229 OPT_HIDDEN = "HIDDEN" |
167 SYNTAX_XHTML = _SYNTAX_XHTML | 231 SYNTAX_XHTML = _SYNTAX_XHTML |
168 SYNTAX_MARKDOWN = "markdown" | 232 SYNTAX_MARKDOWN = "markdown" |
169 SYNTAX_TEXT = "text" | 233 SYNTAX_TEXT = "text" |
170 # default_syntax must be lower case | 234 # default_syntax must be lower case |
171 default_syntax = SYNTAX_XHTML | 235 default_syntax = SYNTAX_XHTML |
172 | |
173 | 236 |
174 def __init__(self, host): | 237 def __init__(self, host): |
175 log.info(_("Text syntaxes plugin initialization")) | 238 log.info(_("Text syntaxes plugin initialization")) |
176 self.host = host | 239 self.host = host |
177 self.syntaxes = {} | 240 self.syntaxes = {} |
216 | 279 |
217 # XXX: we disable raw HTML parsing by default, to avoid parsing error | 280 # XXX: we disable raw HTML parsing by default, to avoid parsing error |
218 # when the user is not aware of markdown and HTML | 281 # when the user is not aware of markdown and HTML |
219 class EscapeHTML(Extension): | 282 class EscapeHTML(Extension): |
220 def extendMarkdown(self, md): | 283 def extendMarkdown(self, md): |
221 md.preprocessors.deregister('html_block') | 284 md.preprocessors.deregister("html_block") |
222 md.inlinePatterns.deregister('html') | 285 md.inlinePatterns.deregister("html") |
223 | 286 |
224 def _html2text(html, baseurl=""): | 287 def _html2text(html, baseurl=""): |
225 h = html2text.HTML2Text(baseurl=baseurl) | 288 h = html2text.HTML2Text(baseurl=baseurl) |
226 h.body_width = 0 # do not truncate the lines, it breaks the long URLs | 289 h.body_width = 0 # do not truncate the lines, it breaks the long URLs |
227 return h.handle(html) | 290 return h.handle(html) |
228 | 291 |
229 self.add_syntax( | 292 self.add_syntax( |
230 self.SYNTAX_MARKDOWN, | 293 self.SYNTAX_MARKDOWN, |
231 partial(markdown.markdown, | 294 partial( |
232 extensions=[ | 295 markdown.markdown, |
233 EscapeHTML(), | 296 extensions=[ |
234 'nl2br', | 297 EscapeHTML(), |
235 'codehilite', | 298 "nl2br", |
236 'fenced_code', | 299 "codehilite", |
237 'sane_lists', | 300 "fenced_code", |
238 'tables', | 301 "sane_lists", |
239 ], | 302 "tables", |
240 extension_configs = { | 303 ], |
241 "codehilite": { | 304 extension_configs={ |
242 "css_class": "highlight", | 305 "codehilite": { |
243 } | 306 "css_class": "highlight", |
244 }), | 307 } |
308 }, | |
309 ), | |
245 _html2text, | 310 _html2text, |
246 [TextSyntaxes.OPT_DEFAULT], | 311 [TextSyntaxes.OPT_DEFAULT], |
247 ) | 312 ) |
248 except ImportError: | 313 except ImportError: |
249 log.warning("markdown or html2text not found, can't use Markdown syntax") | 314 log.warning("markdown or html2text not found, can't use Markdown syntax") |
285 | 350 |
286 self.params_data["options"] = "\n".join(options) | 351 self.params_data["options"] = "\n".join(options) |
287 self.host.memory.update_params(self.params % self.params_data) | 352 self.host.memory.update_params(self.params % self.params_data) |
288 | 353 |
289 def get_current_syntax(self, profile): | 354 def get_current_syntax(self, profile): |
290 """ Return the selected syntax for the given profile | 355 """Return the selected syntax for the given profile |
291 | 356 |
292 @param profile: %(doc_profile)s | 357 @param profile: %(doc_profile)s |
293 @return: profile selected syntax | 358 @return: profile selected syntax |
294 """ | 359 """ |
295 return self.host.memory.param_get_a(NAME, CATEGORY, profile_key=profile) | 360 return self.host.memory.param_get_a(NAME, CATEGORY, profile_key=profile) |
299 "Error while {action}: {failure}".format(action=action, failure=failure) | 364 "Error while {action}: {failure}".format(action=action, failure=failure) |
300 ) | 365 ) |
301 return failure | 366 return failure |
302 | 367 |
303 def clean_style(self, styles_raw: str) -> str: | 368 def clean_style(self, styles_raw: str) -> str: |
304 """"Clean unsafe CSS styles | 369 """ "Clean unsafe CSS styles |
305 | 370 |
306 Remove styles not in the whitelist, or where the value doesn't match the regex | 371 Remove styles not in the whitelist, or where the value doesn't match the regex |
307 @param styles_raw: CSS styles | 372 @param styles_raw: CSS styles |
308 @return: cleaned styles | 373 @return: cleaned styles |
309 """ | 374 """ |
321 if not STYLES_ACCEPTED_VALUE.match(value): | 386 if not STYLES_ACCEPTED_VALUE.match(value): |
322 continue | 387 continue |
323 if value == "none": | 388 if value == "none": |
324 continue | 389 continue |
325 cleaned_styles.append((key, value)) | 390 cleaned_styles.append((key, value)) |
326 return "; ".join( | 391 return "; ".join(["%s: %s" % (key_, value_) for key_, value_ in cleaned_styles]) |
327 ["%s: %s" % (key_, value_) for key_, value_ in cleaned_styles] | |
328 ) | |
329 | 392 |
330 def clean_classes(self, classes_raw: str) -> str: | 393 def clean_classes(self, classes_raw: str) -> str: |
331 """Remove any non whitelisted class | 394 """Remove any non whitelisted class |
332 | 395 |
333 @param classes_raw: classes set on an element | 396 @param classes_raw: classes set on an element |
353 elif isinstance(xhtml, html.HtmlElement): | 416 elif isinstance(xhtml, html.HtmlElement): |
354 xhtml_elt = xhtml | 417 xhtml_elt = xhtml |
355 else: | 418 else: |
356 log.error("Only strings and HtmlElements can be cleaned") | 419 log.error("Only strings and HtmlElements can be cleaned") |
357 raise exceptions.DataError | 420 raise exceptions.DataError |
358 cleaner = clean.Cleaner( | 421 cleaner = clean.Cleaner(style=False, add_nofollow=False, safe_attrs=SAFE_ATTRS) |
359 style=False, add_nofollow=False, safe_attrs=SAFE_ATTRS | |
360 ) | |
361 xhtml_elt = cleaner.clean_html(xhtml_elt) | 422 xhtml_elt = cleaner.clean_html(xhtml_elt) |
362 for elt in xhtml_elt.xpath("//*[@style]"): | 423 for elt in xhtml_elt.xpath("//*[@style]"): |
363 elt.set("style", self.clean_style(elt.get("style"))) | 424 elt.set("style", self.clean_style(elt.get("style"))) |
364 for elt in xhtml_elt.xpath("//*[@class]"): | 425 for elt in xhtml_elt.xpath("//*[@class]"): |
365 elt.set("class", self.clean_classes(elt.get("class"))) | 426 elt.set("class", self.clean_classes(elt.get("class"))) |
367 for element in xhtml_elt.iter(tag=etree.Element): | 428 for element in xhtml_elt.iter(tag=etree.Element): |
368 if not element.text: | 429 if not element.text: |
369 if element.tag in VOID_ELEMENTS: | 430 if element.tag in VOID_ELEMENTS: |
370 element.text = None | 431 element.text = None |
371 else: | 432 else: |
372 element.text = '' | 433 element.text = "" |
373 return html.tostring(xhtml_elt, encoding=str, method="xml") | 434 return html.tostring(xhtml_elt, encoding=str, method="xml") |
374 | 435 |
375 def convert(self, text, syntax_from, syntax_to=_SYNTAX_XHTML, safe=True, | 436 def convert( |
376 profile=None): | 437 self, text, syntax_from, syntax_to=_SYNTAX_XHTML, safe=True, profile=None |
438 ): | |
377 """Convert a text between two syntaxes | 439 """Convert a text between two syntaxes |
378 | 440 |
379 @param text: text to convert | 441 @param text: text to convert |
380 @param syntax_from: source syntax (e.g. "markdown") | 442 @param syntax_from: source syntax (e.g. "markdown") |
381 @param syntax_to: dest syntax (e.g.: "XHTML") | 443 @param syntax_to: dest syntax (e.g.: "XHTML") |