comparison libervia/backend/plugins/plugin_misc_text_syntaxes.py @ 4270:0d7bb4df2343

Reformatted code base using black.
author Goffi <goffi@goffi.org>
date Wed, 19 Jun 2024 18:44:57 +0200
parents ba28ca268f4a
children
comparison
equal deleted inserted replaced
4269:64a85ce8be70 4270:0d7bb4df2343
113 "menuitem", 113 "menuitem",
114 "meta", 114 "meta",
115 "param", 115 "param",
116 "source", 116 "source",
117 "track", 117 "track",
118 "wbr") 118 "wbr",
119 )
119 120
120 SAFE_ATTRS = html.defs.safe_attrs.union({"style", "poster", "controls"}) - {"id"} 121 SAFE_ATTRS = html.defs.safe_attrs.union({"style", "poster", "controls"}) - {"id"}
121 SAFE_CLASSES = { 122 SAFE_CLASSES = {
122 # those classes are used for code highlighting 123 # those classes are used for code highlighting
123 "bp", "c", "ch", "cm", "cp", "cpf", "cs", "dl", "err", "fm", "gd", "ge", "get", "gh", 124 "bp",
124 "gi", "go", "gp", "gr", "gs", "gt", "gu", "highlight", "hll", "il", "k", "kc", "kd", 125 "c",
125 "kn", "kp", "kr", "kt", "m", "mb", "mf", "mh", "mi", "mo", "na", "nb", "nc", "nd", 126 "ch",
126 "ne", "nf", "ni", "nl", "nn", "no", "nt", "nv", "o", "ow", "s", "sa", "sb", "sc", 127 "cm",
127 "sd", "se", "sh", "si", "sr", "ss", "sx", "vc", "vg", "vi", "vm", "w", "write", 128 "cp",
129 "cpf",
130 "cs",
131 "dl",
132 "err",
133 "fm",
134 "gd",
135 "ge",
136 "get",
137 "gh",
138 "gi",
139 "go",
140 "gp",
141 "gr",
142 "gs",
143 "gt",
144 "gu",
145 "highlight",
146 "hll",
147 "il",
148 "k",
149 "kc",
150 "kd",
151 "kn",
152 "kp",
153 "kr",
154 "kt",
155 "m",
156 "mb",
157 "mf",
158 "mh",
159 "mi",
160 "mo",
161 "na",
162 "nb",
163 "nc",
164 "nd",
165 "ne",
166 "nf",
167 "ni",
168 "nl",
169 "nn",
170 "no",
171 "nt",
172 "nv",
173 "o",
174 "ow",
175 "s",
176 "sa",
177 "sb",
178 "sc",
179 "sd",
180 "se",
181 "sh",
182 "si",
183 "sr",
184 "ss",
185 "sx",
186 "vc",
187 "vg",
188 "vi",
189 "vm",
190 "w",
191 "write",
128 } 192 }
129 STYLES_VALUES_REGEX = ( 193 STYLES_VALUES_REGEX = (
130 r"^(" 194 r"^("
131 + "|".join( 195 + "|".join(
132 [ 196 [
155 ), 219 ),
156 } 220 }
157 221
158 222
159 class TextSyntaxes(object): 223 class TextSyntaxes(object):
160 """ Text conversion class 224 """Text conversion class
161 XHTML utf-8 is used as intermediate language for conversions 225 XHTML utf-8 is used as intermediate language for conversions
162 """ 226 """
163 227
164 OPT_DEFAULT = "DEFAULT" 228 OPT_DEFAULT = "DEFAULT"
165 OPT_HIDDEN = "HIDDEN" 229 OPT_HIDDEN = "HIDDEN"
167 SYNTAX_XHTML = _SYNTAX_XHTML 231 SYNTAX_XHTML = _SYNTAX_XHTML
168 SYNTAX_MARKDOWN = "markdown" 232 SYNTAX_MARKDOWN = "markdown"
169 SYNTAX_TEXT = "text" 233 SYNTAX_TEXT = "text"
170 # default_syntax must be lower case 234 # default_syntax must be lower case
171 default_syntax = SYNTAX_XHTML 235 default_syntax = SYNTAX_XHTML
172
173 236
174 def __init__(self, host): 237 def __init__(self, host):
175 log.info(_("Text syntaxes plugin initialization")) 238 log.info(_("Text syntaxes plugin initialization"))
176 self.host = host 239 self.host = host
177 self.syntaxes = {} 240 self.syntaxes = {}
216 279
217 # XXX: we disable raw HTML parsing by default, to avoid parsing error 280 # XXX: we disable raw HTML parsing by default, to avoid parsing error
218 # when the user is not aware of markdown and HTML 281 # when the user is not aware of markdown and HTML
219 class EscapeHTML(Extension): 282 class EscapeHTML(Extension):
220 def extendMarkdown(self, md): 283 def extendMarkdown(self, md):
221 md.preprocessors.deregister('html_block') 284 md.preprocessors.deregister("html_block")
222 md.inlinePatterns.deregister('html') 285 md.inlinePatterns.deregister("html")
223 286
224 def _html2text(html, baseurl=""): 287 def _html2text(html, baseurl=""):
225 h = html2text.HTML2Text(baseurl=baseurl) 288 h = html2text.HTML2Text(baseurl=baseurl)
226 h.body_width = 0 # do not truncate the lines, it breaks the long URLs 289 h.body_width = 0 # do not truncate the lines, it breaks the long URLs
227 return h.handle(html) 290 return h.handle(html)
228 291
229 self.add_syntax( 292 self.add_syntax(
230 self.SYNTAX_MARKDOWN, 293 self.SYNTAX_MARKDOWN,
231 partial(markdown.markdown, 294 partial(
232 extensions=[ 295 markdown.markdown,
233 EscapeHTML(), 296 extensions=[
234 'nl2br', 297 EscapeHTML(),
235 'codehilite', 298 "nl2br",
236 'fenced_code', 299 "codehilite",
237 'sane_lists', 300 "fenced_code",
238 'tables', 301 "sane_lists",
239 ], 302 "tables",
240 extension_configs = { 303 ],
241 "codehilite": { 304 extension_configs={
242 "css_class": "highlight", 305 "codehilite": {
243 } 306 "css_class": "highlight",
244 }), 307 }
308 },
309 ),
245 _html2text, 310 _html2text,
246 [TextSyntaxes.OPT_DEFAULT], 311 [TextSyntaxes.OPT_DEFAULT],
247 ) 312 )
248 except ImportError: 313 except ImportError:
249 log.warning("markdown or html2text not found, can't use Markdown syntax") 314 log.warning("markdown or html2text not found, can't use Markdown syntax")
285 350
286 self.params_data["options"] = "\n".join(options) 351 self.params_data["options"] = "\n".join(options)
287 self.host.memory.update_params(self.params % self.params_data) 352 self.host.memory.update_params(self.params % self.params_data)
288 353
289 def get_current_syntax(self, profile): 354 def get_current_syntax(self, profile):
290 """ Return the selected syntax for the given profile 355 """Return the selected syntax for the given profile
291 356
292 @param profile: %(doc_profile)s 357 @param profile: %(doc_profile)s
293 @return: profile selected syntax 358 @return: profile selected syntax
294 """ 359 """
295 return self.host.memory.param_get_a(NAME, CATEGORY, profile_key=profile) 360 return self.host.memory.param_get_a(NAME, CATEGORY, profile_key=profile)
299 "Error while {action}: {failure}".format(action=action, failure=failure) 364 "Error while {action}: {failure}".format(action=action, failure=failure)
300 ) 365 )
301 return failure 366 return failure
302 367
303 def clean_style(self, styles_raw: str) -> str: 368 def clean_style(self, styles_raw: str) -> str:
304 """"Clean unsafe CSS styles 369 """ "Clean unsafe CSS styles
305 370
306 Remove styles not in the whitelist, or where the value doesn't match the regex 371 Remove styles not in the whitelist, or where the value doesn't match the regex
307 @param styles_raw: CSS styles 372 @param styles_raw: CSS styles
308 @return: cleaned styles 373 @return: cleaned styles
309 """ 374 """
321 if not STYLES_ACCEPTED_VALUE.match(value): 386 if not STYLES_ACCEPTED_VALUE.match(value):
322 continue 387 continue
323 if value == "none": 388 if value == "none":
324 continue 389 continue
325 cleaned_styles.append((key, value)) 390 cleaned_styles.append((key, value))
326 return "; ".join( 391 return "; ".join(["%s: %s" % (key_, value_) for key_, value_ in cleaned_styles])
327 ["%s: %s" % (key_, value_) for key_, value_ in cleaned_styles]
328 )
329 392
330 def clean_classes(self, classes_raw: str) -> str: 393 def clean_classes(self, classes_raw: str) -> str:
331 """Remove any non whitelisted class 394 """Remove any non whitelisted class
332 395
333 @param classes_raw: classes set on an element 396 @param classes_raw: classes set on an element
353 elif isinstance(xhtml, html.HtmlElement): 416 elif isinstance(xhtml, html.HtmlElement):
354 xhtml_elt = xhtml 417 xhtml_elt = xhtml
355 else: 418 else:
356 log.error("Only strings and HtmlElements can be cleaned") 419 log.error("Only strings and HtmlElements can be cleaned")
357 raise exceptions.DataError 420 raise exceptions.DataError
358 cleaner = clean.Cleaner( 421 cleaner = clean.Cleaner(style=False, add_nofollow=False, safe_attrs=SAFE_ATTRS)
359 style=False, add_nofollow=False, safe_attrs=SAFE_ATTRS
360 )
361 xhtml_elt = cleaner.clean_html(xhtml_elt) 422 xhtml_elt = cleaner.clean_html(xhtml_elt)
362 for elt in xhtml_elt.xpath("//*[@style]"): 423 for elt in xhtml_elt.xpath("//*[@style]"):
363 elt.set("style", self.clean_style(elt.get("style"))) 424 elt.set("style", self.clean_style(elt.get("style")))
364 for elt in xhtml_elt.xpath("//*[@class]"): 425 for elt in xhtml_elt.xpath("//*[@class]"):
365 elt.set("class", self.clean_classes(elt.get("class"))) 426 elt.set("class", self.clean_classes(elt.get("class")))
367 for element in xhtml_elt.iter(tag=etree.Element): 428 for element in xhtml_elt.iter(tag=etree.Element):
368 if not element.text: 429 if not element.text:
369 if element.tag in VOID_ELEMENTS: 430 if element.tag in VOID_ELEMENTS:
370 element.text = None 431 element.text = None
371 else: 432 else:
372 element.text = '' 433 element.text = ""
373 return html.tostring(xhtml_elt, encoding=str, method="xml") 434 return html.tostring(xhtml_elt, encoding=str, method="xml")
374 435
375 def convert(self, text, syntax_from, syntax_to=_SYNTAX_XHTML, safe=True, 436 def convert(
376 profile=None): 437 self, text, syntax_from, syntax_to=_SYNTAX_XHTML, safe=True, profile=None
438 ):
377 """Convert a text between two syntaxes 439 """Convert a text between two syntaxes
378 440
379 @param text: text to convert 441 @param text: text to convert
380 @param syntax_from: source syntax (e.g. "markdown") 442 @param syntax_from: source syntax (e.g. "markdown")
381 @param syntax_to: dest syntax (e.g.: "XHTML") 443 @param syntax_to: dest syntax (e.g.: "XHTML")