Mercurial > libervia-backend
comparison sat/plugins/plugin_misc_text_syntaxes.py @ 3709:09f5ac48ffe3
merge bookmark @
author | Goffi <goffi@goffi.org> |
---|---|
date | Fri, 12 Nov 2021 17:21:24 +0100 |
parents | f568f304c982 0bbdc50aa405 |
children | 33d75cd3c371 |
comparison
equal
deleted
inserted
replaced
3684:8353cc3b8db9 | 3709:09f5ac48ffe3 |
---|---|
15 # GNU Affero General Public License for more details. | 15 # GNU Affero General Public License for more details. |
16 | 16 |
17 # You should have received a copy of the GNU Affero General Public License | 17 # You should have received a copy of the GNU Affero General Public License |
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. | 18 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
19 | 19 |
20 from functools import partial | |
21 from html import escape | |
20 import re | 22 import re |
21 from html import escape | 23 from typing import Set |
22 from functools import partial | |
23 from sat.core.i18n import _, D_ | |
24 from sat.core.constants import Const as C | |
25 from sat.core.log import getLogger | |
26 | 24 |
27 from twisted.internet import defer | 25 from twisted.internet import defer |
28 from twisted.internet.threads import deferToThread | 26 from twisted.internet.threads import deferToThread |
27 | |
29 from sat.core import exceptions | 28 from sat.core import exceptions |
29 from sat.core.constants import Const as C | |
30 from sat.core.i18n import D_, _ | |
31 from sat.core.log import getLogger | |
30 from sat.tools import xml_tools | 32 from sat.tools import xml_tools |
31 | 33 |
32 try: | 34 try: |
33 from lxml import html | 35 from lxml import html |
34 from lxml.html import clean | 36 from lxml.html import clean |
113 "param", | 115 "param", |
114 "source", | 116 "source", |
115 "track", | 117 "track", |
116 "wbr") | 118 "wbr") |
117 | 119 |
118 SAFE_ATTRS = html.defs.safe_attrs.union(("style", "poster", "controls")) | 120 SAFE_ATTRS = html.defs.safe_attrs.union({"style", "poster", "controls"}) - {"id"} |
121 SAFE_CLASSES = { | |
122 # those classes are used for code highlighting | |
123 "bp", "c", "ch", "cm", "cp", "cpf", "cs", "dl", "err", "fm", "gd", "ge", "get", "gh", | |
124 "gi", "go", "gp", "gr", "gs", "gt", "gu", "highlight", "hll", "il", "k", "kc", "kd", | |
125 "kn", "kp", "kr", "kt", "m", "mb", "mf", "mh", "mi", "mo", "na", "nb", "nc", "nd", | |
126 "ne", "nf", "ni", "nl", "nn", "no", "nt", "nv", "o", "ow", "s", "sa", "sb", "sc", | |
127 "sd", "se", "sh", "si", "sr", "ss", "sx", "vc", "vg", "vi", "vm", "w", "write", | |
128 } | |
119 STYLES_VALUES_REGEX = ( | 129 STYLES_VALUES_REGEX = ( |
120 r"^(" | 130 r"^(" |
121 + "|".join( | 131 + "|".join( |
122 [ | 132 [ |
123 "([a-z-]+)", # alphabetical names | 133 "([a-z-]+)", # alphabetical names |
235 [TextSyntaxes.OPT_DEFAULT], | 245 [TextSyntaxes.OPT_DEFAULT], |
236 ) | 246 ) |
237 except ImportError: | 247 except ImportError: |
238 log.warning("markdown or html2text not found, can't use Markdown syntax") | 248 log.warning("markdown or html2text not found, can't use Markdown syntax") |
239 log.info( | 249 log.info( |
240 "You can download/install them from https://pythonhosted.org/Markdown/ and https://github.com/Alir3z4/html2text/" | 250 "You can download/install them from https://pythonhosted.org/Markdown/ " |
251 "and https://github.com/Alir3z4/html2text/" | |
241 ) | 252 ) |
242 host.bridge.addMethod( | 253 host.bridge.addMethod( |
243 "syntaxConvert", | 254 "syntaxConvert", |
244 ".plugin", | 255 ".plugin", |
245 in_sign="sssbs", | 256 in_sign="sssbs", |
286 log.error( | 297 log.error( |
287 "Error while {action}: {failure}".format(action=action, failure=failure) | 298 "Error while {action}: {failure}".format(action=action, failure=failure) |
288 ) | 299 ) |
289 return failure | 300 return failure |
290 | 301 |
291 def cleanStyle(self, styles): | 302 def cleanStyle(self, styles_raw: str) -> str: |
292 """"Clean unsafe CSS styles | 303 """"Clean unsafe CSS styles |
293 | 304 |
294 Remove styles not in the whitelist, or where the value doesn't match the regex | 305 Remove styles not in the whitelist, or where the value doesn't match the regex |
295 @param styles_raw(unicode): CSS styles | 306 @param styles_raw: CSS styles |
296 @return (unicode): cleaned styles | 307 @return: cleaned styles |
297 """ | 308 """ |
298 styles = styles.split(";") | 309 styles: List[str] = styles_raw.split(";") |
299 cleaned_styles = [] | 310 cleaned_styles = [] |
300 for style in styles: | 311 for style in styles: |
301 try: | 312 try: |
302 key, value = style.split(":") | 313 key, value = style.split(":") |
303 except ValueError: | 314 except ValueError: |
313 cleaned_styles.append((key, value)) | 324 cleaned_styles.append((key, value)) |
314 return "; ".join( | 325 return "; ".join( |
315 ["%s: %s" % (key_, value_) for key_, value_ in cleaned_styles] | 326 ["%s: %s" % (key_, value_) for key_, value_ in cleaned_styles] |
316 ) | 327 ) |
317 | 328 |
329 def cleanClasses(self, classes_raw: str) -> str: | |
330 """Remove any non whitelisted class | |
331 | |
332 @param classes_raw: classes set on an element | |
333 @return: remaining classes (can be empty string) | |
334 """ | |
335 return " ".join(SAFE_CLASSES.intersection(classes_raw.split())) | |
336 | |
318 def cleanXHTML(self, xhtml): | 337 def cleanXHTML(self, xhtml): |
319 """Clean XHTML text by removing potentially dangerous/malicious parts | 338 """Clean XHTML text by removing potentially dangerous/malicious parts |
320 | 339 |
321 @param xhtml(unicode, lxml.etree._Element): raw HTML/XHTML text to clean | 340 @param xhtml(unicode, lxml.etree._Element): raw HTML/XHTML text to clean |
322 @return (unicode): cleaned XHTML | 341 @return (unicode): cleaned XHTML |
339 style=False, add_nofollow=False, safe_attrs=SAFE_ATTRS | 358 style=False, add_nofollow=False, safe_attrs=SAFE_ATTRS |
340 ) | 359 ) |
341 xhtml_elt = cleaner.clean_html(xhtml_elt) | 360 xhtml_elt = cleaner.clean_html(xhtml_elt) |
342 for elt in xhtml_elt.xpath("//*[@style]"): | 361 for elt in xhtml_elt.xpath("//*[@style]"): |
343 elt.set("style", self.cleanStyle(elt.get("style"))) | 362 elt.set("style", self.cleanStyle(elt.get("style"))) |
363 for elt in xhtml_elt.xpath("//*[@class]"): | |
364 elt.set("class", self.cleanClasses(elt.get("class"))) | |
344 # we remove self-closing elements for non-void elements | 365 # we remove self-closing elements for non-void elements |
345 for element in xhtml_elt.iter(tag=etree.Element): | 366 for element in xhtml_elt.iter(tag=etree.Element): |
346 if not element.text: | 367 if not element.text: |
347 if element.tag in VOID_ELEMENTS: | 368 if element.tag in VOID_ELEMENTS: |
348 element.text = None | 369 element.text = None |