Mercurial > libervia-backend
comparison sat/plugins/plugin_misc_text_syntaxes.py @ 3693:0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
for now, the only allowed classes are the one used for code highlighting.
fix 389
author | Goffi <goffi@goffi.org> |
---|---|
date | Thu, 21 Oct 2021 11:55:39 +0200 |
parents | be6d91572633 |
children | 09f5ac48ffe3 |
comparison
equal
deleted
inserted
replaced
3692:f35ba048687e | 3693:0bbdc50aa405 |
---|---|
15 # GNU Affero General Public License for more details. | 15 # GNU Affero General Public License for more details. |
16 | 16 |
17 # You should have received a copy of the GNU Affero General Public License | 17 # You should have received a copy of the GNU Affero General Public License |
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. | 18 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
19 | 19 |
20 from functools import partial | |
21 from html import escape | |
20 import re | 22 import re |
21 from html import escape | 23 from typing import Set |
22 from functools import partial | |
23 from sat.core.i18n import _, D_ | |
24 from sat.core.constants import Const as C | |
25 from sat.core.log import getLogger | |
26 | 24 |
27 from twisted.internet import defer | 25 from twisted.internet import defer |
28 from twisted.internet.threads import deferToThread | 26 from twisted.internet.threads import deferToThread |
27 | |
29 from sat.core import exceptions | 28 from sat.core import exceptions |
29 from sat.core.constants import Const as C | |
30 from sat.core.i18n import D_, _ | |
31 from sat.core.log import getLogger | |
30 from sat.tools import xml_tools | 32 from sat.tools import xml_tools |
31 | 33 |
32 try: | 34 try: |
33 from lxml import html | 35 from lxml import html |
34 from lxml.html import clean | 36 from lxml.html import clean |
113 "param", | 115 "param", |
114 "source", | 116 "source", |
115 "track", | 117 "track", |
116 "wbr") | 118 "wbr") |
117 | 119 |
118 SAFE_ATTRS = html.defs.safe_attrs.union(("style", "poster", "controls")) | 120 SAFE_ATTRS = html.defs.safe_attrs.union({"style", "poster", "controls"}) - {"id"} |
121 SAFE_CLASSES = { | |
122 # those classes are used for code highlighting | |
123 "bp", "c", "ch", "cm", "cp", "cpf", "cs", "dl", "err", "fm", "gd", "ge", "get", "gh", | |
124 "gi", "go", "gp", "gr", "gs", "gt", "gu", "highlight", "hll", "il", "k", "kc", "kd", | |
125 "kn", "kp", "kr", "kt", "m", "mb", "mf", "mh", "mi", "mo", "na", "nb", "nc", "nd", | |
126 "ne", "nf", "ni", "nl", "nn", "no", "nt", "nv", "o", "ow", "s", "sa", "sb", "sc", | |
127 "sd", "se", "sh", "si", "sr", "ss", "sx", "vc", "vg", "vi", "vm", "w", "write", | |
128 } | |
119 STYLES_VALUES_REGEX = ( | 129 STYLES_VALUES_REGEX = ( |
120 r"^(" | 130 r"^(" |
121 + "|".join( | 131 + "|".join( |
122 [ | 132 [ |
123 "([a-z-]+)", # alphabetical names | 133 "([a-z-]+)", # alphabetical names |
233 [TextSyntaxes.OPT_DEFAULT], | 243 [TextSyntaxes.OPT_DEFAULT], |
234 ) | 244 ) |
235 except ImportError: | 245 except ImportError: |
236 log.warning("markdown or html2text not found, can't use Markdown syntax") | 246 log.warning("markdown or html2text not found, can't use Markdown syntax") |
237 log.info( | 247 log.info( |
238 "You can download/install them from https://pythonhosted.org/Markdown/ and https://github.com/Alir3z4/html2text/" | 248 "You can download/install them from https://pythonhosted.org/Markdown/ " |
249 "and https://github.com/Alir3z4/html2text/" | |
239 ) | 250 ) |
240 host.bridge.addMethod( | 251 host.bridge.addMethod( |
241 "syntaxConvert", | 252 "syntaxConvert", |
242 ".plugin", | 253 ".plugin", |
243 in_sign="sssbs", | 254 in_sign="sssbs", |
284 log.error( | 295 log.error( |
285 "Error while {action}: {failure}".format(action=action, failure=failure) | 296 "Error while {action}: {failure}".format(action=action, failure=failure) |
286 ) | 297 ) |
287 return failure | 298 return failure |
288 | 299 |
289 def cleanStyle(self, styles): | 300 def cleanStyle(self, styles_raw: str) -> str: |
290 """"Clean unsafe CSS styles | 301 """"Clean unsafe CSS styles |
291 | 302 |
292 Remove styles not in the whitelist, or where the value doesn't match the regex | 303 Remove styles not in the whitelist, or where the value doesn't match the regex |
293 @param styles_raw(unicode): CSS styles | 304 @param styles_raw: CSS styles |
294 @return (unicode): cleaned styles | 305 @return: cleaned styles |
295 """ | 306 """ |
296 styles = styles.split(";") | 307 styles: List[str] = styles_raw.split(";") |
297 cleaned_styles = [] | 308 cleaned_styles = [] |
298 for style in styles: | 309 for style in styles: |
299 try: | 310 try: |
300 key, value = style.split(":") | 311 key, value = style.split(":") |
301 except ValueError: | 312 except ValueError: |
311 cleaned_styles.append((key, value)) | 322 cleaned_styles.append((key, value)) |
312 return "; ".join( | 323 return "; ".join( |
313 ["%s: %s" % (key_, value_) for key_, value_ in cleaned_styles] | 324 ["%s: %s" % (key_, value_) for key_, value_ in cleaned_styles] |
314 ) | 325 ) |
315 | 326 |
327 def cleanClasses(self, classes_raw: str) -> str: | |
328 """Remove any non whitelisted class | |
329 | |
330 @param classes_raw: classes set on an element | |
331 @return: remaining classes (can be empty string) | |
332 """ | |
333 return " ".join(SAFE_CLASSES.intersection(classes_raw.split())) | |
334 | |
316 def cleanXHTML(self, xhtml): | 335 def cleanXHTML(self, xhtml): |
317 """Clean XHTML text by removing potentially dangerous/malicious parts | 336 """Clean XHTML text by removing potentially dangerous/malicious parts |
318 | 337 |
319 @param xhtml(unicode, lxml.etree._Element): raw HTML/XHTML text to clean | 338 @param xhtml(unicode, lxml.etree._Element): raw HTML/XHTML text to clean |
320 @return (unicode): cleaned XHTML | 339 @return (unicode): cleaned XHTML |
337 style=False, add_nofollow=False, safe_attrs=SAFE_ATTRS | 356 style=False, add_nofollow=False, safe_attrs=SAFE_ATTRS |
338 ) | 357 ) |
339 xhtml_elt = cleaner.clean_html(xhtml_elt) | 358 xhtml_elt = cleaner.clean_html(xhtml_elt) |
340 for elt in xhtml_elt.xpath("//*[@style]"): | 359 for elt in xhtml_elt.xpath("//*[@style]"): |
341 elt.set("style", self.cleanStyle(elt.get("style"))) | 360 elt.set("style", self.cleanStyle(elt.get("style"))) |
361 for elt in xhtml_elt.xpath("//*[@class]"): | |
362 elt.set("class", self.cleanClasses(elt.get("class"))) | |
342 # we remove self-closing elements for non-void elements | 363 # we remove self-closing elements for non-void elements |
343 for element in xhtml_elt.iter(tag=etree.Element): | 364 for element in xhtml_elt.iter(tag=etree.Element): |
344 if not element.text: | 365 if not element.text: |
345 if element.tag in VOID_ELEMENTS: | 366 if element.tag in VOID_ELEMENTS: |
346 element.text = None | 367 element.text = None |