libervia-backend: sat/plugins/plugin_misc_text

comparison sat/plugins/plugin_misc_text_syntaxes.py @ 3709:09f5ac48ffe3

merge bookmark @

author	Goffi <goffi@goffi.org>
date	Fri, 12 Nov 2021 17:21:24 +0100
parents	f568f304c982 0bbdc50aa405
children	33d75cd3c371

comparison

equal deleted inserted replaced

-:8353cc3b8db9
+:09f5ac48ffe3
 # GNU Affero General Public License for more details.
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
+from functools import partial
+from html import escape
 import re
-from html import escape
+from typing import Set
-from functools import partial
-from sat.core.i18n import _, D_
-from sat.core.constants import Const as C
-from sat.core.log import getLogger
 from twisted.internet import defer
 from twisted.internet.threads import deferToThread
 from sat.core import exceptions
+from sat.core.constants import Const as C
+from sat.core.i18n import D_, _
+from sat.core.log import getLogger
 from sat.tools import xml_tools
 try:
 from lxml import html
 from lxml.html import clean
 "param",
 "source",
 "track",
 "wbr")
-SAFE_ATTRS = html.defs.safe_attrs.union(("style", "poster", "controls"))
+SAFE_ATTRS = html.defs.safe_attrs.union({"style", "poster", "controls"}) - {"id"}
+SAFE_CLASSES = {
+# those classes are used for code highlighting
+"bp", "c", "ch", "cm", "cp", "cpf", "cs", "dl", "err", "fm", "gd", "ge", "get", "gh",
+"gi", "go", "gp", "gr", "gs", "gt", "gu", "highlight", "hll", "il", "k", "kc", "kd",
+"kn", "kp", "kr", "kt", "m", "mb", "mf", "mh", "mi", "mo", "na", "nb", "nc", "nd",
+"ne", "nf", "ni", "nl", "nn", "no", "nt", "nv", "o", "ow", "s", "sa", "sb", "sc",
+"sd", "se", "sh", "si", "sr", "ss", "sx", "vc", "vg", "vi", "vm", "w", "write",
+}
 STYLES_VALUES_REGEX = (
 r"^("
 + "|".join(
 [
 "([a-z-]+)",  # alphabetical names
 [TextSyntaxes.OPT_DEFAULT],
 )
 except ImportError:
 log.warning("markdown or html2text not found, can't use Markdown syntax")
 log.info(
-"You can download/install them from https://pythonhosted.org/Markdown/ and https://github.com/Alir3z4/html2text/"
+"You can download/install them from https://pythonhosted.org/Markdown/ "
+"and https://github.com/Alir3z4/html2text/"
 )
 host.bridge.addMethod(
 "syntaxConvert",
 ".plugin",
 in_sign="sssbs",
 log.error(
 "Error while {action}: {failure}".format(action=action, failure=failure)
 )
 return failure
-def cleanStyle(self, styles):
+def cleanStyle(self, styles_raw: str) -> str:
 """"Clean unsafe CSS styles
 Remove styles not in the whitelist, or where the value doesn't match the regex
-@param styles_raw(unicode): CSS styles
+@param styles_raw: CSS styles
-@return (unicode): cleaned styles
+@return: cleaned styles
 """
-styles = styles.split(";")
+styles: List[str] = styles_raw.split(";")
 cleaned_styles = []
 for style in styles:
 try:
 key, value = style.split(":")
 except ValueError:
 cleaned_styles.append((key, value))
 return "; ".join(
 ["%s: %s" % (key_, value_) for key_, value_ in cleaned_styles]
 )
+def cleanClasses(self, classes_raw: str) -> str:
+"""Remove any non whitelisted class
+@param classes_raw: classes set on an element
+@return: remaining classes (can be empty string)
+"""
+return " ".join(SAFE_CLASSES.intersection(classes_raw.split()))
 def cleanXHTML(self, xhtml):
 """Clean XHTML text by removing potentially dangerous/malicious parts
 @param xhtml(unicode, lxml.etree._Element): raw HTML/XHTML text to clean
 @return (unicode): cleaned XHTML
 style=False, add_nofollow=False, safe_attrs=SAFE_ATTRS
 )
 xhtml_elt = cleaner.clean_html(xhtml_elt)
 for elt in xhtml_elt.xpath("//*[@style]"):
 elt.set("style", self.cleanStyle(elt.get("style")))
+for elt in xhtml_elt.xpath("//*[@class]"):
+elt.set("class", self.cleanClasses(elt.get("class")))
 # we remove self-closing elements for non-void elements
 for element in xhtml_elt.iter(tag=etree.Element):
 if not element.text:
 if element.tag in VOID_ELEMENTS:
 element.text = None

Mercurial > libervia-backend

comparison sat/plugins/plugin_misc_text_syntaxes.py @ 3709:09f5ac48ffe3