Mercurial > libervia-backend
diff sat/plugins/plugin_misc_text_syntaxes.py @ 2624:56f94936df1e
code style reformatting using black
author | Goffi <goffi@goffi.org> |
---|---|
date | Wed, 27 Jun 2018 20:14:46 +0200 |
parents | 26edcf3a30eb |
children | 003b8b4b56a7 |
line wrap: on
line diff
--- a/sat/plugins/plugin_misc_text_syntaxes.py Wed Jun 27 07:51:29 2018 +0200 +++ b/sat/plugins/plugin_misc_text_syntaxes.py Wed Jun 27 20:14:46 2018 +0200 @@ -20,16 +20,20 @@ from sat.core.i18n import _, D_ from sat.core.constants import Const as C from sat.core.log import getLogger + log = getLogger(__name__) from twisted.internet import defer from twisted.internet.threads import deferToThread from sat.core import exceptions + try: from lxml import html from lxml.html import clean except ImportError: - raise exceptions.MissingModule(u"Missing module lxml, please download/install it from http://lxml.de/") + raise exceptions.MissingModule( + u"Missing module lxml, please download/install it from http://lxml.de/" + ) from cgi import escape import re @@ -41,15 +45,69 @@ # TODO: check/adapt following list # list initialy based on feedparser list (http://pythonhosted.org/feedparser/html-sanitization.html) -STYLES_WHITELIST = ("azimuth", "background-color", "border-bottom-color", "border-collapse", "border-color", "border-left-color", "border-right-color", "border-top-color", "clear", "color", "cursor", "direction", "display", "elevation", "float", "font", "font-family", "font-size", "font-style", "font-variant", "font-weight", "height", "letter-spacing", "line-height", "overflow", "pause", "pause-after", "pause-before", "pitch", "pitch-range", "richness", "speak", "speak-header", "speak-numeral", "speak-punctuation", "speech-rate", "stress", "text-align", "text-decoration", "text-indent", "unicode-bidi", "vertical-align", "voice-family", "volume", "white-space", "width") +STYLES_WHITELIST = ( + "azimuth", + "background-color", + "border-bottom-color", + "border-collapse", + "border-color", + "border-left-color", + "border-right-color", + "border-top-color", + "clear", + "color", + "cursor", + "direction", + "display", + "elevation", + "float", + "font", + "font-family", + "font-size", + "font-style", + "font-variant", + "font-weight", + "height", + "letter-spacing", + "line-height", + "overflow", + "pause", + "pause-after", + "pause-before", + "pitch", + "pitch-range", + "richness", + "speak", + "speak-header", + "speak-numeral", + "speak-punctuation", + "speech-rate", + "stress", + "text-align", + "text-decoration", + "text-indent", + "unicode-bidi", + "vertical-align", + "voice-family", + "volume", + "white-space", + "width", +) -SAFE_ATTRS = html.defs.safe_attrs.union(('style', 'poster', 'controls')) -STYLES_VALUES_REGEX = r'^(' + '|'.join(['([a-z-]+)', # alphabetical names - '(#[0-9a-f]+)', # hex value - '(\d+(.\d+)? *(|%|em|ex|px|in|cm|mm|pt|pc))', # values with units (or not) - 'rgb\( *((\d+(.\d+)?), *){2}(\d+(.\d+)?) *\)', # rgb function - 'rgba\( *((\d+(.\d+)?), *){3}(\d+(.\d+)?) *\)', # rgba function - ]) + ') *(!important)?$' # we accept "!important" at the end +SAFE_ATTRS = html.defs.safe_attrs.union(("style", "poster", "controls")) +STYLES_VALUES_REGEX = ( + r"^(" + + "|".join( + [ + "([a-z-]+)", # alphabetical names + "(#[0-9a-f]+)", # hex value + "(\d+(.\d+)? *(|%|em|ex|px|in|cm|mm|pt|pc))", # values with units (or not) + "rgb\( *((\d+(.\d+)?), *){2}(\d+(.\d+)?) *\)", # rgb function + "rgba\( *((\d+(.\d+)?), *){3}(\d+(.\d+)?) *\)", # rgba function + ] + ) + + ") *(!important)?$" +) # we accept "!important" at the end STYLES_ACCEPTED_VALUE = re.compile(STYLES_VALUES_REGEX) PLUGIN_INFO = { @@ -60,7 +118,9 @@ C.PI_DEPENDENCIES: [], C.PI_MAIN: "TextSyntaxes", C.PI_HANDLER: "no", - C.PI_DESCRIPTION: _("""Management of various text syntaxes (XHTML-IM, Markdown, etc)""") + C.PI_DESCRIPTION: _( + """Management of various text syntaxes (XHTML-IM, Markdown, etc)""" + ), } @@ -91,36 +151,60 @@ """ params_data = { - 'category_name': CATEGORY, - 'category_label': _(CATEGORY), - 'name': NAME, - 'label': _(NAME), - 'syntaxes': syntaxes, - } + "category_name": CATEGORY, + "category_label": _(CATEGORY), + "name": NAME, + "label": _(NAME), + "syntaxes": syntaxes, + } def __init__(self, host): log.info(_("Text syntaxes plugin initialization")) self.host = host - self.addSyntax(self.SYNTAX_XHTML, lambda xhtml: defer.succeed(xhtml), lambda xhtml: defer.succeed(xhtml), - TextSyntaxes.OPT_NO_THREAD) + self.addSyntax( + self.SYNTAX_XHTML, + lambda xhtml: defer.succeed(xhtml), + lambda xhtml: defer.succeed(xhtml), + TextSyntaxes.OPT_NO_THREAD, + ) # TODO: text => XHTML should add <a/> to url like in frontends # it's probably best to move sat_frontends.tools.strings to sat.tools.common or similar - self.addSyntax(self.SYNTAX_TEXT, lambda text: escape(text), lambda xhtml: self._removeMarkups(xhtml), [TextSyntaxes.OPT_HIDDEN]) + self.addSyntax( + self.SYNTAX_TEXT, + lambda text: escape(text), + lambda xhtml: self._removeMarkups(xhtml), + [TextSyntaxes.OPT_HIDDEN], + ) try: import markdown, html2text - def _html2text(html, baseurl=''): + def _html2text(html, baseurl=""): h = html2text.HTML2Text(baseurl=baseurl) h.body_width = 0 # do not truncate the lines, it breaks the long URLs return h.handle(html) - self.addSyntax(self.SYNTAX_MARKDOWN, markdown.markdown, _html2text, [TextSyntaxes.OPT_DEFAULT]) + + self.addSyntax( + self.SYNTAX_MARKDOWN, + markdown.markdown, + _html2text, + [TextSyntaxes.OPT_DEFAULT], + ) except ImportError: log.warning(u"markdown or html2text not found, can't use Markdown syntax") - log.info(u"You can download/install them from https://pythonhosted.org/Markdown/ and https://github.com/Alir3z4/html2text/") - host.bridge.addMethod("syntaxConvert", ".plugin", in_sign='sssbs', out_sign='s', - async=True, method=self.convert) - host.bridge.addMethod("syntaxGet", ".plugin", in_sign='s', out_sign='s', - method=self.getSyntax) + log.info( + u"You can download/install them from https://pythonhosted.org/Markdown/ and https://github.com/Alir3z4/html2text/" + ) + host.bridge.addMethod( + "syntaxConvert", + ".plugin", + in_sign="sssbs", + out_sign="s", + async=True, + method=self.convert, + ) + host.bridge.addMethod( + "syntaxGet", ".plugin", in_sign="s", out_sign="s", method=self.getSyntax + ) def _updateParamOptions(self): data_synt = TextSyntaxes.syntaxes @@ -136,10 +220,10 @@ options = [] for syntax in syntaxes: - selected = 'selected="true"' if syntax == default_synt else '' + selected = 'selected="true"' if syntax == default_synt else "" options.append(u'<option value="%s" %s/>' % (syntax, selected)) - TextSyntaxes.params_data["options"] = u'\n'.join(options) + TextSyntaxes.params_data["options"] = u"\n".join(options) self.host.memory.updateParams(TextSyntaxes.params % TextSyntaxes.params_data) def getCurrentSyntax(self, profile): @@ -148,16 +232,19 @@ @param profile: %(doc_profile)s @return: profile selected syntax """ - return self.host.memory.getParamA(NAME, CATEGORY , profile_key=profile) + return self.host.memory.getParamA(NAME, CATEGORY, profile_key=profile) def _logError(self, failure, action=u"converting syntax"): - log.error(u"Error while {action}: {failure}".format(action=action, failure=failure)) + log.error( + u"Error while {action}: {failure}".format(action=action, failure=failure) + ) return failure def cleanXHTML(self, xhtml): """ Clean XHTML text by removing potentially dangerous/malicious parts @param xhtml: raw xhtml text to clean (or lxml's HtmlElement) """ + def blocking_cleaning(xhtml): """ Clean XHTML and style attributes """ @@ -168,7 +255,7 @@ cleaned_styles = [] for style in styles: try: - key, value = style.split(':') + key, value = style.split(":") except ValueError: continue key = key.lower().strip() @@ -180,7 +267,9 @@ if value == "none": continue cleaned_styles.append((key, value)) - return "; ".join(["%s: %s" % (key_, value_) for key_, value_ in cleaned_styles]) + return "; ".join( + ["%s: %s" % (key_, value_) for key_, value_ in cleaned_styles] + ) if isinstance(xhtml, basestring): xhtml_elt = html.fromstring(xhtml) @@ -189,19 +278,21 @@ else: log.error("Only strings and HtmlElements can be cleaned") raise exceptions.DataError - cleaner = clean.Cleaner(style=False, - add_nofollow=False, - safe_attrs=SAFE_ATTRS) + cleaner = clean.Cleaner( + style=False, add_nofollow=False, safe_attrs=SAFE_ATTRS + ) xhtml_elt = cleaner.clean_html(xhtml_elt) for elt in xhtml_elt.xpath("//*[@style]"): - elt.set("style", clean_style(elt.get('style'))) - return html.tostring(xhtml_elt, encoding=unicode, method='xml') + elt.set("style", clean_style(elt.get("style"))) + return html.tostring(xhtml_elt, encoding=unicode, method="xml") d = deferToThread(blocking_cleaning, xhtml) d.addErrback(self._logError, action=u"cleaning syntax") return d - def convert(self, text, syntax_from, syntax_to=_SYNTAX_XHTML, safe=True, profile=None): + def convert( + self, text, syntax_from, syntax_to=_SYNTAX_XHTML, safe=True, profile=None + ): """Convert a text between two syntaxes @param text: text to convert @@ -235,7 +326,7 @@ else: d = deferToThread(syntaxes[syntax_from]["to"], text) - #TODO: keep only body element and change it to a div here ? + # TODO: keep only body element and change it to a div here ? if safe: d.addCallback(self.cleanXHTML) @@ -249,7 +340,7 @@ d.addCallback(lambda text: text.rstrip()) return d - def addSyntax(self, name, to_xhtml_cb, from_xhtml_cb, flags = None): + def addSyntax(self, name, to_xhtml_cb, from_xhtml_cb, flags=None): """Add a new syntax to the manager @param name: unique name of the syntax @@ -262,13 +353,24 @@ """ flags = flags if flags is not None else [] if TextSyntaxes.OPT_HIDDEN in flags and TextSyntaxes.OPT_DEFAULT in flags: - raise ValueError(u"{} and {} are mutually exclusive".format(TextSyntaxes.OPT_HIDDEN, TextSyntaxes.OPT_DEFAULT)) + raise ValueError( + u"{} and {} are mutually exclusive".format( + TextSyntaxes.OPT_HIDDEN, TextSyntaxes.OPT_DEFAULT + ) + ) syntaxes = TextSyntaxes.syntaxes key = name.lower().strip() if key in syntaxes: - raise exceptions.ConflictError(u"This syntax key already exists: {}".format(key)) - syntaxes[key] = {"name": name, "to": to_xhtml_cb, "from": from_xhtml_cb, "flags": flags} + raise exceptions.ConflictError( + u"This syntax key already exists: {}".format(key) + ) + syntaxes[key] = { + "name": name, + "to": to_xhtml_cb, + "from": from_xhtml_cb, + "flags": flags, + } if TextSyntaxes.OPT_DEFAULT in flags: TextSyntaxes.default_syntaxe = key @@ -290,6 +392,6 @@ @param xhtml: the XHTML string to be cleaned @return: the cleaned string """ - cleaner = clean.Cleaner(kill_tags=['style']) + cleaner = clean.Cleaner(kill_tags=["style"]) cleaned = cleaner.clean_html(html.fromstring(xhtml)) return html.tostring(cleaned, encoding=unicode, method="text")