Mercurial > libervia-backend
annotate libervia/backend/plugins/plugin_misc_text_syntaxes.py @ 4329:73d83cb53673
plugin blog import dotclear: repalce deprecated `cgi` by `html`
author | Goffi <goffi@goffi.org> |
---|---|
date | Wed, 20 Nov 2024 14:35:09 +0100 |
parents | 45662662a432 |
children | 111dce64dcb5 |
rev | line source |
---|---|
3028 | 1 #!/usr/bin/env python3 |
3137 | 2 |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
3 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
4 # SAT plugin for managing various text syntaxes |
3479 | 5 # Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
6 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
7 # This program is free software: you can redistribute it and/or modify |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
8 # it under the terms of the GNU Affero General Public License as published by |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
9 # the Free Software Foundation, either version 3 of the License, or |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
10 # (at your option) any later version. |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
11 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
12 # This program is distributed in the hope that it will be useful, |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
15 # GNU Affero General Public License for more details. |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
16 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
17 # You should have received a copy of the GNU Affero General Public License |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
19 |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
20 from functools import partial |
3075
501a1a3c8594
plugin text syntaxes: don't use anymore deprecated cgi.escape
Goffi <goffi@goffi.org>
parents:
3040
diff
changeset
|
21 from html import escape |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
22 import re |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
23 from typing import Set |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
24 |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
25 from twisted.internet import defer |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
26 from twisted.internet.threads import deferToThread |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
27 |
4071
4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents:
4059
diff
changeset
|
28 from libervia.backend.core import exceptions |
4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents:
4059
diff
changeset
|
29 from libervia.backend.core.constants import Const as C |
4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents:
4059
diff
changeset
|
30 from libervia.backend.core.i18n import D_, _ |
4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents:
4059
diff
changeset
|
31 from libervia.backend.core.log import getLogger |
4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents:
4059
diff
changeset
|
32 from libervia.backend.tools import xml_tools |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
33 |
1542
94901070478e
plugins: added new MissingModule exceptions to plugins using third party modules
Goffi <goffi@goffi.org>
parents:
1458
diff
changeset
|
34 try: |
94901070478e
plugins: added new MissingModule exceptions to plugins using third party modules
Goffi <goffi@goffi.org>
parents:
1458
diff
changeset
|
35 from lxml import html |
2786
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
36 from lxml import etree |
1542
94901070478e
plugins: added new MissingModule exceptions to plugins using third party modules
Goffi <goffi@goffi.org>
parents:
1458
diff
changeset
|
37 except ImportError: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
38 raise exceptions.MissingModule( |
4325
45662662a432
plugin text syntaxes: update exception message to be more explicit when `lxml_html_clean` is missing.
Goffi <goffi@goffi.org>
parents:
4270
diff
changeset
|
39 'Missing module "lxml", please download/install it from http://lxml.de/' |
45662662a432
plugin text syntaxes: update exception message to be more explicit when `lxml_html_clean` is missing.
Goffi <goffi@goffi.org>
parents:
4270
diff
changeset
|
40 ) |
45662662a432
plugin text syntaxes: update exception message to be more explicit when `lxml_html_clean` is missing.
Goffi <goffi@goffi.org>
parents:
4270
diff
changeset
|
41 try: |
45662662a432
plugin text syntaxes: update exception message to be more explicit when `lxml_html_clean` is missing.
Goffi <goffi@goffi.org>
parents:
4270
diff
changeset
|
42 from lxml.html import clean |
45662662a432
plugin text syntaxes: update exception message to be more explicit when `lxml_html_clean` is missing.
Goffi <goffi@goffi.org>
parents:
4270
diff
changeset
|
43 except ImportError: |
45662662a432
plugin text syntaxes: update exception message to be more explicit when `lxml_html_clean` is missing.
Goffi <goffi@goffi.org>
parents:
4270
diff
changeset
|
44 raise exceptions.MissingModule( |
45662662a432
plugin text syntaxes: update exception message to be more explicit when `lxml_html_clean` is missing.
Goffi <goffi@goffi.org>
parents:
4270
diff
changeset
|
45 'Missing module "lxml_html_clean". Please download and install it from ' |
45662662a432
plugin text syntaxes: update exception message to be more explicit when `lxml_html_clean` is missing.
Goffi <goffi@goffi.org>
parents:
4270
diff
changeset
|
46 'http://lxml.de/. Note that this is a separate package to install in addition ' |
45662662a432
plugin text syntaxes: update exception message to be more explicit when `lxml_html_clean` is missing.
Goffi <goffi@goffi.org>
parents:
4270
diff
changeset
|
47 'to "lxml".' |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
48 ) |
674
fb0b1100c908
plugin text_syntaxes: fixed clean_xhml (it now return XHTML instead of HTML)
Goffi <goffi@goffi.org>
parents:
665
diff
changeset
|
49 |
2873
e1207b8ad97c
plugin text syntaxes: disable raw HTML parsing in mardown by default
Goffi <goffi@goffi.org>
parents:
2869
diff
changeset
|
50 log = getLogger(__name__) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
51 |
771 | 52 CATEGORY = D_("Composition") |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
53 NAME = "Syntax" |
2869
148d30147890
plugin text syntaxes: fixed default syntax
Goffi <goffi@goffi.org>
parents:
2786
diff
changeset
|
54 _SYNTAX_XHTML = "xhtml" # must be lower case |
744
312a2842b2b8
plugins text-syntaxes: added a default value to use the current user syntax in convert
souliane <souliane@mailoo.org>
parents:
705
diff
changeset
|
55 _SYNTAX_CURRENT = "@CURRENT@" |
312a2842b2b8
plugins text-syntaxes: added a default value to use the current user syntax in convert
souliane <souliane@mailoo.org>
parents:
705
diff
changeset
|
56 |
692
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
57 # TODO: check/adapt following list |
1805
3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
Goffi <goffi@goffi.org>
parents:
1803
diff
changeset
|
58 # list initialy based on feedparser list (http://pythonhosted.org/feedparser/html-sanitization.html) |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
59 STYLES_WHITELIST = ( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
60 "azimuth", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
61 "background-color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
62 "border-bottom-color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
63 "border-collapse", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
64 "border-color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
65 "border-left-color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
66 "border-right-color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
67 "border-top-color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
68 "clear", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
69 "color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
70 "cursor", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
71 "direction", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
72 "display", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
73 "elevation", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
74 "float", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
75 "font", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
76 "font-family", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
77 "font-size", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
78 "font-style", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
79 "font-variant", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
80 "font-weight", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
81 "height", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
82 "letter-spacing", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
83 "line-height", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
84 "overflow", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
85 "pause", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
86 "pause-after", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
87 "pause-before", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
88 "pitch", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
89 "pitch-range", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
90 "richness", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
91 "speak", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
92 "speak-header", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
93 "speak-numeral", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
94 "speak-punctuation", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
95 "speech-rate", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
96 "stress", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
97 "text-align", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
98 "text-decoration", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
99 "text-indent", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
100 "unicode-bidi", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
101 "vertical-align", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
102 "voice-family", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
103 "volume", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
104 "white-space", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
105 "width", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
106 ) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
107 |
2786
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
108 # cf. https://www.w3.org/TR/html/syntax.html#void-elements |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
109 VOID_ELEMENTS = ( |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
110 "area", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
111 "base", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
112 "br", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
113 "col", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
114 "embed", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
115 "hr", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
116 "img", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
117 "input", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
118 "keygen", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
119 "link", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
120 "menuitem", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
121 "meta", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
122 "param", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
123 "source", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
124 "track", |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
125 "wbr", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
126 ) |
2786
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
127 |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
128 SAFE_ATTRS = html.defs.safe_attrs.union({"style", "poster", "controls"}) - {"id"} |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
129 SAFE_CLASSES = { |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
130 # those classes are used for code highlighting |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
131 "bp", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
132 "c", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
133 "ch", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
134 "cm", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
135 "cp", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
136 "cpf", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
137 "cs", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
138 "dl", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
139 "err", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
140 "fm", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
141 "gd", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
142 "ge", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
143 "get", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
144 "gh", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
145 "gi", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
146 "go", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
147 "gp", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
148 "gr", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
149 "gs", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
150 "gt", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
151 "gu", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
152 "highlight", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
153 "hll", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
154 "il", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
155 "k", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
156 "kc", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
157 "kd", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
158 "kn", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
159 "kp", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
160 "kr", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
161 "kt", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
162 "m", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
163 "mb", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
164 "mf", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
165 "mh", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
166 "mi", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
167 "mo", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
168 "na", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
169 "nb", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
170 "nc", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
171 "nd", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
172 "ne", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
173 "nf", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
174 "ni", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
175 "nl", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
176 "nn", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
177 "no", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
178 "nt", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
179 "nv", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
180 "o", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
181 "ow", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
182 "s", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
183 "sa", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
184 "sb", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
185 "sc", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
186 "sd", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
187 "se", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
188 "sh", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
189 "si", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
190 "sr", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
191 "ss", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
192 "sx", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
193 "vc", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
194 "vg", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
195 "vi", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
196 "vm", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
197 "w", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
198 "write", |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
199 } |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
200 STYLES_VALUES_REGEX = ( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
201 r"^(" |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
202 + "|".join( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
203 [ |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
204 "([a-z-]+)", # alphabetical names |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
205 "(#[0-9a-f]+)", # hex value |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
206 "(\d+(.\d+)? *(|%|em|ex|px|in|cm|mm|pt|pc))", # values with units (or not) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
207 "rgb\( *((\d+(.\d+)?), *){2}(\d+(.\d+)?) *\)", # rgb function |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
208 "rgba\( *((\d+(.\d+)?), *){3}(\d+(.\d+)?) *\)", # rgba function |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
209 ] |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
210 ) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
211 + ") *(!important)?$" |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
212 ) # we accept "!important" at the end |
692
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
213 STYLES_ACCEPTED_VALUE = re.compile(STYLES_VALUES_REGEX) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
214 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
215 PLUGIN_INFO = { |
2145
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2106
diff
changeset
|
216 C.PI_NAME: "Text syntaxes", |
2780
85d3240a400f
plugin text syntaxes: changed import name to TEXT_SYNTAX (better with underscore for autocompletion)
Goffi <goffi@goffi.org>
parents:
2771
diff
changeset
|
217 C.PI_IMPORT_NAME: "TEXT_SYNTAXES", |
2145
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2106
diff
changeset
|
218 C.PI_TYPE: "MISC", |
3726
33d75cd3c371
plugin XEP-0060, XEP-0163, XEP-0277, text syntaxes: make those plugins usable with components
Goffi <goffi@goffi.org>
parents:
3709
diff
changeset
|
219 C.PI_MODES: C.PLUG_MODE_BOTH, |
2145
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2106
diff
changeset
|
220 C.PI_PROTOCOLS: [], |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2106
diff
changeset
|
221 C.PI_DEPENDENCIES: [], |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2106
diff
changeset
|
222 C.PI_MAIN: "TextSyntaxes", |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2106
diff
changeset
|
223 C.PI_HANDLER: "no", |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
224 C.PI_DESCRIPTION: _( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
225 """Management of various text syntaxes (XHTML-IM, Markdown, etc)""" |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
226 ), |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
227 } |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
228 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
229 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
230 class TextSyntaxes(object): |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
231 """Text conversion class |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
232 XHTML utf-8 is used as intermediate language for conversions |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
233 """ |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
234 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
235 OPT_DEFAULT = "DEFAULT" |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
236 OPT_HIDDEN = "HIDDEN" |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
237 OPT_NO_THREAD = "NO_THREAD" |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
238 SYNTAX_XHTML = _SYNTAX_XHTML |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
239 SYNTAX_MARKDOWN = "markdown" |
832
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
240 SYNTAX_TEXT = "text" |
2869
148d30147890
plugin text syntaxes: fixed default syntax
Goffi <goffi@goffi.org>
parents:
2786
diff
changeset
|
241 # default_syntax must be lower case |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
242 default_syntax = SYNTAX_XHTML |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
243 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
244 def __init__(self, host): |
993
301b342c697a
core: use of the new core.log module:
Goffi <goffi@goffi.org>
parents:
968
diff
changeset
|
245 log.info(_("Text syntaxes plugin initialization")) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
246 self.host = host |
3620
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
247 self.syntaxes = {} |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
248 |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
249 self.params = """ |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
250 <params> |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
251 <individual> |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
252 <category name="%(category_name)s" label="%(category_label)s"> |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
253 <param name="%(name)s" label="%(label)s" type="list" security="0"> |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
254 %(options)s |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
255 </param> |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
256 </category> |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
257 </individual> |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
258 </params> |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
259 """ |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
260 |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
261 self.params_data = { |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
262 "category_name": CATEGORY, |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
263 "category_label": _(CATEGORY), |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
264 "name": NAME, |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
265 "label": _(NAME), |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
266 "syntaxes": self.syntaxes, |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
267 } |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
268 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
269 self.add_syntax( |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
270 self.SYNTAX_XHTML, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
271 lambda xhtml: defer.succeed(xhtml), |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
272 lambda xhtml: defer.succeed(xhtml), |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
273 TextSyntaxes.OPT_NO_THREAD, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
274 ) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
1811
diff
changeset
|
275 # TODO: text => XHTML should add <a/> to url like in frontends |
4074
26b7ed2817da
refactoring: rename `sat_frontends` to `libervia.frontends`
Goffi <goffi@goffi.org>
parents:
4071
diff
changeset
|
276 # it's probably best to move libervia.frontends.tools.strings to sat.tools.common or similar |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
277 self.add_syntax( |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
278 self.SYNTAX_TEXT, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
279 lambda text: escape(text), |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
280 lambda xhtml: self._remove_markups(xhtml), |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
281 [TextSyntaxes.OPT_HIDDEN], |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
282 ) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
283 try: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
284 import markdown, html2text |
2873
e1207b8ad97c
plugin text syntaxes: disable raw HTML parsing in mardown by default
Goffi <goffi@goffi.org>
parents:
2869
diff
changeset
|
285 from markdown.extensions import Extension |
e1207b8ad97c
plugin text syntaxes: disable raw HTML parsing in mardown by default
Goffi <goffi@goffi.org>
parents:
2869
diff
changeset
|
286 |
e1207b8ad97c
plugin text syntaxes: disable raw HTML parsing in mardown by default
Goffi <goffi@goffi.org>
parents:
2869
diff
changeset
|
287 # XXX: we disable raw HTML parsing by default, to avoid parsing error |
e1207b8ad97c
plugin text syntaxes: disable raw HTML parsing in mardown by default
Goffi <goffi@goffi.org>
parents:
2869
diff
changeset
|
288 # when the user is not aware of markdown and HTML |
e1207b8ad97c
plugin text syntaxes: disable raw HTML parsing in mardown by default
Goffi <goffi@goffi.org>
parents:
2869
diff
changeset
|
289 class EscapeHTML(Extension): |
4059
00dbc3370d35
plugin text syntaxes: fix `EscapeHTML` following massive snake_case renaming
Goffi <goffi@goffi.org>
parents:
4037
diff
changeset
|
290 def extendMarkdown(self, md): |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
291 md.preprocessors.deregister("html_block") |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
292 md.inlinePatterns.deregister("html") |
841
831f208b4ea3
plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents:
836
diff
changeset
|
293 |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
294 def _html2text(html, baseurl=""): |
841
831f208b4ea3
plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents:
836
diff
changeset
|
295 h = html2text.HTML2Text(baseurl=baseurl) |
831f208b4ea3
plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents:
836
diff
changeset
|
296 h.body_width = 0 # do not truncate the lines, it breaks the long URLs |
831f208b4ea3
plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents:
836
diff
changeset
|
297 return h.handle(html) |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
298 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
299 self.add_syntax( |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
300 self.SYNTAX_MARKDOWN, |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
301 partial( |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
302 markdown.markdown, |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
303 extensions=[ |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
304 EscapeHTML(), |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
305 "nl2br", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
306 "codehilite", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
307 "fenced_code", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
308 "sane_lists", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
309 "tables", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
310 ], |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
311 extension_configs={ |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
312 "codehilite": { |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
313 "css_class": "highlight", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
314 } |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
315 }, |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
316 ), |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
317 _html2text, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
318 [TextSyntaxes.OPT_DEFAULT], |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
319 ) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
320 except ImportError: |
3028 | 321 log.warning("markdown or html2text not found, can't use Markdown syntax") |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
322 log.info( |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
323 "You can download/install them from https://pythonhosted.org/Markdown/ " |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
324 "and https://github.com/Alir3z4/html2text/" |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
325 ) |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
326 host.bridge.add_method( |
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
327 "syntax_convert", |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
328 ".plugin", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
329 in_sign="sssbs", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
330 out_sign="s", |
3028 | 331 async_=True, |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
332 method=self.convert, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
333 ) |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
334 host.bridge.add_method( |
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
335 "syntax_get", ".plugin", in_sign="s", out_sign="s", method=self.get_syntax |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
336 ) |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
337 if xml_tools.clean_xhtml is None: |
3028 | 338 log.debug("Installing cleaning method") |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
339 xml_tools.clean_xhtml = self.clean_xhtml |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
340 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
341 def _update_param_options(self): |
3620
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
342 data_synt = self.syntaxes |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
343 default_synt = TextSyntaxes.default_syntax |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
344 syntaxes = [] |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
345 |
3028 | 346 for syntax in list(data_synt.keys()): |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
347 flags = data_synt[syntax]["flags"] |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
348 if TextSyntaxes.OPT_HIDDEN not in flags: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
349 syntaxes.append(syntax) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
350 |
1805
3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
Goffi <goffi@goffi.org>
parents:
1803
diff
changeset
|
351 syntaxes.sort(key=lambda synt: synt.lower()) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
352 options = [] |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
353 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
354 for syntax in syntaxes: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
355 selected = 'selected="true"' if syntax == default_synt else "" |
3028 | 356 options.append('<option value="%s" %s/>' % (syntax, selected)) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
357 |
3620
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
358 self.params_data["options"] = "\n".join(options) |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
359 self.host.memory.update_params(self.params % self.params_data) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
360 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
361 def get_current_syntax(self, profile): |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
362 """Return the selected syntax for the given profile |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
363 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
364 @param profile: %(doc_profile)s |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
365 @return: profile selected syntax |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
366 """ |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
367 return self.host.memory.param_get_a(NAME, CATEGORY, profile_key=profile) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
368 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
369 def _log_error(self, failure, action="converting syntax"): |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
370 log.error( |
3028 | 371 "Error while {action}: {failure}".format(action=action, failure=failure) |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
372 ) |
2106
5874da3811b7
plugin text syntaxes: log error on cleanXHTML failure
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
373 return failure |
5874da3811b7
plugin text syntaxes: log error on cleanXHTML failure
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
374 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
375 def clean_style(self, styles_raw: str) -> str: |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
376 """ "Clean unsafe CSS styles |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
377 |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
378 Remove styles not in the whitelist, or where the value doesn't match the regex |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
379 @param styles_raw: CSS styles |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
380 @return: cleaned styles |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
381 """ |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
382 styles: List[str] = styles_raw.split(";") |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
383 cleaned_styles = [] |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
384 for style in styles: |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
385 try: |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
386 key, value = style.split(":") |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
387 except ValueError: |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
388 continue |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
389 key = key.lower().strip() |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
390 if key not in STYLES_WHITELIST: |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
391 continue |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
392 value = value.lower().strip() |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
393 if not STYLES_ACCEPTED_VALUE.match(value): |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
394 continue |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
395 if value == "none": |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
396 continue |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
397 cleaned_styles.append((key, value)) |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
398 return "; ".join(["%s: %s" % (key_, value_) for key_, value_ in cleaned_styles]) |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
399 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
400 def clean_classes(self, classes_raw: str) -> str: |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
401 """Remove any non whitelisted class |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
402 |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
403 @param classes_raw: classes set on an element |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
404 @return: remaining classes (can be empty string) |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
405 """ |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
406 return " ".join(SAFE_CLASSES.intersection(classes_raw.split())) |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
407 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
408 def clean_xhtml(self, xhtml): |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
409 """Clean XHTML text by removing potentially dangerous/malicious parts |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
410 |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
411 @param xhtml(unicode, lxml.etree._Element): raw HTML/XHTML text to clean |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
412 @return (unicode): cleaned XHTML |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
413 """ |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
414 |
3028 | 415 if isinstance(xhtml, str): |
2786
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
416 try: |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
417 xhtml_elt = html.fromstring(xhtml) |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
418 except etree.ParserError as e: |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
419 if not xhtml.strip(): |
3028 | 420 return "" |
421 log.error("Can't clean XHTML: {xhtml}".format(xhtml=xhtml)) | |
2786
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
422 raise e |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
423 elif isinstance(xhtml, html.HtmlElement): |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
424 xhtml_elt = xhtml |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
425 else: |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
426 log.error("Only strings and HtmlElements can be cleaned") |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
427 raise exceptions.DataError |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
428 cleaner = clean.Cleaner(style=False, add_nofollow=False, safe_attrs=SAFE_ATTRS) |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
429 xhtml_elt = cleaner.clean_html(xhtml_elt) |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
430 for elt in xhtml_elt.xpath("//*[@style]"): |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
431 elt.set("style", self.clean_style(elt.get("style"))) |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
432 for elt in xhtml_elt.xpath("//*[@class]"): |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
433 elt.set("class", self.clean_classes(elt.get("class"))) |
2786
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
434 # we remove self-closing elements for non-void elements |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
435 for element in xhtml_elt.iter(tag=etree.Element): |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
436 if not element.text: |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
437 if element.tag in VOID_ELEMENTS: |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
438 element.text = None |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
439 else: |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
440 element.text = "" |
3028 | 441 return html.tostring(xhtml_elt, encoding=str, method="xml") |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
442 |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
443 def convert( |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
444 self, text, syntax_from, syntax_to=_SYNTAX_XHTML, safe=True, profile=None |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
445 ): |
1803
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
446 """Convert a text between two syntaxes |
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
447 |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
448 @param text: text to convert |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
449 @param syntax_from: source syntax (e.g. "markdown") |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
450 @param syntax_to: dest syntax (e.g.: "XHTML") |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
451 @param safe: clean resulting XHTML to avoid malicious code if True |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
452 @param profile: needed only when syntax_from or syntax_to is set to |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
453 _SYNTAX_CURRENT |
1803
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
454 @return(unicode): converted text |
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
455 """ |
1805
3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
Goffi <goffi@goffi.org>
parents:
1803
diff
changeset
|
456 # FIXME: convert should be abled to handle domish.Element directly |
3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
Goffi <goffi@goffi.org>
parents:
1803
diff
changeset
|
457 # when dealing with XHTML |
3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
Goffi <goffi@goffi.org>
parents:
1803
diff
changeset
|
458 # TODO: a way for parser to return parsing errors/warnings |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
459 |
744
312a2842b2b8
plugins text-syntaxes: added a default value to use the current user syntax in convert
souliane <souliane@mailoo.org>
parents:
705
diff
changeset
|
460 if syntax_from == _SYNTAX_CURRENT: |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
461 syntax_from = self.get_current_syntax(profile) |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
462 else: |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
463 syntax_from = syntax_from.lower().strip() |
744
312a2842b2b8
plugins text-syntaxes: added a default value to use the current user syntax in convert
souliane <souliane@mailoo.org>
parents:
705
diff
changeset
|
464 if syntax_to == _SYNTAX_CURRENT: |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
465 syntax_to = self.get_current_syntax(profile) |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
466 else: |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
467 syntax_to = syntax_to.lower().strip() |
3620
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
468 syntaxes = self.syntaxes |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
469 if syntax_from not in syntaxes: |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
470 raise exceptions.NotFound(syntax_from) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
471 if syntax_to not in syntaxes: |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
472 raise exceptions.NotFound(syntax_to) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
473 d = None |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
474 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
475 if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_from]["flags"]: |
1803
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
476 d = defer.maybeDeferred(syntaxes[syntax_from]["to"], text) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
477 else: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
478 d = deferToThread(syntaxes[syntax_from]["to"], text) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
479 |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
480 # TODO: keep only body element and change it to a div here ? |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
481 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
482 if safe: |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
483 d.addCallback(self.clean_xhtml) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
484 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
485 if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_to]["flags"]: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
486 d.addCallback(syntaxes[syntax_to]["from"]) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
487 else: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
488 d.addCallback(lambda xhtml: deferToThread(syntaxes[syntax_to]["from"], xhtml)) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
489 |
836
2cc0201b4613
plugin text_syntaxes: rstrip the conversion result to avoid new lines systematically added by converters (e.g. html2text do this)
souliane <souliane@mailoo.org>
parents:
832
diff
changeset
|
490 # converters can add new lines that disturb the microblog change detection |
2cc0201b4613
plugin text_syntaxes: rstrip the conversion result to avoid new lines systematically added by converters (e.g. html2text do this)
souliane <souliane@mailoo.org>
parents:
832
diff
changeset
|
491 d.addCallback(lambda text: text.rstrip()) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
492 return d |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
493 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
494 def add_syntax(self, name, to_xhtml_cb, from_xhtml_cb, flags=None): |
1803
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
495 """Add a new syntax to the manager |
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
496 |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
497 @param name: unique name of the syntax |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
498 @param to_xhtml_cb: callback to convert from syntax to XHTML |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
499 @param from_xhtml_cb: callback to convert from XHTML to syntax |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
500 @param flags: set of optional flags, can be: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
501 TextSyntaxes.OPT_DEFAULT: use as the default syntax (replace former one) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
502 TextSyntaxes.OPT_HIDDEN: do not show in parameters |
1803
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
503 TextSyntaxes.OPT_NO_THREAD: do not defer to thread when converting (the callback may then return a deferred) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
504 """ |
1805
3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
Goffi <goffi@goffi.org>
parents:
1803
diff
changeset
|
505 flags = flags if flags is not None else [] |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
506 if TextSyntaxes.OPT_HIDDEN in flags and TextSyntaxes.OPT_DEFAULT in flags: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
507 raise ValueError( |
3028 | 508 "{} and {} are mutually exclusive".format( |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
509 TextSyntaxes.OPT_HIDDEN, TextSyntaxes.OPT_DEFAULT |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
510 ) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
511 ) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
512 |
3620
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
513 syntaxes = self.syntaxes |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
514 key = name.lower().strip() |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
515 if key in syntaxes: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
516 raise exceptions.ConflictError( |
3028 | 517 "This syntax key already exists: {}".format(key) |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
518 ) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
519 syntaxes[key] = { |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
520 "name": name, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
521 "to": to_xhtml_cb, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
522 "from": from_xhtml_cb, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
523 "flags": flags, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
524 } |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
525 if TextSyntaxes.OPT_DEFAULT in flags: |
2869
148d30147890
plugin text syntaxes: fixed default syntax
Goffi <goffi@goffi.org>
parents:
2786
diff
changeset
|
526 TextSyntaxes.default_syntax = key |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
527 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
528 self._update_param_options() |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
529 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
530 def get_syntax(self, name): |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
531 """get syntax key corresponding to a name |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
532 |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
533 @raise exceptions.NotFound: syntax doesn't exist |
832
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
534 """ |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
535 key = name.lower().strip() |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
536 if key in self.syntaxes: |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
537 return key |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
538 raise exceptions.NotFound |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
539 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
540 def _remove_markups(self, xhtml): |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
541 """Remove XHTML markups from the given string. |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
542 |
832
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
543 @param xhtml: the XHTML string to be cleaned |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
544 @return: the cleaned string |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
545 """ |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
546 cleaner = clean.Cleaner(kill_tags=["style"]) |
4258
ba28ca268f4a
plugin text syntaxes: fix empty string handling in `_remove_markups`.
Goffi <goffi@goffi.org>
parents:
4074
diff
changeset
|
547 if not xhtml: |
ba28ca268f4a
plugin text syntaxes: fix empty string handling in `_remove_markups`.
Goffi <goffi@goffi.org>
parents:
4074
diff
changeset
|
548 return "" |
832
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
549 cleaned = cleaner.clean_html(html.fromstring(xhtml)) |
3028 | 550 return html.tostring(cleaned, encoding=str, method="text") |