Mercurial > libervia-backend
annotate libervia/backend/plugins/plugin_misc_text_syntaxes.py @ 4314:6a70fcd93a7a
plugin XEP-0131: Stanza Headers and Internet Metadata implementation:
- SHIM is now supported and put in `msg_data["extra"]["headers"]`.
- `Keywords` are converted from and to list of string in `msg_data["extra"]["keywords"]`
field (if present in headers on message sending, values are merged).
- Python minimal version upgraded to 3.11 due to use of `StrEnum`.
rel 451
author | Goffi <goffi@goffi.org> |
---|---|
date | Sat, 28 Sep 2024 15:56:04 +0200 |
parents | 0d7bb4df2343 |
children |
rev | line source |
---|---|
3028 | 1 #!/usr/bin/env python3 |
3137 | 2 |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
3 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
4 # SAT plugin for managing various text syntaxes |
3479 | 5 # Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
6 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
7 # This program is free software: you can redistribute it and/or modify |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
8 # it under the terms of the GNU Affero General Public License as published by |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
9 # the Free Software Foundation, either version 3 of the License, or |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
10 # (at your option) any later version. |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
11 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
12 # This program is distributed in the hope that it will be useful, |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
15 # GNU Affero General Public License for more details. |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
16 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
17 # You should have received a copy of the GNU Affero General Public License |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
19 |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
20 from functools import partial |
3075
501a1a3c8594
plugin text syntaxes: don't use anymore deprecated cgi.escape
Goffi <goffi@goffi.org>
parents:
3040
diff
changeset
|
21 from html import escape |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
22 import re |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
23 from typing import Set |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
24 |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
25 from twisted.internet import defer |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
26 from twisted.internet.threads import deferToThread |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
27 |
4071
4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents:
4059
diff
changeset
|
28 from libervia.backend.core import exceptions |
4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents:
4059
diff
changeset
|
29 from libervia.backend.core.constants import Const as C |
4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents:
4059
diff
changeset
|
30 from libervia.backend.core.i18n import D_, _ |
4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents:
4059
diff
changeset
|
31 from libervia.backend.core.log import getLogger |
4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents:
4059
diff
changeset
|
32 from libervia.backend.tools import xml_tools |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
33 |
1542
94901070478e
plugins: added new MissingModule exceptions to plugins using third party modules
Goffi <goffi@goffi.org>
parents:
1458
diff
changeset
|
34 try: |
94901070478e
plugins: added new MissingModule exceptions to plugins using third party modules
Goffi <goffi@goffi.org>
parents:
1458
diff
changeset
|
35 from lxml import html |
94901070478e
plugins: added new MissingModule exceptions to plugins using third party modules
Goffi <goffi@goffi.org>
parents:
1458
diff
changeset
|
36 from lxml.html import clean |
2786
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
37 from lxml import etree |
1542
94901070478e
plugins: added new MissingModule exceptions to plugins using third party modules
Goffi <goffi@goffi.org>
parents:
1458
diff
changeset
|
38 except ImportError: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
39 raise exceptions.MissingModule( |
3028 | 40 "Missing module lxml, please download/install it from http://lxml.de/" |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
41 ) |
674
fb0b1100c908
plugin text_syntaxes: fixed clean_xhml (it now return XHTML instead of HTML)
Goffi <goffi@goffi.org>
parents:
665
diff
changeset
|
42 |
2873
e1207b8ad97c
plugin text syntaxes: disable raw HTML parsing in mardown by default
Goffi <goffi@goffi.org>
parents:
2869
diff
changeset
|
43 log = getLogger(__name__) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
44 |
771 | 45 CATEGORY = D_("Composition") |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
46 NAME = "Syntax" |
2869
148d30147890
plugin text syntaxes: fixed default syntax
Goffi <goffi@goffi.org>
parents:
2786
diff
changeset
|
47 _SYNTAX_XHTML = "xhtml" # must be lower case |
744
312a2842b2b8
plugins text-syntaxes: added a default value to use the current user syntax in convert
souliane <souliane@mailoo.org>
parents:
705
diff
changeset
|
48 _SYNTAX_CURRENT = "@CURRENT@" |
312a2842b2b8
plugins text-syntaxes: added a default value to use the current user syntax in convert
souliane <souliane@mailoo.org>
parents:
705
diff
changeset
|
49 |
692
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
50 # TODO: check/adapt following list |
1805
3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
Goffi <goffi@goffi.org>
parents:
1803
diff
changeset
|
51 # list initialy based on feedparser list (http://pythonhosted.org/feedparser/html-sanitization.html) |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
52 STYLES_WHITELIST = ( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
53 "azimuth", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
54 "background-color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
55 "border-bottom-color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
56 "border-collapse", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
57 "border-color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
58 "border-left-color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
59 "border-right-color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
60 "border-top-color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
61 "clear", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
62 "color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
63 "cursor", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
64 "direction", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
65 "display", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
66 "elevation", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
67 "float", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
68 "font", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
69 "font-family", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
70 "font-size", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
71 "font-style", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
72 "font-variant", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
73 "font-weight", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
74 "height", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
75 "letter-spacing", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
76 "line-height", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
77 "overflow", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
78 "pause", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
79 "pause-after", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
80 "pause-before", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
81 "pitch", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
82 "pitch-range", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
83 "richness", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
84 "speak", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
85 "speak-header", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
86 "speak-numeral", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
87 "speak-punctuation", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
88 "speech-rate", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
89 "stress", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
90 "text-align", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
91 "text-decoration", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
92 "text-indent", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
93 "unicode-bidi", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
94 "vertical-align", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
95 "voice-family", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
96 "volume", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
97 "white-space", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
98 "width", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
99 ) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
100 |
2786
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
101 # cf. https://www.w3.org/TR/html/syntax.html#void-elements |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
102 VOID_ELEMENTS = ( |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
103 "area", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
104 "base", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
105 "br", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
106 "col", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
107 "embed", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
108 "hr", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
109 "img", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
110 "input", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
111 "keygen", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
112 "link", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
113 "menuitem", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
114 "meta", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
115 "param", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
116 "source", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
117 "track", |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
118 "wbr", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
119 ) |
2786
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
120 |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
121 SAFE_ATTRS = html.defs.safe_attrs.union({"style", "poster", "controls"}) - {"id"} |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
122 SAFE_CLASSES = { |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
123 # those classes are used for code highlighting |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
124 "bp", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
125 "c", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
126 "ch", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
127 "cm", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
128 "cp", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
129 "cpf", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
130 "cs", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
131 "dl", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
132 "err", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
133 "fm", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
134 "gd", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
135 "ge", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
136 "get", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
137 "gh", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
138 "gi", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
139 "go", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
140 "gp", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
141 "gr", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
142 "gs", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
143 "gt", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
144 "gu", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
145 "highlight", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
146 "hll", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
147 "il", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
148 "k", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
149 "kc", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
150 "kd", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
151 "kn", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
152 "kp", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
153 "kr", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
154 "kt", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
155 "m", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
156 "mb", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
157 "mf", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
158 "mh", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
159 "mi", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
160 "mo", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
161 "na", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
162 "nb", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
163 "nc", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
164 "nd", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
165 "ne", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
166 "nf", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
167 "ni", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
168 "nl", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
169 "nn", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
170 "no", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
171 "nt", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
172 "nv", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
173 "o", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
174 "ow", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
175 "s", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
176 "sa", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
177 "sb", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
178 "sc", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
179 "sd", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
180 "se", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
181 "sh", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
182 "si", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
183 "sr", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
184 "ss", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
185 "sx", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
186 "vc", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
187 "vg", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
188 "vi", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
189 "vm", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
190 "w", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
191 "write", |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
192 } |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
193 STYLES_VALUES_REGEX = ( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
194 r"^(" |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
195 + "|".join( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
196 [ |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
197 "([a-z-]+)", # alphabetical names |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
198 "(#[0-9a-f]+)", # hex value |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
199 "(\d+(.\d+)? *(|%|em|ex|px|in|cm|mm|pt|pc))", # values with units (or not) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
200 "rgb\( *((\d+(.\d+)?), *){2}(\d+(.\d+)?) *\)", # rgb function |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
201 "rgba\( *((\d+(.\d+)?), *){3}(\d+(.\d+)?) *\)", # rgba function |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
202 ] |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
203 ) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
204 + ") *(!important)?$" |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
205 ) # we accept "!important" at the end |
692
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
206 STYLES_ACCEPTED_VALUE = re.compile(STYLES_VALUES_REGEX) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
207 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
208 PLUGIN_INFO = { |
2145
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2106
diff
changeset
|
209 C.PI_NAME: "Text syntaxes", |
2780
85d3240a400f
plugin text syntaxes: changed import name to TEXT_SYNTAX (better with underscore for autocompletion)
Goffi <goffi@goffi.org>
parents:
2771
diff
changeset
|
210 C.PI_IMPORT_NAME: "TEXT_SYNTAXES", |
2145
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2106
diff
changeset
|
211 C.PI_TYPE: "MISC", |
3726
33d75cd3c371
plugin XEP-0060, XEP-0163, XEP-0277, text syntaxes: make those plugins usable with components
Goffi <goffi@goffi.org>
parents:
3709
diff
changeset
|
212 C.PI_MODES: C.PLUG_MODE_BOTH, |
2145
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2106
diff
changeset
|
213 C.PI_PROTOCOLS: [], |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2106
diff
changeset
|
214 C.PI_DEPENDENCIES: [], |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2106
diff
changeset
|
215 C.PI_MAIN: "TextSyntaxes", |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2106
diff
changeset
|
216 C.PI_HANDLER: "no", |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
217 C.PI_DESCRIPTION: _( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
218 """Management of various text syntaxes (XHTML-IM, Markdown, etc)""" |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
219 ), |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
220 } |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
221 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
222 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
223 class TextSyntaxes(object): |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
224 """Text conversion class |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
225 XHTML utf-8 is used as intermediate language for conversions |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
226 """ |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
227 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
228 OPT_DEFAULT = "DEFAULT" |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
229 OPT_HIDDEN = "HIDDEN" |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
230 OPT_NO_THREAD = "NO_THREAD" |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
231 SYNTAX_XHTML = _SYNTAX_XHTML |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
232 SYNTAX_MARKDOWN = "markdown" |
832
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
233 SYNTAX_TEXT = "text" |
2869
148d30147890
plugin text syntaxes: fixed default syntax
Goffi <goffi@goffi.org>
parents:
2786
diff
changeset
|
234 # default_syntax must be lower case |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
235 default_syntax = SYNTAX_XHTML |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
236 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
237 def __init__(self, host): |
993
301b342c697a
core: use of the new core.log module:
Goffi <goffi@goffi.org>
parents:
968
diff
changeset
|
238 log.info(_("Text syntaxes plugin initialization")) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
239 self.host = host |
3620
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
240 self.syntaxes = {} |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
241 |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
242 self.params = """ |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
243 <params> |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
244 <individual> |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
245 <category name="%(category_name)s" label="%(category_label)s"> |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
246 <param name="%(name)s" label="%(label)s" type="list" security="0"> |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
247 %(options)s |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
248 </param> |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
249 </category> |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
250 </individual> |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
251 </params> |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
252 """ |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
253 |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
254 self.params_data = { |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
255 "category_name": CATEGORY, |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
256 "category_label": _(CATEGORY), |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
257 "name": NAME, |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
258 "label": _(NAME), |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
259 "syntaxes": self.syntaxes, |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
260 } |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
261 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
262 self.add_syntax( |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
263 self.SYNTAX_XHTML, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
264 lambda xhtml: defer.succeed(xhtml), |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
265 lambda xhtml: defer.succeed(xhtml), |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
266 TextSyntaxes.OPT_NO_THREAD, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
267 ) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
1811
diff
changeset
|
268 # TODO: text => XHTML should add <a/> to url like in frontends |
4074
26b7ed2817da
refactoring: rename `sat_frontends` to `libervia.frontends`
Goffi <goffi@goffi.org>
parents:
4071
diff
changeset
|
269 # it's probably best to move libervia.frontends.tools.strings to sat.tools.common or similar |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
270 self.add_syntax( |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
271 self.SYNTAX_TEXT, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
272 lambda text: escape(text), |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
273 lambda xhtml: self._remove_markups(xhtml), |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
274 [TextSyntaxes.OPT_HIDDEN], |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
275 ) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
276 try: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
277 import markdown, html2text |
2873
e1207b8ad97c
plugin text syntaxes: disable raw HTML parsing in mardown by default
Goffi <goffi@goffi.org>
parents:
2869
diff
changeset
|
278 from markdown.extensions import Extension |
e1207b8ad97c
plugin text syntaxes: disable raw HTML parsing in mardown by default
Goffi <goffi@goffi.org>
parents:
2869
diff
changeset
|
279 |
e1207b8ad97c
plugin text syntaxes: disable raw HTML parsing in mardown by default
Goffi <goffi@goffi.org>
parents:
2869
diff
changeset
|
280 # XXX: we disable raw HTML parsing by default, to avoid parsing error |
e1207b8ad97c
plugin text syntaxes: disable raw HTML parsing in mardown by default
Goffi <goffi@goffi.org>
parents:
2869
diff
changeset
|
281 # when the user is not aware of markdown and HTML |
e1207b8ad97c
plugin text syntaxes: disable raw HTML parsing in mardown by default
Goffi <goffi@goffi.org>
parents:
2869
diff
changeset
|
282 class EscapeHTML(Extension): |
4059
00dbc3370d35
plugin text syntaxes: fix `EscapeHTML` following massive snake_case renaming
Goffi <goffi@goffi.org>
parents:
4037
diff
changeset
|
283 def extendMarkdown(self, md): |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
284 md.preprocessors.deregister("html_block") |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
285 md.inlinePatterns.deregister("html") |
841
831f208b4ea3
plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents:
836
diff
changeset
|
286 |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
287 def _html2text(html, baseurl=""): |
841
831f208b4ea3
plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents:
836
diff
changeset
|
288 h = html2text.HTML2Text(baseurl=baseurl) |
831f208b4ea3
plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents:
836
diff
changeset
|
289 h.body_width = 0 # do not truncate the lines, it breaks the long URLs |
831f208b4ea3
plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents:
836
diff
changeset
|
290 return h.handle(html) |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
291 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
292 self.add_syntax( |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
293 self.SYNTAX_MARKDOWN, |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
294 partial( |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
295 markdown.markdown, |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
296 extensions=[ |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
297 EscapeHTML(), |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
298 "nl2br", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
299 "codehilite", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
300 "fenced_code", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
301 "sane_lists", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
302 "tables", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
303 ], |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
304 extension_configs={ |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
305 "codehilite": { |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
306 "css_class": "highlight", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
307 } |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
308 }, |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
309 ), |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
310 _html2text, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
311 [TextSyntaxes.OPT_DEFAULT], |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
312 ) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
313 except ImportError: |
3028 | 314 log.warning("markdown or html2text not found, can't use Markdown syntax") |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
315 log.info( |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
316 "You can download/install them from https://pythonhosted.org/Markdown/ " |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
317 "and https://github.com/Alir3z4/html2text/" |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
318 ) |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
319 host.bridge.add_method( |
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
320 "syntax_convert", |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
321 ".plugin", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
322 in_sign="sssbs", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
323 out_sign="s", |
3028 | 324 async_=True, |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
325 method=self.convert, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
326 ) |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
327 host.bridge.add_method( |
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
328 "syntax_get", ".plugin", in_sign="s", out_sign="s", method=self.get_syntax |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
329 ) |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
330 if xml_tools.clean_xhtml is None: |
3028 | 331 log.debug("Installing cleaning method") |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
332 xml_tools.clean_xhtml = self.clean_xhtml |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
333 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
334 def _update_param_options(self): |
3620
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
335 data_synt = self.syntaxes |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
336 default_synt = TextSyntaxes.default_syntax |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
337 syntaxes = [] |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
338 |
3028 | 339 for syntax in list(data_synt.keys()): |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
340 flags = data_synt[syntax]["flags"] |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
341 if TextSyntaxes.OPT_HIDDEN not in flags: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
342 syntaxes.append(syntax) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
343 |
1805
3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
Goffi <goffi@goffi.org>
parents:
1803
diff
changeset
|
344 syntaxes.sort(key=lambda synt: synt.lower()) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
345 options = [] |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
346 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
347 for syntax in syntaxes: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
348 selected = 'selected="true"' if syntax == default_synt else "" |
3028 | 349 options.append('<option value="%s" %s/>' % (syntax, selected)) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
350 |
3620
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
351 self.params_data["options"] = "\n".join(options) |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
352 self.host.memory.update_params(self.params % self.params_data) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
353 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
354 def get_current_syntax(self, profile): |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
355 """Return the selected syntax for the given profile |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
356 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
357 @param profile: %(doc_profile)s |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
358 @return: profile selected syntax |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
359 """ |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
360 return self.host.memory.param_get_a(NAME, CATEGORY, profile_key=profile) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
361 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
362 def _log_error(self, failure, action="converting syntax"): |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
363 log.error( |
3028 | 364 "Error while {action}: {failure}".format(action=action, failure=failure) |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
365 ) |
2106
5874da3811b7
plugin text syntaxes: log error on cleanXHTML failure
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
366 return failure |
5874da3811b7
plugin text syntaxes: log error on cleanXHTML failure
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
367 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
368 def clean_style(self, styles_raw: str) -> str: |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
369 """ "Clean unsafe CSS styles |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
370 |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
371 Remove styles not in the whitelist, or where the value doesn't match the regex |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
372 @param styles_raw: CSS styles |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
373 @return: cleaned styles |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
374 """ |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
375 styles: List[str] = styles_raw.split(";") |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
376 cleaned_styles = [] |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
377 for style in styles: |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
378 try: |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
379 key, value = style.split(":") |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
380 except ValueError: |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
381 continue |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
382 key = key.lower().strip() |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
383 if key not in STYLES_WHITELIST: |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
384 continue |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
385 value = value.lower().strip() |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
386 if not STYLES_ACCEPTED_VALUE.match(value): |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
387 continue |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
388 if value == "none": |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
389 continue |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
390 cleaned_styles.append((key, value)) |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
391 return "; ".join(["%s: %s" % (key_, value_) for key_, value_ in cleaned_styles]) |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
392 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
393 def clean_classes(self, classes_raw: str) -> str: |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
394 """Remove any non whitelisted class |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
395 |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
396 @param classes_raw: classes set on an element |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
397 @return: remaining classes (can be empty string) |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
398 """ |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
399 return " ".join(SAFE_CLASSES.intersection(classes_raw.split())) |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
400 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
401 def clean_xhtml(self, xhtml): |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
402 """Clean XHTML text by removing potentially dangerous/malicious parts |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
403 |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
404 @param xhtml(unicode, lxml.etree._Element): raw HTML/XHTML text to clean |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
405 @return (unicode): cleaned XHTML |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
406 """ |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
407 |
3028 | 408 if isinstance(xhtml, str): |
2786
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
409 try: |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
410 xhtml_elt = html.fromstring(xhtml) |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
411 except etree.ParserError as e: |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
412 if not xhtml.strip(): |
3028 | 413 return "" |
414 log.error("Can't clean XHTML: {xhtml}".format(xhtml=xhtml)) | |
2786
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
415 raise e |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
416 elif isinstance(xhtml, html.HtmlElement): |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
417 xhtml_elt = xhtml |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
418 else: |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
419 log.error("Only strings and HtmlElements can be cleaned") |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
420 raise exceptions.DataError |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
421 cleaner = clean.Cleaner(style=False, add_nofollow=False, safe_attrs=SAFE_ATTRS) |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
422 xhtml_elt = cleaner.clean_html(xhtml_elt) |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
423 for elt in xhtml_elt.xpath("//*[@style]"): |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
424 elt.set("style", self.clean_style(elt.get("style"))) |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
425 for elt in xhtml_elt.xpath("//*[@class]"): |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
426 elt.set("class", self.clean_classes(elt.get("class"))) |
2786
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
427 # we remove self-closing elements for non-void elements |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
428 for element in xhtml_elt.iter(tag=etree.Element): |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
429 if not element.text: |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
430 if element.tag in VOID_ELEMENTS: |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
431 element.text = None |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
432 else: |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
433 element.text = "" |
3028 | 434 return html.tostring(xhtml_elt, encoding=str, method="xml") |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
435 |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
436 def convert( |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
437 self, text, syntax_from, syntax_to=_SYNTAX_XHTML, safe=True, profile=None |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
438 ): |
1803
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
439 """Convert a text between two syntaxes |
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
440 |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
441 @param text: text to convert |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
442 @param syntax_from: source syntax (e.g. "markdown") |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
443 @param syntax_to: dest syntax (e.g.: "XHTML") |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
444 @param safe: clean resulting XHTML to avoid malicious code if True |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
445 @param profile: needed only when syntax_from or syntax_to is set to |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
446 _SYNTAX_CURRENT |
1803
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
447 @return(unicode): converted text |
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
448 """ |
1805
3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
Goffi <goffi@goffi.org>
parents:
1803
diff
changeset
|
449 # FIXME: convert should be abled to handle domish.Element directly |
3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
Goffi <goffi@goffi.org>
parents:
1803
diff
changeset
|
450 # when dealing with XHTML |
3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
Goffi <goffi@goffi.org>
parents:
1803
diff
changeset
|
451 # TODO: a way for parser to return parsing errors/warnings |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
452 |
744
312a2842b2b8
plugins text-syntaxes: added a default value to use the current user syntax in convert
souliane <souliane@mailoo.org>
parents:
705
diff
changeset
|
453 if syntax_from == _SYNTAX_CURRENT: |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
454 syntax_from = self.get_current_syntax(profile) |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
455 else: |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
456 syntax_from = syntax_from.lower().strip() |
744
312a2842b2b8
plugins text-syntaxes: added a default value to use the current user syntax in convert
souliane <souliane@mailoo.org>
parents:
705
diff
changeset
|
457 if syntax_to == _SYNTAX_CURRENT: |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
458 syntax_to = self.get_current_syntax(profile) |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
459 else: |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
460 syntax_to = syntax_to.lower().strip() |
3620
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
461 syntaxes = self.syntaxes |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
462 if syntax_from not in syntaxes: |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
463 raise exceptions.NotFound(syntax_from) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
464 if syntax_to not in syntaxes: |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
465 raise exceptions.NotFound(syntax_to) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
466 d = None |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
467 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
468 if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_from]["flags"]: |
1803
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
469 d = defer.maybeDeferred(syntaxes[syntax_from]["to"], text) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
470 else: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
471 d = deferToThread(syntaxes[syntax_from]["to"], text) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
472 |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
473 # TODO: keep only body element and change it to a div here ? |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
474 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
475 if safe: |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
476 d.addCallback(self.clean_xhtml) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
477 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
478 if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_to]["flags"]: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
479 d.addCallback(syntaxes[syntax_to]["from"]) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
480 else: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
481 d.addCallback(lambda xhtml: deferToThread(syntaxes[syntax_to]["from"], xhtml)) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
482 |
836
2cc0201b4613
plugin text_syntaxes: rstrip the conversion result to avoid new lines systematically added by converters (e.g. html2text do this)
souliane <souliane@mailoo.org>
parents:
832
diff
changeset
|
483 # converters can add new lines that disturb the microblog change detection |
2cc0201b4613
plugin text_syntaxes: rstrip the conversion result to avoid new lines systematically added by converters (e.g. html2text do this)
souliane <souliane@mailoo.org>
parents:
832
diff
changeset
|
484 d.addCallback(lambda text: text.rstrip()) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
485 return d |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
486 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
487 def add_syntax(self, name, to_xhtml_cb, from_xhtml_cb, flags=None): |
1803
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
488 """Add a new syntax to the manager |
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
489 |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
490 @param name: unique name of the syntax |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
491 @param to_xhtml_cb: callback to convert from syntax to XHTML |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
492 @param from_xhtml_cb: callback to convert from XHTML to syntax |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
493 @param flags: set of optional flags, can be: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
494 TextSyntaxes.OPT_DEFAULT: use as the default syntax (replace former one) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
495 TextSyntaxes.OPT_HIDDEN: do not show in parameters |
1803
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
496 TextSyntaxes.OPT_NO_THREAD: do not defer to thread when converting (the callback may then return a deferred) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
497 """ |
1805
3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
Goffi <goffi@goffi.org>
parents:
1803
diff
changeset
|
498 flags = flags if flags is not None else [] |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
499 if TextSyntaxes.OPT_HIDDEN in flags and TextSyntaxes.OPT_DEFAULT in flags: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
500 raise ValueError( |
3028 | 501 "{} and {} are mutually exclusive".format( |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
502 TextSyntaxes.OPT_HIDDEN, TextSyntaxes.OPT_DEFAULT |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
503 ) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
504 ) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
505 |
3620
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
506 syntaxes = self.syntaxes |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
507 key = name.lower().strip() |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
508 if key in syntaxes: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
509 raise exceptions.ConflictError( |
3028 | 510 "This syntax key already exists: {}".format(key) |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
511 ) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
512 syntaxes[key] = { |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
513 "name": name, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
514 "to": to_xhtml_cb, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
515 "from": from_xhtml_cb, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
516 "flags": flags, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
517 } |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
518 if TextSyntaxes.OPT_DEFAULT in flags: |
2869
148d30147890
plugin text syntaxes: fixed default syntax
Goffi <goffi@goffi.org>
parents:
2786
diff
changeset
|
519 TextSyntaxes.default_syntax = key |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
520 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
521 self._update_param_options() |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
522 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
523 def get_syntax(self, name): |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
524 """get syntax key corresponding to a name |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
525 |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
526 @raise exceptions.NotFound: syntax doesn't exist |
832
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
527 """ |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
528 key = name.lower().strip() |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
529 if key in self.syntaxes: |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
530 return key |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
531 raise exceptions.NotFound |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
532 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
533 def _remove_markups(self, xhtml): |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
534 """Remove XHTML markups from the given string. |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
535 |
832
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
536 @param xhtml: the XHTML string to be cleaned |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
537 @return: the cleaned string |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
538 """ |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
539 cleaner = clean.Cleaner(kill_tags=["style"]) |
4258
ba28ca268f4a
plugin text syntaxes: fix empty string handling in `_remove_markups`.
Goffi <goffi@goffi.org>
parents:
4074
diff
changeset
|
540 if not xhtml: |
ba28ca268f4a
plugin text syntaxes: fix empty string handling in `_remove_markups`.
Goffi <goffi@goffi.org>
parents:
4074
diff
changeset
|
541 return "" |
832
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
542 cleaned = cleaner.clean_html(html.fromstring(xhtml)) |
3028 | 543 return html.tostring(cleaned, encoding=str, method="text") |