Mercurial > libervia-backend
annotate libervia/backend/plugins/plugin_misc_text_syntaxes.py @ 4309:b56b1eae7994
component email gateway: add multicasting:
XEP-0033 multicasting is now supported both for incoming and outgoing messages. XEP-0033
metadata are converted to suitable Email headers and vice versa.
Email address and JID are both supported, and delivery is done by the gateway when
suitable on incoming messages.
rel 450
author | Goffi <goffi@goffi.org> |
---|---|
date | Thu, 26 Sep 2024 16:12:01 +0200 |
parents | 0d7bb4df2343 |
children |
rev | line source |
---|---|
3028 | 1 #!/usr/bin/env python3 |
3137 | 2 |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
3 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
4 # SAT plugin for managing various text syntaxes |
3479 | 5 # Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
6 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
7 # This program is free software: you can redistribute it and/or modify |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
8 # it under the terms of the GNU Affero General Public License as published by |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
9 # the Free Software Foundation, either version 3 of the License, or |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
10 # (at your option) any later version. |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
11 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
12 # This program is distributed in the hope that it will be useful, |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
15 # GNU Affero General Public License for more details. |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
16 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
17 # You should have received a copy of the GNU Affero General Public License |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
19 |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
20 from functools import partial |
3075
501a1a3c8594
plugin text syntaxes: don't use anymore deprecated cgi.escape
Goffi <goffi@goffi.org>
parents:
3040
diff
changeset
|
21 from html import escape |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
22 import re |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
23 from typing import Set |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
24 |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
25 from twisted.internet import defer |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
26 from twisted.internet.threads import deferToThread |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
27 |
4071
4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents:
4059
diff
changeset
|
28 from libervia.backend.core import exceptions |
4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents:
4059
diff
changeset
|
29 from libervia.backend.core.constants import Const as C |
4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents:
4059
diff
changeset
|
30 from libervia.backend.core.i18n import D_, _ |
4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents:
4059
diff
changeset
|
31 from libervia.backend.core.log import getLogger |
4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents:
4059
diff
changeset
|
32 from libervia.backend.tools import xml_tools |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
33 |
1542
94901070478e
plugins: added new MissingModule exceptions to plugins using third party modules
Goffi <goffi@goffi.org>
parents:
1458
diff
changeset
|
34 try: |
94901070478e
plugins: added new MissingModule exceptions to plugins using third party modules
Goffi <goffi@goffi.org>
parents:
1458
diff
changeset
|
35 from lxml import html |
94901070478e
plugins: added new MissingModule exceptions to plugins using third party modules
Goffi <goffi@goffi.org>
parents:
1458
diff
changeset
|
36 from lxml.html import clean |
2786
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
37 from lxml import etree |
1542
94901070478e
plugins: added new MissingModule exceptions to plugins using third party modules
Goffi <goffi@goffi.org>
parents:
1458
diff
changeset
|
38 except ImportError: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
39 raise exceptions.MissingModule( |
3028 | 40 "Missing module lxml, please download/install it from http://lxml.de/" |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
41 ) |
674
fb0b1100c908
plugin text_syntaxes: fixed clean_xhml (it now return XHTML instead of HTML)
Goffi <goffi@goffi.org>
parents:
665
diff
changeset
|
42 |
2873
e1207b8ad97c
plugin text syntaxes: disable raw HTML parsing in mardown by default
Goffi <goffi@goffi.org>
parents:
2869
diff
changeset
|
43 log = getLogger(__name__) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
44 |
771 | 45 CATEGORY = D_("Composition") |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
46 NAME = "Syntax" |
2869
148d30147890
plugin text syntaxes: fixed default syntax
Goffi <goffi@goffi.org>
parents:
2786
diff
changeset
|
47 _SYNTAX_XHTML = "xhtml" # must be lower case |
744
312a2842b2b8
plugins text-syntaxes: added a default value to use the current user syntax in convert
souliane <souliane@mailoo.org>
parents:
705
diff
changeset
|
48 _SYNTAX_CURRENT = "@CURRENT@" |
312a2842b2b8
plugins text-syntaxes: added a default value to use the current user syntax in convert
souliane <souliane@mailoo.org>
parents:
705
diff
changeset
|
49 |
692
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
50 # TODO: check/adapt following list |
1805
3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
Goffi <goffi@goffi.org>
parents:
1803
diff
changeset
|
51 # list initialy based on feedparser list (http://pythonhosted.org/feedparser/html-sanitization.html) |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
52 STYLES_WHITELIST = ( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
53 "azimuth", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
54 "background-color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
55 "border-bottom-color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
56 "border-collapse", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
57 "border-color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
58 "border-left-color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
59 "border-right-color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
60 "border-top-color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
61 "clear", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
62 "color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
63 "cursor", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
64 "direction", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
65 "display", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
66 "elevation", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
67 "float", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
68 "font", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
69 "font-family", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
70 "font-size", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
71 "font-style", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
72 "font-variant", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
73 "font-weight", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
74 "height", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
75 "letter-spacing", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
76 "line-height", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
77 "overflow", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
78 "pause", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
79 "pause-after", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
80 "pause-before", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
81 "pitch", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
82 "pitch-range", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
83 "richness", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
84 "speak", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
85 "speak-header", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
86 "speak-numeral", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
87 "speak-punctuation", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
88 "speech-rate", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
89 "stress", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
90 "text-align", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
91 "text-decoration", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
92 "text-indent", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
93 "unicode-bidi", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
94 "vertical-align", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
95 "voice-family", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
96 "volume", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
97 "white-space", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
98 "width", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
99 ) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
100 |
2786
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
101 # cf. https://www.w3.org/TR/html/syntax.html#void-elements |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
102 VOID_ELEMENTS = ( |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
103 "area", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
104 "base", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
105 "br", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
106 "col", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
107 "embed", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
108 "hr", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
109 "img", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
110 "input", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
111 "keygen", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
112 "link", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
113 "menuitem", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
114 "meta", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
115 "param", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
116 "source", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
117 "track", |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
118 "wbr", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
119 ) |
2786
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
120 |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
121 SAFE_ATTRS = html.defs.safe_attrs.union({"style", "poster", "controls"}) - {"id"} |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
122 SAFE_CLASSES = { |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
123 # those classes are used for code highlighting |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
124 "bp", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
125 "c", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
126 "ch", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
127 "cm", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
128 "cp", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
129 "cpf", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
130 "cs", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
131 "dl", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
132 "err", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
133 "fm", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
134 "gd", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
135 "ge", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
136 "get", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
137 "gh", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
138 "gi", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
139 "go", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
140 "gp", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
141 "gr", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
142 "gs", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
143 "gt", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
144 "gu", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
145 "highlight", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
146 "hll", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
147 "il", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
148 "k", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
149 "kc", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
150 "kd", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
151 "kn", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
152 "kp", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
153 "kr", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
154 "kt", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
155 "m", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
156 "mb", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
157 "mf", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
158 "mh", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
159 "mi", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
160 "mo", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
161 "na", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
162 "nb", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
163 "nc", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
164 "nd", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
165 "ne", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
166 "nf", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
167 "ni", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
168 "nl", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
169 "nn", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
170 "no", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
171 "nt", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
172 "nv", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
173 "o", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
174 "ow", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
175 "s", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
176 "sa", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
177 "sb", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
178 "sc", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
179 "sd", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
180 "se", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
181 "sh", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
182 "si", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
183 "sr", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
184 "ss", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
185 "sx", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
186 "vc", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
187 "vg", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
188 "vi", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
189 "vm", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
190 "w", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
191 "write", |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
192 } |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
193 STYLES_VALUES_REGEX = ( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
194 r"^(" |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
195 + "|".join( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
196 [ |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
197 "([a-z-]+)", # alphabetical names |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
198 "(#[0-9a-f]+)", # hex value |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
199 "(\d+(.\d+)? *(|%|em|ex|px|in|cm|mm|pt|pc))", # values with units (or not) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
200 "rgb\( *((\d+(.\d+)?), *){2}(\d+(.\d+)?) *\)", # rgb function |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
201 "rgba\( *((\d+(.\d+)?), *){3}(\d+(.\d+)?) *\)", # rgba function |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
202 ] |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
203 ) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
204 + ") *(!important)?$" |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
205 ) # we accept "!important" at the end |
692
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
206 STYLES_ACCEPTED_VALUE = re.compile(STYLES_VALUES_REGEX) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
207 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
208 PLUGIN_INFO = { |
2145
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2106
diff
changeset
|
209 C.PI_NAME: "Text syntaxes", |
2780
85d3240a400f
plugin text syntaxes: changed import name to TEXT_SYNTAX (better with underscore for autocompletion)
Goffi <goffi@goffi.org>
parents:
2771
diff
changeset
|
210 C.PI_IMPORT_NAME: "TEXT_SYNTAXES", |
2145
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2106
diff
changeset
|
211 C.PI_TYPE: "MISC", |
3726
33d75cd3c371
plugin XEP-0060, XEP-0163, XEP-0277, text syntaxes: make those plugins usable with components
Goffi <goffi@goffi.org>
parents:
3709
diff
changeset
|
212 C.PI_MODES: C.PLUG_MODE_BOTH, |
2145
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2106
diff
changeset
|
213 C.PI_PROTOCOLS: [], |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2106
diff
changeset
|
214 C.PI_DEPENDENCIES: [], |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2106
diff
changeset
|
215 C.PI_MAIN: "TextSyntaxes", |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2106
diff
changeset
|
216 C.PI_HANDLER: "no", |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
217 C.PI_DESCRIPTION: _( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
218 """Management of various text syntaxes (XHTML-IM, Markdown, etc)""" |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
219 ), |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
220 } |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
221 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
222 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
223 class TextSyntaxes(object): |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
224 """Text conversion class |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
225 XHTML utf-8 is used as intermediate language for conversions |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
226 """ |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
227 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
228 OPT_DEFAULT = "DEFAULT" |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
229 OPT_HIDDEN = "HIDDEN" |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
230 OPT_NO_THREAD = "NO_THREAD" |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
231 SYNTAX_XHTML = _SYNTAX_XHTML |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
232 SYNTAX_MARKDOWN = "markdown" |
832
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
233 SYNTAX_TEXT = "text" |
2869
148d30147890
plugin text syntaxes: fixed default syntax
Goffi <goffi@goffi.org>
parents:
2786
diff
changeset
|
234 # default_syntax must be lower case |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
235 default_syntax = SYNTAX_XHTML |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
236 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
237 def __init__(self, host): |
993
301b342c697a
core: use of the new core.log module:
Goffi <goffi@goffi.org>
parents:
968
diff
changeset
|
238 log.info(_("Text syntaxes plugin initialization")) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
239 self.host = host |
3620
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
240 self.syntaxes = {} |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
241 |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
242 self.params = """ |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
243 <params> |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
244 <individual> |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
245 <category name="%(category_name)s" label="%(category_label)s"> |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
246 <param name="%(name)s" label="%(label)s" type="list" security="0"> |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
247 %(options)s |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
248 </param> |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
249 </category> |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
250 </individual> |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
251 </params> |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
252 """ |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
253 |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
254 self.params_data = { |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
255 "category_name": CATEGORY, |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
256 "category_label": _(CATEGORY), |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
257 "name": NAME, |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
258 "label": _(NAME), |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
259 "syntaxes": self.syntaxes, |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
260 } |
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
261 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
262 self.add_syntax( |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
263 self.SYNTAX_XHTML, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
264 lambda xhtml: defer.succeed(xhtml), |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
265 lambda xhtml: defer.succeed(xhtml), |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
266 TextSyntaxes.OPT_NO_THREAD, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
267 ) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
1811
diff
changeset
|
268 # TODO: text => XHTML should add <a/> to url like in frontends |
4074
26b7ed2817da
refactoring: rename `sat_frontends` to `libervia.frontends`
Goffi <goffi@goffi.org>
parents:
4071
diff
changeset
|
269 # it's probably best to move libervia.frontends.tools.strings to sat.tools.common or similar |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
270 self.add_syntax( |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
271 self.SYNTAX_TEXT, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
272 lambda text: escape(text), |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
273 lambda xhtml: self._remove_markups(xhtml), |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
274 [TextSyntaxes.OPT_HIDDEN], |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
275 ) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
276 try: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
277 import markdown, html2text |
2873
e1207b8ad97c
plugin text syntaxes: disable raw HTML parsing in mardown by default
Goffi <goffi@goffi.org>
parents:
2869
diff
changeset
|
278 from markdown.extensions import Extension |
e1207b8ad97c
plugin text syntaxes: disable raw HTML parsing in mardown by default
Goffi <goffi@goffi.org>
parents:
2869
diff
changeset
|
279 |
e1207b8ad97c
plugin text syntaxes: disable raw HTML parsing in mardown by default
Goffi <goffi@goffi.org>
parents:
2869
diff
changeset
|
280 # XXX: we disable raw HTML parsing by default, to avoid parsing error |
e1207b8ad97c
plugin text syntaxes: disable raw HTML parsing in mardown by default
Goffi <goffi@goffi.org>
parents:
2869
diff
changeset
|
281 # when the user is not aware of markdown and HTML |
e1207b8ad97c
plugin text syntaxes: disable raw HTML parsing in mardown by default
Goffi <goffi@goffi.org>
parents:
2869
diff
changeset
|
282 class EscapeHTML(Extension): |
4059
00dbc3370d35
plugin text syntaxes: fix `EscapeHTML` following massive snake_case renaming
Goffi <goffi@goffi.org>
parents:
4037
diff
changeset
|
283 def extendMarkdown(self, md): |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
284 md.preprocessors.deregister("html_block") |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
285 md.inlinePatterns.deregister("html") |
841
831f208b4ea3
plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents:
836
diff
changeset
|
286 |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
287 def _html2text(html, baseurl=""): |
841
831f208b4ea3
plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents:
836
diff
changeset
|
288 h = html2text.HTML2Text(baseurl=baseurl) |
831f208b4ea3
plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents:
836
diff
changeset
|
289 h.body_width = 0 # do not truncate the lines, it breaks the long URLs |
831f208b4ea3
plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents:
836
diff
changeset
|
290 return h.handle(html) |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
291 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
292 self.add_syntax( |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
293 self.SYNTAX_MARKDOWN, |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
294 partial( |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
295 markdown.markdown, |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
296 extensions=[ |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
297 EscapeHTML(), |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
298 "nl2br", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
299 "codehilite", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
300 "fenced_code", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
301 "sane_lists", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
302 "tables", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
303 ], |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
304 extension_configs={ |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
305 "codehilite": { |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
306 "css_class": "highlight", |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
307 } |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
308 }, |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
309 ), |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
310 _html2text, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
311 [TextSyntaxes.OPT_DEFAULT], |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
312 ) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
313 except ImportError: |
3028 | 314 log.warning("markdown or html2text not found, can't use Markdown syntax") |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
315 log.info( |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
316 "You can download/install them from https://pythonhosted.org/Markdown/ " |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
317 "and https://github.com/Alir3z4/html2text/" |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
318 ) |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
319 host.bridge.add_method( |
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
320 "syntax_convert", |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
321 ".plugin", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
322 in_sign="sssbs", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
323 out_sign="s", |
3028 | 324 async_=True, |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
325 method=self.convert, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
326 ) |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
327 host.bridge.add_method( |
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
328 "syntax_get", ".plugin", in_sign="s", out_sign="s", method=self.get_syntax |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
329 ) |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
330 if xml_tools.clean_xhtml is None: |
3028 | 331 log.debug("Installing cleaning method") |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
332 xml_tools.clean_xhtml = self.clean_xhtml |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
333 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
334 def _update_param_options(self): |
3620
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
335 data_synt = self.syntaxes |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
336 default_synt = TextSyntaxes.default_syntax |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
337 syntaxes = [] |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
338 |
3028 | 339 for syntax in list(data_synt.keys()): |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
340 flags = data_synt[syntax]["flags"] |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
341 if TextSyntaxes.OPT_HIDDEN not in flags: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
342 syntaxes.append(syntax) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
343 |
1805
3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
Goffi <goffi@goffi.org>
parents:
1803
diff
changeset
|
344 syntaxes.sort(key=lambda synt: synt.lower()) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
345 options = [] |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
346 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
347 for syntax in syntaxes: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
348 selected = 'selected="true"' if syntax == default_synt else "" |
3028 | 349 options.append('<option value="%s" %s/>' % (syntax, selected)) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
350 |
3620
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
351 self.params_data["options"] = "\n".join(options) |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
352 self.host.memory.update_params(self.params % self.params_data) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
353 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
354 def get_current_syntax(self, profile): |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
355 """Return the selected syntax for the given profile |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
356 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
357 @param profile: %(doc_profile)s |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
358 @return: profile selected syntax |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
359 """ |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
360 return self.host.memory.param_get_a(NAME, CATEGORY, profile_key=profile) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
361 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
362 def _log_error(self, failure, action="converting syntax"): |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
363 log.error( |
3028 | 364 "Error while {action}: {failure}".format(action=action, failure=failure) |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
365 ) |
2106
5874da3811b7
plugin text syntaxes: log error on cleanXHTML failure
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
366 return failure |
5874da3811b7
plugin text syntaxes: log error on cleanXHTML failure
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
367 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
368 def clean_style(self, styles_raw: str) -> str: |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
369 """ "Clean unsafe CSS styles |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
370 |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
371 Remove styles not in the whitelist, or where the value doesn't match the regex |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
372 @param styles_raw: CSS styles |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
373 @return: cleaned styles |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
374 """ |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
375 styles: List[str] = styles_raw.split(";") |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
376 cleaned_styles = [] |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
377 for style in styles: |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
378 try: |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
379 key, value = style.split(":") |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
380 except ValueError: |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
381 continue |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
382 key = key.lower().strip() |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
383 if key not in STYLES_WHITELIST: |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
384 continue |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
385 value = value.lower().strip() |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
386 if not STYLES_ACCEPTED_VALUE.match(value): |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
387 continue |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
388 if value == "none": |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
389 continue |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
390 cleaned_styles.append((key, value)) |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
391 return "; ".join(["%s: %s" % (key_, value_) for key_, value_ in cleaned_styles]) |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
392 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
393 def clean_classes(self, classes_raw: str) -> str: |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
394 """Remove any non whitelisted class |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
395 |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
396 @param classes_raw: classes set on an element |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
397 @return: remaining classes (can be empty string) |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
398 """ |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
399 return " ".join(SAFE_CLASSES.intersection(classes_raw.split())) |
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
400 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
401 def clean_xhtml(self, xhtml): |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
402 """Clean XHTML text by removing potentially dangerous/malicious parts |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
403 |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
404 @param xhtml(unicode, lxml.etree._Element): raw HTML/XHTML text to clean |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
405 @return (unicode): cleaned XHTML |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
406 """ |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
407 |
3028 | 408 if isinstance(xhtml, str): |
2786
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
409 try: |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
410 xhtml_elt = html.fromstring(xhtml) |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
411 except etree.ParserError as e: |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
412 if not xhtml.strip(): |
3028 | 413 return "" |
414 log.error("Can't clean XHTML: {xhtml}".format(xhtml=xhtml)) | |
2786
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
415 raise e |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
416 elif isinstance(xhtml, html.HtmlElement): |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
417 xhtml_elt = xhtml |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
418 else: |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
419 log.error("Only strings and HtmlElements can be cleaned") |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
420 raise exceptions.DataError |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
421 cleaner = clean.Cleaner(style=False, add_nofollow=False, safe_attrs=SAFE_ATTRS) |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
422 xhtml_elt = cleaner.clean_html(xhtml_elt) |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
423 for elt in xhtml_elt.xpath("//*[@style]"): |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
424 elt.set("style", self.clean_style(elt.get("style"))) |
3693
0bbdc50aa405
plugin text syntaxes: remove `id` attributes and whitelist allowed classes:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
425 for elt in xhtml_elt.xpath("//*[@class]"): |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
426 elt.set("class", self.clean_classes(elt.get("class"))) |
2786
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
427 # we remove self-closing elements for non-void elements |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
428 for element in xhtml_elt.iter(tag=etree.Element): |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
429 if not element.text: |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
430 if element.tag in VOID_ELEMENTS: |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
431 element.text = None |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
432 else: |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
433 element.text = "" |
3028 | 434 return html.tostring(xhtml_elt, encoding=str, method="xml") |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
435 |
4270
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
436 def convert( |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
437 self, text, syntax_from, syntax_to=_SYNTAX_XHTML, safe=True, profile=None |
0d7bb4df2343
Reformatted code base using black.
Goffi <goffi@goffi.org>
parents:
4258
diff
changeset
|
438 ): |
1803
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
439 """Convert a text between two syntaxes |
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
440 |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
441 @param text: text to convert |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
442 @param syntax_from: source syntax (e.g. "markdown") |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
443 @param syntax_to: dest syntax (e.g.: "XHTML") |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
444 @param safe: clean resulting XHTML to avoid malicious code if True |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
445 @param profile: needed only when syntax_from or syntax_to is set to |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
446 _SYNTAX_CURRENT |
1803
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
447 @return(unicode): converted text |
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
448 """ |
1805
3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
Goffi <goffi@goffi.org>
parents:
1803
diff
changeset
|
449 # FIXME: convert should be abled to handle domish.Element directly |
3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
Goffi <goffi@goffi.org>
parents:
1803
diff
changeset
|
450 # when dealing with XHTML |
3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
Goffi <goffi@goffi.org>
parents:
1803
diff
changeset
|
451 # TODO: a way for parser to return parsing errors/warnings |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
452 |
744
312a2842b2b8
plugins text-syntaxes: added a default value to use the current user syntax in convert
souliane <souliane@mailoo.org>
parents:
705
diff
changeset
|
453 if syntax_from == _SYNTAX_CURRENT: |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
454 syntax_from = self.get_current_syntax(profile) |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
455 else: |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
456 syntax_from = syntax_from.lower().strip() |
744
312a2842b2b8
plugins text-syntaxes: added a default value to use the current user syntax in convert
souliane <souliane@mailoo.org>
parents:
705
diff
changeset
|
457 if syntax_to == _SYNTAX_CURRENT: |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
458 syntax_to = self.get_current_syntax(profile) |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
459 else: |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
460 syntax_to = syntax_to.lower().strip() |
3620
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
461 syntaxes = self.syntaxes |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
462 if syntax_from not in syntaxes: |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
463 raise exceptions.NotFound(syntax_from) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
464 if syntax_to not in syntaxes: |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
465 raise exceptions.NotFound(syntax_to) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
466 d = None |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
467 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
468 if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_from]["flags"]: |
1803
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
469 d = defer.maybeDeferred(syntaxes[syntax_from]["to"], text) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
470 else: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
471 d = deferToThread(syntaxes[syntax_from]["to"], text) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
472 |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
473 # TODO: keep only body element and change it to a div here ? |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
474 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
475 if safe: |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
476 d.addCallback(self.clean_xhtml) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
477 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
478 if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_to]["flags"]: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
479 d.addCallback(syntaxes[syntax_to]["from"]) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
480 else: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
481 d.addCallback(lambda xhtml: deferToThread(syntaxes[syntax_to]["from"], xhtml)) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
482 |
836
2cc0201b4613
plugin text_syntaxes: rstrip the conversion result to avoid new lines systematically added by converters (e.g. html2text do this)
souliane <souliane@mailoo.org>
parents:
832
diff
changeset
|
483 # converters can add new lines that disturb the microblog change detection |
2cc0201b4613
plugin text_syntaxes: rstrip the conversion result to avoid new lines systematically added by converters (e.g. html2text do this)
souliane <souliane@mailoo.org>
parents:
832
diff
changeset
|
484 d.addCallback(lambda text: text.rstrip()) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
485 return d |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
486 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
487 def add_syntax(self, name, to_xhtml_cb, from_xhtml_cb, flags=None): |
1803
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
488 """Add a new syntax to the manager |
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
489 |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
490 @param name: unique name of the syntax |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
491 @param to_xhtml_cb: callback to convert from syntax to XHTML |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
492 @param from_xhtml_cb: callback to convert from XHTML to syntax |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
493 @param flags: set of optional flags, can be: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
494 TextSyntaxes.OPT_DEFAULT: use as the default syntax (replace former one) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
495 TextSyntaxes.OPT_HIDDEN: do not show in parameters |
1803
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
496 TextSyntaxes.OPT_NO_THREAD: do not defer to thread when converting (the callback may then return a deferred) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
497 """ |
1805
3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
Goffi <goffi@goffi.org>
parents:
1803
diff
changeset
|
498 flags = flags if flags is not None else [] |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
499 if TextSyntaxes.OPT_HIDDEN in flags and TextSyntaxes.OPT_DEFAULT in flags: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
500 raise ValueError( |
3028 | 501 "{} and {} are mutually exclusive".format( |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
502 TextSyntaxes.OPT_HIDDEN, TextSyntaxes.OPT_DEFAULT |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
503 ) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
504 ) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
505 |
3620
f568f304c982
plugin text syntaxes: remove side effect on init:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
506 syntaxes = self.syntaxes |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
507 key = name.lower().strip() |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
508 if key in syntaxes: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
509 raise exceptions.ConflictError( |
3028 | 510 "This syntax key already exists: {}".format(key) |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
511 ) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
512 syntaxes[key] = { |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
513 "name": name, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
514 "to": to_xhtml_cb, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
515 "from": from_xhtml_cb, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
516 "flags": flags, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
517 } |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
518 if TextSyntaxes.OPT_DEFAULT in flags: |
2869
148d30147890
plugin text syntaxes: fixed default syntax
Goffi <goffi@goffi.org>
parents:
2786
diff
changeset
|
519 TextSyntaxes.default_syntax = key |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
520 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
521 self._update_param_options() |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
522 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
523 def get_syntax(self, name): |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
524 """get syntax key corresponding to a name |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
525 |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
526 @raise exceptions.NotFound: syntax doesn't exist |
832
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
527 """ |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
528 key = name.lower().strip() |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
529 if key in self.syntaxes: |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
530 return key |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
531 raise exceptions.NotFound |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
532 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3726
diff
changeset
|
533 def _remove_markups(self, xhtml): |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
534 """Remove XHTML markups from the given string. |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
535 |
832
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
536 @param xhtml: the XHTML string to be cleaned |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
537 @return: the cleaned string |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
538 """ |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
539 cleaner = clean.Cleaner(kill_tags=["style"]) |
4258
ba28ca268f4a
plugin text syntaxes: fix empty string handling in `_remove_markups`.
Goffi <goffi@goffi.org>
parents:
4074
diff
changeset
|
540 if not xhtml: |
ba28ca268f4a
plugin text syntaxes: fix empty string handling in `_remove_markups`.
Goffi <goffi@goffi.org>
parents:
4074
diff
changeset
|
541 return "" |
832
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
542 cleaned = cleaner.clean_html(html.fromstring(xhtml)) |
3028 | 543 return html.tostring(cleaned, encoding=str, method="text") |