Mercurial > libervia-backend
annotate sat/plugins/plugin_misc_text_syntaxes.py @ 2786:be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
author | Goffi <goffi@goffi.org> |
---|---|
date | Sat, 19 Jan 2019 11:39:02 +0100 |
parents | b17e6fa1e607 |
children | 148d30147890 |
rev | line source |
---|---|
1934
2daf7b4c6756
use of /usr/bin/env instead of /usr/bin/python in shebang
Goffi <goffi@goffi.org>
parents:
1867
diff
changeset
|
1 #!/usr/bin/env python2 |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
2 # -*- coding: utf-8 -*- |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
3 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
4 # SAT plugin for managing various text syntaxes |
2771 | 5 # Copyright (C) 2009-2019 Jérôme Poisson (goffi@goffi.org) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
6 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
7 # This program is free software: you can redistribute it and/or modify |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
8 # it under the terms of the GNU Affero General Public License as published by |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
9 # the Free Software Foundation, either version 3 of the License, or |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
10 # (at your option) any later version. |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
11 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
12 # This program is distributed in the hope that it will be useful, |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
15 # GNU Affero General Public License for more details. |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
16 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
17 # You should have received a copy of the GNU Affero General Public License |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
19 |
771 | 20 from sat.core.i18n import _, D_ |
2145
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2106
diff
changeset
|
21 from sat.core.constants import Const as C |
993
301b342c697a
core: use of the new core.log module:
Goffi <goffi@goffi.org>
parents:
968
diff
changeset
|
22 from sat.core.log import getLogger |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
23 |
993
301b342c697a
core: use of the new core.log module:
Goffi <goffi@goffi.org>
parents:
968
diff
changeset
|
24 log = getLogger(__name__) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
25 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
26 from twisted.internet import defer |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
27 from twisted.internet.threads import deferToThread |
705
6c8a119dcc94
plugin text syntaxes: clean_xhtml now accept lxml's HtmlElement to avoid parsing two times the same xml
Goffi <goffi@goffi.org>
parents:
702
diff
changeset
|
28 from sat.core import exceptions |
2782
b17e6fa1e607
core (XMLUI): new XHTMLBox widget:
Goffi <goffi@goffi.org>
parents:
2781
diff
changeset
|
29 from sat.tools import xml_tools |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
30 |
1542
94901070478e
plugins: added new MissingModule exceptions to plugins using third party modules
Goffi <goffi@goffi.org>
parents:
1458
diff
changeset
|
31 try: |
94901070478e
plugins: added new MissingModule exceptions to plugins using third party modules
Goffi <goffi@goffi.org>
parents:
1458
diff
changeset
|
32 from lxml import html |
94901070478e
plugins: added new MissingModule exceptions to plugins using third party modules
Goffi <goffi@goffi.org>
parents:
1458
diff
changeset
|
33 from lxml.html import clean |
2786
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
34 from lxml import etree |
1542
94901070478e
plugins: added new MissingModule exceptions to plugins using third party modules
Goffi <goffi@goffi.org>
parents:
1458
diff
changeset
|
35 except ImportError: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
36 raise exceptions.MissingModule( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
37 u"Missing module lxml, please download/install it from http://lxml.de/" |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
38 ) |
832
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
39 from cgi import escape |
692
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
40 import re |
674
fb0b1100c908
plugin text_syntaxes: fixed clean_xhml (it now return XHTML instead of HTML)
Goffi <goffi@goffi.org>
parents:
665
diff
changeset
|
41 |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
42 |
771 | 43 CATEGORY = D_("Composition") |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
44 NAME = "Syntax" |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
45 _SYNTAX_XHTML = "XHTML" |
744
312a2842b2b8
plugins text-syntaxes: added a default value to use the current user syntax in convert
souliane <souliane@mailoo.org>
parents:
705
diff
changeset
|
46 _SYNTAX_CURRENT = "@CURRENT@" |
312a2842b2b8
plugins text-syntaxes: added a default value to use the current user syntax in convert
souliane <souliane@mailoo.org>
parents:
705
diff
changeset
|
47 |
692
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
48 # TODO: check/adapt following list |
1805
3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
Goffi <goffi@goffi.org>
parents:
1803
diff
changeset
|
49 # list initialy based on feedparser list (http://pythonhosted.org/feedparser/html-sanitization.html) |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
50 STYLES_WHITELIST = ( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
51 "azimuth", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
52 "background-color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
53 "border-bottom-color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
54 "border-collapse", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
55 "border-color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
56 "border-left-color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
57 "border-right-color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
58 "border-top-color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
59 "clear", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
60 "color", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
61 "cursor", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
62 "direction", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
63 "display", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
64 "elevation", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
65 "float", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
66 "font", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
67 "font-family", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
68 "font-size", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
69 "font-style", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
70 "font-variant", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
71 "font-weight", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
72 "height", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
73 "letter-spacing", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
74 "line-height", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
75 "overflow", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
76 "pause", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
77 "pause-after", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
78 "pause-before", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
79 "pitch", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
80 "pitch-range", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
81 "richness", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
82 "speak", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
83 "speak-header", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
84 "speak-numeral", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
85 "speak-punctuation", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
86 "speech-rate", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
87 "stress", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
88 "text-align", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
89 "text-decoration", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
90 "text-indent", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
91 "unicode-bidi", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
92 "vertical-align", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
93 "voice-family", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
94 "volume", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
95 "white-space", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
96 "width", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
97 ) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
98 |
2786
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
99 # cf. https://www.w3.org/TR/html/syntax.html#void-elements |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
100 VOID_ELEMENTS = ( |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
101 "area", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
102 "base", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
103 "br", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
104 "col", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
105 "embed", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
106 "hr", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
107 "img", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
108 "input", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
109 "keygen", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
110 "link", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
111 "menuitem", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
112 "meta", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
113 "param", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
114 "source", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
115 "track", |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
116 "wbr") |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
117 |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
118 SAFE_ATTRS = html.defs.safe_attrs.union(("style", "poster", "controls")) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
119 STYLES_VALUES_REGEX = ( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
120 r"^(" |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
121 + "|".join( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
122 [ |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
123 "([a-z-]+)", # alphabetical names |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
124 "(#[0-9a-f]+)", # hex value |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
125 "(\d+(.\d+)? *(|%|em|ex|px|in|cm|mm|pt|pc))", # values with units (or not) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
126 "rgb\( *((\d+(.\d+)?), *){2}(\d+(.\d+)?) *\)", # rgb function |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
127 "rgba\( *((\d+(.\d+)?), *){3}(\d+(.\d+)?) *\)", # rgba function |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
128 ] |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
129 ) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
130 + ") *(!important)?$" |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
131 ) # we accept "!important" at the end |
692
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
132 STYLES_ACCEPTED_VALUE = re.compile(STYLES_VALUES_REGEX) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
133 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
134 PLUGIN_INFO = { |
2145
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2106
diff
changeset
|
135 C.PI_NAME: "Text syntaxes", |
2780
85d3240a400f
plugin text syntaxes: changed import name to TEXT_SYNTAX (better with underscore for autocompletion)
Goffi <goffi@goffi.org>
parents:
2771
diff
changeset
|
136 C.PI_IMPORT_NAME: "TEXT_SYNTAXES", |
2145
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2106
diff
changeset
|
137 C.PI_TYPE: "MISC", |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2106
diff
changeset
|
138 C.PI_PROTOCOLS: [], |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2106
diff
changeset
|
139 C.PI_DEPENDENCIES: [], |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2106
diff
changeset
|
140 C.PI_MAIN: "TextSyntaxes", |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
2106
diff
changeset
|
141 C.PI_HANDLER: "no", |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
142 C.PI_DESCRIPTION: _( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
143 """Management of various text syntaxes (XHTML-IM, Markdown, etc)""" |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
144 ), |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
145 } |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
146 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
147 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
148 class TextSyntaxes(object): |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
149 """ Text conversion class |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
150 XHTML utf-8 is used as intermediate language for conversions |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
151 """ |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
152 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
153 OPT_DEFAULT = "DEFAULT" |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
154 OPT_HIDDEN = "HIDDEN" |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
155 OPT_NO_THREAD = "NO_THREAD" |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
156 SYNTAX_XHTML = _SYNTAX_XHTML |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
157 SYNTAX_MARKDOWN = "markdown" |
832
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
158 SYNTAX_TEXT = "text" |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
159 syntaxes = {} |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
160 default_syntax = SYNTAX_XHTML |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
161 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
162 params = """ |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
163 <params> |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
164 <individual> |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
165 <category name="%(category_name)s" label="%(category_label)s"> |
968
75f3b3b430ff
tools, frontends, memory: param definition and XMLUI handle multi-selection for list widgets:
souliane <souliane@mailoo.org>
parents:
852
diff
changeset
|
166 <param name="%(name)s" label="%(label)s" type="list" security="0"> |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
167 %(options)s |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
168 </param> |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
169 </category> |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
170 </individual> |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
171 </params> |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
172 """ |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
173 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
174 params_data = { |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
175 "category_name": CATEGORY, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
176 "category_label": _(CATEGORY), |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
177 "name": NAME, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
178 "label": _(NAME), |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
179 "syntaxes": syntaxes, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
180 } |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
181 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
182 def __init__(self, host): |
993
301b342c697a
core: use of the new core.log module:
Goffi <goffi@goffi.org>
parents:
968
diff
changeset
|
183 log.info(_("Text syntaxes plugin initialization")) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
184 self.host = host |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
185 self.addSyntax( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
186 self.SYNTAX_XHTML, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
187 lambda xhtml: defer.succeed(xhtml), |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
188 lambda xhtml: defer.succeed(xhtml), |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
189 TextSyntaxes.OPT_NO_THREAD, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
190 ) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
1811
diff
changeset
|
191 # TODO: text => XHTML should add <a/> to url like in frontends |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
1811
diff
changeset
|
192 # it's probably best to move sat_frontends.tools.strings to sat.tools.common or similar |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
193 self.addSyntax( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
194 self.SYNTAX_TEXT, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
195 lambda text: escape(text), |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
196 lambda xhtml: self._removeMarkups(xhtml), |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
197 [TextSyntaxes.OPT_HIDDEN], |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
198 ) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
199 try: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
200 import markdown, html2text |
841
831f208b4ea3
plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents:
836
diff
changeset
|
201 |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
202 def _html2text(html, baseurl=""): |
841
831f208b4ea3
plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents:
836
diff
changeset
|
203 h = html2text.HTML2Text(baseurl=baseurl) |
831f208b4ea3
plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents:
836
diff
changeset
|
204 h.body_width = 0 # do not truncate the lines, it breaks the long URLs |
831f208b4ea3
plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents:
836
diff
changeset
|
205 return h.handle(html) |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
206 |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
207 self.addSyntax( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
208 self.SYNTAX_MARKDOWN, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
209 markdown.markdown, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
210 _html2text, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
211 [TextSyntaxes.OPT_DEFAULT], |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
212 ) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
213 except ImportError: |
1805
3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
Goffi <goffi@goffi.org>
parents:
1803
diff
changeset
|
214 log.warning(u"markdown or html2text not found, can't use Markdown syntax") |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
215 log.info( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
216 u"You can download/install them from https://pythonhosted.org/Markdown/ and https://github.com/Alir3z4/html2text/" |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
217 ) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
218 host.bridge.addMethod( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
219 "syntaxConvert", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
220 ".plugin", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
221 in_sign="sssbs", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
222 out_sign="s", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
223 async=True, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
224 method=self.convert, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
225 ) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
226 host.bridge.addMethod( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
227 "syntaxGet", ".plugin", in_sign="s", out_sign="s", method=self.getSyntax |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
228 ) |
2782
b17e6fa1e607
core (XMLUI): new XHTMLBox widget:
Goffi <goffi@goffi.org>
parents:
2781
diff
changeset
|
229 if xml_tools.cleanXHTML is None: |
b17e6fa1e607
core (XMLUI): new XHTMLBox widget:
Goffi <goffi@goffi.org>
parents:
2781
diff
changeset
|
230 log.debug(u"Installing cleaning method") |
b17e6fa1e607
core (XMLUI): new XHTMLBox widget:
Goffi <goffi@goffi.org>
parents:
2781
diff
changeset
|
231 xml_tools.cleanXHTML = self.cleanXHTML |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
232 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
233 def _updateParamOptions(self): |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
234 data_synt = TextSyntaxes.syntaxes |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
235 default_synt = TextSyntaxes.default_syntax |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
236 syntaxes = [] |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
237 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
238 for syntax in data_synt.keys(): |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
239 flags = data_synt[syntax]["flags"] |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
240 if TextSyntaxes.OPT_HIDDEN not in flags: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
241 syntaxes.append(syntax) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
242 |
1805
3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
Goffi <goffi@goffi.org>
parents:
1803
diff
changeset
|
243 syntaxes.sort(key=lambda synt: synt.lower()) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
244 options = [] |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
245 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
246 for syntax in syntaxes: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
247 selected = 'selected="true"' if syntax == default_synt else "" |
968
75f3b3b430ff
tools, frontends, memory: param definition and XMLUI handle multi-selection for list widgets:
souliane <souliane@mailoo.org>
parents:
852
diff
changeset
|
248 options.append(u'<option value="%s" %s/>' % (syntax, selected)) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
249 |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
250 TextSyntaxes.params_data["options"] = u"\n".join(options) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
251 self.host.memory.updateParams(TextSyntaxes.params % TextSyntaxes.params_data) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
252 |
702
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
699
diff
changeset
|
253 def getCurrentSyntax(self, profile): |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
254 """ Return the selected syntax for the given profile |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
255 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
256 @param profile: %(doc_profile)s |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
257 @return: profile selected syntax |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
258 """ |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
259 return self.host.memory.getParamA(NAME, CATEGORY, profile_key=profile) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
260 |
2106
5874da3811b7
plugin text syntaxes: log error on cleanXHTML failure
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
261 def _logError(self, failure, action=u"converting syntax"): |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
262 log.error( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
263 u"Error while {action}: {failure}".format(action=action, failure=failure) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
264 ) |
2106
5874da3811b7
plugin text syntaxes: log error on cleanXHTML failure
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
265 return failure |
5874da3811b7
plugin text syntaxes: log error on cleanXHTML failure
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
266 |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
267 def cleanStyle(self, styles): |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
268 """"Clean unsafe CSS styles |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
269 |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
270 Remove styles not in the whitelist, or where the value doesn't match the regex |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
271 @param styles_raw(unicode): CSS styles |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
272 @return (unicode): cleaned styles |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
273 """ |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
274 styles = styles.split(";") |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
275 cleaned_styles = [] |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
276 for style in styles: |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
277 try: |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
278 key, value = style.split(":") |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
279 except ValueError: |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
280 continue |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
281 key = key.lower().strip() |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
282 if key not in STYLES_WHITELIST: |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
283 continue |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
284 value = value.lower().strip() |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
285 if not STYLES_ACCEPTED_VALUE.match(value): |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
286 continue |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
287 if value == "none": |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
288 continue |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
289 cleaned_styles.append((key, value)) |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
290 return "; ".join( |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
291 ["%s: %s" % (key_, value_) for key_, value_ in cleaned_styles] |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
292 ) |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
293 |
1805
3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
Goffi <goffi@goffi.org>
parents:
1803
diff
changeset
|
294 def cleanXHTML(self, xhtml): |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
295 """Clean XHTML text by removing potentially dangerous/malicious parts |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
296 |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
297 @param xhtml(unicode, lxml.etree._Element): raw HTML/XHTML text to clean |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
298 @return (unicode): cleaned XHTML |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
299 """ |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
300 |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
301 if isinstance(xhtml, basestring): |
2786
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
302 try: |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
303 xhtml_elt = html.fromstring(xhtml) |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
304 except etree.ParserError as e: |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
305 if not xhtml.strip(): |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
306 return u"" |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
307 log.error(u"Can't clean XHTML: {xhtml}".format(xhtml=xhtml)) |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
308 raise e |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
309 elif isinstance(xhtml, html.HtmlElement): |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
310 xhtml_elt = xhtml |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
311 else: |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
312 log.error("Only strings and HtmlElements can be cleaned") |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
313 raise exceptions.DataError |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
314 cleaner = clean.Cleaner( |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
315 style=False, add_nofollow=False, safe_attrs=SAFE_ATTRS |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
316 ) |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
317 xhtml_elt = cleaner.clean_html(xhtml_elt) |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
318 for elt in xhtml_elt.xpath("//*[@style]"): |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
319 elt.set("style", self.cleanStyle(elt.get("style"))) |
2786
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
320 # we remove self-closing elements for non-void elements |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
321 for element in xhtml_elt.iter(tag=etree.Element): |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
322 if not element.text: |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
323 if element.tag in VOID_ELEMENTS: |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
324 element.text = None |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
325 else: |
be8405795e09
plugin text syntaxes: handle empty content in cleanXHTML + don't use self-closing tags for non-void elements.
Goffi <goffi@goffi.org>
parents:
2782
diff
changeset
|
326 element.text = u'' |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
327 return html.tostring(xhtml_elt, encoding=unicode, method="xml") |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
328 |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
329 def convert( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
330 self, text, syntax_from, syntax_to=_SYNTAX_XHTML, safe=True, profile=None |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
331 ): |
1803
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
332 """Convert a text between two syntaxes |
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
333 |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
334 @param text: text to convert |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
335 @param syntax_from: source syntax (e.g. "markdown") |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
336 @param syntax_to: dest syntax (e.g.: "XHTML") |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
337 @param safe: clean resulting XHTML to avoid malicious code if True |
2781
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
338 @param profile: needed only when syntax_from or syntax_to is set to |
816be0a23877
plugin text syntaxes: cleanStyle is an independent method, cleanXHTML is now blocking (no need to launch thread for that)
Goffi <goffi@goffi.org>
parents:
2780
diff
changeset
|
339 _SYNTAX_CURRENT |
1803
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
340 @return(unicode): converted text |
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
341 """ |
1805
3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
Goffi <goffi@goffi.org>
parents:
1803
diff
changeset
|
342 # FIXME: convert should be abled to handle domish.Element directly |
3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
Goffi <goffi@goffi.org>
parents:
1803
diff
changeset
|
343 # when dealing with XHTML |
3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
Goffi <goffi@goffi.org>
parents:
1803
diff
changeset
|
344 # TODO: a way for parser to return parsing errors/warnings |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
345 |
744
312a2842b2b8
plugins text-syntaxes: added a default value to use the current user syntax in convert
souliane <souliane@mailoo.org>
parents:
705
diff
changeset
|
346 if syntax_from == _SYNTAX_CURRENT: |
312a2842b2b8
plugins text-syntaxes: added a default value to use the current user syntax in convert
souliane <souliane@mailoo.org>
parents:
705
diff
changeset
|
347 syntax_from = self.getCurrentSyntax(profile) |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
348 else: |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
349 syntax_from = syntax_from.lower().strip() |
744
312a2842b2b8
plugins text-syntaxes: added a default value to use the current user syntax in convert
souliane <souliane@mailoo.org>
parents:
705
diff
changeset
|
350 if syntax_to == _SYNTAX_CURRENT: |
312a2842b2b8
plugins text-syntaxes: added a default value to use the current user syntax in convert
souliane <souliane@mailoo.org>
parents:
705
diff
changeset
|
351 syntax_to = self.getCurrentSyntax(profile) |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
352 else: |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
353 syntax_to = syntax_to.lower().strip() |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
354 syntaxes = TextSyntaxes.syntaxes |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
355 if syntax_from not in syntaxes: |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
356 raise exceptions.NotFound(syntax_from) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
357 if syntax_to not in syntaxes: |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
358 raise exceptions.NotFound(syntax_to) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
359 d = None |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
360 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
361 if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_from]["flags"]: |
1803
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
362 d = defer.maybeDeferred(syntaxes[syntax_from]["to"], text) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
363 else: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
364 d = deferToThread(syntaxes[syntax_from]["to"], text) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
365 |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
366 # TODO: keep only body element and change it to a div here ? |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
367 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
368 if safe: |
1805
3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
Goffi <goffi@goffi.org>
parents:
1803
diff
changeset
|
369 d.addCallback(self.cleanXHTML) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
370 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
371 if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_to]["flags"]: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
372 d.addCallback(syntaxes[syntax_to]["from"]) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
373 else: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
374 d.addCallback(lambda xhtml: deferToThread(syntaxes[syntax_to]["from"], xhtml)) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
375 |
836
2cc0201b4613
plugin text_syntaxes: rstrip the conversion result to avoid new lines systematically added by converters (e.g. html2text do this)
souliane <souliane@mailoo.org>
parents:
832
diff
changeset
|
376 # converters can add new lines that disturb the microblog change detection |
2cc0201b4613
plugin text_syntaxes: rstrip the conversion result to avoid new lines systematically added by converters (e.g. html2text do this)
souliane <souliane@mailoo.org>
parents:
832
diff
changeset
|
377 d.addCallback(lambda text: text.rstrip()) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
378 return d |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
379 |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
380 def addSyntax(self, name, to_xhtml_cb, from_xhtml_cb, flags=None): |
1803
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
381 """Add a new syntax to the manager |
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
382 |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
383 @param name: unique name of the syntax |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
384 @param to_xhtml_cb: callback to convert from syntax to XHTML |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
385 @param from_xhtml_cb: callback to convert from XHTML to syntax |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
386 @param flags: set of optional flags, can be: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
387 TextSyntaxes.OPT_DEFAULT: use as the default syntax (replace former one) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
388 TextSyntaxes.OPT_HIDDEN: do not show in parameters |
1803
14a97a5fe1c0
plugin text syntaxes: a non blocking syntax callback can now return a unicode directly instead of a Deferred
Goffi <goffi@goffi.org>
parents:
1766
diff
changeset
|
389 TextSyntaxes.OPT_NO_THREAD: do not defer to thread when converting (the callback may then return a deferred) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
390 """ |
1805
3c40fa0dcd7a
pluging text syntaxes: various minor improvments:
Goffi <goffi@goffi.org>
parents:
1803
diff
changeset
|
391 flags = flags if flags is not None else [] |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
392 if TextSyntaxes.OPT_HIDDEN in flags and TextSyntaxes.OPT_DEFAULT in flags: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
393 raise ValueError( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
394 u"{} and {} are mutually exclusive".format( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
395 TextSyntaxes.OPT_HIDDEN, TextSyntaxes.OPT_DEFAULT |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
396 ) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
397 ) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
398 |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
399 syntaxes = TextSyntaxes.syntaxes |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
400 key = name.lower().strip() |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
401 if key in syntaxes: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
402 raise exceptions.ConflictError( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
403 u"This syntax key already exists: {}".format(key) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
404 ) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
405 syntaxes[key] = { |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
406 "name": name, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
407 "to": to_xhtml_cb, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
408 "from": from_xhtml_cb, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
409 "flags": flags, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
410 } |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
411 if TextSyntaxes.OPT_DEFAULT in flags: |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
412 TextSyntaxes.default_syntaxe = key |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
413 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
414 self._updateParamOptions() |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
415 |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
416 def getSyntax(self, name): |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
417 """get syntax key corresponding to a name |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
418 |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
419 @raise exceptions.NotFound: syntax doesn't exist |
832
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
420 """ |
2324
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
421 key = name.lower().strip() |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
422 if key in self.syntaxes: |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
423 return key |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
424 raise exceptions.NotFound |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
425 |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
426 def _removeMarkups(self, xhtml): |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
427 """Remove XHTML markups from the given string. |
fe922e6fabd4
plugin text syntaxes: various improvments:
Goffi <goffi@goffi.org>
parents:
2145
diff
changeset
|
428 |
832
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
429 @param xhtml: the XHTML string to be cleaned |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
430 @return: the cleaned string |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
431 """ |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
432 cleaner = clean.Cleaner(kill_tags=["style"]) |
832
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
433 cleaned = cleaner.clean_html(html.fromstring(xhtml)) |
852
4cc55e05266d
plugin text syntaxes: fixed cleaners encoding
Goffi <goffi@goffi.org>
parents:
841
diff
changeset
|
434 return html.tostring(cleaned, encoding=unicode, method="text") |