Mercurial > libervia-backend
annotate src/plugins/plugin_misc_text_syntaxes.py @ 1524:7b0fcefd52d4
plugin XEP-0047, XEP-0096: In-Band Bystream plugin cleaning:
- some renaming, comments improvments, etc
- progress callback is no more managed here, as it will be managed by application
- no more file data is used, beside file_obj
- a proper Deferred is used instead of success and error callbacks
- more clean error sending method
plugin XEP-0096 has been updated to handle changes. Its temporarily partially broken though
author | Goffi <goffi@goffi.org> |
---|---|
date | Fri, 25 Sep 2015 19:19:12 +0200 |
parents | 832846fefe85 |
children | 94901070478e |
rev | line source |
---|---|
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
1 #!/usr/bin/python |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
2 # -*- coding: utf-8 -*- |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
3 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
4 # SAT plugin for managing various text syntaxes |
1396 | 5 # Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Jérôme Poisson (goffi@goffi.org) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
6 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
7 # This program is free software: you can redistribute it and/or modify |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
8 # it under the terms of the GNU Affero General Public License as published by |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
9 # the Free Software Foundation, either version 3 of the License, or |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
10 # (at your option) any later version. |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
11 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
12 # This program is distributed in the hope that it will be useful, |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
15 # GNU Affero General Public License for more details. |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
16 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
17 # You should have received a copy of the GNU Affero General Public License |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
19 |
771 | 20 from sat.core.i18n import _, D_ |
993
301b342c697a
core: use of the new core.log module:
Goffi <goffi@goffi.org>
parents:
968
diff
changeset
|
21 from sat.core.log import getLogger |
301b342c697a
core: use of the new core.log module:
Goffi <goffi@goffi.org>
parents:
968
diff
changeset
|
22 log = getLogger(__name__) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
23 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
24 from twisted.internet import defer |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
25 from twisted.internet.threads import deferToThread |
705
6c8a119dcc94
plugin text syntaxes: clean_xhtml now accept lxml's HtmlElement to avoid parsing two times the same xml
Goffi <goffi@goffi.org>
parents:
702
diff
changeset
|
26 from sat.core import exceptions |
674
fb0b1100c908
plugin text_syntaxes: fixed clean_xhml (it now return XHTML instead of HTML)
Goffi <goffi@goffi.org>
parents:
665
diff
changeset
|
27 from lxml import html |
fb0b1100c908
plugin text_syntaxes: fixed clean_xhml (it now return XHTML instead of HTML)
Goffi <goffi@goffi.org>
parents:
665
diff
changeset
|
28 from lxml.html import clean |
832
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
29 from cgi import escape |
692
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
30 import re |
674
fb0b1100c908
plugin text_syntaxes: fixed clean_xhml (it now return XHTML instead of HTML)
Goffi <goffi@goffi.org>
parents:
665
diff
changeset
|
31 |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
32 |
771 | 33 CATEGORY = D_("Composition") |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
34 NAME = "Syntax" |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
35 _SYNTAX_XHTML = "XHTML" |
744
312a2842b2b8
plugins text-syntaxes: added a default value to use the current user syntax in convert
souliane <souliane@mailoo.org>
parents:
705
diff
changeset
|
36 _SYNTAX_CURRENT = "@CURRENT@" |
312a2842b2b8
plugins text-syntaxes: added a default value to use the current user syntax in convert
souliane <souliane@mailoo.org>
parents:
705
diff
changeset
|
37 |
692
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
38 # TODO: check/adapt following list |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
39 STYLES_WHITELIST = ["azimuth", "background-color", "border-bottom-color", "border-collapse", "border-color", "border-left-color", "border-right-color", "border-top-color", "clear", "color", "cursor", "direction", "display", "elevation", "float", "font", "font-family", "font-size", "font-style", "font-variant", "font-weight", "height", "letter-spacing", "line-height", "overflow", "pause", "pause-after", "pause-before", "pitch", "pitch-range", "richness", "speak", "speak-header", "speak-numeral", "speak-punctuation", "speech-rate", "stress", "text-align", "text-decoration", "text-indent", "unicode-bidi", "vertical-align", "voice-family", "volume", "white-space", "width"] # based on feedparser list (http://pythonhosted.org/feedparser/html-sanitization.html) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
40 |
692
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
41 SAFE_ATTRS = html.defs.safe_attrs.union(('style',)) |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
42 STYLES_VALUES_REGEX = r'^(' + '|'.join(['([a-z-]+)', # alphabetical names |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
43 '(#[0-9a-f]+)', # hex value |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
44 '(\d+(.\d+)? *(|%|em|ex|px|in|cm|mm|pt|pc))', # values with units (or not) |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
45 'rgb\( *((\d+(.\d+)?), *){2}(\d+(.\d+)?) *\)', # rgb function |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
46 'rgba\( *((\d+(.\d+)?), *){3}(\d+(.\d+)?) *\)', # rgba function |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
47 ]) + ') *(!important)?$' # we accept "!important" at the end |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
48 STYLES_ACCEPTED_VALUE = re.compile(STYLES_VALUES_REGEX) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
49 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
50 PLUGIN_INFO = { |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
51 "name": "Text syntaxes", |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
52 "import_name": "TEXT-SYNTAXES", |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
53 "type": "MISC", |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
54 "protocols": [], |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
55 "dependencies": [], |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
56 "main": "TextSyntaxes", |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
57 "handler": "no", |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
58 "description": _("""Management of various text syntaxes (XHTML-IM, Markdown, etc)""") |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
59 } |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
60 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
61 class UnknownSyntax(Exception): |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
62 pass |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
63 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
64 class TextSyntaxes(object): |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
65 """ Text conversion class |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
66 XHTML utf-8 is used as intermediate language for conversions |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
67 """ |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
68 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
69 OPT_DEFAULT = "DEFAULT" |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
70 OPT_HIDDEN = "HIDDEN" |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
71 OPT_NO_THREAD = "NO_THREAD" |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
72 SYNTAX_XHTML = _SYNTAX_XHTML |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
73 SYNTAX_MARKDOWN = "markdown" |
832
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
74 SYNTAX_TEXT = "text" |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
75 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
76 params = """ |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
77 <params> |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
78 <individual> |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
79 <category name="%(category_name)s" label="%(category_label)s"> |
968
75f3b3b430ff
tools, frontends, memory: param definition and XMLUI handle multi-selection for list widgets:
souliane <souliane@mailoo.org>
parents:
852
diff
changeset
|
80 <param name="%(name)s" label="%(label)s" type="list" security="0"> |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
81 %(options)s |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
82 </param> |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
83 </category> |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
84 </individual> |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
85 </params> |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
86 """ |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
87 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
88 params_data = { |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
89 'category_name': CATEGORY, |
771 | 90 'category_label': _(CATEGORY), |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
91 'name': NAME, |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
92 'label': _(NAME), |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
93 'syntaxes': {}, |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
94 } |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
95 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
96 def __init__(self, host): |
993
301b342c697a
core: use of the new core.log module:
Goffi <goffi@goffi.org>
parents:
968
diff
changeset
|
97 log.info(_("Text syntaxes plugin initialization")) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
98 self.host = host |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
99 self.syntaxes = {} |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
100 self.addSyntax(self.SYNTAX_XHTML, lambda xhtml: defer.succeed(xhtml), lambda xhtml: defer.succeed(xhtml), |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
101 TextSyntaxes.OPT_NO_THREAD) |
832
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
102 self.addSyntax(self.SYNTAX_TEXT, lambda text: escape(text), lambda xhtml: self._removeMarkups(xhtml), [TextSyntaxes.OPT_HIDDEN]) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
103 try: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
104 import markdown, html2text |
841
831f208b4ea3
plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents:
836
diff
changeset
|
105 |
831f208b4ea3
plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents:
836
diff
changeset
|
106 def _html2text(html, baseurl=''): |
831f208b4ea3
plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents:
836
diff
changeset
|
107 h = html2text.HTML2Text(baseurl=baseurl) |
831f208b4ea3
plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents:
836
diff
changeset
|
108 h.body_width = 0 # do not truncate the lines, it breaks the long URLs |
831f208b4ea3
plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents:
836
diff
changeset
|
109 return h.handle(html) |
831f208b4ea3
plugin text_syntaxes: html2text was breaking the long URLs
souliane <souliane@mailoo.org>
parents:
836
diff
changeset
|
110 self.addSyntax(self.SYNTAX_MARKDOWN, markdown.markdown, _html2text, [TextSyntaxes.OPT_DEFAULT]) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
111 except ImportError: |
993
301b342c697a
core: use of the new core.log module:
Goffi <goffi@goffi.org>
parents:
968
diff
changeset
|
112 log.warning("markdown or html2text not found, can't use Markdown syntax") |
744
312a2842b2b8
plugins text-syntaxes: added a default value to use the current user syntax in convert
souliane <souliane@mailoo.org>
parents:
705
diff
changeset
|
113 host.bridge.addMethod("syntaxConvert", ".plugin", in_sign='sssbs', out_sign='s', |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
114 async=True, method=self.convert) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
115 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
116 def _updateParamOptions(self): |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
117 data_synt = TextSyntaxes.params_data['syntaxes'] |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
118 syntaxes = [] |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
119 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
120 for syntax in data_synt.keys(): |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
121 flags = data_synt[syntax]["flags"] |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
122 if TextSyntaxes.OPT_HIDDEN not in flags: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
123 syntaxes.append(syntax) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
124 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
125 syntaxes.sort(key=unicode.lower) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
126 options = [] |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
127 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
128 for syntax in syntaxes: |
968
75f3b3b430ff
tools, frontends, memory: param definition and XMLUI handle multi-selection for list widgets:
souliane <souliane@mailoo.org>
parents:
852
diff
changeset
|
129 selected = 'selected="true"' if syntax == _SYNTAX_XHTML else '' |
75f3b3b430ff
tools, frontends, memory: param definition and XMLUI handle multi-selection for list widgets:
souliane <souliane@mailoo.org>
parents:
852
diff
changeset
|
130 options.append(u'<option value="%s" %s/>' % (syntax, selected)) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
131 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
132 TextSyntaxes.params_data["options"] = u'\n'.join(options) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
133 self.host.memory.updateParams(TextSyntaxes.params % TextSyntaxes.params_data) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
134 |
702
a25db3fe3959
plugin XEP-0071: rich messages management for sendMessage
Goffi <goffi@goffi.org>
parents:
699
diff
changeset
|
135 def getCurrentSyntax(self, profile): |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
136 """ Return the selected syntax for the given profile |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
137 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
138 @param profile: %(doc_profile)s |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
139 @return: profile selected syntax |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
140 """ |
699
e819630c6737
plugin text syntaxes: fixed bad parameter order for getParamA
Goffi <goffi@goffi.org>
parents:
695
diff
changeset
|
141 return self.host.memory.getParamA(NAME, CATEGORY , profile_key=profile) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
142 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
143 def clean_xhtml(self, xhtml): |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
144 """ Clean XHTML text by removing potentially dangerous/malicious parts |
705
6c8a119dcc94
plugin text syntaxes: clean_xhtml now accept lxml's HtmlElement to avoid parsing two times the same xml
Goffi <goffi@goffi.org>
parents:
702
diff
changeset
|
145 @param xhtml: raw xhtml text to clean (or lxml's HtmlElement) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
146 """ |
674
fb0b1100c908
plugin text_syntaxes: fixed clean_xhml (it now return XHTML instead of HTML)
Goffi <goffi@goffi.org>
parents:
665
diff
changeset
|
147 def blocking_cleaning(xhtml): |
692
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
148 """ Clean XHTML and style attributes """ |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
149 |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
150 def clean_style(styles_raw): |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
151 """" Remove styles not in the whitelist, |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
152 or where the value doesn't match the regex """ |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
153 styles = styles_raw.split(";") |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
154 cleaned_styles = [] |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
155 for style in styles: |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
156 try: |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
157 key, value = style.split(':') |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
158 except ValueError: |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
159 continue |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
160 key = key.lower().strip() |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
161 if key not in STYLES_WHITELIST: |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
162 continue |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
163 value = value.lower().strip() |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
164 if not STYLES_ACCEPTED_VALUE.match(value): |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
165 continue |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
166 if value == "none": |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
167 continue |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
168 cleaned_styles.append((key, value)) |
1458
832846fefe85
plugin text-syntaxes: minor variable renaming
Goffi <goffi@goffi.org>
parents:
1396
diff
changeset
|
169 return "; ".join(["%s: %s" % (key_, value_) for key_, value_ in cleaned_styles]) |
692
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
170 |
705
6c8a119dcc94
plugin text syntaxes: clean_xhtml now accept lxml's HtmlElement to avoid parsing two times the same xml
Goffi <goffi@goffi.org>
parents:
702
diff
changeset
|
171 if isinstance(xhtml, basestring): |
6c8a119dcc94
plugin text syntaxes: clean_xhtml now accept lxml's HtmlElement to avoid parsing two times the same xml
Goffi <goffi@goffi.org>
parents:
702
diff
changeset
|
172 xhtml_elt = html.fromstring(xhtml) |
6c8a119dcc94
plugin text syntaxes: clean_xhtml now accept lxml's HtmlElement to avoid parsing two times the same xml
Goffi <goffi@goffi.org>
parents:
702
diff
changeset
|
173 elif isinstance(xhtml, html.HtmlElement): |
6c8a119dcc94
plugin text syntaxes: clean_xhtml now accept lxml's HtmlElement to avoid parsing two times the same xml
Goffi <goffi@goffi.org>
parents:
702
diff
changeset
|
174 xhtml_elt = xhtml |
6c8a119dcc94
plugin text syntaxes: clean_xhtml now accept lxml's HtmlElement to avoid parsing two times the same xml
Goffi <goffi@goffi.org>
parents:
702
diff
changeset
|
175 else: |
993
301b342c697a
core: use of the new core.log module:
Goffi <goffi@goffi.org>
parents:
968
diff
changeset
|
176 log.error("Only strings and HtmlElements can be cleaned") |
705
6c8a119dcc94
plugin text syntaxes: clean_xhtml now accept lxml's HtmlElement to avoid parsing two times the same xml
Goffi <goffi@goffi.org>
parents:
702
diff
changeset
|
177 raise exceptions.DataError |
674
fb0b1100c908
plugin text_syntaxes: fixed clean_xhml (it now return XHTML instead of HTML)
Goffi <goffi@goffi.org>
parents:
665
diff
changeset
|
178 cleaner = clean.Cleaner(style=False, |
fb0b1100c908
plugin text_syntaxes: fixed clean_xhml (it now return XHTML instead of HTML)
Goffi <goffi@goffi.org>
parents:
665
diff
changeset
|
179 add_nofollow=False, |
692
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
180 safe_attrs=SAFE_ATTRS) |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
181 xhtml_elt = cleaner.clean_html(xhtml_elt) |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
182 for elt in xhtml_elt.xpath("//*[@style]"): |
e98db42cd78c
plugin text syntaxes: styles sanitisation
Goffi <goffi@goffi.org>
parents:
674
diff
changeset
|
183 elt.set("style", clean_style(elt.get('style'))) |
852
4cc55e05266d
plugin text syntaxes: fixed cleaners encoding
Goffi <goffi@goffi.org>
parents:
841
diff
changeset
|
184 return html.tostring(xhtml_elt, encoding=unicode, method='xml') |
674
fb0b1100c908
plugin text_syntaxes: fixed clean_xhml (it now return XHTML instead of HTML)
Goffi <goffi@goffi.org>
parents:
665
diff
changeset
|
185 |
705
6c8a119dcc94
plugin text syntaxes: clean_xhtml now accept lxml's HtmlElement to avoid parsing two times the same xml
Goffi <goffi@goffi.org>
parents:
702
diff
changeset
|
186 return deferToThread(blocking_cleaning, xhtml) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
187 |
744
312a2842b2b8
plugins text-syntaxes: added a default value to use the current user syntax in convert
souliane <souliane@mailoo.org>
parents:
705
diff
changeset
|
188 def convert(self, text, syntax_from, syntax_to=_SYNTAX_XHTML, safe=True, profile=None): |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
189 """ Convert a text between two syntaxes |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
190 @param text: text to convert |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
191 @param syntax_from: source syntax (e.g. "markdown") |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
192 @param syntax_to: dest syntax (e.g.: "XHTML") |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
193 @param safe: clean resulting XHTML to avoid malicious code if True |
744
312a2842b2b8
plugins text-syntaxes: added a default value to use the current user syntax in convert
souliane <souliane@mailoo.org>
parents:
705
diff
changeset
|
194 @param profile: needed only when syntax_from or syntax_to is set to _SYNTAX_CURRENT |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
195 @return: converted text """ |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
196 |
744
312a2842b2b8
plugins text-syntaxes: added a default value to use the current user syntax in convert
souliane <souliane@mailoo.org>
parents:
705
diff
changeset
|
197 if syntax_from == _SYNTAX_CURRENT: |
312a2842b2b8
plugins text-syntaxes: added a default value to use the current user syntax in convert
souliane <souliane@mailoo.org>
parents:
705
diff
changeset
|
198 syntax_from = self.getCurrentSyntax(profile) |
312a2842b2b8
plugins text-syntaxes: added a default value to use the current user syntax in convert
souliane <souliane@mailoo.org>
parents:
705
diff
changeset
|
199 if syntax_to == _SYNTAX_CURRENT: |
312a2842b2b8
plugins text-syntaxes: added a default value to use the current user syntax in convert
souliane <souliane@mailoo.org>
parents:
705
diff
changeset
|
200 syntax_to = self.getCurrentSyntax(profile) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
201 syntaxes = TextSyntaxes.params_data['syntaxes'] |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
202 if syntax_from not in syntaxes: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
203 raise UnknownSyntax(syntax_from) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
204 if syntax_to not in syntaxes: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
205 raise UnknownSyntax(syntax_to) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
206 d = None |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
207 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
208 if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_from]["flags"]: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
209 d = syntaxes[syntax_from]["to"](text) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
210 else: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
211 d = deferToThread(syntaxes[syntax_from]["to"], text) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
212 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
213 #TODO: keep only body element and change it to a div here ? |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
214 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
215 if safe: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
216 d.addCallback(self.clean_xhtml) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
217 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
218 if TextSyntaxes.OPT_NO_THREAD in syntaxes[syntax_to]["flags"]: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
219 d.addCallback(syntaxes[syntax_to]["from"]) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
220 else: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
221 d.addCallback(lambda xhtml: deferToThread(syntaxes[syntax_to]["from"], xhtml)) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
222 |
836
2cc0201b4613
plugin text_syntaxes: rstrip the conversion result to avoid new lines systematically added by converters (e.g. html2text do this)
souliane <souliane@mailoo.org>
parents:
832
diff
changeset
|
223 # converters can add new lines that disturb the microblog change detection |
2cc0201b4613
plugin text_syntaxes: rstrip the conversion result to avoid new lines systematically added by converters (e.g. html2text do this)
souliane <souliane@mailoo.org>
parents:
832
diff
changeset
|
224 d.addCallback(lambda text: text.rstrip()) |
665
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
225 return d |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
226 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
227 def addSyntax(self, name, to_xhtml_cb, from_xhtml_cb, flags = None): |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
228 """ Add a new syntax to the manager |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
229 @param name: unique name of the syntax |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
230 @param to_xhtml_cb: callback to convert from syntax to XHTML |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
231 @param from_xhtml_cb: callback to convert from XHTML to syntax |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
232 @param flags: set of optional flags, can be: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
233 TextSyntaxes.OPT_DEFAULT: use as the default syntax (replace former one) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
234 TextSyntaxes.OPT_HIDDEN: do not show in parameters |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
235 TextSyntaxes.OPT_NO_THREAD: do not defer to thread when converting (the callback must then return a deferred) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
236 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
237 """ |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
238 name = unicode(name) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
239 flags = flags or [] |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
240 if TextSyntaxes.OPT_HIDDEN in flags and TextSyntaxes.OPT_DEFAULT in flags: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
241 raise ValueError("%s and %s are mutually exclusive" % (TextSyntaxes.OPT_HIDDEN, TextSyntaxes.OPT_DEFAULT)) |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
242 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
243 syntaxes = TextSyntaxes.params_data['syntaxes'] |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
244 syntaxes[name] = {"to": to_xhtml_cb, "from": from_xhtml_cb, "flags": flags} |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
245 if TextSyntaxes.OPT_DEFAULT in flags: |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
246 syntaxes = TextSyntaxes.params_data['default'] = name |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
247 |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
248 self._updateParamOptions() |
6a64e0a759e6
plugin text syntaxes: this plugin manage rich text syntaxes conversions and cleaning.
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
249 |
832
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
250 def _removeMarkups(self, xhtml): |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
251 """ |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
252 Remove XHTML markups from the given string. |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
253 @param xhtml: the XHTML string to be cleaned |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
254 @return: the cleaned string |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
255 """ |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
256 cleaner = clean.Cleaner(kill_tags=['style']) |
c4b22aedb7d7
plugin groupblog, XEP-0071, XEP-0277, text_syntaxes: manage raw/rich/xhtml data for content/title:
souliane <souliane@mailoo.org>
parents:
811
diff
changeset
|
257 cleaned = cleaner.clean_html(html.fromstring(xhtml)) |
852
4cc55e05266d
plugin text syntaxes: fixed cleaners encoding
Goffi <goffi@goffi.org>
parents:
841
diff
changeset
|
258 return html.tostring(cleaned, encoding=unicode, method="text") |