comparison sat/plugins/plugin_misc_text_syntaxes.py @ 2873:e1207b8ad97c

plugin text syntaxes: disable raw HTML parsing in mardown by default
author Goffi <goffi@goffi.org>
date Mon, 25 Mar 2019 08:14:00 +0100
parents 148d30147890
children a02ad4bc0a6d
comparison
equal deleted inserted replaced
2872:6b00f88316bf 2873:e1207b8ad97c
15 # GNU Affero General Public License for more details. 15 # GNU Affero General Public License for more details.
16 16
17 # You should have received a copy of the GNU Affero General Public License 17 # You should have received a copy of the GNU Affero General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. 18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 19
20 import re
21 from cgi import escape
22 from functools import partial
20 from sat.core.i18n import _, D_ 23 from sat.core.i18n import _, D_
21 from sat.core.constants import Const as C 24 from sat.core.constants import Const as C
22 from sat.core.log import getLogger 25 from sat.core.log import getLogger
23
24 log = getLogger(__name__)
25 26
26 from twisted.internet import defer 27 from twisted.internet import defer
27 from twisted.internet.threads import deferToThread 28 from twisted.internet.threads import deferToThread
28 from sat.core import exceptions 29 from sat.core import exceptions
29 from sat.tools import xml_tools 30 from sat.tools import xml_tools
34 from lxml import etree 35 from lxml import etree
35 except ImportError: 36 except ImportError:
36 raise exceptions.MissingModule( 37 raise exceptions.MissingModule(
37 u"Missing module lxml, please download/install it from http://lxml.de/" 38 u"Missing module lxml, please download/install it from http://lxml.de/"
38 ) 39 )
39 from cgi import escape 40
40 import re 41 log = getLogger(__name__)
41
42 42
43 CATEGORY = D_("Composition") 43 CATEGORY = D_("Composition")
44 NAME = "Syntax" 44 NAME = "Syntax"
45 _SYNTAX_XHTML = "xhtml" # must be lower case 45 _SYNTAX_XHTML = "xhtml" # must be lower case
46 _SYNTAX_CURRENT = "@CURRENT@" 46 _SYNTAX_CURRENT = "@CURRENT@"
197 lambda xhtml: self._removeMarkups(xhtml), 197 lambda xhtml: self._removeMarkups(xhtml),
198 [TextSyntaxes.OPT_HIDDEN], 198 [TextSyntaxes.OPT_HIDDEN],
199 ) 199 )
200 try: 200 try:
201 import markdown, html2text 201 import markdown, html2text
202 from markdown.extensions import Extension
203
204 # XXX: we disable raw HTML parsing by default, to avoid parsing error
205 # when the user is not aware of markdown and HTML
206 class EscapeHTML(Extension):
207 def extendMarkdown(self, md):
208 md.preprocessors.deregister('html_block')
209 md.inlinePatterns.deregister('html')
202 210
203 def _html2text(html, baseurl=""): 211 def _html2text(html, baseurl=""):
204 h = html2text.HTML2Text(baseurl=baseurl) 212 h = html2text.HTML2Text(baseurl=baseurl)
205 h.body_width = 0 # do not truncate the lines, it breaks the long URLs 213 h.body_width = 0 # do not truncate the lines, it breaks the long URLs
206 return h.handle(html) 214 return h.handle(html)
207 215
208 self.addSyntax( 216 self.addSyntax(
209 self.SYNTAX_MARKDOWN, 217 self.SYNTAX_MARKDOWN,
210 markdown.markdown, 218 partial(markdown.markdown, extensions=[EscapeHTML()]),
211 _html2text, 219 _html2text,
212 [TextSyntaxes.OPT_DEFAULT], 220 [TextSyntaxes.OPT_DEFAULT],
213 ) 221 )
214 except ImportError: 222 except ImportError:
215 log.warning(u"markdown or html2text not found, can't use Markdown syntax") 223 log.warning(u"markdown or html2text not found, can't use Markdown syntax")