Mercurial > libervia-backend
changeset 1806:fd788d24277a
plugin syntax dc_wiki: first draft:
handle dotclear wiki syntax
/!\ only dc_wiki -> XHTML is handled for now
author | Goffi <goffi@goffi.org> |
---|---|
date | Sun, 17 Jan 2016 20:39:20 +0100 (2016-01-17) |
parents | 3c40fa0dcd7a |
children | 0d3110341947 |
files | src/core/constants.py src/plugins/plugin_syntax_dc_wiki.py |
diffstat | 2 files changed, 307 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/src/core/constants.py Sun Jan 17 17:33:10 2016 +0100 +++ b/src/core/constants.py Sun Jan 17 20:39:20 2016 +0100 @@ -143,6 +143,7 @@ PLUG_TYPE_MISC = "MISC" PLUG_TYPE_EXP = "EXP" PLUG_TYPE_SEC = "SEC" + PLUG_TYPE_SYNTAXE = "SYNTAXE" # names of widely used plugins
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/plugins/plugin_syntax_dc_wiki.py Sun Jan 17 20:39:20 2016 +0100 @@ -0,0 +1,306 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# SàT plugin for Dotclear Wiki Syntax +# Copyright (C) 2009-2016 Jérôme Poisson (goffi@goffi.org) + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# XXX: ref used: http://dotclear.org/documentation/2.0/usage/syntaxes#wiki-syntax-and-xhtml-equivalent + +from sat.core.i18n import _ +from sat.core.log import getLogger +log = getLogger(__name__) +from sat.core.constants import Const as C +from twisted.words.xish import domish +from sat.tools import xml_tools +import re + +SYNTAX_NAME = "dc_wiki" + +PLUGIN_INFO = { + "name": "Dotclear Wiki Syntax Plugin", + "import_name": "SYNT_DC_WIKI", + "type": C.PLUG_TYPE_SYNTAXE, + "dependencies": ["TEXT-SYNTAXES"], + "main": "DCWikiSyntax", + "handler": "", + "description": _("""Implementation of Dotclear wiki syntax""") +} + +NOTE_TPL = u'[{}]' # Note template +NOTE_A_REV_TPL = u'rev_note_{}' +NOTE_A_TPL = u'note_{}' + +wiki = [r"\\(?P<escape_char>[][!_+%'|\/*#@{}~?$()-])", + r"^!!!!!(?P<h1_title>.+?)$", + r"^!!!!(?P<h2_title>.+?)$", + r"^!!!(?P<h3_title>.+?)$", + r"^!!(?P<h4_title>.+?)$", + r"^!(?P<h5_title>.+?)$", + r"^----$(?P<horizontal_rule>)", + r"^\*(?P<list_bullet>.*?)$", + r"^#(?P<list_ordered>.*?)$", + r"^ (?P<preformated>.*?)$", + r"^> +?(?P<quote>.*?)$", + r"''(?P<emphasis>.+?)''", + r"__(?P<strong_emphasis>.+?)__", + r"%%%(?P<line_break>)", + r"\+\+(?P<insertion>.+?)\+\+", + r"--(?P<deletion>.+?)--", + r"\[(?P<link>.+?)\]", + r"\(\((?P<image>.+?)\)\)", + r"~(?P<anchor>.+?)~", + r"\?\?(?P<acronym>.+?\|.+?)\?\?", + r"{{(?P<inline_quote>.+?)}}", + r"@@(?P<code>.+?)@@", + r"\$\$(?P<footnote>.+?)\$\$", + r"(?P<text>.+?)", + ] + +wiki_re = re.compile('|'.join(wiki), re.MULTILINE | re.DOTALL) +wiki_block_level_re = re.compile(r"^///html(?P<html>.+?)///\n\n|(?P<paragraph>.+?)(?:\n{2,}|\Z)", re.MULTILINE | re.DOTALL) + + +class DCWikiParser(object): + + def __init__(self): + self._footnotes = None + for i in xrange(5): + setattr(self, + 'parser_h{}_title'.format(i), + lambda string, parent, i=i: self._parser_title(string, parent, 'h{}'.format(i))) + + def parser_paragraph(self, string, parent): + p_elt = parent.addElement('p') + self._parse(string, p_elt) + + def parser_html(self, string, parent): + wrapped_html = "<div>{}</div>".format(string) + try: + div_elt = xml_tools.ElementParser()(wrapped_html) + except domish.ParserError as e: + log.warning(u"Error while parsing HTML content, ignoring it: {}".format(e)) + return + children = list(div_elt.elements()) + if len(children) == 1 and children[0].name == 'div': + div_elt = children[0] + parent.addChild(div_elt) + + def parser_escape_char(self, string, parent): + parent.addContent(string) + + def _parser_title(self, string, parent, name): + elt = parent.addElement(name) + elt.addContent(string) + + def parser_horizontal_rule(self, string, parent): + parent.addElement('hr') + + def _parser_list(self, string, parent, list_type): + depth = 0 + while string[depth:depth+1] == '*': + depth +=1 + + string = string[depth:].lstrip() + + for i in xrange(depth+1): + list_elt = getattr(parent, list_type) + if not list_elt: + parent = parent.addElement(list_type) + else: + parent = list_elt + + li_elt = parent.addElement('li') + self._parse(string, li_elt) + + def parser_list_bullet(self, string, parent): + self._parser_list(string, parent, 'ul') + + def parser_list_ordered(self, string, parent): + self._parser_list(string, parent, 'ol') + + def parser_preformated(self, string, parent): + pre_elt = parent.pre + if pre_elt is None: + pre_elt = parent.addElement('pre') + else: + # we are on a new line, and this is important for <pre/> + pre_elt.addContent('\n') + pre_elt.addContent(string) + + def parser_quote(self, string, parent): + blockquote_elt = parent.blockquote + if blockquote_elt is None: + blockquote_elt = parent.addElement('blockquote') + p_elt = blockquote_elt.p + if p_elt is None: + p_elt = blockquote_elt.addElement('p') + else: + string = u'\n' + string + + self._parse(string, p_elt) + + def parser_emphasis(self, string, parent): + em_elt = parent.addElement('em') + self._parse(string, em_elt) + + def parser_strong_emphasis(self, string, parent): + strong_elt = parent.addElement('strong') + self._parse(string, strong_elt) + + def parser_line_break(self, string, parent): + parent.addElement('br') + + def parser_insertion(self, string, parent): + ins_elt = parent.addElement('ins') + self._parse(string, ins_elt) + + def parser_deletion(self, string, parent): + del_elt = parent.addElement('del') + self._parse(string, del_elt) + + def parser_link(self, string, parent): + url_data = string.split(u'|') + a_elt = parent.addElement('a') + length = len(url_data) + if length == 0: + url = url_data[0] + a_elt['href'] = url + a_elt.addContent(url) + else: + name = url_data[0] + url = url_data[1] + a_elt['href'] = url + a_elt.addContent(name) + if length >= 3: + a_elt['lang'] = url_data[2] + if length >= 4: + a_elt['title'] = url_data[3] + if length > 4: + log.warning(u"too much data for url, ignoring extra data") + + def parser_image(self, string, parent): + image_data = string.split(u'|') + img_elt = parent.addElement('img') + + for idx, attribute in enumerate(('src', 'alt', 'position', 'longdesc')): + try: + data = image_data[idx] + except IndexError: + break + + if attribute != 'position': + img_elt[attribute] = data + else: + data = data.lower() + if data in ('l', 'g'): + img_elt['style'] = "display:block; float:left; margin:0 1em 1em 0" + elif data in ('r', 'd'): + img_elt['style'] = "display:block; float:right; margin:0 0 1em 1em" + elif data == 'c': + img_elt['style'] = "display:block; margin-left:auto; margin-right:auto" + else: + log.warning(u"bad position argument for image, ignoring it") + + def parser_anchor(self, string, parent): + a_elt = parent.addElement('a') + a_elt['id'] = string + + def parser_acronym(self, string, parent): + acronym, title = string.split(u'|',1) + acronym_elt = parent.addElement('acronym', content=acronym) + acronym_elt['title'] = title + + def parser_inline_quote(self, string, parent): + quote_data = string.split(u'|') + quote = quote_data[0] + q_elt = parent.addElement('q', content=quote) + for idx, attribute in enumerate(('lang', 'cite'), 1): + try: + data = quote_data[idx] + except IndexError: + break + q_elt[attribute] = data + + def parser_code(self, string, parent): + parent.addElement('code', content=string) + + def parser_footnote(self, string, parent): + idx = len(self._footnotes) + 1 + note_txt = NOTE_TPL.format(idx) + sup_elt = parent.addElement('sup') + sup_elt['class'] = 'note' + a_elt = sup_elt.addElement('a', content=note_txt) + a_elt['id'] = NOTE_A_REV_TPL.format(idx) + a_elt['href'] = u'#{}'.format(NOTE_A_TPL.format(idx)) + + p_elt = domish.Element((None, 'p')) + a_elt = p_elt.addElement('a', content=note_txt) + a_elt['id'] = NOTE_A_TPL.format(idx) + a_elt['href'] = u'#{}'.format(NOTE_A_REV_TPL.format(idx)) + self._parse(string, p_elt) + # footnotes are actually added at the end of the parsing + self._footnotes.append(p_elt) + + def parser_text(self, string, parent): + parent.addContent(string) + + def _parse(self, string, parent, block_level=False): + regex = wiki_block_level_re if block_level else wiki_re + + for match in regex.finditer(string): + if match.lastgroup is None: + parent.addContent(string) + return + matched = match.group(match.lastgroup) + try: + parser = getattr(self, 'parser_{}'.format(match.lastgroup)) + except AttributeError: + log.warning(u"No parser found for {}".format(match.lastgroup)) + # parent.addContent(string) + continue + parser(matched, parent) + + def parse(self, string): + self._footnotes = [] + div_elt = domish.Element((None, 'div')) + self._parse(string, parent=div_elt, block_level=True) + if self._footnotes: + foot_div_elt = div_elt.addElement('div') + foot_div_elt['class'] = 'footnotes' + # we add a simple horizontal rule which can be customized + # with footnotes class, instead of a text which would need + # to be translated + foot_div_elt.addElement('hr') + for elt in self._footnotes: + foot_div_elt.addChild(elt) + return div_elt + + +class DCWikiSyntax(object): + + def __init__(self, host): + log.info(_(u"Dotclear wiki syntax plugin initialization")) + self.host = host + self._dc_parser = DCWikiParser() + self._stx = self.host.plugins["TEXT-SYNTAXES"] + self._stx.addSyntax(SYNTAX_NAME, self.parseWiki, self.parseXHTML, [self._stx.OPT_NO_THREAD]) + + def parseWiki(self, wiki_stx): + div_elt = self._dc_parser.parse(wiki_stx) + return div_elt.toXml() + + def parseXHTML(self, xhtml): + raise NotImplementedError