# HG changeset patch # User Goffi # Date 1453059560 -3600 # Node ID fd788d24277adade140983e556b70946d4be7f5f # Parent 3c40fa0dcd7aed7371bda63b6f38ced4aa7e6c25 plugin syntax dc_wiki: first draft: handle dotclear wiki syntax /!\ only dc_wiki -> XHTML is handled for now diff -r 3c40fa0dcd7a -r fd788d24277a src/core/constants.py --- a/src/core/constants.py Sun Jan 17 17:33:10 2016 +0100 +++ b/src/core/constants.py Sun Jan 17 20:39:20 2016 +0100 @@ -143,6 +143,7 @@ PLUG_TYPE_MISC = "MISC" PLUG_TYPE_EXP = "EXP" PLUG_TYPE_SEC = "SEC" + PLUG_TYPE_SYNTAXE = "SYNTAXE" # names of widely used plugins diff -r 3c40fa0dcd7a -r fd788d24277a src/plugins/plugin_syntax_dc_wiki.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/plugins/plugin_syntax_dc_wiki.py Sun Jan 17 20:39:20 2016 +0100 @@ -0,0 +1,306 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# SàT plugin for Dotclear Wiki Syntax +# Copyright (C) 2009-2016 Jérôme Poisson (goffi@goffi.org) + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +# XXX: ref used: http://dotclear.org/documentation/2.0/usage/syntaxes#wiki-syntax-and-xhtml-equivalent + +from sat.core.i18n import _ +from sat.core.log import getLogger +log = getLogger(__name__) +from sat.core.constants import Const as C +from twisted.words.xish import domish +from sat.tools import xml_tools +import re + +SYNTAX_NAME = "dc_wiki" + +PLUGIN_INFO = { + "name": "Dotclear Wiki Syntax Plugin", + "import_name": "SYNT_DC_WIKI", + "type": C.PLUG_TYPE_SYNTAXE, + "dependencies": ["TEXT-SYNTAXES"], + "main": "DCWikiSyntax", + "handler": "", + "description": _("""Implementation of Dotclear wiki syntax""") +} + +NOTE_TPL = u'[{}]' # Note template +NOTE_A_REV_TPL = u'rev_note_{}' +NOTE_A_TPL = u'note_{}' + +wiki = [r"\\(?P[][!_+%'|\/*#@{}~?$()-])", + r"^!!!!!(?P.+?)$", + r"^!!!!(?P.+?)$", + r"^!!!(?P.+?)$", + r"^!!(?P.+?)$", + r"^!(?P.+?)$", + r"^----$(?P)", + r"^\*(?P.*?)$", + r"^#(?P.*?)$", + r"^ (?P.*?)$", + r"^> +?(?P.*?)$", + r"''(?P.+?)''", + r"__(?P.+?)__", + r"%%%(?P)", + r"\+\+(?P.+?)\+\+", + r"--(?P.+?)--", + r"\[(?P.+?)\]", + r"\(\((?P.+?)\)\)", + r"~(?P.+?)~", + r"\?\?(?P.+?\|.+?)\?\?", + r"{{(?P.+?)}}", + r"@@(?P.+?)@@", + r"\$\$(?P.+?)\$\$", + r"(?P.+?)", + ] + +wiki_re = re.compile('|'.join(wiki), re.MULTILINE | re.DOTALL) +wiki_block_level_re = re.compile(r"^///html(?P.+?)///\n\n|(?P.+?)(?:\n{2,}|\Z)", re.MULTILINE | re.DOTALL) + + +class DCWikiParser(object): + + def __init__(self): + self._footnotes = None + for i in xrange(5): + setattr(self, + 'parser_h{}_title'.format(i), + lambda string, parent, i=i: self._parser_title(string, parent, 'h{}'.format(i))) + + def parser_paragraph(self, string, parent): + p_elt = parent.addElement('p') + self._parse(string, p_elt) + + def parser_html(self, string, parent): + wrapped_html = "
{}
".format(string) + try: + div_elt = xml_tools.ElementParser()(wrapped_html) + except domish.ParserError as e: + log.warning(u"Error while parsing HTML content, ignoring it: {}".format(e)) + return + children = list(div_elt.elements()) + if len(children) == 1 and children[0].name == 'div': + div_elt = children[0] + parent.addChild(div_elt) + + def parser_escape_char(self, string, parent): + parent.addContent(string) + + def _parser_title(self, string, parent, name): + elt = parent.addElement(name) + elt.addContent(string) + + def parser_horizontal_rule(self, string, parent): + parent.addElement('hr') + + def _parser_list(self, string, parent, list_type): + depth = 0 + while string[depth:depth+1] == '*': + depth +=1 + + string = string[depth:].lstrip() + + for i in xrange(depth+1): + list_elt = getattr(parent, list_type) + if not list_elt: + parent = parent.addElement(list_type) + else: + parent = list_elt + + li_elt = parent.addElement('li') + self._parse(string, li_elt) + + def parser_list_bullet(self, string, parent): + self._parser_list(string, parent, 'ul') + + def parser_list_ordered(self, string, parent): + self._parser_list(string, parent, 'ol') + + def parser_preformated(self, string, parent): + pre_elt = parent.pre + if pre_elt is None: + pre_elt = parent.addElement('pre') + else: + # we are on a new line, and this is important for
+            pre_elt.addContent('\n')
+        pre_elt.addContent(string)
+
+    def parser_quote(self, string, parent):
+        blockquote_elt = parent.blockquote
+        if blockquote_elt is None:
+            blockquote_elt = parent.addElement('blockquote')
+        p_elt = blockquote_elt.p
+        if p_elt is None:
+            p_elt = blockquote_elt.addElement('p')
+        else:
+            string = u'\n' + string
+
+        self._parse(string, p_elt)
+
+    def parser_emphasis(self, string, parent):
+        em_elt = parent.addElement('em')
+        self._parse(string, em_elt)
+
+    def parser_strong_emphasis(self, string, parent):
+        strong_elt = parent.addElement('strong')
+        self._parse(string, strong_elt)
+
+    def parser_line_break(self, string, parent):
+        parent.addElement('br')
+
+    def parser_insertion(self, string, parent):
+        ins_elt = parent.addElement('ins')
+        self._parse(string, ins_elt)
+
+    def parser_deletion(self, string, parent):
+        del_elt = parent.addElement('del')
+        self._parse(string, del_elt)
+
+    def parser_link(self, string, parent):
+        url_data = string.split(u'|')
+        a_elt = parent.addElement('a')
+        length = len(url_data)
+        if length == 0:
+            url = url_data[0]
+            a_elt['href'] = url
+            a_elt.addContent(url)
+        else:
+            name = url_data[0]
+            url = url_data[1]
+            a_elt['href'] = url
+            a_elt.addContent(name)
+            if length >= 3:
+                a_elt['lang'] = url_data[2]
+            if length >= 4:
+                a_elt['title'] = url_data[3]
+            if length > 4:
+                log.warning(u"too much data for url, ignoring extra data")
+
+    def parser_image(self, string, parent):
+        image_data = string.split(u'|')
+        img_elt = parent.addElement('img')
+
+        for idx, attribute in enumerate(('src', 'alt', 'position', 'longdesc')):
+            try:
+                data = image_data[idx]
+            except IndexError:
+                break
+
+            if attribute != 'position':
+                img_elt[attribute] = data
+            else:
+                data = data.lower()
+                if data in ('l', 'g'):
+                    img_elt['style'] = "display:block; float:left; margin:0 1em 1em 0"
+                elif data in ('r', 'd'):
+                    img_elt['style'] = "display:block; float:right; margin:0 0 1em 1em"
+                elif data == 'c':
+                    img_elt['style'] = "display:block; margin-left:auto; margin-right:auto"
+                else:
+                    log.warning(u"bad position argument for image, ignoring it")
+
+    def parser_anchor(self, string, parent):
+        a_elt = parent.addElement('a')
+        a_elt['id'] = string
+
+    def parser_acronym(self, string, parent):
+        acronym, title = string.split(u'|',1)
+        acronym_elt = parent.addElement('acronym', content=acronym)
+        acronym_elt['title'] = title
+
+    def parser_inline_quote(self, string, parent):
+        quote_data = string.split(u'|')
+        quote = quote_data[0]
+        q_elt = parent.addElement('q', content=quote)
+        for idx, attribute in enumerate(('lang', 'cite'), 1):
+            try:
+                data = quote_data[idx]
+            except IndexError:
+                break
+            q_elt[attribute] = data
+
+    def parser_code(self, string, parent):
+        parent.addElement('code', content=string)
+
+    def parser_footnote(self, string, parent):
+        idx = len(self._footnotes) + 1
+        note_txt = NOTE_TPL.format(idx)
+        sup_elt = parent.addElement('sup')
+        sup_elt['class'] = 'note'
+        a_elt = sup_elt.addElement('a', content=note_txt)
+        a_elt['id'] = NOTE_A_REV_TPL.format(idx)
+        a_elt['href'] = u'#{}'.format(NOTE_A_TPL.format(idx))
+
+        p_elt = domish.Element((None, 'p'))
+        a_elt = p_elt.addElement('a', content=note_txt)
+        a_elt['id'] = NOTE_A_TPL.format(idx)
+        a_elt['href'] = u'#{}'.format(NOTE_A_REV_TPL.format(idx))
+        self._parse(string, p_elt)
+        # footnotes are actually added at the end of the parsing
+        self._footnotes.append(p_elt)
+
+    def parser_text(self, string, parent):
+        parent.addContent(string)
+
+    def _parse(self, string, parent, block_level=False):
+        regex = wiki_block_level_re if block_level else wiki_re
+
+        for match in regex.finditer(string):
+            if match.lastgroup is None:
+                parent.addContent(string)
+                return
+            matched = match.group(match.lastgroup)
+            try:
+                parser = getattr(self, 'parser_{}'.format(match.lastgroup))
+            except AttributeError:
+                log.warning(u"No parser found for {}".format(match.lastgroup))
+                # parent.addContent(string)
+                continue
+            parser(matched, parent)
+
+    def parse(self, string):
+        self._footnotes = []
+        div_elt = domish.Element((None, 'div'))
+        self._parse(string, parent=div_elt, block_level=True)
+        if self._footnotes:
+            foot_div_elt = div_elt.addElement('div')
+            foot_div_elt['class'] = 'footnotes'
+            # we add a simple horizontal rule which can be customized
+            # with footnotes class, instead of a text which would need
+            # to be translated
+            foot_div_elt.addElement('hr')
+            for elt in self._footnotes:
+                foot_div_elt.addChild(elt)
+        return div_elt
+
+
+class DCWikiSyntax(object):
+
+    def __init__(self, host):
+        log.info(_(u"Dotclear wiki syntax plugin initialization"))
+        self.host = host
+        self._dc_parser = DCWikiParser()
+        self._stx = self.host.plugins["TEXT-SYNTAXES"]
+        self._stx.addSyntax(SYNTAX_NAME, self.parseWiki, self.parseXHTML, [self._stx.OPT_NO_THREAD])
+
+    def parseWiki(self, wiki_stx):
+        div_elt = self._dc_parser.parse(wiki_stx)
+        return div_elt.toXml()
+
+    def parseXHTML(self, xhtml):
+        raise NotImplementedError