view src/plugins/plugin_syntax_dc_wiki.py @ 1806:fd788d24277a

plugin syntax dc_wiki: first draft: handle dotclear wiki syntax /!\ only dc_wiki -> XHTML is handled for now
author Goffi <goffi@goffi.org>
date Sun, 17 Jan 2016 20:39:20 +0100
parents
children 0d3110341947
line wrap: on
line source

#!/usr/bin/python
# -*- coding: utf-8 -*-

# SàT plugin for Dotclear Wiki Syntax
# Copyright (C) 2009-2016 Jérôme Poisson (goffi@goffi.org)

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.

# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

# XXX: ref used: http://dotclear.org/documentation/2.0/usage/syntaxes#wiki-syntax-and-xhtml-equivalent

from sat.core.i18n import _
from sat.core.log import getLogger
log = getLogger(__name__)
from sat.core.constants import Const as C
from twisted.words.xish import domish
from sat.tools import xml_tools
import re

SYNTAX_NAME = "dc_wiki"

PLUGIN_INFO = {
    "name": "Dotclear Wiki Syntax Plugin",
    "import_name": "SYNT_DC_WIKI",
    "type": C.PLUG_TYPE_SYNTAXE,
    "dependencies": ["TEXT-SYNTAXES"],
    "main": "DCWikiSyntax",
    "handler": "",
    "description": _("""Implementation of Dotclear wiki syntax""")
}

NOTE_TPL = u'[{}]' # Note template
NOTE_A_REV_TPL = u'rev_note_{}'
NOTE_A_TPL = u'note_{}'

wiki = [r"\\(?P<escape_char>[][!_+%'|\/*#@{}~?$()-])",
        r"^!!!!!(?P<h1_title>.+?)$",
        r"^!!!!(?P<h2_title>.+?)$",
        r"^!!!(?P<h3_title>.+?)$",
        r"^!!(?P<h4_title>.+?)$",
        r"^!(?P<h5_title>.+?)$",
        r"^----$(?P<horizontal_rule>)",
        r"^\*(?P<list_bullet>.*?)$",
        r"^#(?P<list_ordered>.*?)$",
        r"^ (?P<preformated>.*?)$",
        r"^> +?(?P<quote>.*?)$",
        r"''(?P<emphasis>.+?)''",
        r"__(?P<strong_emphasis>.+?)__",
        r"%%%(?P<line_break>)",
        r"\+\+(?P<insertion>.+?)\+\+",
        r"--(?P<deletion>.+?)--",
        r"\[(?P<link>.+?)\]",
        r"\(\((?P<image>.+?)\)\)",
        r"~(?P<anchor>.+?)~",
        r"\?\?(?P<acronym>.+?\|.+?)\?\?",
        r"{{(?P<inline_quote>.+?)}}",
        r"@@(?P<code>.+?)@@",
        r"\$\$(?P<footnote>.+?)\$\$",
        r"(?P<text>.+?)",
       ]

wiki_re = re.compile('|'.join(wiki), re.MULTILINE | re.DOTALL)
wiki_block_level_re = re.compile(r"^///html(?P<html>.+?)///\n\n|(?P<paragraph>.+?)(?:\n{2,}|\Z)", re.MULTILINE | re.DOTALL)


class DCWikiParser(object):

    def __init__(self):
        self._footnotes = None
        for i in xrange(5):
            setattr(self,
                'parser_h{}_title'.format(i),
                lambda string, parent, i=i: self._parser_title(string, parent, 'h{}'.format(i)))

    def parser_paragraph(self, string, parent):
        p_elt = parent.addElement('p')
        self._parse(string, p_elt)

    def parser_html(self, string, parent):
        wrapped_html = "<div>{}</div>".format(string)
        try:
            div_elt = xml_tools.ElementParser()(wrapped_html)
        except domish.ParserError as e:
            log.warning(u"Error while parsing HTML content, ignoring it: {}".format(e))
            return
        children = list(div_elt.elements())
        if len(children) == 1 and children[0].name == 'div':
            div_elt = children[0]
        parent.addChild(div_elt)

    def parser_escape_char(self, string, parent):
        parent.addContent(string)

    def _parser_title(self, string, parent, name):
        elt = parent.addElement(name)
        elt.addContent(string)

    def parser_horizontal_rule(self, string, parent):
        parent.addElement('hr')

    def _parser_list(self, string, parent, list_type):
        depth = 0
        while string[depth:depth+1] == '*':
            depth +=1

        string = string[depth:].lstrip()

        for i in xrange(depth+1):
            list_elt = getattr(parent, list_type)
            if not list_elt:
                parent = parent.addElement(list_type)
            else:
                parent = list_elt

        li_elt = parent.addElement('li')
        self._parse(string, li_elt)

    def parser_list_bullet(self, string, parent):
        self._parser_list(string, parent, 'ul')

    def parser_list_ordered(self, string, parent):
        self._parser_list(string, parent, 'ol')

    def parser_preformated(self, string, parent):
        pre_elt = parent.pre
        if pre_elt is None:
            pre_elt = parent.addElement('pre')
        else:
            # we are on a new line, and this is important for <pre/>
            pre_elt.addContent('\n')
        pre_elt.addContent(string)

    def parser_quote(self, string, parent):
        blockquote_elt = parent.blockquote
        if blockquote_elt is None:
            blockquote_elt = parent.addElement('blockquote')
        p_elt = blockquote_elt.p
        if p_elt is None:
            p_elt = blockquote_elt.addElement('p')
        else:
            string = u'\n' + string

        self._parse(string, p_elt)

    def parser_emphasis(self, string, parent):
        em_elt = parent.addElement('em')
        self._parse(string, em_elt)

    def parser_strong_emphasis(self, string, parent):
        strong_elt = parent.addElement('strong')
        self._parse(string, strong_elt)

    def parser_line_break(self, string, parent):
        parent.addElement('br')

    def parser_insertion(self, string, parent):
        ins_elt = parent.addElement('ins')
        self._parse(string, ins_elt)

    def parser_deletion(self, string, parent):
        del_elt = parent.addElement('del')
        self._parse(string, del_elt)

    def parser_link(self, string, parent):
        url_data = string.split(u'|')
        a_elt = parent.addElement('a')
        length = len(url_data)
        if length == 0:
            url = url_data[0]
            a_elt['href'] = url
            a_elt.addContent(url)
        else:
            name = url_data[0]
            url = url_data[1]
            a_elt['href'] = url
            a_elt.addContent(name)
            if length >= 3:
                a_elt['lang'] = url_data[2]
            if length >= 4:
                a_elt['title'] = url_data[3]
            if length > 4:
                log.warning(u"too much data for url, ignoring extra data")

    def parser_image(self, string, parent):
        image_data = string.split(u'|')
        img_elt = parent.addElement('img')

        for idx, attribute in enumerate(('src', 'alt', 'position', 'longdesc')):
            try:
                data = image_data[idx]
            except IndexError:
                break

            if attribute != 'position':
                img_elt[attribute] = data
            else:
                data = data.lower()
                if data in ('l', 'g'):
                    img_elt['style'] = "display:block; float:left; margin:0 1em 1em 0"
                elif data in ('r', 'd'):
                    img_elt['style'] = "display:block; float:right; margin:0 0 1em 1em"
                elif data == 'c':
                    img_elt['style'] = "display:block; margin-left:auto; margin-right:auto"
                else:
                    log.warning(u"bad position argument for image, ignoring it")

    def parser_anchor(self, string, parent):
        a_elt = parent.addElement('a')
        a_elt['id'] = string

    def parser_acronym(self, string, parent):
        acronym, title = string.split(u'|',1)
        acronym_elt = parent.addElement('acronym', content=acronym)
        acronym_elt['title'] = title

    def parser_inline_quote(self, string, parent):
        quote_data = string.split(u'|')
        quote = quote_data[0]
        q_elt = parent.addElement('q', content=quote)
        for idx, attribute in enumerate(('lang', 'cite'), 1):
            try:
                data = quote_data[idx]
            except IndexError:
                break
            q_elt[attribute] = data

    def parser_code(self, string, parent):
        parent.addElement('code', content=string)

    def parser_footnote(self, string, parent):
        idx = len(self._footnotes) + 1
        note_txt = NOTE_TPL.format(idx)
        sup_elt = parent.addElement('sup')
        sup_elt['class'] = 'note'
        a_elt = sup_elt.addElement('a', content=note_txt)
        a_elt['id'] = NOTE_A_REV_TPL.format(idx)
        a_elt['href'] = u'#{}'.format(NOTE_A_TPL.format(idx))

        p_elt = domish.Element((None, 'p'))
        a_elt = p_elt.addElement('a', content=note_txt)
        a_elt['id'] = NOTE_A_TPL.format(idx)
        a_elt['href'] = u'#{}'.format(NOTE_A_REV_TPL.format(idx))
        self._parse(string, p_elt)
        # footnotes are actually added at the end of the parsing
        self._footnotes.append(p_elt)

    def parser_text(self, string, parent):
        parent.addContent(string)

    def _parse(self, string, parent, block_level=False):
        regex = wiki_block_level_re if block_level else wiki_re

        for match in regex.finditer(string):
            if match.lastgroup is None:
                parent.addContent(string)
                return
            matched = match.group(match.lastgroup)
            try:
                parser = getattr(self, 'parser_{}'.format(match.lastgroup))
            except AttributeError:
                log.warning(u"No parser found for {}".format(match.lastgroup))
                # parent.addContent(string)
                continue
            parser(matched, parent)

    def parse(self, string):
        self._footnotes = []
        div_elt = domish.Element((None, 'div'))
        self._parse(string, parent=div_elt, block_level=True)
        if self._footnotes:
            foot_div_elt = div_elt.addElement('div')
            foot_div_elt['class'] = 'footnotes'
            # we add a simple horizontal rule which can be customized
            # with footnotes class, instead of a text which would need
            # to be translated
            foot_div_elt.addElement('hr')
            for elt in self._footnotes:
                foot_div_elt.addChild(elt)
        return div_elt


class DCWikiSyntax(object):

    def __init__(self, host):
        log.info(_(u"Dotclear wiki syntax plugin initialization"))
        self.host = host
        self._dc_parser = DCWikiParser()
        self._stx = self.host.plugins["TEXT-SYNTAXES"]
        self._stx.addSyntax(SYNTAX_NAME, self.parseWiki, self.parseXHTML, [self._stx.OPT_NO_THREAD])

    def parseWiki(self, wiki_stx):
        div_elt = self._dc_parser.parse(wiki_stx)
        return div_elt.toXml()

    def parseXHTML(self, xhtml):
        raise NotImplementedError