# HG changeset patch # User Goffi # Date 1453134614 -3600 # Node ID 18561326a5612502dbb88547adfd0478f217d5c3 # Parent 0d3110341947ae7626a12da1d3f17f5bbc6e7a54 plugin syntax dotclear: renamed syntax to wiki_dotclear, and renamed plugin file to plugin_syntax_wiki_dotclear.py diff -r 0d3110341947 -r 18561326a561 src/plugins/plugin_syntax_dc_wiki.py --- a/src/plugins/plugin_syntax_dc_wiki.py Mon Jan 18 17:04:29 2016 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,648 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -# SàT plugin for Dotclear Wiki Syntax -# Copyright (C) 2009-2016 Jérôme Poisson (goffi@goffi.org) - -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. - -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -# XXX: ref used: http://dotclear.org/documentation/2.0/usage/syntaxes#wiki-syntax-and-xhtml-equivalent - -from sat.core.i18n import _ -from sat.core.log import getLogger -log = getLogger(__name__) -from sat.core.constants import Const as C -from sat.core import exceptions -from twisted.words.xish import domish -from sat.tools import xml_tools -import copy -import re - -SYNTAX_NAME = "dc_wiki" - -PLUGIN_INFO = { - "name": "Dotclear Wiki Syntax Plugin", - "import_name": "SYNT_DC_WIKI", - "type": C.PLUG_TYPE_SYNTAXE, - "dependencies": ["TEXT-SYNTAXES"], - "main": "DCWikiSyntax", - "handler": "", - "description": _("""Implementation of Dotclear wiki syntax""") -} - -NOTE_TPL = u'[{}]' # Note template -NOTE_A_REV_TPL = u'rev_note_{}' -NOTE_A_TPL = u'note_{}' -ESCAPE_CHARS_BASE = r"(?P[][{}%|\\/*#@{{}}~$-])" -ESCAPE_CHARS_EXTRA = r"!?_+'()" # These chars are not escaped in XHTML => dc_wiki conversion, - # but are used in the other direction -ESCAPE_CHARS = ESCAPE_CHARS_BASE.format('') -FLAG_UL = 'ul' # must be the name of the element -FLAG_OL = 'ol' -ELT_WITH_STYLE = ('img', 'div') # elements where a style attribute is expected - -wiki = [r'\\' + ESCAPE_CHARS_BASE.format(ESCAPE_CHARS_EXTRA), - r"^!!!!!(?P.+?)$", - r"^!!!!(?P.+?)$", - r"^!!!(?P.+?)$", - r"^!!(?P.+?)$", - r"^!(?P.+?)$", - r"^----$(?P)", - r"^\*(?P.*?)$", - r"^#(?P.*?)$", - r"^ (?P.*?)$", - r"^> +?(?P.*?)$", - r"''(?P.+?)''", - r"__(?P.+?)__", - r"%%%(?P)", - r"\+\+(?P.+?)\+\+", - r"--(?P.+?)--", - r"\[(?P.+?)\]", - r"\(\((?P.+?)\)\)", - r"~(?P.+?)~", - r"\?\?(?P.+?\|.+?)\?\?", - r"{{(?P.+?)}}", - r"@@(?P.+?)@@", - r"\$\$(?P.+?)\$\$", - r"(?P.+?)", - ] - -wiki_re = re.compile('|'.join(wiki), re.MULTILINE | re.DOTALL) -wiki_block_level_re = re.compile(r"^///html(?P.+?)///\n\n|(?P.+?)(?:\n{2,}|\Z)", re.MULTILINE | re.DOTALL) - - -class DCWikiParser(object): - - def __init__(self): - self._footnotes = None - for i in xrange(5): - setattr(self, - 'parser_h{}_title'.format(i), - lambda string, parent, i=i: self._parser_title(string, parent, 'h{}'.format(i))) - - def parser_paragraph(self, string, parent): - p_elt = parent.addElement('p') - self._parse(string, p_elt) - - def parser_html(self, string, parent): - wrapped_html = "
{}
".format(string) - try: - div_elt = xml_tools.ElementParser()(wrapped_html) - except domish.ParserError as e: - log.warning(u"Error while parsing HTML content, ignoring it: {}".format(e)) - return - children = list(div_elt.elements()) - if len(children) == 1 and children[0].name == 'div': - div_elt = children[0] - parent.addChild(div_elt) - - def parser_escape_char(self, string, parent): - parent.addContent(string) - - def _parser_title(self, string, parent, name): - elt = parent.addElement(name) - elt.addContent(string) - - def parser_horizontal_rule(self, string, parent): - parent.addElement('hr') - - def _parser_list(self, string, parent, list_type): - depth = 0 - while string[depth:depth+1] == '*': - depth +=1 - - string = string[depth:].lstrip() - - for i in xrange(depth+1): - list_elt = getattr(parent, list_type) - if not list_elt: - parent = parent.addElement(list_type) - else: - parent = list_elt - - li_elt = parent.addElement('li') - self._parse(string, li_elt) - - def parser_list_bullet(self, string, parent): - self._parser_list(string, parent, 'ul') - - def parser_list_ordered(self, string, parent): - self._parser_list(string, parent, 'ol') - - def parser_preformated(self, string, parent): - pre_elt = parent.pre - if pre_elt is None: - pre_elt = parent.addElement('pre') - else: - # we are on a new line, and this is important for
-            pre_elt.addContent('\n')
-        pre_elt.addContent(string)
-
-    def parser_quote(self, string, parent):
-        blockquote_elt = parent.blockquote
-        if blockquote_elt is None:
-            blockquote_elt = parent.addElement('blockquote')
-        p_elt = blockquote_elt.p
-        if p_elt is None:
-            p_elt = blockquote_elt.addElement('p')
-        else:
-            string = u'\n' + string
-
-        self._parse(string, p_elt)
-
-    def parser_emphasis(self, string, parent):
-        em_elt = parent.addElement('em')
-        self._parse(string, em_elt)
-
-    def parser_strong_emphasis(self, string, parent):
-        strong_elt = parent.addElement('strong')
-        self._parse(string, strong_elt)
-
-    def parser_line_break(self, string, parent):
-        parent.addElement('br')
-
-    def parser_insertion(self, string, parent):
-        ins_elt = parent.addElement('ins')
-        self._parse(string, ins_elt)
-
-    def parser_deletion(self, string, parent):
-        del_elt = parent.addElement('del')
-        self._parse(string, del_elt)
-
-    def parser_link(self, string, parent):
-        url_data = string.split(u'|')
-        a_elt = parent.addElement('a')
-        length = len(url_data)
-        if length == 1:
-            url = url_data[0]
-            a_elt['href'] = url
-            a_elt.addContent(url)
-        else:
-            name = url_data[0]
-            url = url_data[1]
-            a_elt['href'] = url
-            a_elt.addContent(name)
-            if length >= 3:
-                a_elt['lang'] = url_data[2]
-            if length >= 4:
-                a_elt['title'] = url_data[3]
-            if length > 4:
-                log.warning(u"too much data for url, ignoring extra data")
-
-    def parser_image(self, string, parent):
-        image_data = string.split(u'|')
-        img_elt = parent.addElement('img')
-
-        for idx, attribute in enumerate(('src', 'alt', 'position', 'longdesc')):
-            try:
-                data = image_data[idx]
-            except IndexError:
-                break
-
-            if attribute != 'position':
-                img_elt[attribute] = data
-            else:
-                data = data.lower()
-                if data in ('l', 'g'):
-                    img_elt['style'] = "display:block; float:left; margin:0 1em 1em 0"
-                elif data in ('r', 'd'):
-                    img_elt['style'] = "display:block; float:right; margin:0 0 1em 1em"
-                elif data == 'c':
-                    img_elt['style'] = "display:block; margin-left:auto; margin-right:auto"
-                else:
-                    log.warning(u"bad position argument for image, ignoring it")
-
-    def parser_anchor(self, string, parent):
-        a_elt = parent.addElement('a')
-        a_elt['id'] = string
-
-    def parser_acronym(self, string, parent):
-        acronym, title = string.split(u'|',1)
-        acronym_elt = parent.addElement('acronym', content=acronym)
-        acronym_elt['title'] = title
-
-    def parser_inline_quote(self, string, parent):
-        quote_data = string.split(u'|')
-        quote = quote_data[0]
-        q_elt = parent.addElement('q', content=quote)
-        for idx, attribute in enumerate(('lang', 'cite'), 1):
-            try:
-                data = quote_data[idx]
-            except IndexError:
-                break
-            q_elt[attribute] = data
-
-    def parser_code(self, string, parent):
-        parent.addElement('code', content=string)
-
-    def parser_footnote(self, string, parent):
-        idx = len(self._footnotes) + 1
-        note_txt = NOTE_TPL.format(idx)
-        sup_elt = parent.addElement('sup')
-        sup_elt['class'] = 'note'
-        a_elt = sup_elt.addElement('a', content=note_txt)
-        a_elt['id'] = NOTE_A_REV_TPL.format(idx)
-        a_elt['href'] = u'#{}'.format(NOTE_A_TPL.format(idx))
-
-        p_elt = domish.Element((None, 'p'))
-        a_elt = p_elt.addElement('a', content=note_txt)
-        a_elt['id'] = NOTE_A_TPL.format(idx)
-        a_elt['href'] = u'#{}'.format(NOTE_A_REV_TPL.format(idx))
-        self._parse(string, p_elt)
-        # footnotes are actually added at the end of the parsing
-        self._footnotes.append(p_elt)
-
-    def parser_text(self, string, parent):
-        parent.addContent(string)
-
-    def _parse(self, string, parent, block_level=False):
-        regex = wiki_block_level_re if block_level else wiki_re
-
-        for match in regex.finditer(string):
-            if match.lastgroup is None:
-                parent.addContent(string)
-                return
-            matched = match.group(match.lastgroup)
-            try:
-                parser = getattr(self, 'parser_{}'.format(match.lastgroup))
-            except AttributeError:
-                log.warning(u"No parser found for {}".format(match.lastgroup))
-                # parent.addContent(string)
-                continue
-            parser(matched, parent)
-
-    def parse(self, string):
-        self._footnotes = []
-        div_elt = domish.Element((None, 'div'))
-        self._parse(string, parent=div_elt, block_level=True)
-        if self._footnotes:
-            foot_div_elt = div_elt.addElement('div')
-            foot_div_elt['class'] = 'footnotes'
-            # we add a simple horizontal rule which can be customized
-            # with footnotes class, instead of a text which would need
-            # to be translated
-            foot_div_elt.addElement('hr')
-            for elt in self._footnotes:
-                foot_div_elt.addChild(elt)
-        return div_elt
-
-
-class XHTMLParser(object):
-
-    def __init__(self):
-        self.flags = None
-        self.toto = 0
-        self.footnotes = None # will hold a map from url to buffer id
-        for i in xrange(1,6):
-            setattr(self,
-                'parser_h{}'.format(i),
-                lambda elt, buf, level=i: self.parserHeading(elt, buf, level)
-                )
-
-    def parser_a(self, elt, buf):
-        try:
-            url = elt['href']
-        except KeyError:
-            # probably an anchor
-            try:
-                id_ = elt['id']
-                if not id_:
-                    # we don't want empty values
-                    raise KeyError
-            except KeyError:
-                self.parserGeneric(elt, buf)
-            else:
-                buf.append(u'~~{}~~'.format(id_))
-            return
-
-        link_data = [url]
-        name = unicode(elt)
-        if name != url:
-            link_data.insert(0, name)
-
-        lang = elt.getAttribute('lang')
-        title = elt.getAttribute('title')
-        if lang is not None:
-            link_data.append(lang)
-        elif title is not None:
-            link_data.appand(u'')
-        if title is not None:
-            link_data.append(title)
-        buf.append(u'[')
-        buf.append(u'|'.join(link_data))
-        buf.append(u']')
-
-    def parser_acronym(self, elt, buf):
-        try:
-            title = elt['title']
-        except KeyError:
-            log.debug(u"Acronyme without title, using generic parser")
-            self.parserGeneric(elt, buf)
-            return
-        buf.append(u'??{}|{}??'.format(unicode(elt), title))
-
-    def parser_blockquote(self, elt, buf):
-        # we remove wrapping 

to avoid empty line with "> " - children = list([child for child in elt.children if unicode(child).strip() not in ('', '\n')]) - if len(children) == 1 and children[0].name == 'p': - elt = children[0] - tmp_buf = [] - self.parseChildren(elt, tmp_buf) - blockquote = u'> ' + u'\n> '.join(u''.join(tmp_buf).split('\n')) - buf.append(blockquote) - - def parser_br(self, elt, buf): - buf.append(u'%%%') - - def parser_code(self, elt, buf): - buf.append(u'@@') - self.parseChildren(elt, buf) - buf.append(u'@@') - - def parser_del(self, elt, buf): - buf.append(u'--') - self.parseChildren(elt, buf) - buf.append(u'--') - - def parser_div(self, elt, buf): - if elt.getAttribute('class') == 'footnotes': - self.parserFootnote(elt, buf) - else: - self.parseChildren(elt, buf, block=True) - - def parser_em(self, elt, buf): - buf.append(u"''") - self.parseChildren(elt, buf) - buf.append(u"''") - - def parser_h6(self, elt, buf): - # XXX:

heading is not managed by wiki syntax - # so we handle it with a
- elt = copy.copy(elt) # we don't want to change to original element - elt.name = 'h5' - self._parse(elt, buf) - - def parser_hr(self, elt, buf): - buf.append(u'\n----\n') - - def parser_img(self, elt, buf): - try: - url = elt['src'] - except KeyError: - log.warning(u"Ignoring without src") - return - - image_data=[url] - - alt = elt.getAttribute('alt') - style = elt.getAttribute('style', '') - desc = elt.getAttribute('longdesc') - - if '0 1em 1em 0' in style: - position = 'L' - elif '0 0 1em 1em' in style: - position = 'R' - elif 'auto' in style: - position = 'C' - else: - position = None - - if alt: - image_data.append(alt) - elif position or desc: - image_data.append(u'') - - if position: - image_data.append(position) - elif desc: - image_data.append(u'') - - if desc: - image_data.append(desc) - - buf.append(u'((') - buf.append(u'|'.join(image_data)) - buf.append(u'))') - - def parser_ins(self, elt, buf): - buf.append(u'++') - self.parseChildren(elt, buf) - buf.append(u'++') - - def parser_li(self, elt, buf): - flag = None - current_flag = None - bullets = [] - for flag in reversed(self.flags): - if flag in (FLAG_UL, FLAG_OL): - if current_flag is None: - current_flag = flag - if flag == current_flag: - bullets.append(u'*' if flag == FLAG_UL else u'#') - else: - break - - if flag != current_flag and buf[-1] == u' ': - # this trick is to avoid a space when we switch - # from (un)ordered to the other type on the same row - # e.g. *# unorder + ordered item - del buf[-1] - - buf.extend(bullets) - - buf.append(u' ') - self.parseChildren(elt, buf) - buf.append(u'\n') - - def parser_ol(self, elt, buf): - self.parserList(elt, buf, FLAG_OL) - - def parser_p(self, elt, buf): - self.parseChildren(elt, buf) - buf.append(u'\n\n') - - def parser_pre(self, elt, buf): - pre = u''.join([child.toXml() if domish.IElement.providedBy(child) else unicode(child) for child in elt.children]) - pre = u' ' + u'\n '.join(pre.split('\n')) - buf.append(pre) - - def parser_q(self, elt, buf): - quote_data=[unicode(elt)] - - lang = elt.getAttribute('lang') - cite = elt.getAttribute('url') - - if lang: - quote_data.append(lang) - elif cite: - quote_data.append(u'') - - if cite: - quote_data.append(cite) - - buf.append(u'{{') - buf.append(u'|'.join(quote_data)) - buf.append(u'}}') - - def parser_span(self, elt, buf): - self.parseChildren(elt, buf, block=True) - - def parser_strong(self, elt, buf): - buf.append(u'__') - self.parseChildren(elt, buf) - buf.append(u'__') - - def parser_sup(self, elt, buf): - # sup is mainly used for footnotes, so we check if we have an anchor inside - children = list([child for child in elt.children if unicode(child).strip() not in ('', '\n')]) - if (len(children) == 1 and domish.IElement.providedBy(children[0]) - and children[0].name == 'a' and '#' in children[0].getAttribute('href', '')): - url = children[0]['href'] - note_id = url[url.find('#')+1:] - if not note_id: - log.warning("bad link found in footnote") - self.parserGeneric(elt, buf) - return - # this looks like a footnote - buf.append(u'$$') - buf.append(u' ') # placeholder - self.footnotes[note_id] = len(buf) - 1 - buf.append(u'$$') - else: - self.parserGeneric(elt, buf) - - def parser_ul(self, elt, buf): - self.parserList(elt, buf, FLAG_UL) - - def parserList(self, elt, buf, type_): - self.flags.append(type_) - self.parseChildren(elt, buf, block=True) - idx = 0 - for flag in reversed(self.flags): - idx -= 1 - if flag == type_: - del self.flags[idx] - break - - if idx == 0: - raise exceptions.InternalError(u"flag has been removed by an other parser") - - def parserHeading(self, elt, buf, level): - buf.append((6-level) * u'!') - for child in elt.children: - # we ignore other elements for a Hx title - self.parserText(child, buf) - buf.append(u'\n') - - def parserFootnote(self, elt, buf): - for elt in elt.elements(): - # all children other than

are ignored - if elt.name == 'p': - a_elt = elt.a - if a_elt is None: - log.warning(u"

element doesn't contain in footnote, ignoring it") - continue - try: - note_idx = self.footnotes[a_elt['id']] - except KeyError: - log.warning(u"Note id doesn't match any known note, ignoring it") - # we create a dummy element to parse all children after the - dummy_elt = domish.Element((None, 'note')) - a_idx = elt.children.index(a_elt) - dummy_elt.children = elt.children[a_idx+1:] - note_buf = [] - self.parseChildren(dummy_elt, note_buf) - # now we can replace the placeholder - buf[note_idx] = u''.join(note_buf) - - def parserText(self, txt, buf, keep_whitespaces=False): - txt = unicode(txt) - if not keep_whitespaces: - # we get text and only let one inter word space - txt = u' '.join(txt.split()) - txt = re.sub(ESCAPE_CHARS, r'\\\1', txt) - if txt: - buf.append(txt) - return txt - - def parserGeneric(self, elt, buf): - # as dotclear wiki syntax handle arbitrary XHTML code - # we use this feature to add elements that we don't know - buf.append(u"\n\n///html\n{}\n///\n\n".format(elt.toXml())) - - def parseChildren(self, elt, buf, block=False): - first_visible = True - for child in elt.children: - if not block and not first_visible and buf and buf[-1][-1] not in (' ','\n'): - # we add separation if it isn't already there - buf.append(u' ') - if domish.IElement.providedBy(child): - self._parse(child, buf) - first_visible = False - else: - appended = self.parserText(child, buf) - if appended: - first_visible = False - - def _parse(self, elt, buf): - elt_name = elt.name.lower() - style = elt.getAttribute('style') - if style and elt_name not in ELT_WITH_STYLE: - # if we have style we use generic parser to put raw HTML - # to avoid losing it - parser = self.parserGeneric - else: - try: - parser = getattr(self, "parser_{}".format(elt_name)) - except AttributeError: - log.debug("Can't find parser for {} element, using generic one".format(elt.name)) - parser = self.parserGeneric - parser(elt, buf) - - def parse(self, elt): - self.flags = [] - self.footnotes = {} - buf = [] - self._parse(elt, buf) - return u''.join(buf) - - def parseString(self, string): - wrapped_html = u"

".format(string) - try: - div_elt = xml_tools.ElementParser()(wrapped_html) - except domish.ParserError as e: - log.warning(u"Error while parsing HTML content: {}".format(e)) - return - children = list(div_elt.elements()) - if len(children) == 1 and children[0].name == 'div': - div_elt = children[0] - return self.parse(div_elt) - - -class DCWikiSyntax(object): - - def __init__(self, host): - log.info(_(u"Dotclear wiki syntax plugin initialization")) - self.host = host - self._dc_parser = DCWikiParser() - self._xhtml_parser = XHTMLParser() - self._stx = self.host.plugins["TEXT-SYNTAXES"] - self._stx.addSyntax(SYNTAX_NAME, self.parseWiki, self.parseXHTML, [self._stx.OPT_NO_THREAD]) - - def parseWiki(self, wiki_stx): - div_elt = self._dc_parser.parse(wiki_stx) - return div_elt.toXml() - - def parseXHTML(self, xhtml): - return self._xhtml_parser.parseString(xhtml) diff -r 0d3110341947 -r 18561326a561 src/plugins/plugin_syntax_wiki_dotclear.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/plugins/plugin_syntax_wiki_dotclear.py Mon Jan 18 17:30:14 2016 +0100 @@ -0,0 +1,647 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# SàT plugin for Dotclear Wiki Syntax +# Copyright (C) 2009-2016 Jérôme Poisson (goffi@goffi.org) + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +# XXX: ref used: http://dotclear.org/documentation/2.0/usage/syntaxes#wiki-syntax-and-xhtml-equivalent + +from sat.core.i18n import _ +from sat.core.log import getLogger +log = getLogger(__name__) +from sat.core.constants import Const as C +from sat.core import exceptions +from twisted.words.xish import domish +from sat.tools import xml_tools +import copy +import re + +PLUGIN_INFO = { + "name": "Dotclear Wiki Syntax Plugin", + "import_name": "SYNT_DC_WIKI", + "type": C.PLUG_TYPE_SYNTAXE, + "dependencies": ["TEXT-SYNTAXES"], + "main": "DCWikiSyntax", + "handler": "", + "description": _("""Implementation of Dotclear wiki syntax""") +} + +NOTE_TPL = u'[{}]' # Note template +NOTE_A_REV_TPL = u'rev_note_{}' +NOTE_A_TPL = u'note_{}' +ESCAPE_CHARS_BASE = r"(?P[][{}%|\\/*#@{{}}~$-])" +ESCAPE_CHARS_EXTRA = r"!?_+'()" # These chars are not escaped in XHTML => dc_wiki conversion, + # but are used in the other direction +ESCAPE_CHARS = ESCAPE_CHARS_BASE.format('') +FLAG_UL = 'ul' # must be the name of the element +FLAG_OL = 'ol' +ELT_WITH_STYLE = ('img', 'div') # elements where a style attribute is expected + +wiki = [r'\\' + ESCAPE_CHARS_BASE.format(ESCAPE_CHARS_EXTRA), + r"^!!!!!(?P.+?)$", + r"^!!!!(?P.+?)$", + r"^!!!(?P.+?)$", + r"^!!(?P.+?)$", + r"^!(?P.+?)$", + r"^----$(?P)", + r"^\*(?P.*?)$", + r"^#(?P.*?)$", + r"^ (?P.*?)$", + r"^> +?(?P.*?)$", + r"''(?P.+?)''", + r"__(?P.+?)__", + r"%%%(?P)", + r"\+\+(?P.+?)\+\+", + r"--(?P.+?)--", + r"\[(?P.+?)\]", + r"\(\((?P.+?)\)\)", + r"~(?P.+?)~", + r"\?\?(?P.+?\|.+?)\?\?", + r"{{(?P.+?)}}", + r"@@(?P.+?)@@", + r"\$\$(?P.+?)\$\$", + r"(?P.+?)", + ] + +wiki_re = re.compile('|'.join(wiki), re.MULTILINE | re.DOTALL) +wiki_block_level_re = re.compile(r"^///html(?P.+?)///\n\n|(?P.+?)(?:\n{2,}|\Z)", re.MULTILINE | re.DOTALL) + + +class DCWikiParser(object): + + def __init__(self): + self._footnotes = None + for i in xrange(5): + setattr(self, + 'parser_h{}_title'.format(i), + lambda string, parent, i=i: self._parser_title(string, parent, 'h{}'.format(i))) + + def parser_paragraph(self, string, parent): + p_elt = parent.addElement('p') + self._parse(string, p_elt) + + def parser_html(self, string, parent): + wrapped_html = "
{}
".format(string) + try: + div_elt = xml_tools.ElementParser()(wrapped_html) + except domish.ParserError as e: + log.warning(u"Error while parsing HTML content, ignoring it: {}".format(e)) + return + children = list(div_elt.elements()) + if len(children) == 1 and children[0].name == 'div': + div_elt = children[0] + parent.addChild(div_elt) + + def parser_escape_char(self, string, parent): + parent.addContent(string) + + def _parser_title(self, string, parent, name): + elt = parent.addElement(name) + elt.addContent(string) + + def parser_horizontal_rule(self, string, parent): + parent.addElement('hr') + + def _parser_list(self, string, parent, list_type): + depth = 0 + while string[depth:depth+1] == '*': + depth +=1 + + string = string[depth:].lstrip() + + for i in xrange(depth+1): + list_elt = getattr(parent, list_type) + if not list_elt: + parent = parent.addElement(list_type) + else: + parent = list_elt + + li_elt = parent.addElement('li') + self._parse(string, li_elt) + + def parser_list_bullet(self, string, parent): + self._parser_list(string, parent, 'ul') + + def parser_list_ordered(self, string, parent): + self._parser_list(string, parent, 'ol') + + def parser_preformated(self, string, parent): + pre_elt = parent.pre + if pre_elt is None: + pre_elt = parent.addElement('pre') + else: + # we are on a new line, and this is important for
+            pre_elt.addContent('\n')
+        pre_elt.addContent(string)
+
+    def parser_quote(self, string, parent):
+        blockquote_elt = parent.blockquote
+        if blockquote_elt is None:
+            blockquote_elt = parent.addElement('blockquote')
+        p_elt = blockquote_elt.p
+        if p_elt is None:
+            p_elt = blockquote_elt.addElement('p')
+        else:
+            string = u'\n' + string
+
+        self._parse(string, p_elt)
+
+    def parser_emphasis(self, string, parent):
+        em_elt = parent.addElement('em')
+        self._parse(string, em_elt)
+
+    def parser_strong_emphasis(self, string, parent):
+        strong_elt = parent.addElement('strong')
+        self._parse(string, strong_elt)
+
+    def parser_line_break(self, string, parent):
+        parent.addElement('br')
+
+    def parser_insertion(self, string, parent):
+        ins_elt = parent.addElement('ins')
+        self._parse(string, ins_elt)
+
+    def parser_deletion(self, string, parent):
+        del_elt = parent.addElement('del')
+        self._parse(string, del_elt)
+
+    def parser_link(self, string, parent):
+        url_data = string.split(u'|')
+        a_elt = parent.addElement('a')
+        length = len(url_data)
+        if length == 1:
+            url = url_data[0]
+            a_elt['href'] = url
+            a_elt.addContent(url)
+        else:
+            name = url_data[0]
+            url = url_data[1]
+            a_elt['href'] = url
+            a_elt.addContent(name)
+            if length >= 3:
+                a_elt['lang'] = url_data[2]
+            if length >= 4:
+                a_elt['title'] = url_data[3]
+            if length > 4:
+                log.warning(u"too much data for url, ignoring extra data")
+
+    def parser_image(self, string, parent):
+        image_data = string.split(u'|')
+        img_elt = parent.addElement('img')
+
+        for idx, attribute in enumerate(('src', 'alt', 'position', 'longdesc')):
+            try:
+                data = image_data[idx]
+            except IndexError:
+                break
+
+            if attribute != 'position':
+                img_elt[attribute] = data
+            else:
+                data = data.lower()
+                if data in ('l', 'g'):
+                    img_elt['style'] = "display:block; float:left; margin:0 1em 1em 0"
+                elif data in ('r', 'd'):
+                    img_elt['style'] = "display:block; float:right; margin:0 0 1em 1em"
+                elif data == 'c':
+                    img_elt['style'] = "display:block; margin-left:auto; margin-right:auto"
+                else:
+                    log.warning(u"bad position argument for image, ignoring it")
+
+    def parser_anchor(self, string, parent):
+        a_elt = parent.addElement('a')
+        a_elt['id'] = string
+
+    def parser_acronym(self, string, parent):
+        acronym, title = string.split(u'|',1)
+        acronym_elt = parent.addElement('acronym', content=acronym)
+        acronym_elt['title'] = title
+
+    def parser_inline_quote(self, string, parent):
+        quote_data = string.split(u'|')
+        quote = quote_data[0]
+        q_elt = parent.addElement('q', content=quote)
+        for idx, attribute in enumerate(('lang', 'cite'), 1):
+            try:
+                data = quote_data[idx]
+            except IndexError:
+                break
+            q_elt[attribute] = data
+
+    def parser_code(self, string, parent):
+        parent.addElement('code', content=string)
+
+    def parser_footnote(self, string, parent):
+        idx = len(self._footnotes) + 1
+        note_txt = NOTE_TPL.format(idx)
+        sup_elt = parent.addElement('sup')
+        sup_elt['class'] = 'note'
+        a_elt = sup_elt.addElement('a', content=note_txt)
+        a_elt['id'] = NOTE_A_REV_TPL.format(idx)
+        a_elt['href'] = u'#{}'.format(NOTE_A_TPL.format(idx))
+
+        p_elt = domish.Element((None, 'p'))
+        a_elt = p_elt.addElement('a', content=note_txt)
+        a_elt['id'] = NOTE_A_TPL.format(idx)
+        a_elt['href'] = u'#{}'.format(NOTE_A_REV_TPL.format(idx))
+        self._parse(string, p_elt)
+        # footnotes are actually added at the end of the parsing
+        self._footnotes.append(p_elt)
+
+    def parser_text(self, string, parent):
+        parent.addContent(string)
+
+    def _parse(self, string, parent, block_level=False):
+        regex = wiki_block_level_re if block_level else wiki_re
+
+        for match in regex.finditer(string):
+            if match.lastgroup is None:
+                parent.addContent(string)
+                return
+            matched = match.group(match.lastgroup)
+            try:
+                parser = getattr(self, 'parser_{}'.format(match.lastgroup))
+            except AttributeError:
+                log.warning(u"No parser found for {}".format(match.lastgroup))
+                # parent.addContent(string)
+                continue
+            parser(matched, parent)
+
+    def parse(self, string):
+        self._footnotes = []
+        div_elt = domish.Element((None, 'div'))
+        self._parse(string, parent=div_elt, block_level=True)
+        if self._footnotes:
+            foot_div_elt = div_elt.addElement('div')
+            foot_div_elt['class'] = 'footnotes'
+            # we add a simple horizontal rule which can be customized
+            # with footnotes class, instead of a text which would need
+            # to be translated
+            foot_div_elt.addElement('hr')
+            for elt in self._footnotes:
+                foot_div_elt.addChild(elt)
+        return div_elt
+
+
+class XHTMLParser(object):
+
+    def __init__(self):
+        self.flags = None
+        self.toto = 0
+        self.footnotes = None # will hold a map from url to buffer id
+        for i in xrange(1,6):
+            setattr(self,
+                'parser_h{}'.format(i),
+                lambda elt, buf, level=i: self.parserHeading(elt, buf, level)
+                )
+
+    def parser_a(self, elt, buf):
+        try:
+            url = elt['href']
+        except KeyError:
+            # probably an anchor
+            try:
+                id_ = elt['id']
+                if not id_:
+                    # we don't want empty values
+                    raise KeyError
+            except KeyError:
+                self.parserGeneric(elt, buf)
+            else:
+                buf.append(u'~~{}~~'.format(id_))
+            return
+
+        link_data = [url]
+        name = unicode(elt)
+        if name != url:
+            link_data.insert(0, name)
+
+        lang = elt.getAttribute('lang')
+        title = elt.getAttribute('title')
+        if lang is not None:
+            link_data.append(lang)
+        elif title is not None:
+            link_data.appand(u'')
+        if title is not None:
+            link_data.append(title)
+        buf.append(u'[')
+        buf.append(u'|'.join(link_data))
+        buf.append(u']')
+
+    def parser_acronym(self, elt, buf):
+        try:
+            title = elt['title']
+        except KeyError:
+            log.debug(u"Acronyme without title, using generic parser")
+            self.parserGeneric(elt, buf)
+            return
+        buf.append(u'??{}|{}??'.format(unicode(elt), title))
+
+    def parser_blockquote(self, elt, buf):
+        # we remove wrapping 

to avoid empty line with "> " + children = list([child for child in elt.children if unicode(child).strip() not in ('', '\n')]) + if len(children) == 1 and children[0].name == 'p': + elt = children[0] + tmp_buf = [] + self.parseChildren(elt, tmp_buf) + blockquote = u'> ' + u'\n> '.join(u''.join(tmp_buf).split('\n')) + buf.append(blockquote) + + def parser_br(self, elt, buf): + buf.append(u'%%%') + + def parser_code(self, elt, buf): + buf.append(u'@@') + self.parseChildren(elt, buf) + buf.append(u'@@') + + def parser_del(self, elt, buf): + buf.append(u'--') + self.parseChildren(elt, buf) + buf.append(u'--') + + def parser_div(self, elt, buf): + if elt.getAttribute('class') == 'footnotes': + self.parserFootnote(elt, buf) + else: + self.parseChildren(elt, buf, block=True) + + def parser_em(self, elt, buf): + buf.append(u"''") + self.parseChildren(elt, buf) + buf.append(u"''") + + def parser_h6(self, elt, buf): + # XXX:

heading is not managed by wiki syntax + # so we handle it with a
+ elt = copy.copy(elt) # we don't want to change to original element + elt.name = 'h5' + self._parse(elt, buf) + + def parser_hr(self, elt, buf): + buf.append(u'\n----\n') + + def parser_img(self, elt, buf): + try: + url = elt['src'] + except KeyError: + log.warning(u"Ignoring without src") + return + + image_data=[url] + + alt = elt.getAttribute('alt') + style = elt.getAttribute('style', '') + desc = elt.getAttribute('longdesc') + + if '0 1em 1em 0' in style: + position = 'L' + elif '0 0 1em 1em' in style: + position = 'R' + elif 'auto' in style: + position = 'C' + else: + position = None + + if alt: + image_data.append(alt) + elif position or desc: + image_data.append(u'') + + if position: + image_data.append(position) + elif desc: + image_data.append(u'') + + if desc: + image_data.append(desc) + + buf.append(u'((') + buf.append(u'|'.join(image_data)) + buf.append(u'))') + + def parser_ins(self, elt, buf): + buf.append(u'++') + self.parseChildren(elt, buf) + buf.append(u'++') + + def parser_li(self, elt, buf): + flag = None + current_flag = None + bullets = [] + for flag in reversed(self.flags): + if flag in (FLAG_UL, FLAG_OL): + if current_flag is None: + current_flag = flag + if flag == current_flag: + bullets.append(u'*' if flag == FLAG_UL else u'#') + else: + break + + if flag != current_flag and buf[-1] == u' ': + # this trick is to avoid a space when we switch + # from (un)ordered to the other type on the same row + # e.g. *# unorder + ordered item + del buf[-1] + + buf.extend(bullets) + + buf.append(u' ') + self.parseChildren(elt, buf) + buf.append(u'\n') + + def parser_ol(self, elt, buf): + self.parserList(elt, buf, FLAG_OL) + + def parser_p(self, elt, buf): + self.parseChildren(elt, buf) + buf.append(u'\n\n') + + def parser_pre(self, elt, buf): + pre = u''.join([child.toXml() if domish.IElement.providedBy(child) else unicode(child) for child in elt.children]) + pre = u' ' + u'\n '.join(pre.split('\n')) + buf.append(pre) + + def parser_q(self, elt, buf): + quote_data=[unicode(elt)] + + lang = elt.getAttribute('lang') + cite = elt.getAttribute('url') + + if lang: + quote_data.append(lang) + elif cite: + quote_data.append(u'') + + if cite: + quote_data.append(cite) + + buf.append(u'{{') + buf.append(u'|'.join(quote_data)) + buf.append(u'}}') + + def parser_span(self, elt, buf): + self.parseChildren(elt, buf, block=True) + + def parser_strong(self, elt, buf): + buf.append(u'__') + self.parseChildren(elt, buf) + buf.append(u'__') + + def parser_sup(self, elt, buf): + # sup is mainly used for footnotes, so we check if we have an anchor inside + children = list([child for child in elt.children if unicode(child).strip() not in ('', '\n')]) + if (len(children) == 1 and domish.IElement.providedBy(children[0]) + and children[0].name == 'a' and '#' in children[0].getAttribute('href', '')): + url = children[0]['href'] + note_id = url[url.find('#')+1:] + if not note_id: + log.warning("bad link found in footnote") + self.parserGeneric(elt, buf) + return + # this looks like a footnote + buf.append(u'$$') + buf.append(u' ') # placeholder + self.footnotes[note_id] = len(buf) - 1 + buf.append(u'$$') + else: + self.parserGeneric(elt, buf) + + def parser_ul(self, elt, buf): + self.parserList(elt, buf, FLAG_UL) + + def parserList(self, elt, buf, type_): + self.flags.append(type_) + self.parseChildren(elt, buf, block=True) + idx = 0 + for flag in reversed(self.flags): + idx -= 1 + if flag == type_: + del self.flags[idx] + break + + if idx == 0: + raise exceptions.InternalError(u"flag has been removed by an other parser") + + def parserHeading(self, elt, buf, level): + buf.append((6-level) * u'!') + for child in elt.children: + # we ignore other elements for a Hx title + self.parserText(child, buf) + buf.append(u'\n') + + def parserFootnote(self, elt, buf): + for elt in elt.elements(): + # all children other than

are ignored + if elt.name == 'p': + a_elt = elt.a + if a_elt is None: + log.warning(u"

element doesn't contain in footnote, ignoring it") + continue + try: + note_idx = self.footnotes[a_elt['id']] + except KeyError: + log.warning(u"Note id doesn't match any known note, ignoring it") + # we create a dummy element to parse all children after the + dummy_elt = domish.Element((None, 'note')) + a_idx = elt.children.index(a_elt) + dummy_elt.children = elt.children[a_idx+1:] + note_buf = [] + self.parseChildren(dummy_elt, note_buf) + # now we can replace the placeholder + buf[note_idx] = u''.join(note_buf) + + def parserText(self, txt, buf, keep_whitespaces=False): + txt = unicode(txt) + if not keep_whitespaces: + # we get text and only let one inter word space + txt = u' '.join(txt.split()) + txt = re.sub(ESCAPE_CHARS, r'\\\1', txt) + if txt: + buf.append(txt) + return txt + + def parserGeneric(self, elt, buf): + # as dotclear wiki syntax handle arbitrary XHTML code + # we use this feature to add elements that we don't know + buf.append(u"\n\n///html\n{}\n///\n\n".format(elt.toXml())) + + def parseChildren(self, elt, buf, block=False): + first_visible = True + for child in elt.children: + if not block and not first_visible and buf and buf[-1][-1] not in (' ','\n'): + # we add separation if it isn't already there + buf.append(u' ') + if domish.IElement.providedBy(child): + self._parse(child, buf) + first_visible = False + else: + appended = self.parserText(child, buf) + if appended: + first_visible = False + + def _parse(self, elt, buf): + elt_name = elt.name.lower() + style = elt.getAttribute('style') + if style and elt_name not in ELT_WITH_STYLE: + # if we have style we use generic parser to put raw HTML + # to avoid losing it + parser = self.parserGeneric + else: + try: + parser = getattr(self, "parser_{}".format(elt_name)) + except AttributeError: + log.debug("Can't find parser for {} element, using generic one".format(elt.name)) + parser = self.parserGeneric + parser(elt, buf) + + def parse(self, elt): + self.flags = [] + self.footnotes = {} + buf = [] + self._parse(elt, buf) + return u''.join(buf) + + def parseString(self, string): + wrapped_html = u"

".format(string) + try: + div_elt = xml_tools.ElementParser()(wrapped_html) + except domish.ParserError as e: + log.warning(u"Error while parsing HTML content: {}".format(e)) + return + children = list(div_elt.elements()) + if len(children) == 1 and children[0].name == 'div': + div_elt = children[0] + return self.parse(div_elt) + + +class DCWikiSyntax(object): + SYNTAX_NAME = "wiki_dotclear" + + def __init__(self, host): + log.info(_(u"Dotclear wiki syntax plugin initialization")) + self.host = host + self._dc_parser = DCWikiParser() + self._xhtml_parser = XHTMLParser() + self._stx = self.host.plugins["TEXT-SYNTAXES"] + self._stx.addSyntax(self.SYNTAX_NAME, self.parseWiki, self.parseXHTML, [self._stx.OPT_NO_THREAD]) + + def parseWiki(self, wiki_stx): + div_elt = self._dc_parser.parse(wiki_stx) + return div_elt.toXml() + + def parseXHTML(self, xhtml): + return self._xhtml_parser.parseString(xhtml)