comparison src/plugins/plugin_syntax_dc_wiki.py @ 1806:fd788d24277a

plugin syntax dc_wiki: first draft: handle dotclear wiki syntax /!\ only dc_wiki -> XHTML is handled for now
author Goffi <goffi@goffi.org>
date Sun, 17 Jan 2016 20:39:20 +0100
parents
children 0d3110341947
comparison
equal deleted inserted replaced
1805:3c40fa0dcd7a 1806:fd788d24277a
1 #!/usr/bin/python
2 # -*- coding: utf-8 -*-
3
4 # SàT plugin for Dotclear Wiki Syntax
5 # Copyright (C) 2009-2016 Jérôme Poisson (goffi@goffi.org)
6
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU Affero General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Affero General Public License for more details.
16
17 # You should have received a copy of the GNU Affero General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
20 # XXX: ref used: http://dotclear.org/documentation/2.0/usage/syntaxes#wiki-syntax-and-xhtml-equivalent
21
22 from sat.core.i18n import _
23 from sat.core.log import getLogger
24 log = getLogger(__name__)
25 from sat.core.constants import Const as C
26 from twisted.words.xish import domish
27 from sat.tools import xml_tools
28 import re
29
30 SYNTAX_NAME = "dc_wiki"
31
32 PLUGIN_INFO = {
33 "name": "Dotclear Wiki Syntax Plugin",
34 "import_name": "SYNT_DC_WIKI",
35 "type": C.PLUG_TYPE_SYNTAXE,
36 "dependencies": ["TEXT-SYNTAXES"],
37 "main": "DCWikiSyntax",
38 "handler": "",
39 "description": _("""Implementation of Dotclear wiki syntax""")
40 }
41
42 NOTE_TPL = u'[{}]' # Note template
43 NOTE_A_REV_TPL = u'rev_note_{}'
44 NOTE_A_TPL = u'note_{}'
45
46 wiki = [r"\\(?P<escape_char>[][!_+%'|\/*#@{}~?$()-])",
47 r"^!!!!!(?P<h1_title>.+?)$",
48 r"^!!!!(?P<h2_title>.+?)$",
49 r"^!!!(?P<h3_title>.+?)$",
50 r"^!!(?P<h4_title>.+?)$",
51 r"^!(?P<h5_title>.+?)$",
52 r"^----$(?P<horizontal_rule>)",
53 r"^\*(?P<list_bullet>.*?)$",
54 r"^#(?P<list_ordered>.*?)$",
55 r"^ (?P<preformated>.*?)$",
56 r"^> +?(?P<quote>.*?)$",
57 r"''(?P<emphasis>.+?)''",
58 r"__(?P<strong_emphasis>.+?)__",
59 r"%%%(?P<line_break>)",
60 r"\+\+(?P<insertion>.+?)\+\+",
61 r"--(?P<deletion>.+?)--",
62 r"\[(?P<link>.+?)\]",
63 r"\(\((?P<image>.+?)\)\)",
64 r"~(?P<anchor>.+?)~",
65 r"\?\?(?P<acronym>.+?\|.+?)\?\?",
66 r"{{(?P<inline_quote>.+?)}}",
67 r"@@(?P<code>.+?)@@",
68 r"\$\$(?P<footnote>.+?)\$\$",
69 r"(?P<text>.+?)",
70 ]
71
72 wiki_re = re.compile('|'.join(wiki), re.MULTILINE | re.DOTALL)
73 wiki_block_level_re = re.compile(r"^///html(?P<html>.+?)///\n\n|(?P<paragraph>.+?)(?:\n{2,}|\Z)", re.MULTILINE | re.DOTALL)
74
75
76 class DCWikiParser(object):
77
78 def __init__(self):
79 self._footnotes = None
80 for i in xrange(5):
81 setattr(self,
82 'parser_h{}_title'.format(i),
83 lambda string, parent, i=i: self._parser_title(string, parent, 'h{}'.format(i)))
84
85 def parser_paragraph(self, string, parent):
86 p_elt = parent.addElement('p')
87 self._parse(string, p_elt)
88
89 def parser_html(self, string, parent):
90 wrapped_html = "<div>{}</div>".format(string)
91 try:
92 div_elt = xml_tools.ElementParser()(wrapped_html)
93 except domish.ParserError as e:
94 log.warning(u"Error while parsing HTML content, ignoring it: {}".format(e))
95 return
96 children = list(div_elt.elements())
97 if len(children) == 1 and children[0].name == 'div':
98 div_elt = children[0]
99 parent.addChild(div_elt)
100
101 def parser_escape_char(self, string, parent):
102 parent.addContent(string)
103
104 def _parser_title(self, string, parent, name):
105 elt = parent.addElement(name)
106 elt.addContent(string)
107
108 def parser_horizontal_rule(self, string, parent):
109 parent.addElement('hr')
110
111 def _parser_list(self, string, parent, list_type):
112 depth = 0
113 while string[depth:depth+1] == '*':
114 depth +=1
115
116 string = string[depth:].lstrip()
117
118 for i in xrange(depth+1):
119 list_elt = getattr(parent, list_type)
120 if not list_elt:
121 parent = parent.addElement(list_type)
122 else:
123 parent = list_elt
124
125 li_elt = parent.addElement('li')
126 self._parse(string, li_elt)
127
128 def parser_list_bullet(self, string, parent):
129 self._parser_list(string, parent, 'ul')
130
131 def parser_list_ordered(self, string, parent):
132 self._parser_list(string, parent, 'ol')
133
134 def parser_preformated(self, string, parent):
135 pre_elt = parent.pre
136 if pre_elt is None:
137 pre_elt = parent.addElement('pre')
138 else:
139 # we are on a new line, and this is important for <pre/>
140 pre_elt.addContent('\n')
141 pre_elt.addContent(string)
142
143 def parser_quote(self, string, parent):
144 blockquote_elt = parent.blockquote
145 if blockquote_elt is None:
146 blockquote_elt = parent.addElement('blockquote')
147 p_elt = blockquote_elt.p
148 if p_elt is None:
149 p_elt = blockquote_elt.addElement('p')
150 else:
151 string = u'\n' + string
152
153 self._parse(string, p_elt)
154
155 def parser_emphasis(self, string, parent):
156 em_elt = parent.addElement('em')
157 self._parse(string, em_elt)
158
159 def parser_strong_emphasis(self, string, parent):
160 strong_elt = parent.addElement('strong')
161 self._parse(string, strong_elt)
162
163 def parser_line_break(self, string, parent):
164 parent.addElement('br')
165
166 def parser_insertion(self, string, parent):
167 ins_elt = parent.addElement('ins')
168 self._parse(string, ins_elt)
169
170 def parser_deletion(self, string, parent):
171 del_elt = parent.addElement('del')
172 self._parse(string, del_elt)
173
174 def parser_link(self, string, parent):
175 url_data = string.split(u'|')
176 a_elt = parent.addElement('a')
177 length = len(url_data)
178 if length == 0:
179 url = url_data[0]
180 a_elt['href'] = url
181 a_elt.addContent(url)
182 else:
183 name = url_data[0]
184 url = url_data[1]
185 a_elt['href'] = url
186 a_elt.addContent(name)
187 if length >= 3:
188 a_elt['lang'] = url_data[2]
189 if length >= 4:
190 a_elt['title'] = url_data[3]
191 if length > 4:
192 log.warning(u"too much data for url, ignoring extra data")
193
194 def parser_image(self, string, parent):
195 image_data = string.split(u'|')
196 img_elt = parent.addElement('img')
197
198 for idx, attribute in enumerate(('src', 'alt', 'position', 'longdesc')):
199 try:
200 data = image_data[idx]
201 except IndexError:
202 break
203
204 if attribute != 'position':
205 img_elt[attribute] = data
206 else:
207 data = data.lower()
208 if data in ('l', 'g'):
209 img_elt['style'] = "display:block; float:left; margin:0 1em 1em 0"
210 elif data in ('r', 'd'):
211 img_elt['style'] = "display:block; float:right; margin:0 0 1em 1em"
212 elif data == 'c':
213 img_elt['style'] = "display:block; margin-left:auto; margin-right:auto"
214 else:
215 log.warning(u"bad position argument for image, ignoring it")
216
217 def parser_anchor(self, string, parent):
218 a_elt = parent.addElement('a')
219 a_elt['id'] = string
220
221 def parser_acronym(self, string, parent):
222 acronym, title = string.split(u'|',1)
223 acronym_elt = parent.addElement('acronym', content=acronym)
224 acronym_elt['title'] = title
225
226 def parser_inline_quote(self, string, parent):
227 quote_data = string.split(u'|')
228 quote = quote_data[0]
229 q_elt = parent.addElement('q', content=quote)
230 for idx, attribute in enumerate(('lang', 'cite'), 1):
231 try:
232 data = quote_data[idx]
233 except IndexError:
234 break
235 q_elt[attribute] = data
236
237 def parser_code(self, string, parent):
238 parent.addElement('code', content=string)
239
240 def parser_footnote(self, string, parent):
241 idx = len(self._footnotes) + 1
242 note_txt = NOTE_TPL.format(idx)
243 sup_elt = parent.addElement('sup')
244 sup_elt['class'] = 'note'
245 a_elt = sup_elt.addElement('a', content=note_txt)
246 a_elt['id'] = NOTE_A_REV_TPL.format(idx)
247 a_elt['href'] = u'#{}'.format(NOTE_A_TPL.format(idx))
248
249 p_elt = domish.Element((None, 'p'))
250 a_elt = p_elt.addElement('a', content=note_txt)
251 a_elt['id'] = NOTE_A_TPL.format(idx)
252 a_elt['href'] = u'#{}'.format(NOTE_A_REV_TPL.format(idx))
253 self._parse(string, p_elt)
254 # footnotes are actually added at the end of the parsing
255 self._footnotes.append(p_elt)
256
257 def parser_text(self, string, parent):
258 parent.addContent(string)
259
260 def _parse(self, string, parent, block_level=False):
261 regex = wiki_block_level_re if block_level else wiki_re
262
263 for match in regex.finditer(string):
264 if match.lastgroup is None:
265 parent.addContent(string)
266 return
267 matched = match.group(match.lastgroup)
268 try:
269 parser = getattr(self, 'parser_{}'.format(match.lastgroup))
270 except AttributeError:
271 log.warning(u"No parser found for {}".format(match.lastgroup))
272 # parent.addContent(string)
273 continue
274 parser(matched, parent)
275
276 def parse(self, string):
277 self._footnotes = []
278 div_elt = domish.Element((None, 'div'))
279 self._parse(string, parent=div_elt, block_level=True)
280 if self._footnotes:
281 foot_div_elt = div_elt.addElement('div')
282 foot_div_elt['class'] = 'footnotes'
283 # we add a simple horizontal rule which can be customized
284 # with footnotes class, instead of a text which would need
285 # to be translated
286 foot_div_elt.addElement('hr')
287 for elt in self._footnotes:
288 foot_div_elt.addChild(elt)
289 return div_elt
290
291
292 class DCWikiSyntax(object):
293
294 def __init__(self, host):
295 log.info(_(u"Dotclear wiki syntax plugin initialization"))
296 self.host = host
297 self._dc_parser = DCWikiParser()
298 self._stx = self.host.plugins["TEXT-SYNTAXES"]
299 self._stx.addSyntax(SYNTAX_NAME, self.parseWiki, self.parseXHTML, [self._stx.OPT_NO_THREAD])
300
301 def parseWiki(self, wiki_stx):
302 div_elt = self._dc_parser.parse(wiki_stx)
303 return div_elt.toXml()
304
305 def parseXHTML(self, xhtml):
306 raise NotImplementedError