Mercurial > libervia-backend
comparison src/plugins/plugin_syntax_dc_wiki.py @ 1806:fd788d24277a
plugin syntax dc_wiki: first draft:
handle dotclear wiki syntax
/!\ only dc_wiki -> XHTML is handled for now
author | Goffi <goffi@goffi.org> |
---|---|
date | Sun, 17 Jan 2016 20:39:20 +0100 |
parents | |
children | 0d3110341947 |
comparison
equal
deleted
inserted
replaced
1805:3c40fa0dcd7a | 1806:fd788d24277a |
---|---|
1 #!/usr/bin/python | |
2 # -*- coding: utf-8 -*- | |
3 | |
4 # SàT plugin for Dotclear Wiki Syntax | |
5 # Copyright (C) 2009-2016 Jérôme Poisson (goffi@goffi.org) | |
6 | |
7 # This program is free software: you can redistribute it and/or modify | |
8 # it under the terms of the GNU Affero General Public License as published by | |
9 # the Free Software Foundation, either version 3 of the License, or | |
10 # (at your option) any later version. | |
11 | |
12 # This program is distributed in the hope that it will be useful, | |
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 # GNU Affero General Public License for more details. | |
16 | |
17 # You should have received a copy of the GNU Affero General Public License | |
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
19 | |
20 # XXX: ref used: http://dotclear.org/documentation/2.0/usage/syntaxes#wiki-syntax-and-xhtml-equivalent | |
21 | |
22 from sat.core.i18n import _ | |
23 from sat.core.log import getLogger | |
24 log = getLogger(__name__) | |
25 from sat.core.constants import Const as C | |
26 from twisted.words.xish import domish | |
27 from sat.tools import xml_tools | |
28 import re | |
29 | |
30 SYNTAX_NAME = "dc_wiki" | |
31 | |
32 PLUGIN_INFO = { | |
33 "name": "Dotclear Wiki Syntax Plugin", | |
34 "import_name": "SYNT_DC_WIKI", | |
35 "type": C.PLUG_TYPE_SYNTAXE, | |
36 "dependencies": ["TEXT-SYNTAXES"], | |
37 "main": "DCWikiSyntax", | |
38 "handler": "", | |
39 "description": _("""Implementation of Dotclear wiki syntax""") | |
40 } | |
41 | |
42 NOTE_TPL = u'[{}]' # Note template | |
43 NOTE_A_REV_TPL = u'rev_note_{}' | |
44 NOTE_A_TPL = u'note_{}' | |
45 | |
46 wiki = [r"\\(?P<escape_char>[][!_+%'|\/*#@{}~?$()-])", | |
47 r"^!!!!!(?P<h1_title>.+?)$", | |
48 r"^!!!!(?P<h2_title>.+?)$", | |
49 r"^!!!(?P<h3_title>.+?)$", | |
50 r"^!!(?P<h4_title>.+?)$", | |
51 r"^!(?P<h5_title>.+?)$", | |
52 r"^----$(?P<horizontal_rule>)", | |
53 r"^\*(?P<list_bullet>.*?)$", | |
54 r"^#(?P<list_ordered>.*?)$", | |
55 r"^ (?P<preformated>.*?)$", | |
56 r"^> +?(?P<quote>.*?)$", | |
57 r"''(?P<emphasis>.+?)''", | |
58 r"__(?P<strong_emphasis>.+?)__", | |
59 r"%%%(?P<line_break>)", | |
60 r"\+\+(?P<insertion>.+?)\+\+", | |
61 r"--(?P<deletion>.+?)--", | |
62 r"\[(?P<link>.+?)\]", | |
63 r"\(\((?P<image>.+?)\)\)", | |
64 r"~(?P<anchor>.+?)~", | |
65 r"\?\?(?P<acronym>.+?\|.+?)\?\?", | |
66 r"{{(?P<inline_quote>.+?)}}", | |
67 r"@@(?P<code>.+?)@@", | |
68 r"\$\$(?P<footnote>.+?)\$\$", | |
69 r"(?P<text>.+?)", | |
70 ] | |
71 | |
72 wiki_re = re.compile('|'.join(wiki), re.MULTILINE | re.DOTALL) | |
73 wiki_block_level_re = re.compile(r"^///html(?P<html>.+?)///\n\n|(?P<paragraph>.+?)(?:\n{2,}|\Z)", re.MULTILINE | re.DOTALL) | |
74 | |
75 | |
76 class DCWikiParser(object): | |
77 | |
78 def __init__(self): | |
79 self._footnotes = None | |
80 for i in xrange(5): | |
81 setattr(self, | |
82 'parser_h{}_title'.format(i), | |
83 lambda string, parent, i=i: self._parser_title(string, parent, 'h{}'.format(i))) | |
84 | |
85 def parser_paragraph(self, string, parent): | |
86 p_elt = parent.addElement('p') | |
87 self._parse(string, p_elt) | |
88 | |
89 def parser_html(self, string, parent): | |
90 wrapped_html = "<div>{}</div>".format(string) | |
91 try: | |
92 div_elt = xml_tools.ElementParser()(wrapped_html) | |
93 except domish.ParserError as e: | |
94 log.warning(u"Error while parsing HTML content, ignoring it: {}".format(e)) | |
95 return | |
96 children = list(div_elt.elements()) | |
97 if len(children) == 1 and children[0].name == 'div': | |
98 div_elt = children[0] | |
99 parent.addChild(div_elt) | |
100 | |
101 def parser_escape_char(self, string, parent): | |
102 parent.addContent(string) | |
103 | |
104 def _parser_title(self, string, parent, name): | |
105 elt = parent.addElement(name) | |
106 elt.addContent(string) | |
107 | |
108 def parser_horizontal_rule(self, string, parent): | |
109 parent.addElement('hr') | |
110 | |
111 def _parser_list(self, string, parent, list_type): | |
112 depth = 0 | |
113 while string[depth:depth+1] == '*': | |
114 depth +=1 | |
115 | |
116 string = string[depth:].lstrip() | |
117 | |
118 for i in xrange(depth+1): | |
119 list_elt = getattr(parent, list_type) | |
120 if not list_elt: | |
121 parent = parent.addElement(list_type) | |
122 else: | |
123 parent = list_elt | |
124 | |
125 li_elt = parent.addElement('li') | |
126 self._parse(string, li_elt) | |
127 | |
128 def parser_list_bullet(self, string, parent): | |
129 self._parser_list(string, parent, 'ul') | |
130 | |
131 def parser_list_ordered(self, string, parent): | |
132 self._parser_list(string, parent, 'ol') | |
133 | |
134 def parser_preformated(self, string, parent): | |
135 pre_elt = parent.pre | |
136 if pre_elt is None: | |
137 pre_elt = parent.addElement('pre') | |
138 else: | |
139 # we are on a new line, and this is important for <pre/> | |
140 pre_elt.addContent('\n') | |
141 pre_elt.addContent(string) | |
142 | |
143 def parser_quote(self, string, parent): | |
144 blockquote_elt = parent.blockquote | |
145 if blockquote_elt is None: | |
146 blockquote_elt = parent.addElement('blockquote') | |
147 p_elt = blockquote_elt.p | |
148 if p_elt is None: | |
149 p_elt = blockquote_elt.addElement('p') | |
150 else: | |
151 string = u'\n' + string | |
152 | |
153 self._parse(string, p_elt) | |
154 | |
155 def parser_emphasis(self, string, parent): | |
156 em_elt = parent.addElement('em') | |
157 self._parse(string, em_elt) | |
158 | |
159 def parser_strong_emphasis(self, string, parent): | |
160 strong_elt = parent.addElement('strong') | |
161 self._parse(string, strong_elt) | |
162 | |
163 def parser_line_break(self, string, parent): | |
164 parent.addElement('br') | |
165 | |
166 def parser_insertion(self, string, parent): | |
167 ins_elt = parent.addElement('ins') | |
168 self._parse(string, ins_elt) | |
169 | |
170 def parser_deletion(self, string, parent): | |
171 del_elt = parent.addElement('del') | |
172 self._parse(string, del_elt) | |
173 | |
174 def parser_link(self, string, parent): | |
175 url_data = string.split(u'|') | |
176 a_elt = parent.addElement('a') | |
177 length = len(url_data) | |
178 if length == 0: | |
179 url = url_data[0] | |
180 a_elt['href'] = url | |
181 a_elt.addContent(url) | |
182 else: | |
183 name = url_data[0] | |
184 url = url_data[1] | |
185 a_elt['href'] = url | |
186 a_elt.addContent(name) | |
187 if length >= 3: | |
188 a_elt['lang'] = url_data[2] | |
189 if length >= 4: | |
190 a_elt['title'] = url_data[3] | |
191 if length > 4: | |
192 log.warning(u"too much data for url, ignoring extra data") | |
193 | |
194 def parser_image(self, string, parent): | |
195 image_data = string.split(u'|') | |
196 img_elt = parent.addElement('img') | |
197 | |
198 for idx, attribute in enumerate(('src', 'alt', 'position', 'longdesc')): | |
199 try: | |
200 data = image_data[idx] | |
201 except IndexError: | |
202 break | |
203 | |
204 if attribute != 'position': | |
205 img_elt[attribute] = data | |
206 else: | |
207 data = data.lower() | |
208 if data in ('l', 'g'): | |
209 img_elt['style'] = "display:block; float:left; margin:0 1em 1em 0" | |
210 elif data in ('r', 'd'): | |
211 img_elt['style'] = "display:block; float:right; margin:0 0 1em 1em" | |
212 elif data == 'c': | |
213 img_elt['style'] = "display:block; margin-left:auto; margin-right:auto" | |
214 else: | |
215 log.warning(u"bad position argument for image, ignoring it") | |
216 | |
217 def parser_anchor(self, string, parent): | |
218 a_elt = parent.addElement('a') | |
219 a_elt['id'] = string | |
220 | |
221 def parser_acronym(self, string, parent): | |
222 acronym, title = string.split(u'|',1) | |
223 acronym_elt = parent.addElement('acronym', content=acronym) | |
224 acronym_elt['title'] = title | |
225 | |
226 def parser_inline_quote(self, string, parent): | |
227 quote_data = string.split(u'|') | |
228 quote = quote_data[0] | |
229 q_elt = parent.addElement('q', content=quote) | |
230 for idx, attribute in enumerate(('lang', 'cite'), 1): | |
231 try: | |
232 data = quote_data[idx] | |
233 except IndexError: | |
234 break | |
235 q_elt[attribute] = data | |
236 | |
237 def parser_code(self, string, parent): | |
238 parent.addElement('code', content=string) | |
239 | |
240 def parser_footnote(self, string, parent): | |
241 idx = len(self._footnotes) + 1 | |
242 note_txt = NOTE_TPL.format(idx) | |
243 sup_elt = parent.addElement('sup') | |
244 sup_elt['class'] = 'note' | |
245 a_elt = sup_elt.addElement('a', content=note_txt) | |
246 a_elt['id'] = NOTE_A_REV_TPL.format(idx) | |
247 a_elt['href'] = u'#{}'.format(NOTE_A_TPL.format(idx)) | |
248 | |
249 p_elt = domish.Element((None, 'p')) | |
250 a_elt = p_elt.addElement('a', content=note_txt) | |
251 a_elt['id'] = NOTE_A_TPL.format(idx) | |
252 a_elt['href'] = u'#{}'.format(NOTE_A_REV_TPL.format(idx)) | |
253 self._parse(string, p_elt) | |
254 # footnotes are actually added at the end of the parsing | |
255 self._footnotes.append(p_elt) | |
256 | |
257 def parser_text(self, string, parent): | |
258 parent.addContent(string) | |
259 | |
260 def _parse(self, string, parent, block_level=False): | |
261 regex = wiki_block_level_re if block_level else wiki_re | |
262 | |
263 for match in regex.finditer(string): | |
264 if match.lastgroup is None: | |
265 parent.addContent(string) | |
266 return | |
267 matched = match.group(match.lastgroup) | |
268 try: | |
269 parser = getattr(self, 'parser_{}'.format(match.lastgroup)) | |
270 except AttributeError: | |
271 log.warning(u"No parser found for {}".format(match.lastgroup)) | |
272 # parent.addContent(string) | |
273 continue | |
274 parser(matched, parent) | |
275 | |
276 def parse(self, string): | |
277 self._footnotes = [] | |
278 div_elt = domish.Element((None, 'div')) | |
279 self._parse(string, parent=div_elt, block_level=True) | |
280 if self._footnotes: | |
281 foot_div_elt = div_elt.addElement('div') | |
282 foot_div_elt['class'] = 'footnotes' | |
283 # we add a simple horizontal rule which can be customized | |
284 # with footnotes class, instead of a text which would need | |
285 # to be translated | |
286 foot_div_elt.addElement('hr') | |
287 for elt in self._footnotes: | |
288 foot_div_elt.addChild(elt) | |
289 return div_elt | |
290 | |
291 | |
292 class DCWikiSyntax(object): | |
293 | |
294 def __init__(self, host): | |
295 log.info(_(u"Dotclear wiki syntax plugin initialization")) | |
296 self.host = host | |
297 self._dc_parser = DCWikiParser() | |
298 self._stx = self.host.plugins["TEXT-SYNTAXES"] | |
299 self._stx.addSyntax(SYNTAX_NAME, self.parseWiki, self.parseXHTML, [self._stx.OPT_NO_THREAD]) | |
300 | |
301 def parseWiki(self, wiki_stx): | |
302 div_elt = self._dc_parser.parse(wiki_stx) | |
303 return div_elt.toXml() | |
304 | |
305 def parseXHTML(self, xhtml): | |
306 raise NotImplementedError |