1806
|
1 #!/usr/bin/python |
|
2 # -*- coding: utf-8 -*- |
|
3 |
|
4 # SàT plugin for Dotclear Wiki Syntax |
|
5 # Copyright (C) 2009-2016 Jérôme Poisson (goffi@goffi.org) |
|
6 |
|
7 # This program is free software: you can redistribute it and/or modify |
|
8 # it under the terms of the GNU Affero General Public License as published by |
|
9 # the Free Software Foundation, either version 3 of the License, or |
|
10 # (at your option) any later version. |
|
11 |
|
12 # This program is distributed in the hope that it will be useful, |
|
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
15 # GNU Affero General Public License for more details. |
|
16 |
|
17 # You should have received a copy of the GNU Affero General Public License |
|
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
19 |
|
20 # XXX: ref used: http://dotclear.org/documentation/2.0/usage/syntaxes#wiki-syntax-and-xhtml-equivalent |
|
21 |
|
22 from sat.core.i18n import _ |
|
23 from sat.core.log import getLogger |
|
24 log = getLogger(__name__) |
|
25 from sat.core.constants import Const as C |
|
26 from twisted.words.xish import domish |
|
27 from sat.tools import xml_tools |
|
28 import re |
|
29 |
|
30 SYNTAX_NAME = "dc_wiki" |
|
31 |
|
32 PLUGIN_INFO = { |
|
33 "name": "Dotclear Wiki Syntax Plugin", |
|
34 "import_name": "SYNT_DC_WIKI", |
|
35 "type": C.PLUG_TYPE_SYNTAXE, |
|
36 "dependencies": ["TEXT-SYNTAXES"], |
|
37 "main": "DCWikiSyntax", |
|
38 "handler": "", |
|
39 "description": _("""Implementation of Dotclear wiki syntax""") |
|
40 } |
|
41 |
|
42 NOTE_TPL = u'[{}]' # Note template |
|
43 NOTE_A_REV_TPL = u'rev_note_{}' |
|
44 NOTE_A_TPL = u'note_{}' |
|
45 |
|
46 wiki = [r"\\(?P<escape_char>[][!_+%'|\/*#@{}~?$()-])", |
|
47 r"^!!!!!(?P<h1_title>.+?)$", |
|
48 r"^!!!!(?P<h2_title>.+?)$", |
|
49 r"^!!!(?P<h3_title>.+?)$", |
|
50 r"^!!(?P<h4_title>.+?)$", |
|
51 r"^!(?P<h5_title>.+?)$", |
|
52 r"^----$(?P<horizontal_rule>)", |
|
53 r"^\*(?P<list_bullet>.*?)$", |
|
54 r"^#(?P<list_ordered>.*?)$", |
|
55 r"^ (?P<preformated>.*?)$", |
|
56 r"^> +?(?P<quote>.*?)$", |
|
57 r"''(?P<emphasis>.+?)''", |
|
58 r"__(?P<strong_emphasis>.+?)__", |
|
59 r"%%%(?P<line_break>)", |
|
60 r"\+\+(?P<insertion>.+?)\+\+", |
|
61 r"--(?P<deletion>.+?)--", |
|
62 r"\[(?P<link>.+?)\]", |
|
63 r"\(\((?P<image>.+?)\)\)", |
|
64 r"~(?P<anchor>.+?)~", |
|
65 r"\?\?(?P<acronym>.+?\|.+?)\?\?", |
|
66 r"{{(?P<inline_quote>.+?)}}", |
|
67 r"@@(?P<code>.+?)@@", |
|
68 r"\$\$(?P<footnote>.+?)\$\$", |
|
69 r"(?P<text>.+?)", |
|
70 ] |
|
71 |
|
72 wiki_re = re.compile('|'.join(wiki), re.MULTILINE | re.DOTALL) |
|
73 wiki_block_level_re = re.compile(r"^///html(?P<html>.+?)///\n\n|(?P<paragraph>.+?)(?:\n{2,}|\Z)", re.MULTILINE | re.DOTALL) |
|
74 |
|
75 |
|
76 class DCWikiParser(object): |
|
77 |
|
78 def __init__(self): |
|
79 self._footnotes = None |
|
80 for i in xrange(5): |
|
81 setattr(self, |
|
82 'parser_h{}_title'.format(i), |
|
83 lambda string, parent, i=i: self._parser_title(string, parent, 'h{}'.format(i))) |
|
84 |
|
85 def parser_paragraph(self, string, parent): |
|
86 p_elt = parent.addElement('p') |
|
87 self._parse(string, p_elt) |
|
88 |
|
89 def parser_html(self, string, parent): |
|
90 wrapped_html = "<div>{}</div>".format(string) |
|
91 try: |
|
92 div_elt = xml_tools.ElementParser()(wrapped_html) |
|
93 except domish.ParserError as e: |
|
94 log.warning(u"Error while parsing HTML content, ignoring it: {}".format(e)) |
|
95 return |
|
96 children = list(div_elt.elements()) |
|
97 if len(children) == 1 and children[0].name == 'div': |
|
98 div_elt = children[0] |
|
99 parent.addChild(div_elt) |
|
100 |
|
101 def parser_escape_char(self, string, parent): |
|
102 parent.addContent(string) |
|
103 |
|
104 def _parser_title(self, string, parent, name): |
|
105 elt = parent.addElement(name) |
|
106 elt.addContent(string) |
|
107 |
|
108 def parser_horizontal_rule(self, string, parent): |
|
109 parent.addElement('hr') |
|
110 |
|
111 def _parser_list(self, string, parent, list_type): |
|
112 depth = 0 |
|
113 while string[depth:depth+1] == '*': |
|
114 depth +=1 |
|
115 |
|
116 string = string[depth:].lstrip() |
|
117 |
|
118 for i in xrange(depth+1): |
|
119 list_elt = getattr(parent, list_type) |
|
120 if not list_elt: |
|
121 parent = parent.addElement(list_type) |
|
122 else: |
|
123 parent = list_elt |
|
124 |
|
125 li_elt = parent.addElement('li') |
|
126 self._parse(string, li_elt) |
|
127 |
|
128 def parser_list_bullet(self, string, parent): |
|
129 self._parser_list(string, parent, 'ul') |
|
130 |
|
131 def parser_list_ordered(self, string, parent): |
|
132 self._parser_list(string, parent, 'ol') |
|
133 |
|
134 def parser_preformated(self, string, parent): |
|
135 pre_elt = parent.pre |
|
136 if pre_elt is None: |
|
137 pre_elt = parent.addElement('pre') |
|
138 else: |
|
139 # we are on a new line, and this is important for <pre/> |
|
140 pre_elt.addContent('\n') |
|
141 pre_elt.addContent(string) |
|
142 |
|
143 def parser_quote(self, string, parent): |
|
144 blockquote_elt = parent.blockquote |
|
145 if blockquote_elt is None: |
|
146 blockquote_elt = parent.addElement('blockquote') |
|
147 p_elt = blockquote_elt.p |
|
148 if p_elt is None: |
|
149 p_elt = blockquote_elt.addElement('p') |
|
150 else: |
|
151 string = u'\n' + string |
|
152 |
|
153 self._parse(string, p_elt) |
|
154 |
|
155 def parser_emphasis(self, string, parent): |
|
156 em_elt = parent.addElement('em') |
|
157 self._parse(string, em_elt) |
|
158 |
|
159 def parser_strong_emphasis(self, string, parent): |
|
160 strong_elt = parent.addElement('strong') |
|
161 self._parse(string, strong_elt) |
|
162 |
|
163 def parser_line_break(self, string, parent): |
|
164 parent.addElement('br') |
|
165 |
|
166 def parser_insertion(self, string, parent): |
|
167 ins_elt = parent.addElement('ins') |
|
168 self._parse(string, ins_elt) |
|
169 |
|
170 def parser_deletion(self, string, parent): |
|
171 del_elt = parent.addElement('del') |
|
172 self._parse(string, del_elt) |
|
173 |
|
174 def parser_link(self, string, parent): |
|
175 url_data = string.split(u'|') |
|
176 a_elt = parent.addElement('a') |
|
177 length = len(url_data) |
|
178 if length == 0: |
|
179 url = url_data[0] |
|
180 a_elt['href'] = url |
|
181 a_elt.addContent(url) |
|
182 else: |
|
183 name = url_data[0] |
|
184 url = url_data[1] |
|
185 a_elt['href'] = url |
|
186 a_elt.addContent(name) |
|
187 if length >= 3: |
|
188 a_elt['lang'] = url_data[2] |
|
189 if length >= 4: |
|
190 a_elt['title'] = url_data[3] |
|
191 if length > 4: |
|
192 log.warning(u"too much data for url, ignoring extra data") |
|
193 |
|
194 def parser_image(self, string, parent): |
|
195 image_data = string.split(u'|') |
|
196 img_elt = parent.addElement('img') |
|
197 |
|
198 for idx, attribute in enumerate(('src', 'alt', 'position', 'longdesc')): |
|
199 try: |
|
200 data = image_data[idx] |
|
201 except IndexError: |
|
202 break |
|
203 |
|
204 if attribute != 'position': |
|
205 img_elt[attribute] = data |
|
206 else: |
|
207 data = data.lower() |
|
208 if data in ('l', 'g'): |
|
209 img_elt['style'] = "display:block; float:left; margin:0 1em 1em 0" |
|
210 elif data in ('r', 'd'): |
|
211 img_elt['style'] = "display:block; float:right; margin:0 0 1em 1em" |
|
212 elif data == 'c': |
|
213 img_elt['style'] = "display:block; margin-left:auto; margin-right:auto" |
|
214 else: |
|
215 log.warning(u"bad position argument for image, ignoring it") |
|
216 |
|
217 def parser_anchor(self, string, parent): |
|
218 a_elt = parent.addElement('a') |
|
219 a_elt['id'] = string |
|
220 |
|
221 def parser_acronym(self, string, parent): |
|
222 acronym, title = string.split(u'|',1) |
|
223 acronym_elt = parent.addElement('acronym', content=acronym) |
|
224 acronym_elt['title'] = title |
|
225 |
|
226 def parser_inline_quote(self, string, parent): |
|
227 quote_data = string.split(u'|') |
|
228 quote = quote_data[0] |
|
229 q_elt = parent.addElement('q', content=quote) |
|
230 for idx, attribute in enumerate(('lang', 'cite'), 1): |
|
231 try: |
|
232 data = quote_data[idx] |
|
233 except IndexError: |
|
234 break |
|
235 q_elt[attribute] = data |
|
236 |
|
237 def parser_code(self, string, parent): |
|
238 parent.addElement('code', content=string) |
|
239 |
|
240 def parser_footnote(self, string, parent): |
|
241 idx = len(self._footnotes) + 1 |
|
242 note_txt = NOTE_TPL.format(idx) |
|
243 sup_elt = parent.addElement('sup') |
|
244 sup_elt['class'] = 'note' |
|
245 a_elt = sup_elt.addElement('a', content=note_txt) |
|
246 a_elt['id'] = NOTE_A_REV_TPL.format(idx) |
|
247 a_elt['href'] = u'#{}'.format(NOTE_A_TPL.format(idx)) |
|
248 |
|
249 p_elt = domish.Element((None, 'p')) |
|
250 a_elt = p_elt.addElement('a', content=note_txt) |
|
251 a_elt['id'] = NOTE_A_TPL.format(idx) |
|
252 a_elt['href'] = u'#{}'.format(NOTE_A_REV_TPL.format(idx)) |
|
253 self._parse(string, p_elt) |
|
254 # footnotes are actually added at the end of the parsing |
|
255 self._footnotes.append(p_elt) |
|
256 |
|
257 def parser_text(self, string, parent): |
|
258 parent.addContent(string) |
|
259 |
|
260 def _parse(self, string, parent, block_level=False): |
|
261 regex = wiki_block_level_re if block_level else wiki_re |
|
262 |
|
263 for match in regex.finditer(string): |
|
264 if match.lastgroup is None: |
|
265 parent.addContent(string) |
|
266 return |
|
267 matched = match.group(match.lastgroup) |
|
268 try: |
|
269 parser = getattr(self, 'parser_{}'.format(match.lastgroup)) |
|
270 except AttributeError: |
|
271 log.warning(u"No parser found for {}".format(match.lastgroup)) |
|
272 # parent.addContent(string) |
|
273 continue |
|
274 parser(matched, parent) |
|
275 |
|
276 def parse(self, string): |
|
277 self._footnotes = [] |
|
278 div_elt = domish.Element((None, 'div')) |
|
279 self._parse(string, parent=div_elt, block_level=True) |
|
280 if self._footnotes: |
|
281 foot_div_elt = div_elt.addElement('div') |
|
282 foot_div_elt['class'] = 'footnotes' |
|
283 # we add a simple horizontal rule which can be customized |
|
284 # with footnotes class, instead of a text which would need |
|
285 # to be translated |
|
286 foot_div_elt.addElement('hr') |
|
287 for elt in self._footnotes: |
|
288 foot_div_elt.addChild(elt) |
|
289 return div_elt |
|
290 |
|
291 |
|
292 class DCWikiSyntax(object): |
|
293 |
|
294 def __init__(self, host): |
|
295 log.info(_(u"Dotclear wiki syntax plugin initialization")) |
|
296 self.host = host |
|
297 self._dc_parser = DCWikiParser() |
|
298 self._stx = self.host.plugins["TEXT-SYNTAXES"] |
|
299 self._stx.addSyntax(SYNTAX_NAME, self.parseWiki, self.parseXHTML, [self._stx.OPT_NO_THREAD]) |
|
300 |
|
301 def parseWiki(self, wiki_stx): |
|
302 div_elt = self._dc_parser.parse(wiki_stx) |
|
303 return div_elt.toXml() |
|
304 |
|
305 def parseXHTML(self, xhtml): |
|
306 raise NotImplementedError |