Mercurial > libervia-backend
comparison sat/plugins/plugin_syntax_wiki_dotclear.py @ 2562:26edcf3a30eb
core, setup: huge cleaning:
- moved directories from src and frontends/src to sat and sat_frontends, which is the recommanded naming convention
- move twisted directory to root
- removed all hacks from setup.py, and added missing dependencies, it is now clean
- use https URL for website in setup.py
- removed "Environment :: X11 Applications :: GTK", as wix is deprecated and removed
- renamed sat.sh to sat and fixed its installation
- added python_requires to specify Python version needed
- replaced glib2reactor which use deprecated code by gtk3reactor
sat can now be installed directly from virtualenv without using --system-site-packages anymore \o/
author | Goffi <goffi@goffi.org> |
---|---|
date | Mon, 02 Apr 2018 19:44:50 +0200 |
parents | src/plugins/plugin_syntax_wiki_dotclear.py@0046283a285d |
children | 56f94936df1e |
comparison
equal
deleted
inserted
replaced
2561:bd30dc3ffe5a | 2562:26edcf3a30eb |
---|---|
1 #!/usr/bin/env python2 | |
2 # -*- coding: utf-8 -*- | |
3 | |
4 # SàT plugin for Dotclear Wiki Syntax | |
5 # Copyright (C) 2009-2018 Jérôme Poisson (goffi@goffi.org) | |
6 | |
7 # This program is free software: you can redistribute it and/or modify | |
8 # it under the terms of the GNU Affero General Public License as published by | |
9 # the Free Software Foundation, either version 3 of the License, or | |
10 # (at your option) any later version. | |
11 | |
12 # This program is distributed in the hope that it will be useful, | |
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 # GNU Affero General Public License for more details. | |
16 | |
17 # You should have received a copy of the GNU Affero General Public License | |
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
19 | |
20 # XXX: ref used: http://dotclear.org/documentation/2.0/usage/syntaxes#wiki-syntax-and-xhtml-equivalent | |
21 | |
22 from sat.core.i18n import _ | |
23 from sat.core.log import getLogger | |
24 log = getLogger(__name__) | |
25 from sat.core.constants import Const as C | |
26 from sat.core import exceptions | |
27 from twisted.words.xish import domish | |
28 from sat.tools import xml_tools | |
29 import copy | |
30 import re | |
31 | |
32 PLUGIN_INFO = { | |
33 C.PI_NAME: "Dotclear Wiki Syntax Plugin", | |
34 C.PI_IMPORT_NAME: "SYNT_DC_WIKI", | |
35 C.PI_TYPE: C.PLUG_TYPE_SYNTAXE, | |
36 C.PI_DEPENDENCIES: ["TEXT-SYNTAXES"], | |
37 C.PI_MAIN: "DCWikiSyntax", | |
38 C.PI_HANDLER: "", | |
39 C.PI_DESCRIPTION: _("""Implementation of Dotclear wiki syntax""") | |
40 } | |
41 | |
42 NOTE_TPL = u'[{}]' # Note template | |
43 NOTE_A_REV_TPL = u'rev_note_{}' | |
44 NOTE_A_TPL = u'note_{}' | |
45 ESCAPE_CHARS_BASE = r"(?P<escape_char>[][{}%|\\/*#@{{}}~$-])" | |
46 ESCAPE_CHARS_EXTRA = r"!?_+'()" # These chars are not escaped in XHTML => dc_wiki conversion, | |
47 # but are used in the other direction | |
48 ESCAPE_CHARS = ESCAPE_CHARS_BASE.format('') | |
49 FLAG_UL = 'ul' # must be the name of the element | |
50 FLAG_OL = 'ol' | |
51 ELT_WITH_STYLE = ('img', 'div') # elements where a style attribute is expected | |
52 | |
53 wiki = [r'\\' + ESCAPE_CHARS_BASE.format(ESCAPE_CHARS_EXTRA), | |
54 r"^!!!!!(?P<h1_title>.+?)$", | |
55 r"^!!!!(?P<h2_title>.+?)$", | |
56 r"^!!!(?P<h3_title>.+?)$", | |
57 r"^!!(?P<h4_title>.+?)$", | |
58 r"^!(?P<h5_title>.+?)$", | |
59 r"^----$(?P<horizontal_rule>)", | |
60 r"^\*(?P<list_bullet>.*?)$", | |
61 r"^#(?P<list_ordered>.*?)$", | |
62 r"^ (?P<preformated>.*?)$", | |
63 r"^> +?(?P<quote>.*?)$", | |
64 r"''(?P<emphasis>.+?)''", | |
65 r"__(?P<strong_emphasis>.+?)__", | |
66 r"%%%(?P<line_break>)", | |
67 r"\+\+(?P<insertion>.+?)\+\+", | |
68 r"--(?P<deletion>.+?)--", | |
69 r"\[(?P<link>.+?)\]", | |
70 r"\(\((?P<image>.+?)\)\)", | |
71 r"~(?P<anchor>.+?)~", | |
72 r"\?\?(?P<acronym>.+?\|.+?)\?\?", | |
73 r"{{(?P<inline_quote>.+?)}}", | |
74 r"@@(?P<code>.+?)@@", | |
75 r"\$\$(?P<footnote>.+?)\$\$", | |
76 r"(?P<text>.+?)", | |
77 ] | |
78 | |
79 wiki_re = re.compile('|'.join(wiki), re.MULTILINE | re.DOTALL) | |
80 wiki_block_level_re = re.compile(r"^///html(?P<html>.+?)///\n\n|(?P<paragraph>.+?)(?:\n{2,}|\Z)", re.MULTILINE | re.DOTALL) | |
81 | |
82 | |
83 class DCWikiParser(object): | |
84 | |
85 def __init__(self): | |
86 self._footnotes = None | |
87 for i in xrange(5): | |
88 setattr(self, | |
89 'parser_h{}_title'.format(i), | |
90 lambda string, parent, i=i: self._parser_title(string, parent, 'h{}'.format(i))) | |
91 | |
92 def parser_paragraph(self, string, parent): | |
93 p_elt = parent.addElement('p') | |
94 self._parse(string, p_elt) | |
95 | |
96 def parser_html(self, string, parent): | |
97 wrapped_html = "<div>{}</div>".format(string) | |
98 try: | |
99 div_elt = xml_tools.ElementParser()(wrapped_html) | |
100 except domish.ParserError as e: | |
101 log.warning(u"Error while parsing HTML content, ignoring it: {}".format(e)) | |
102 return | |
103 children = list(div_elt.elements()) | |
104 if len(children) == 1 and children[0].name == 'div': | |
105 div_elt = children[0] | |
106 parent.addChild(div_elt) | |
107 | |
108 def parser_escape_char(self, string, parent): | |
109 parent.addContent(string) | |
110 | |
111 def _parser_title(self, string, parent, name): | |
112 elt = parent.addElement(name) | |
113 elt.addContent(string) | |
114 | |
115 def parser_horizontal_rule(self, string, parent): | |
116 parent.addElement('hr') | |
117 | |
118 def _parser_list(self, string, parent, list_type): | |
119 depth = 0 | |
120 while string[depth:depth+1] == '*': | |
121 depth +=1 | |
122 | |
123 string = string[depth:].lstrip() | |
124 | |
125 for i in xrange(depth+1): | |
126 list_elt = getattr(parent, list_type) | |
127 if not list_elt: | |
128 parent = parent.addElement(list_type) | |
129 else: | |
130 parent = list_elt | |
131 | |
132 li_elt = parent.addElement('li') | |
133 self._parse(string, li_elt) | |
134 | |
135 def parser_list_bullet(self, string, parent): | |
136 self._parser_list(string, parent, 'ul') | |
137 | |
138 def parser_list_ordered(self, string, parent): | |
139 self._parser_list(string, parent, 'ol') | |
140 | |
141 def parser_preformated(self, string, parent): | |
142 pre_elt = parent.pre | |
143 if pre_elt is None: | |
144 pre_elt = parent.addElement('pre') | |
145 else: | |
146 # we are on a new line, and this is important for <pre/> | |
147 pre_elt.addContent('\n') | |
148 pre_elt.addContent(string) | |
149 | |
150 def parser_quote(self, string, parent): | |
151 blockquote_elt = parent.blockquote | |
152 if blockquote_elt is None: | |
153 blockquote_elt = parent.addElement('blockquote') | |
154 p_elt = blockquote_elt.p | |
155 if p_elt is None: | |
156 p_elt = blockquote_elt.addElement('p') | |
157 else: | |
158 string = u'\n' + string | |
159 | |
160 self._parse(string, p_elt) | |
161 | |
162 def parser_emphasis(self, string, parent): | |
163 em_elt = parent.addElement('em') | |
164 self._parse(string, em_elt) | |
165 | |
166 def parser_strong_emphasis(self, string, parent): | |
167 strong_elt = parent.addElement('strong') | |
168 self._parse(string, strong_elt) | |
169 | |
170 def parser_line_break(self, string, parent): | |
171 parent.addElement('br') | |
172 | |
173 def parser_insertion(self, string, parent): | |
174 ins_elt = parent.addElement('ins') | |
175 self._parse(string, ins_elt) | |
176 | |
177 def parser_deletion(self, string, parent): | |
178 del_elt = parent.addElement('del') | |
179 self._parse(string, del_elt) | |
180 | |
181 def parser_link(self, string, parent): | |
182 url_data = string.split(u'|') | |
183 a_elt = parent.addElement('a') | |
184 length = len(url_data) | |
185 if length == 1: | |
186 url = url_data[0] | |
187 a_elt['href'] = url | |
188 a_elt.addContent(url) | |
189 else: | |
190 name = url_data[0] | |
191 url = url_data[1] | |
192 a_elt['href'] = url | |
193 a_elt.addContent(name) | |
194 if length >= 3: | |
195 a_elt['lang'] = url_data[2] | |
196 if length >= 4: | |
197 a_elt['title'] = url_data[3] | |
198 if length > 4: | |
199 log.warning(u"too much data for url, ignoring extra data") | |
200 | |
201 def parser_image(self, string, parent): | |
202 image_data = string.split(u'|') | |
203 img_elt = parent.addElement('img') | |
204 | |
205 for idx, attribute in enumerate(('src', 'alt', 'position', 'longdesc')): | |
206 try: | |
207 data = image_data[idx] | |
208 except IndexError: | |
209 break | |
210 | |
211 if attribute != 'position': | |
212 img_elt[attribute] = data | |
213 else: | |
214 data = data.lower() | |
215 if data in ('l', 'g'): | |
216 img_elt['style'] = "display:block; float:left; margin:0 1em 1em 0" | |
217 elif data in ('r', 'd'): | |
218 img_elt['style'] = "display:block; float:right; margin:0 0 1em 1em" | |
219 elif data == 'c': | |
220 img_elt['style'] = "display:block; margin-left:auto; margin-right:auto" | |
221 else: | |
222 log.warning(u"bad position argument for image, ignoring it") | |
223 | |
224 def parser_anchor(self, string, parent): | |
225 a_elt = parent.addElement('a') | |
226 a_elt['id'] = string | |
227 | |
228 def parser_acronym(self, string, parent): | |
229 acronym, title = string.split(u'|',1) | |
230 acronym_elt = parent.addElement('acronym', content=acronym) | |
231 acronym_elt['title'] = title | |
232 | |
233 def parser_inline_quote(self, string, parent): | |
234 quote_data = string.split(u'|') | |
235 quote = quote_data[0] | |
236 q_elt = parent.addElement('q', content=quote) | |
237 for idx, attribute in enumerate(('lang', 'cite'), 1): | |
238 try: | |
239 data = quote_data[idx] | |
240 except IndexError: | |
241 break | |
242 q_elt[attribute] = data | |
243 | |
244 def parser_code(self, string, parent): | |
245 parent.addElement('code', content=string) | |
246 | |
247 def parser_footnote(self, string, parent): | |
248 idx = len(self._footnotes) + 1 | |
249 note_txt = NOTE_TPL.format(idx) | |
250 sup_elt = parent.addElement('sup') | |
251 sup_elt['class'] = 'note' | |
252 a_elt = sup_elt.addElement('a', content=note_txt) | |
253 a_elt['id'] = NOTE_A_REV_TPL.format(idx) | |
254 a_elt['href'] = u'#{}'.format(NOTE_A_TPL.format(idx)) | |
255 | |
256 p_elt = domish.Element((None, 'p')) | |
257 a_elt = p_elt.addElement('a', content=note_txt) | |
258 a_elt['id'] = NOTE_A_TPL.format(idx) | |
259 a_elt['href'] = u'#{}'.format(NOTE_A_REV_TPL.format(idx)) | |
260 self._parse(string, p_elt) | |
261 # footnotes are actually added at the end of the parsing | |
262 self._footnotes.append(p_elt) | |
263 | |
264 def parser_text(self, string, parent): | |
265 parent.addContent(string) | |
266 | |
267 def _parse(self, string, parent, block_level=False): | |
268 regex = wiki_block_level_re if block_level else wiki_re | |
269 | |
270 for match in regex.finditer(string): | |
271 if match.lastgroup is None: | |
272 parent.addContent(string) | |
273 return | |
274 matched = match.group(match.lastgroup) | |
275 try: | |
276 parser = getattr(self, 'parser_{}'.format(match.lastgroup)) | |
277 except AttributeError: | |
278 log.warning(u"No parser found for {}".format(match.lastgroup)) | |
279 # parent.addContent(string) | |
280 continue | |
281 parser(matched, parent) | |
282 | |
283 def parse(self, string): | |
284 self._footnotes = [] | |
285 div_elt = domish.Element((None, 'div')) | |
286 self._parse(string, parent=div_elt, block_level=True) | |
287 if self._footnotes: | |
288 foot_div_elt = div_elt.addElement('div') | |
289 foot_div_elt['class'] = 'footnotes' | |
290 # we add a simple horizontal rule which can be customized | |
291 # with footnotes class, instead of a text which would need | |
292 # to be translated | |
293 foot_div_elt.addElement('hr') | |
294 for elt in self._footnotes: | |
295 foot_div_elt.addChild(elt) | |
296 return div_elt | |
297 | |
298 | |
299 class XHTMLParser(object): | |
300 | |
301 def __init__(self): | |
302 self.flags = None | |
303 self.toto = 0 | |
304 self.footnotes = None # will hold a map from url to buffer id | |
305 for i in xrange(1,6): | |
306 setattr(self, | |
307 'parser_h{}'.format(i), | |
308 lambda elt, buf, level=i: self.parserHeading(elt, buf, level) | |
309 ) | |
310 | |
311 def parser_a(self, elt, buf): | |
312 try: | |
313 url = elt['href'] | |
314 except KeyError: | |
315 # probably an anchor | |
316 try: | |
317 id_ = elt['id'] | |
318 if not id_: | |
319 # we don't want empty values | |
320 raise KeyError | |
321 except KeyError: | |
322 self.parserGeneric(elt, buf) | |
323 else: | |
324 buf.append(u'~~{}~~'.format(id_)) | |
325 return | |
326 | |
327 link_data = [url] | |
328 name = unicode(elt) | |
329 if name != url: | |
330 link_data.insert(0, name) | |
331 | |
332 lang = elt.getAttribute('lang') | |
333 title = elt.getAttribute('title') | |
334 if lang is not None: | |
335 link_data.append(lang) | |
336 elif title is not None: | |
337 link_data.appand(u'') | |
338 if title is not None: | |
339 link_data.append(title) | |
340 buf.append(u'[') | |
341 buf.append(u'|'.join(link_data)) | |
342 buf.append(u']') | |
343 | |
344 def parser_acronym(self, elt, buf): | |
345 try: | |
346 title = elt['title'] | |
347 except KeyError: | |
348 log.debug(u"Acronyme without title, using generic parser") | |
349 self.parserGeneric(elt, buf) | |
350 return | |
351 buf.append(u'??{}|{}??'.format(unicode(elt), title)) | |
352 | |
353 def parser_blockquote(self, elt, buf): | |
354 # we remove wrapping <p> to avoid empty line with "> " | |
355 children = list([child for child in elt.children if unicode(child).strip() not in ('', '\n')]) | |
356 if len(children) == 1 and children[0].name == 'p': | |
357 elt = children[0] | |
358 tmp_buf = [] | |
359 self.parseChildren(elt, tmp_buf) | |
360 blockquote = u'> ' + u'\n> '.join(u''.join(tmp_buf).split('\n')) | |
361 buf.append(blockquote) | |
362 | |
363 def parser_br(self, elt, buf): | |
364 buf.append(u'%%%') | |
365 | |
366 def parser_code(self, elt, buf): | |
367 buf.append(u'@@') | |
368 self.parseChildren(elt, buf) | |
369 buf.append(u'@@') | |
370 | |
371 def parser_del(self, elt, buf): | |
372 buf.append(u'--') | |
373 self.parseChildren(elt, buf) | |
374 buf.append(u'--') | |
375 | |
376 def parser_div(self, elt, buf): | |
377 if elt.getAttribute('class') == 'footnotes': | |
378 self.parserFootnote(elt, buf) | |
379 else: | |
380 self.parseChildren(elt, buf, block=True) | |
381 | |
382 def parser_em(self, elt, buf): | |
383 buf.append(u"''") | |
384 self.parseChildren(elt, buf) | |
385 buf.append(u"''") | |
386 | |
387 def parser_h6(self, elt, buf): | |
388 # XXX: <h6/> heading is not managed by wiki syntax | |
389 # so we handle it with a <h5/> | |
390 elt = copy.copy(elt) # we don't want to change to original element | |
391 elt.name = 'h5' | |
392 self._parse(elt, buf) | |
393 | |
394 def parser_hr(self, elt, buf): | |
395 buf.append(u'\n----\n') | |
396 | |
397 def parser_img(self, elt, buf): | |
398 try: | |
399 url = elt['src'] | |
400 except KeyError: | |
401 log.warning(u"Ignoring <img/> without src") | |
402 return | |
403 | |
404 image_data=[url] | |
405 | |
406 alt = elt.getAttribute('alt') | |
407 style = elt.getAttribute('style', '') | |
408 desc = elt.getAttribute('longdesc') | |
409 | |
410 if '0 1em 1em 0' in style: | |
411 position = 'L' | |
412 elif '0 0 1em 1em' in style: | |
413 position = 'R' | |
414 elif 'auto' in style: | |
415 position = 'C' | |
416 else: | |
417 position = None | |
418 | |
419 if alt: | |
420 image_data.append(alt) | |
421 elif position or desc: | |
422 image_data.append(u'') | |
423 | |
424 if position: | |
425 image_data.append(position) | |
426 elif desc: | |
427 image_data.append(u'') | |
428 | |
429 if desc: | |
430 image_data.append(desc) | |
431 | |
432 buf.append(u'((') | |
433 buf.append(u'|'.join(image_data)) | |
434 buf.append(u'))') | |
435 | |
436 def parser_ins(self, elt, buf): | |
437 buf.append(u'++') | |
438 self.parseChildren(elt, buf) | |
439 buf.append(u'++') | |
440 | |
441 def parser_li(self, elt, buf): | |
442 flag = None | |
443 current_flag = None | |
444 bullets = [] | |
445 for flag in reversed(self.flags): | |
446 if flag in (FLAG_UL, FLAG_OL): | |
447 if current_flag is None: | |
448 current_flag = flag | |
449 if flag == current_flag: | |
450 bullets.append(u'*' if flag == FLAG_UL else u'#') | |
451 else: | |
452 break | |
453 | |
454 if flag != current_flag and buf[-1] == u' ': | |
455 # this trick is to avoid a space when we switch | |
456 # from (un)ordered to the other type on the same row | |
457 # e.g. *# unorder + ordered item | |
458 del buf[-1] | |
459 | |
460 buf.extend(bullets) | |
461 | |
462 buf.append(u' ') | |
463 self.parseChildren(elt, buf) | |
464 buf.append(u'\n') | |
465 | |
466 def parser_ol(self, elt, buf): | |
467 self.parserList(elt, buf, FLAG_OL) | |
468 | |
469 def parser_p(self, elt, buf): | |
470 self.parseChildren(elt, buf) | |
471 buf.append(u'\n\n') | |
472 | |
473 def parser_pre(self, elt, buf): | |
474 pre = u''.join([child.toXml() if domish.IElement.providedBy(child) else unicode(child) for child in elt.children]) | |
475 pre = u' ' + u'\n '.join(pre.split('\n')) | |
476 buf.append(pre) | |
477 | |
478 def parser_q(self, elt, buf): | |
479 quote_data=[unicode(elt)] | |
480 | |
481 lang = elt.getAttribute('lang') | |
482 cite = elt.getAttribute('url') | |
483 | |
484 if lang: | |
485 quote_data.append(lang) | |
486 elif cite: | |
487 quote_data.append(u'') | |
488 | |
489 if cite: | |
490 quote_data.append(cite) | |
491 | |
492 buf.append(u'{{') | |
493 buf.append(u'|'.join(quote_data)) | |
494 buf.append(u'}}') | |
495 | |
496 def parser_span(self, elt, buf): | |
497 self.parseChildren(elt, buf, block=True) | |
498 | |
499 def parser_strong(self, elt, buf): | |
500 buf.append(u'__') | |
501 self.parseChildren(elt, buf) | |
502 buf.append(u'__') | |
503 | |
504 def parser_sup(self, elt, buf): | |
505 # sup is mainly used for footnotes, so we check if we have an anchor inside | |
506 children = list([child for child in elt.children if unicode(child).strip() not in ('', '\n')]) | |
507 if (len(children) == 1 and domish.IElement.providedBy(children[0]) | |
508 and children[0].name == 'a' and '#' in children[0].getAttribute('href', '')): | |
509 url = children[0]['href'] | |
510 note_id = url[url.find('#')+1:] | |
511 if not note_id: | |
512 log.warning("bad link found in footnote") | |
513 self.parserGeneric(elt, buf) | |
514 return | |
515 # this looks like a footnote | |
516 buf.append(u'$$') | |
517 buf.append(u' ') # placeholder | |
518 self.footnotes[note_id] = len(buf) - 1 | |
519 buf.append(u'$$') | |
520 else: | |
521 self.parserGeneric(elt, buf) | |
522 | |
523 def parser_ul(self, elt, buf): | |
524 self.parserList(elt, buf, FLAG_UL) | |
525 | |
526 def parserList(self, elt, buf, type_): | |
527 self.flags.append(type_) | |
528 self.parseChildren(elt, buf, block=True) | |
529 idx = 0 | |
530 for flag in reversed(self.flags): | |
531 idx -= 1 | |
532 if flag == type_: | |
533 del self.flags[idx] | |
534 break | |
535 | |
536 if idx == 0: | |
537 raise exceptions.InternalError(u"flag has been removed by an other parser") | |
538 | |
539 def parserHeading(self, elt, buf, level): | |
540 buf.append((6-level) * u'!') | |
541 for child in elt.children: | |
542 # we ignore other elements for a Hx title | |
543 self.parserText(child, buf) | |
544 buf.append(u'\n') | |
545 | |
546 def parserFootnote(self, elt, buf): | |
547 for elt in elt.elements(): | |
548 # all children other than <p/> are ignored | |
549 if elt.name == 'p': | |
550 a_elt = elt.a | |
551 if a_elt is None: | |
552 log.warning(u"<p/> element doesn't contain <a/> in footnote, ignoring it") | |
553 continue | |
554 try: | |
555 note_idx = self.footnotes[a_elt['id']] | |
556 except KeyError: | |
557 log.warning(u"Note id doesn't match any known note, ignoring it") | |
558 # we create a dummy element to parse all children after the <a/> | |
559 dummy_elt = domish.Element((None, 'note')) | |
560 a_idx = elt.children.index(a_elt) | |
561 dummy_elt.children = elt.children[a_idx+1:] | |
562 note_buf = [] | |
563 self.parseChildren(dummy_elt, note_buf) | |
564 # now we can replace the placeholder | |
565 buf[note_idx] = u''.join(note_buf) | |
566 | |
567 def parserText(self, txt, buf, keep_whitespaces=False): | |
568 txt = unicode(txt) | |
569 if not keep_whitespaces: | |
570 # we get text and only let one inter word space | |
571 txt = u' '.join(txt.split()) | |
572 txt = re.sub(ESCAPE_CHARS, r'\\\1', txt) | |
573 if txt: | |
574 buf.append(txt) | |
575 return txt | |
576 | |
577 def parserGeneric(self, elt, buf): | |
578 # as dotclear wiki syntax handle arbitrary XHTML code | |
579 # we use this feature to add elements that we don't know | |
580 buf.append(u"\n\n///html\n{}\n///\n\n".format(elt.toXml())) | |
581 | |
582 def parseChildren(self, elt, buf, block=False): | |
583 first_visible = True | |
584 for child in elt.children: | |
585 if not block and not first_visible and buf and buf[-1][-1] not in (' ','\n'): | |
586 # we add separation if it isn't already there | |
587 buf.append(u' ') | |
588 if domish.IElement.providedBy(child): | |
589 self._parse(child, buf) | |
590 first_visible = False | |
591 else: | |
592 appended = self.parserText(child, buf) | |
593 if appended: | |
594 first_visible = False | |
595 | |
596 def _parse(self, elt, buf): | |
597 elt_name = elt.name.lower() | |
598 style = elt.getAttribute('style') | |
599 if style and elt_name not in ELT_WITH_STYLE: | |
600 # if we have style we use generic parser to put raw HTML | |
601 # to avoid losing it | |
602 parser = self.parserGeneric | |
603 else: | |
604 try: | |
605 parser = getattr(self, "parser_{}".format(elt_name)) | |
606 except AttributeError: | |
607 log.debug("Can't find parser for {} element, using generic one".format(elt.name)) | |
608 parser = self.parserGeneric | |
609 parser(elt, buf) | |
610 | |
611 def parse(self, elt): | |
612 self.flags = [] | |
613 self.footnotes = {} | |
614 buf = [] | |
615 self._parse(elt, buf) | |
616 return u''.join(buf) | |
617 | |
618 def parseString(self, string): | |
619 wrapped_html = u"<div>{}</div>".format(string) | |
620 try: | |
621 div_elt = xml_tools.ElementParser()(wrapped_html) | |
622 except domish.ParserError as e: | |
623 log.warning(u"Error while parsing HTML content: {}".format(e)) | |
624 return | |
625 children = list(div_elt.elements()) | |
626 if len(children) == 1 and children[0].name == 'div': | |
627 div_elt = children[0] | |
628 return self.parse(div_elt) | |
629 | |
630 | |
631 class DCWikiSyntax(object): | |
632 SYNTAX_NAME = "wiki_dotclear" | |
633 | |
634 def __init__(self, host): | |
635 log.info(_(u"Dotclear wiki syntax plugin initialization")) | |
636 self.host = host | |
637 self._dc_parser = DCWikiParser() | |
638 self._xhtml_parser = XHTMLParser() | |
639 self._stx = self.host.plugins["TEXT-SYNTAXES"] | |
640 self._stx.addSyntax(self.SYNTAX_NAME, self.parseWiki, self.parseXHTML, [self._stx.OPT_NO_THREAD]) | |
641 | |
642 def parseWiki(self, wiki_stx): | |
643 div_elt = self._dc_parser.parse(wiki_stx) | |
644 return div_elt.toXml() | |
645 | |
646 def parseXHTML(self, xhtml): | |
647 return self._xhtml_parser.parseString(xhtml) |