Mercurial > libervia-backend
comparison libervia/backend/plugins/plugin_syntax_wiki_dotclear.py @ 4071:4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
author | Goffi <goffi@goffi.org> |
---|---|
date | Fri, 02 Jun 2023 11:49:51 +0200 |
parents | sat/plugins/plugin_syntax_wiki_dotclear.py@524856bd7b19 |
children | 0d7bb4df2343 |
comparison
equal
deleted
inserted
replaced
4070:d10748475025 | 4071:4b842c1fb686 |
---|---|
1 #!/usr/bin/env python3 | |
2 | |
3 | |
4 # SàT plugin for Dotclear Wiki Syntax | |
5 # Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org) | |
6 | |
7 # This program is free software: you can redistribute it and/or modify | |
8 # it under the terms of the GNU Affero General Public License as published by | |
9 # the Free Software Foundation, either version 3 of the License, or | |
10 # (at your option) any later version. | |
11 | |
12 # This program is distributed in the hope that it will be useful, | |
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 # GNU Affero General Public License for more details. | |
16 | |
17 # You should have received a copy of the GNU Affero General Public License | |
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
19 | |
20 # XXX: ref used: http://dotclear.org/documentation/2.0/usage/syntaxes#wiki-syntax-and-xhtml-equivalent | |
21 | |
22 from libervia.backend.core.i18n import _ | |
23 from libervia.backend.core.log import getLogger | |
24 | |
25 log = getLogger(__name__) | |
26 from libervia.backend.core.constants import Const as C | |
27 from libervia.backend.core import exceptions | |
28 from twisted.words.xish import domish | |
29 from libervia.backend.tools import xml_tools | |
30 import copy | |
31 import re | |
32 | |
33 PLUGIN_INFO = { | |
34 C.PI_NAME: "Dotclear Wiki Syntax Plugin", | |
35 C.PI_IMPORT_NAME: "SYNT_DC_WIKI", | |
36 C.PI_TYPE: C.PLUG_TYPE_SYNTAXE, | |
37 C.PI_DEPENDENCIES: ["TEXT_SYNTAXES"], | |
38 C.PI_MAIN: "DCWikiSyntax", | |
39 C.PI_HANDLER: "", | |
40 C.PI_DESCRIPTION: _("""Implementation of Dotclear wiki syntax"""), | |
41 } | |
42 | |
43 NOTE_TPL = "[{}]" # Note template | |
44 NOTE_A_REV_TPL = "rev_note_{}" | |
45 NOTE_A_TPL = "note_{}" | |
46 ESCAPE_CHARS_BASE = r"(?P<escape_char>[][{}%|\\/*#@{{}}~$-])" | |
47 ESCAPE_CHARS_EXTRA = ( | |
48 r"!?_+'()" | |
49 ) # These chars are not escaped in XHTML => dc_wiki conversion, | |
50 # but are used in the other direction | |
51 ESCAPE_CHARS = ESCAPE_CHARS_BASE.format("") | |
52 FLAG_UL = "ul" # must be the name of the element | |
53 FLAG_OL = "ol" | |
54 ELT_WITH_STYLE = ("img", "div") # elements where a style attribute is expected | |
55 | |
56 wiki = [ | |
57 r"\\" + ESCAPE_CHARS_BASE.format(ESCAPE_CHARS_EXTRA), | |
58 r"^!!!!!(?P<h1_title>.+?)$", | |
59 r"^!!!!(?P<h2_title>.+?)$", | |
60 r"^!!!(?P<h3_title>.+?)$", | |
61 r"^!!(?P<h4_title>.+?)$", | |
62 r"^!(?P<h5_title>.+?)$", | |
63 r"^----$(?P<horizontal_rule>)", | |
64 r"^\*(?P<list_bullet>.*?)$", | |
65 r"^#(?P<list_ordered>.*?)$", | |
66 r"^ (?P<preformated>.*?)$", | |
67 r"^> +?(?P<quote>.*?)$", | |
68 r"''(?P<emphasis>.+?)''", | |
69 r"__(?P<strong_emphasis>.+?)__", | |
70 r"%%%(?P<line_break>)", | |
71 r"\+\+(?P<insertion>.+?)\+\+", | |
72 r"--(?P<deletion>.+?)--", | |
73 r"\[(?P<link>.+?)\]", | |
74 r"\(\((?P<image>.+?)\)\)", | |
75 r"~(?P<anchor>.+?)~", | |
76 r"\?\?(?P<acronym>.+?\|.+?)\?\?", | |
77 r"{{(?P<inline_quote>.+?)}}", | |
78 r"@@(?P<code>.+?)@@", | |
79 r"\$\$(?P<footnote>.+?)\$\$", | |
80 r"(?P<text>.+?)", | |
81 ] | |
82 | |
83 wiki_re = re.compile("|".join(wiki), re.MULTILINE | re.DOTALL) | |
84 wiki_block_level_re = re.compile( | |
85 r"^///html(?P<html>.+?)///\n\n|(?P<paragraph>.+?)(?:\n{2,}|\Z)", | |
86 re.MULTILINE | re.DOTALL, | |
87 ) | |
88 | |
89 | |
90 class DCWikiParser(object): | |
91 def __init__(self): | |
92 self._footnotes = None | |
93 for i in range(5): | |
94 setattr( | |
95 self, | |
96 "parser_h{}_title".format(i), | |
97 lambda string, parent, i=i: self._parser_title( | |
98 string, parent, "h{}".format(i) | |
99 ), | |
100 ) | |
101 | |
102 def parser_paragraph(self, string, parent): | |
103 p_elt = parent.addElement("p") | |
104 self._parse(string, p_elt) | |
105 | |
106 def parser_html(self, string, parent): | |
107 wrapped_html = "<div>{}</div>".format(string) | |
108 try: | |
109 div_elt = xml_tools.ElementParser()(wrapped_html) | |
110 except domish.ParserError as e: | |
111 log.warning("Error while parsing HTML content, ignoring it: {}".format(e)) | |
112 return | |
113 children = list(div_elt.elements()) | |
114 if len(children) == 1 and children[0].name == "div": | |
115 div_elt = children[0] | |
116 parent.addChild(div_elt) | |
117 | |
118 def parser_escape_char(self, string, parent): | |
119 parent.addContent(string) | |
120 | |
121 def _parser_title(self, string, parent, name): | |
122 elt = parent.addElement(name) | |
123 elt.addContent(string) | |
124 | |
125 def parser_horizontal_rule(self, string, parent): | |
126 parent.addElement("hr") | |
127 | |
128 def _parser_list(self, string, parent, list_type): | |
129 depth = 0 | |
130 while string[depth : depth + 1] == "*": | |
131 depth += 1 | |
132 | |
133 string = string[depth:].lstrip() | |
134 | |
135 for i in range(depth + 1): | |
136 list_elt = getattr(parent, list_type) | |
137 if not list_elt: | |
138 parent = parent.addElement(list_type) | |
139 else: | |
140 parent = list_elt | |
141 | |
142 li_elt = parent.addElement("li") | |
143 self._parse(string, li_elt) | |
144 | |
145 def parser_list_bullet(self, string, parent): | |
146 self._parser_list(string, parent, "ul") | |
147 | |
148 def parser_list_ordered(self, string, parent): | |
149 self._parser_list(string, parent, "ol") | |
150 | |
151 def parser_preformated(self, string, parent): | |
152 pre_elt = parent.pre | |
153 if pre_elt is None: | |
154 pre_elt = parent.addElement("pre") | |
155 else: | |
156 # we are on a new line, and this is important for <pre/> | |
157 pre_elt.addContent("\n") | |
158 pre_elt.addContent(string) | |
159 | |
160 def parser_quote(self, string, parent): | |
161 blockquote_elt = parent.blockquote | |
162 if blockquote_elt is None: | |
163 blockquote_elt = parent.addElement("blockquote") | |
164 p_elt = blockquote_elt.p | |
165 if p_elt is None: | |
166 p_elt = blockquote_elt.addElement("p") | |
167 else: | |
168 string = "\n" + string | |
169 | |
170 self._parse(string, p_elt) | |
171 | |
172 def parser_emphasis(self, string, parent): | |
173 em_elt = parent.addElement("em") | |
174 self._parse(string, em_elt) | |
175 | |
176 def parser_strong_emphasis(self, string, parent): | |
177 strong_elt = parent.addElement("strong") | |
178 self._parse(string, strong_elt) | |
179 | |
180 def parser_line_break(self, string, parent): | |
181 parent.addElement("br") | |
182 | |
183 def parser_insertion(self, string, parent): | |
184 ins_elt = parent.addElement("ins") | |
185 self._parse(string, ins_elt) | |
186 | |
187 def parser_deletion(self, string, parent): | |
188 del_elt = parent.addElement("del") | |
189 self._parse(string, del_elt) | |
190 | |
191 def parser_link(self, string, parent): | |
192 url_data = string.split("|") | |
193 a_elt = parent.addElement("a") | |
194 length = len(url_data) | |
195 if length == 1: | |
196 url = url_data[0] | |
197 a_elt["href"] = url | |
198 a_elt.addContent(url) | |
199 else: | |
200 name = url_data[0] | |
201 url = url_data[1] | |
202 a_elt["href"] = url | |
203 a_elt.addContent(name) | |
204 if length >= 3: | |
205 a_elt["lang"] = url_data[2] | |
206 if length >= 4: | |
207 a_elt["title"] = url_data[3] | |
208 if length > 4: | |
209 log.warning("too much data for url, ignoring extra data") | |
210 | |
211 def parser_image(self, string, parent): | |
212 image_data = string.split("|") | |
213 img_elt = parent.addElement("img") | |
214 | |
215 for idx, attribute in enumerate(("src", "alt", "position", "longdesc")): | |
216 try: | |
217 data = image_data[idx] | |
218 except IndexError: | |
219 break | |
220 | |
221 if attribute != "position": | |
222 img_elt[attribute] = data | |
223 else: | |
224 data = data.lower() | |
225 if data in ("l", "g"): | |
226 img_elt["style"] = "display:block; float:left; margin:0 1em 1em 0" | |
227 elif data in ("r", "d"): | |
228 img_elt["style"] = "display:block; float:right; margin:0 0 1em 1em" | |
229 elif data == "c": | |
230 img_elt[ | |
231 "style" | |
232 ] = "display:block; margin-left:auto; margin-right:auto" | |
233 else: | |
234 log.warning("bad position argument for image, ignoring it") | |
235 | |
236 def parser_anchor(self, string, parent): | |
237 a_elt = parent.addElement("a") | |
238 a_elt["id"] = string | |
239 | |
240 def parser_acronym(self, string, parent): | |
241 acronym, title = string.split("|", 1) | |
242 acronym_elt = parent.addElement("acronym", content=acronym) | |
243 acronym_elt["title"] = title | |
244 | |
245 def parser_inline_quote(self, string, parent): | |
246 quote_data = string.split("|") | |
247 quote = quote_data[0] | |
248 q_elt = parent.addElement("q", content=quote) | |
249 for idx, attribute in enumerate(("lang", "cite"), 1): | |
250 try: | |
251 data = quote_data[idx] | |
252 except IndexError: | |
253 break | |
254 q_elt[attribute] = data | |
255 | |
256 def parser_code(self, string, parent): | |
257 parent.addElement("code", content=string) | |
258 | |
259 def parser_footnote(self, string, parent): | |
260 idx = len(self._footnotes) + 1 | |
261 note_txt = NOTE_TPL.format(idx) | |
262 sup_elt = parent.addElement("sup") | |
263 sup_elt["class"] = "note" | |
264 a_elt = sup_elt.addElement("a", content=note_txt) | |
265 a_elt["id"] = NOTE_A_REV_TPL.format(idx) | |
266 a_elt["href"] = "#{}".format(NOTE_A_TPL.format(idx)) | |
267 | |
268 p_elt = domish.Element((None, "p")) | |
269 a_elt = p_elt.addElement("a", content=note_txt) | |
270 a_elt["id"] = NOTE_A_TPL.format(idx) | |
271 a_elt["href"] = "#{}".format(NOTE_A_REV_TPL.format(idx)) | |
272 self._parse(string, p_elt) | |
273 # footnotes are actually added at the end of the parsing | |
274 self._footnotes.append(p_elt) | |
275 | |
276 def parser_text(self, string, parent): | |
277 parent.addContent(string) | |
278 | |
279 def _parse(self, string, parent, block_level=False): | |
280 regex = wiki_block_level_re if block_level else wiki_re | |
281 | |
282 for match in regex.finditer(string): | |
283 if match.lastgroup is None: | |
284 parent.addContent(string) | |
285 return | |
286 matched = match.group(match.lastgroup) | |
287 try: | |
288 parser = getattr(self, "parser_{}".format(match.lastgroup)) | |
289 except AttributeError: | |
290 log.warning("No parser found for {}".format(match.lastgroup)) | |
291 # parent.addContent(string) | |
292 continue | |
293 parser(matched, parent) | |
294 | |
295 def parse(self, string): | |
296 self._footnotes = [] | |
297 div_elt = domish.Element((None, "div")) | |
298 self._parse(string, parent=div_elt, block_level=True) | |
299 if self._footnotes: | |
300 foot_div_elt = div_elt.addElement("div") | |
301 foot_div_elt["class"] = "footnotes" | |
302 # we add a simple horizontal rule which can be customized | |
303 # with footnotes class, instead of a text which would need | |
304 # to be translated | |
305 foot_div_elt.addElement("hr") | |
306 for elt in self._footnotes: | |
307 foot_div_elt.addChild(elt) | |
308 return div_elt | |
309 | |
310 | |
311 class XHTMLParser(object): | |
312 def __init__(self): | |
313 self.flags = None | |
314 self.toto = 0 | |
315 self.footnotes = None # will hold a map from url to buffer id | |
316 for i in range(1, 6): | |
317 setattr( | |
318 self, | |
319 "parser_h{}".format(i), | |
320 lambda elt, buf, level=i: self.parser_heading(elt, buf, level), | |
321 ) | |
322 | |
323 def parser_a(self, elt, buf): | |
324 try: | |
325 url = elt["href"] | |
326 except KeyError: | |
327 # probably an anchor | |
328 try: | |
329 id_ = elt["id"] | |
330 if not id_: | |
331 # we don't want empty values | |
332 raise KeyError | |
333 except KeyError: | |
334 self.parser_generic(elt, buf) | |
335 else: | |
336 buf.append("~~{}~~".format(id_)) | |
337 return | |
338 | |
339 link_data = [url] | |
340 name = str(elt) | |
341 if name != url: | |
342 link_data.insert(0, name) | |
343 | |
344 lang = elt.getAttribute("lang") | |
345 title = elt.getAttribute("title") | |
346 if lang is not None: | |
347 link_data.append(lang) | |
348 elif title is not None: | |
349 link_data.appand("") | |
350 if title is not None: | |
351 link_data.append(title) | |
352 buf.append("[") | |
353 buf.append("|".join(link_data)) | |
354 buf.append("]") | |
355 | |
356 def parser_acronym(self, elt, buf): | |
357 try: | |
358 title = elt["title"] | |
359 except KeyError: | |
360 log.debug("Acronyme without title, using generic parser") | |
361 self.parser_generic(elt, buf) | |
362 return | |
363 buf.append("??{}|{}??".format(str(elt), title)) | |
364 | |
365 def parser_blockquote(self, elt, buf): | |
366 # we remove wrapping <p> to avoid empty line with "> " | |
367 children = list( | |
368 [child for child in elt.children if str(child).strip() not in ("", "\n")] | |
369 ) | |
370 if len(children) == 1 and children[0].name == "p": | |
371 elt = children[0] | |
372 tmp_buf = [] | |
373 self.parse_children(elt, tmp_buf) | |
374 blockquote = "> " + "\n> ".join("".join(tmp_buf).split("\n")) | |
375 buf.append(blockquote) | |
376 | |
377 def parser_br(self, elt, buf): | |
378 buf.append("%%%") | |
379 | |
380 def parser_code(self, elt, buf): | |
381 buf.append("@@") | |
382 self.parse_children(elt, buf) | |
383 buf.append("@@") | |
384 | |
385 def parser_del(self, elt, buf): | |
386 buf.append("--") | |
387 self.parse_children(elt, buf) | |
388 buf.append("--") | |
389 | |
390 def parser_div(self, elt, buf): | |
391 if elt.getAttribute("class") == "footnotes": | |
392 self.parser_footnote(elt, buf) | |
393 else: | |
394 self.parse_children(elt, buf, block=True) | |
395 | |
396 def parser_em(self, elt, buf): | |
397 buf.append("''") | |
398 self.parse_children(elt, buf) | |
399 buf.append("''") | |
400 | |
401 def parser_h6(self, elt, buf): | |
402 # XXX: <h6/> heading is not managed by wiki syntax | |
403 # so we handle it with a <h5/> | |
404 elt = copy.copy(elt) # we don't want to change to original element | |
405 elt.name = "h5" | |
406 self._parse(elt, buf) | |
407 | |
408 def parser_hr(self, elt, buf): | |
409 buf.append("\n----\n") | |
410 | |
411 def parser_img(self, elt, buf): | |
412 try: | |
413 url = elt["src"] | |
414 except KeyError: | |
415 log.warning("Ignoring <img/> without src") | |
416 return | |
417 | |
418 image_data = [url] | |
419 | |
420 alt = elt.getAttribute("alt") | |
421 style = elt.getAttribute("style", "") | |
422 desc = elt.getAttribute("longdesc") | |
423 | |
424 if "0 1em 1em 0" in style: | |
425 position = "L" | |
426 elif "0 0 1em 1em" in style: | |
427 position = "R" | |
428 elif "auto" in style: | |
429 position = "C" | |
430 else: | |
431 position = None | |
432 | |
433 if alt: | |
434 image_data.append(alt) | |
435 elif position or desc: | |
436 image_data.append("") | |
437 | |
438 if position: | |
439 image_data.append(position) | |
440 elif desc: | |
441 image_data.append("") | |
442 | |
443 if desc: | |
444 image_data.append(desc) | |
445 | |
446 buf.append("((") | |
447 buf.append("|".join(image_data)) | |
448 buf.append("))") | |
449 | |
450 def parser_ins(self, elt, buf): | |
451 buf.append("++") | |
452 self.parse_children(elt, buf) | |
453 buf.append("++") | |
454 | |
455 def parser_li(self, elt, buf): | |
456 flag = None | |
457 current_flag = None | |
458 bullets = [] | |
459 for flag in reversed(self.flags): | |
460 if flag in (FLAG_UL, FLAG_OL): | |
461 if current_flag is None: | |
462 current_flag = flag | |
463 if flag == current_flag: | |
464 bullets.append("*" if flag == FLAG_UL else "#") | |
465 else: | |
466 break | |
467 | |
468 if flag != current_flag and buf[-1] == " ": | |
469 # this trick is to avoid a space when we switch | |
470 # from (un)ordered to the other type on the same row | |
471 # e.g. *# unorder + ordered item | |
472 del buf[-1] | |
473 | |
474 buf.extend(bullets) | |
475 | |
476 buf.append(" ") | |
477 self.parse_children(elt, buf) | |
478 buf.append("\n") | |
479 | |
480 def parser_ol(self, elt, buf): | |
481 self.parser_list(elt, buf, FLAG_OL) | |
482 | |
483 def parser_p(self, elt, buf): | |
484 self.parse_children(elt, buf) | |
485 buf.append("\n\n") | |
486 | |
487 def parser_pre(self, elt, buf): | |
488 pre = "".join( | |
489 [ | |
490 child.toXml() if domish.IElement.providedBy(child) else str(child) | |
491 for child in elt.children | |
492 ] | |
493 ) | |
494 pre = " " + "\n ".join(pre.split("\n")) | |
495 buf.append(pre) | |
496 | |
497 def parser_q(self, elt, buf): | |
498 quote_data = [str(elt)] | |
499 | |
500 lang = elt.getAttribute("lang") | |
501 cite = elt.getAttribute("url") | |
502 | |
503 if lang: | |
504 quote_data.append(lang) | |
505 elif cite: | |
506 quote_data.append("") | |
507 | |
508 if cite: | |
509 quote_data.append(cite) | |
510 | |
511 buf.append("{{") | |
512 buf.append("|".join(quote_data)) | |
513 buf.append("}}") | |
514 | |
515 def parser_span(self, elt, buf): | |
516 self.parse_children(elt, buf, block=True) | |
517 | |
518 def parser_strong(self, elt, buf): | |
519 buf.append("__") | |
520 self.parse_children(elt, buf) | |
521 buf.append("__") | |
522 | |
523 def parser_sup(self, elt, buf): | |
524 # sup is mainly used for footnotes, so we check if we have an anchor inside | |
525 children = list( | |
526 [child for child in elt.children if str(child).strip() not in ("", "\n")] | |
527 ) | |
528 if ( | |
529 len(children) == 1 | |
530 and domish.IElement.providedBy(children[0]) | |
531 and children[0].name == "a" | |
532 and "#" in children[0].getAttribute("href", "") | |
533 ): | |
534 url = children[0]["href"] | |
535 note_id = url[url.find("#") + 1 :] | |
536 if not note_id: | |
537 log.warning("bad link found in footnote") | |
538 self.parser_generic(elt, buf) | |
539 return | |
540 # this looks like a footnote | |
541 buf.append("$$") | |
542 buf.append(" ") # placeholder | |
543 self.footnotes[note_id] = len(buf) - 1 | |
544 buf.append("$$") | |
545 else: | |
546 self.parser_generic(elt, buf) | |
547 | |
548 def parser_ul(self, elt, buf): | |
549 self.parser_list(elt, buf, FLAG_UL) | |
550 | |
551 def parser_list(self, elt, buf, type_): | |
552 self.flags.append(type_) | |
553 self.parse_children(elt, buf, block=True) | |
554 idx = 0 | |
555 for flag in reversed(self.flags): | |
556 idx -= 1 | |
557 if flag == type_: | |
558 del self.flags[idx] | |
559 break | |
560 | |
561 if idx == 0: | |
562 raise exceptions.InternalError("flag has been removed by an other parser") | |
563 | |
564 def parser_heading(self, elt, buf, level): | |
565 buf.append((6 - level) * "!") | |
566 for child in elt.children: | |
567 # we ignore other elements for a Hx title | |
568 self.parser_text(child, buf) | |
569 buf.append("\n") | |
570 | |
571 def parser_footnote(self, elt, buf): | |
572 for elt in elt.elements(): | |
573 # all children other than <p/> are ignored | |
574 if elt.name == "p": | |
575 a_elt = elt.a | |
576 if a_elt is None: | |
577 log.warning( | |
578 "<p/> element doesn't contain <a/> in footnote, ignoring it" | |
579 ) | |
580 continue | |
581 try: | |
582 note_idx = self.footnotes[a_elt["id"]] | |
583 except KeyError: | |
584 log.warning("Note id doesn't match any known note, ignoring it") | |
585 # we create a dummy element to parse all children after the <a/> | |
586 dummy_elt = domish.Element((None, "note")) | |
587 a_idx = elt.children.index(a_elt) | |
588 dummy_elt.children = elt.children[a_idx + 1 :] | |
589 note_buf = [] | |
590 self.parse_children(dummy_elt, note_buf) | |
591 # now we can replace the placeholder | |
592 buf[note_idx] = "".join(note_buf) | |
593 | |
594 def parser_text(self, txt, buf, keep_whitespaces=False): | |
595 txt = str(txt) | |
596 if not keep_whitespaces: | |
597 # we get text and only let one inter word space | |
598 txt = " ".join(txt.split()) | |
599 txt = re.sub(ESCAPE_CHARS, r"\\\1", txt) | |
600 if txt: | |
601 buf.append(txt) | |
602 return txt | |
603 | |
604 def parser_generic(self, elt, buf): | |
605 # as dotclear wiki syntax handle arbitrary XHTML code | |
606 # we use this feature to add elements that we don't know | |
607 buf.append("\n\n///html\n{}\n///\n\n".format(elt.toXml())) | |
608 | |
609 def parse_children(self, elt, buf, block=False): | |
610 first_visible = True | |
611 for child in elt.children: | |
612 if not block and not first_visible and buf and buf[-1][-1] not in (" ", "\n"): | |
613 # we add separation if it isn't already there | |
614 buf.append(" ") | |
615 if domish.IElement.providedBy(child): | |
616 self._parse(child, buf) | |
617 first_visible = False | |
618 else: | |
619 appended = self.parser_text(child, buf) | |
620 if appended: | |
621 first_visible = False | |
622 | |
623 def _parse(self, elt, buf): | |
624 elt_name = elt.name.lower() | |
625 style = elt.getAttribute("style") | |
626 if style and elt_name not in ELT_WITH_STYLE: | |
627 # if we have style we use generic parser to put raw HTML | |
628 # to avoid losing it | |
629 parser = self.parser_generic | |
630 else: | |
631 try: | |
632 parser = getattr(self, "parser_{}".format(elt_name)) | |
633 except AttributeError: | |
634 log.debug( | |
635 "Can't find parser for {} element, using generic one".format(elt.name) | |
636 ) | |
637 parser = self.parser_generic | |
638 parser(elt, buf) | |
639 | |
640 def parse(self, elt): | |
641 self.flags = [] | |
642 self.footnotes = {} | |
643 buf = [] | |
644 self._parse(elt, buf) | |
645 return "".join(buf) | |
646 | |
647 def parseString(self, string): | |
648 wrapped_html = "<div>{}</div>".format(string) | |
649 try: | |
650 div_elt = xml_tools.ElementParser()(wrapped_html) | |
651 except domish.ParserError as e: | |
652 log.warning("Error while parsing HTML content: {}".format(e)) | |
653 return | |
654 children = list(div_elt.elements()) | |
655 if len(children) == 1 and children[0].name == "div": | |
656 div_elt = children[0] | |
657 return self.parse(div_elt) | |
658 | |
659 | |
660 class DCWikiSyntax(object): | |
661 SYNTAX_NAME = "wiki_dotclear" | |
662 | |
663 def __init__(self, host): | |
664 log.info(_("Dotclear wiki syntax plugin initialization")) | |
665 self.host = host | |
666 self._dc_parser = DCWikiParser() | |
667 self._xhtml_parser = XHTMLParser() | |
668 self._stx = self.host.plugins["TEXT_SYNTAXES"] | |
669 self._stx.add_syntax( | |
670 self.SYNTAX_NAME, self.parse_wiki, self.parse_xhtml, [self._stx.OPT_NO_THREAD] | |
671 ) | |
672 | |
673 def parse_wiki(self, wiki_stx): | |
674 div_elt = self._dc_parser.parse(wiki_stx) | |
675 return div_elt.toXml() | |
676 | |
677 def parse_xhtml(self, xhtml): | |
678 return self._xhtml_parser.parseString(xhtml) |