comparison sat/plugins/plugin_syntax_wiki_dotclear.py @ 3028:ab2696e34d29

Python 3 port: /!\ this is a huge commit /!\ starting from this commit, SàT is needs Python 3.6+ /!\ SàT maybe be instable or some feature may not work anymore, this will improve with time This patch port backend, bridge and frontends to Python 3. Roughly this has been done this way: - 2to3 tools has been applied (with python 3.7) - all references to python2 have been replaced with python3 (notably shebangs) - fixed files not handled by 2to3 (notably the shell script) - several manual fixes - fixed issues reported by Python 3 that where not handled in Python 2 - replaced "async" with "async_" when needed (it's a reserved word from Python 3.7) - replaced zope's "implements" with @implementer decorator - temporary hack to handle data pickled in database, as str or bytes may be returned, to be checked later - fixed hash comparison for password - removed some code which is not needed anymore with Python 3 - deactivated some code which needs to be checked (notably certificate validation) - tested with jp, fixed reported issues until some basic commands worked - ported Primitivus (after porting dependencies like urwid satext) - more manual fixes
author Goffi <goffi@goffi.org>
date Tue, 13 Aug 2019 19:08:41 +0200
parents 85d3240a400f
children 9d0df638c8b4
comparison
equal deleted inserted replaced
3027:ff5bcb12ae60 3028:ab2696e34d29
1 #!/usr/bin/env python2 1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*- 2 # -*- coding: utf-8 -*-
3 3
4 # SàT plugin for Dotclear Wiki Syntax 4 # SàT plugin for Dotclear Wiki Syntax
5 # Copyright (C) 2009-2019 Jérôme Poisson (goffi@goffi.org) 5 # Copyright (C) 2009-2019 Jérôme Poisson (goffi@goffi.org)
6 6
38 C.PI_MAIN: "DCWikiSyntax", 38 C.PI_MAIN: "DCWikiSyntax",
39 C.PI_HANDLER: "", 39 C.PI_HANDLER: "",
40 C.PI_DESCRIPTION: _("""Implementation of Dotclear wiki syntax"""), 40 C.PI_DESCRIPTION: _("""Implementation of Dotclear wiki syntax"""),
41 } 41 }
42 42
43 NOTE_TPL = u"[{}]" # Note template 43 NOTE_TPL = "[{}]" # Note template
44 NOTE_A_REV_TPL = u"rev_note_{}" 44 NOTE_A_REV_TPL = "rev_note_{}"
45 NOTE_A_TPL = u"note_{}" 45 NOTE_A_TPL = "note_{}"
46 ESCAPE_CHARS_BASE = r"(?P<escape_char>[][{}%|\\/*#@{{}}~$-])" 46 ESCAPE_CHARS_BASE = r"(?P<escape_char>[][{}%|\\/*#@{{}}~$-])"
47 ESCAPE_CHARS_EXTRA = ( 47 ESCAPE_CHARS_EXTRA = (
48 r"!?_+'()" 48 r"!?_+'()"
49 ) # These chars are not escaped in XHTML => dc_wiki conversion, 49 ) # These chars are not escaped in XHTML => dc_wiki conversion,
50 # but are used in the other direction 50 # but are used in the other direction
88 88
89 89
90 class DCWikiParser(object): 90 class DCWikiParser(object):
91 def __init__(self): 91 def __init__(self):
92 self._footnotes = None 92 self._footnotes = None
93 for i in xrange(5): 93 for i in range(5):
94 setattr( 94 setattr(
95 self, 95 self,
96 "parser_h{}_title".format(i), 96 "parser_h{}_title".format(i),
97 lambda string, parent, i=i: self._parser_title( 97 lambda string, parent, i=i: self._parser_title(
98 string, parent, "h{}".format(i) 98 string, parent, "h{}".format(i)
106 def parser_html(self, string, parent): 106 def parser_html(self, string, parent):
107 wrapped_html = "<div>{}</div>".format(string) 107 wrapped_html = "<div>{}</div>".format(string)
108 try: 108 try:
109 div_elt = xml_tools.ElementParser()(wrapped_html) 109 div_elt = xml_tools.ElementParser()(wrapped_html)
110 except domish.ParserError as e: 110 except domish.ParserError as e:
111 log.warning(u"Error while parsing HTML content, ignoring it: {}".format(e)) 111 log.warning("Error while parsing HTML content, ignoring it: {}".format(e))
112 return 112 return
113 children = list(div_elt.elements()) 113 children = list(div_elt.elements())
114 if len(children) == 1 and children[0].name == "div": 114 if len(children) == 1 and children[0].name == "div":
115 div_elt = children[0] 115 div_elt = children[0]
116 parent.addChild(div_elt) 116 parent.addChild(div_elt)
130 while string[depth : depth + 1] == "*": 130 while string[depth : depth + 1] == "*":
131 depth += 1 131 depth += 1
132 132
133 string = string[depth:].lstrip() 133 string = string[depth:].lstrip()
134 134
135 for i in xrange(depth + 1): 135 for i in range(depth + 1):
136 list_elt = getattr(parent, list_type) 136 list_elt = getattr(parent, list_type)
137 if not list_elt: 137 if not list_elt:
138 parent = parent.addElement(list_type) 138 parent = parent.addElement(list_type)
139 else: 139 else:
140 parent = list_elt 140 parent = list_elt
163 blockquote_elt = parent.addElement("blockquote") 163 blockquote_elt = parent.addElement("blockquote")
164 p_elt = blockquote_elt.p 164 p_elt = blockquote_elt.p
165 if p_elt is None: 165 if p_elt is None:
166 p_elt = blockquote_elt.addElement("p") 166 p_elt = blockquote_elt.addElement("p")
167 else: 167 else:
168 string = u"\n" + string 168 string = "\n" + string
169 169
170 self._parse(string, p_elt) 170 self._parse(string, p_elt)
171 171
172 def parser_emphasis(self, string, parent): 172 def parser_emphasis(self, string, parent):
173 em_elt = parent.addElement("em") 173 em_elt = parent.addElement("em")
187 def parser_deletion(self, string, parent): 187 def parser_deletion(self, string, parent):
188 del_elt = parent.addElement("del") 188 del_elt = parent.addElement("del")
189 self._parse(string, del_elt) 189 self._parse(string, del_elt)
190 190
191 def parser_link(self, string, parent): 191 def parser_link(self, string, parent):
192 url_data = string.split(u"|") 192 url_data = string.split("|")
193 a_elt = parent.addElement("a") 193 a_elt = parent.addElement("a")
194 length = len(url_data) 194 length = len(url_data)
195 if length == 1: 195 if length == 1:
196 url = url_data[0] 196 url = url_data[0]
197 a_elt["href"] = url 197 a_elt["href"] = url
204 if length >= 3: 204 if length >= 3:
205 a_elt["lang"] = url_data[2] 205 a_elt["lang"] = url_data[2]
206 if length >= 4: 206 if length >= 4:
207 a_elt["title"] = url_data[3] 207 a_elt["title"] = url_data[3]
208 if length > 4: 208 if length > 4:
209 log.warning(u"too much data for url, ignoring extra data") 209 log.warning("too much data for url, ignoring extra data")
210 210
211 def parser_image(self, string, parent): 211 def parser_image(self, string, parent):
212 image_data = string.split(u"|") 212 image_data = string.split("|")
213 img_elt = parent.addElement("img") 213 img_elt = parent.addElement("img")
214 214
215 for idx, attribute in enumerate(("src", "alt", "position", "longdesc")): 215 for idx, attribute in enumerate(("src", "alt", "position", "longdesc")):
216 try: 216 try:
217 data = image_data[idx] 217 data = image_data[idx]
229 elif data == "c": 229 elif data == "c":
230 img_elt[ 230 img_elt[
231 "style" 231 "style"
232 ] = "display:block; margin-left:auto; margin-right:auto" 232 ] = "display:block; margin-left:auto; margin-right:auto"
233 else: 233 else:
234 log.warning(u"bad position argument for image, ignoring it") 234 log.warning("bad position argument for image, ignoring it")
235 235
236 def parser_anchor(self, string, parent): 236 def parser_anchor(self, string, parent):
237 a_elt = parent.addElement("a") 237 a_elt = parent.addElement("a")
238 a_elt["id"] = string 238 a_elt["id"] = string
239 239
240 def parser_acronym(self, string, parent): 240 def parser_acronym(self, string, parent):
241 acronym, title = string.split(u"|", 1) 241 acronym, title = string.split("|", 1)
242 acronym_elt = parent.addElement("acronym", content=acronym) 242 acronym_elt = parent.addElement("acronym", content=acronym)
243 acronym_elt["title"] = title 243 acronym_elt["title"] = title
244 244
245 def parser_inline_quote(self, string, parent): 245 def parser_inline_quote(self, string, parent):
246 quote_data = string.split(u"|") 246 quote_data = string.split("|")
247 quote = quote_data[0] 247 quote = quote_data[0]
248 q_elt = parent.addElement("q", content=quote) 248 q_elt = parent.addElement("q", content=quote)
249 for idx, attribute in enumerate(("lang", "cite"), 1): 249 for idx, attribute in enumerate(("lang", "cite"), 1):
250 try: 250 try:
251 data = quote_data[idx] 251 data = quote_data[idx]
261 note_txt = NOTE_TPL.format(idx) 261 note_txt = NOTE_TPL.format(idx)
262 sup_elt = parent.addElement("sup") 262 sup_elt = parent.addElement("sup")
263 sup_elt["class"] = "note" 263 sup_elt["class"] = "note"
264 a_elt = sup_elt.addElement("a", content=note_txt) 264 a_elt = sup_elt.addElement("a", content=note_txt)
265 a_elt["id"] = NOTE_A_REV_TPL.format(idx) 265 a_elt["id"] = NOTE_A_REV_TPL.format(idx)
266 a_elt["href"] = u"#{}".format(NOTE_A_TPL.format(idx)) 266 a_elt["href"] = "#{}".format(NOTE_A_TPL.format(idx))
267 267
268 p_elt = domish.Element((None, "p")) 268 p_elt = domish.Element((None, "p"))
269 a_elt = p_elt.addElement("a", content=note_txt) 269 a_elt = p_elt.addElement("a", content=note_txt)
270 a_elt["id"] = NOTE_A_TPL.format(idx) 270 a_elt["id"] = NOTE_A_TPL.format(idx)
271 a_elt["href"] = u"#{}".format(NOTE_A_REV_TPL.format(idx)) 271 a_elt["href"] = "#{}".format(NOTE_A_REV_TPL.format(idx))
272 self._parse(string, p_elt) 272 self._parse(string, p_elt)
273 # footnotes are actually added at the end of the parsing 273 # footnotes are actually added at the end of the parsing
274 self._footnotes.append(p_elt) 274 self._footnotes.append(p_elt)
275 275
276 def parser_text(self, string, parent): 276 def parser_text(self, string, parent):
285 return 285 return
286 matched = match.group(match.lastgroup) 286 matched = match.group(match.lastgroup)
287 try: 287 try:
288 parser = getattr(self, "parser_{}".format(match.lastgroup)) 288 parser = getattr(self, "parser_{}".format(match.lastgroup))
289 except AttributeError: 289 except AttributeError:
290 log.warning(u"No parser found for {}".format(match.lastgroup)) 290 log.warning("No parser found for {}".format(match.lastgroup))
291 # parent.addContent(string) 291 # parent.addContent(string)
292 continue 292 continue
293 parser(matched, parent) 293 parser(matched, parent)
294 294
295 def parse(self, string): 295 def parse(self, string):
311 class XHTMLParser(object): 311 class XHTMLParser(object):
312 def __init__(self): 312 def __init__(self):
313 self.flags = None 313 self.flags = None
314 self.toto = 0 314 self.toto = 0
315 self.footnotes = None # will hold a map from url to buffer id 315 self.footnotes = None # will hold a map from url to buffer id
316 for i in xrange(1, 6): 316 for i in range(1, 6):
317 setattr( 317 setattr(
318 self, 318 self,
319 "parser_h{}".format(i), 319 "parser_h{}".format(i),
320 lambda elt, buf, level=i: self.parserHeading(elt, buf, level), 320 lambda elt, buf, level=i: self.parserHeading(elt, buf, level),
321 ) 321 )
331 # we don't want empty values 331 # we don't want empty values
332 raise KeyError 332 raise KeyError
333 except KeyError: 333 except KeyError:
334 self.parserGeneric(elt, buf) 334 self.parserGeneric(elt, buf)
335 else: 335 else:
336 buf.append(u"~~{}~~".format(id_)) 336 buf.append("~~{}~~".format(id_))
337 return 337 return
338 338
339 link_data = [url] 339 link_data = [url]
340 name = unicode(elt) 340 name = str(elt)
341 if name != url: 341 if name != url:
342 link_data.insert(0, name) 342 link_data.insert(0, name)
343 343
344 lang = elt.getAttribute("lang") 344 lang = elt.getAttribute("lang")
345 title = elt.getAttribute("title") 345 title = elt.getAttribute("title")
346 if lang is not None: 346 if lang is not None:
347 link_data.append(lang) 347 link_data.append(lang)
348 elif title is not None: 348 elif title is not None:
349 link_data.appand(u"") 349 link_data.appand("")
350 if title is not None: 350 if title is not None:
351 link_data.append(title) 351 link_data.append(title)
352 buf.append(u"[") 352 buf.append("[")
353 buf.append(u"|".join(link_data)) 353 buf.append("|".join(link_data))
354 buf.append(u"]") 354 buf.append("]")
355 355
356 def parser_acronym(self, elt, buf): 356 def parser_acronym(self, elt, buf):
357 try: 357 try:
358 title = elt["title"] 358 title = elt["title"]
359 except KeyError: 359 except KeyError:
360 log.debug(u"Acronyme without title, using generic parser") 360 log.debug("Acronyme without title, using generic parser")
361 self.parserGeneric(elt, buf) 361 self.parserGeneric(elt, buf)
362 return 362 return
363 buf.append(u"??{}|{}??".format(unicode(elt), title)) 363 buf.append("??{}|{}??".format(str(elt), title))
364 364
365 def parser_blockquote(self, elt, buf): 365 def parser_blockquote(self, elt, buf):
366 # we remove wrapping <p> to avoid empty line with "> " 366 # we remove wrapping <p> to avoid empty line with "> "
367 children = list( 367 children = list(
368 [child for child in elt.children if unicode(child).strip() not in ("", "\n")] 368 [child for child in elt.children if str(child).strip() not in ("", "\n")]
369 ) 369 )
370 if len(children) == 1 and children[0].name == "p": 370 if len(children) == 1 and children[0].name == "p":
371 elt = children[0] 371 elt = children[0]
372 tmp_buf = [] 372 tmp_buf = []
373 self.parseChildren(elt, tmp_buf) 373 self.parseChildren(elt, tmp_buf)
374 blockquote = u"> " + u"\n> ".join(u"".join(tmp_buf).split("\n")) 374 blockquote = "> " + "\n> ".join("".join(tmp_buf).split("\n"))
375 buf.append(blockquote) 375 buf.append(blockquote)
376 376
377 def parser_br(self, elt, buf): 377 def parser_br(self, elt, buf):
378 buf.append(u"%%%") 378 buf.append("%%%")
379 379
380 def parser_code(self, elt, buf): 380 def parser_code(self, elt, buf):
381 buf.append(u"@@") 381 buf.append("@@")
382 self.parseChildren(elt, buf) 382 self.parseChildren(elt, buf)
383 buf.append(u"@@") 383 buf.append("@@")
384 384
385 def parser_del(self, elt, buf): 385 def parser_del(self, elt, buf):
386 buf.append(u"--") 386 buf.append("--")
387 self.parseChildren(elt, buf) 387 self.parseChildren(elt, buf)
388 buf.append(u"--") 388 buf.append("--")
389 389
390 def parser_div(self, elt, buf): 390 def parser_div(self, elt, buf):
391 if elt.getAttribute("class") == "footnotes": 391 if elt.getAttribute("class") == "footnotes":
392 self.parserFootnote(elt, buf) 392 self.parserFootnote(elt, buf)
393 else: 393 else:
394 self.parseChildren(elt, buf, block=True) 394 self.parseChildren(elt, buf, block=True)
395 395
396 def parser_em(self, elt, buf): 396 def parser_em(self, elt, buf):
397 buf.append(u"''") 397 buf.append("''")
398 self.parseChildren(elt, buf) 398 self.parseChildren(elt, buf)
399 buf.append(u"''") 399 buf.append("''")
400 400
401 def parser_h6(self, elt, buf): 401 def parser_h6(self, elt, buf):
402 # XXX: <h6/> heading is not managed by wiki syntax 402 # XXX: <h6/> heading is not managed by wiki syntax
403 # so we handle it with a <h5/> 403 # so we handle it with a <h5/>
404 elt = copy.copy(elt) # we don't want to change to original element 404 elt = copy.copy(elt) # we don't want to change to original element
405 elt.name = "h5" 405 elt.name = "h5"
406 self._parse(elt, buf) 406 self._parse(elt, buf)
407 407
408 def parser_hr(self, elt, buf): 408 def parser_hr(self, elt, buf):
409 buf.append(u"\n----\n") 409 buf.append("\n----\n")
410 410
411 def parser_img(self, elt, buf): 411 def parser_img(self, elt, buf):
412 try: 412 try:
413 url = elt["src"] 413 url = elt["src"]
414 except KeyError: 414 except KeyError:
415 log.warning(u"Ignoring <img/> without src") 415 log.warning("Ignoring <img/> without src")
416 return 416 return
417 417
418 image_data = [url] 418 image_data = [url]
419 419
420 alt = elt.getAttribute("alt") 420 alt = elt.getAttribute("alt")
431 position = None 431 position = None
432 432
433 if alt: 433 if alt:
434 image_data.append(alt) 434 image_data.append(alt)
435 elif position or desc: 435 elif position or desc:
436 image_data.append(u"") 436 image_data.append("")
437 437
438 if position: 438 if position:
439 image_data.append(position) 439 image_data.append(position)
440 elif desc: 440 elif desc:
441 image_data.append(u"") 441 image_data.append("")
442 442
443 if desc: 443 if desc:
444 image_data.append(desc) 444 image_data.append(desc)
445 445
446 buf.append(u"((") 446 buf.append("((")
447 buf.append(u"|".join(image_data)) 447 buf.append("|".join(image_data))
448 buf.append(u"))") 448 buf.append("))")
449 449
450 def parser_ins(self, elt, buf): 450 def parser_ins(self, elt, buf):
451 buf.append(u"++") 451 buf.append("++")
452 self.parseChildren(elt, buf) 452 self.parseChildren(elt, buf)
453 buf.append(u"++") 453 buf.append("++")
454 454
455 def parser_li(self, elt, buf): 455 def parser_li(self, elt, buf):
456 flag = None 456 flag = None
457 current_flag = None 457 current_flag = None
458 bullets = [] 458 bullets = []
459 for flag in reversed(self.flags): 459 for flag in reversed(self.flags):
460 if flag in (FLAG_UL, FLAG_OL): 460 if flag in (FLAG_UL, FLAG_OL):
461 if current_flag is None: 461 if current_flag is None:
462 current_flag = flag 462 current_flag = flag
463 if flag == current_flag: 463 if flag == current_flag:
464 bullets.append(u"*" if flag == FLAG_UL else u"#") 464 bullets.append("*" if flag == FLAG_UL else "#")
465 else: 465 else:
466 break 466 break
467 467
468 if flag != current_flag and buf[-1] == u" ": 468 if flag != current_flag and buf[-1] == " ":
469 # this trick is to avoid a space when we switch 469 # this trick is to avoid a space when we switch
470 # from (un)ordered to the other type on the same row 470 # from (un)ordered to the other type on the same row
471 # e.g. *# unorder + ordered item 471 # e.g. *# unorder + ordered item
472 del buf[-1] 472 del buf[-1]
473 473
474 buf.extend(bullets) 474 buf.extend(bullets)
475 475
476 buf.append(u" ") 476 buf.append(" ")
477 self.parseChildren(elt, buf) 477 self.parseChildren(elt, buf)
478 buf.append(u"\n") 478 buf.append("\n")
479 479
480 def parser_ol(self, elt, buf): 480 def parser_ol(self, elt, buf):
481 self.parserList(elt, buf, FLAG_OL) 481 self.parserList(elt, buf, FLAG_OL)
482 482
483 def parser_p(self, elt, buf): 483 def parser_p(self, elt, buf):
484 self.parseChildren(elt, buf) 484 self.parseChildren(elt, buf)
485 buf.append(u"\n\n") 485 buf.append("\n\n")
486 486
487 def parser_pre(self, elt, buf): 487 def parser_pre(self, elt, buf):
488 pre = u"".join( 488 pre = "".join(
489 [ 489 [
490 child.toXml() if domish.IElement.providedBy(child) else unicode(child) 490 child.toXml() if domish.IElement.providedBy(child) else str(child)
491 for child in elt.children 491 for child in elt.children
492 ] 492 ]
493 ) 493 )
494 pre = u" " + u"\n ".join(pre.split("\n")) 494 pre = " " + "\n ".join(pre.split("\n"))
495 buf.append(pre) 495 buf.append(pre)
496 496
497 def parser_q(self, elt, buf): 497 def parser_q(self, elt, buf):
498 quote_data = [unicode(elt)] 498 quote_data = [str(elt)]
499 499
500 lang = elt.getAttribute("lang") 500 lang = elt.getAttribute("lang")
501 cite = elt.getAttribute("url") 501 cite = elt.getAttribute("url")
502 502
503 if lang: 503 if lang:
504 quote_data.append(lang) 504 quote_data.append(lang)
505 elif cite: 505 elif cite:
506 quote_data.append(u"") 506 quote_data.append("")
507 507
508 if cite: 508 if cite:
509 quote_data.append(cite) 509 quote_data.append(cite)
510 510
511 buf.append(u"{{") 511 buf.append("{{")
512 buf.append(u"|".join(quote_data)) 512 buf.append("|".join(quote_data))
513 buf.append(u"}}") 513 buf.append("}}")
514 514
515 def parser_span(self, elt, buf): 515 def parser_span(self, elt, buf):
516 self.parseChildren(elt, buf, block=True) 516 self.parseChildren(elt, buf, block=True)
517 517
518 def parser_strong(self, elt, buf): 518 def parser_strong(self, elt, buf):
519 buf.append(u"__") 519 buf.append("__")
520 self.parseChildren(elt, buf) 520 self.parseChildren(elt, buf)
521 buf.append(u"__") 521 buf.append("__")
522 522
523 def parser_sup(self, elt, buf): 523 def parser_sup(self, elt, buf):
524 # sup is mainly used for footnotes, so we check if we have an anchor inside 524 # sup is mainly used for footnotes, so we check if we have an anchor inside
525 children = list( 525 children = list(
526 [child for child in elt.children if unicode(child).strip() not in ("", "\n")] 526 [child for child in elt.children if str(child).strip() not in ("", "\n")]
527 ) 527 )
528 if ( 528 if (
529 len(children) == 1 529 len(children) == 1
530 and domish.IElement.providedBy(children[0]) 530 and domish.IElement.providedBy(children[0])
531 and children[0].name == "a" 531 and children[0].name == "a"
536 if not note_id: 536 if not note_id:
537 log.warning("bad link found in footnote") 537 log.warning("bad link found in footnote")
538 self.parserGeneric(elt, buf) 538 self.parserGeneric(elt, buf)
539 return 539 return
540 # this looks like a footnote 540 # this looks like a footnote
541 buf.append(u"$$") 541 buf.append("$$")
542 buf.append(u" ") # placeholder 542 buf.append(" ") # placeholder
543 self.footnotes[note_id] = len(buf) - 1 543 self.footnotes[note_id] = len(buf) - 1
544 buf.append(u"$$") 544 buf.append("$$")
545 else: 545 else:
546 self.parserGeneric(elt, buf) 546 self.parserGeneric(elt, buf)
547 547
548 def parser_ul(self, elt, buf): 548 def parser_ul(self, elt, buf):
549 self.parserList(elt, buf, FLAG_UL) 549 self.parserList(elt, buf, FLAG_UL)
557 if flag == type_: 557 if flag == type_:
558 del self.flags[idx] 558 del self.flags[idx]
559 break 559 break
560 560
561 if idx == 0: 561 if idx == 0:
562 raise exceptions.InternalError(u"flag has been removed by an other parser") 562 raise exceptions.InternalError("flag has been removed by an other parser")
563 563
564 def parserHeading(self, elt, buf, level): 564 def parserHeading(self, elt, buf, level):
565 buf.append((6 - level) * u"!") 565 buf.append((6 - level) * "!")
566 for child in elt.children: 566 for child in elt.children:
567 # we ignore other elements for a Hx title 567 # we ignore other elements for a Hx title
568 self.parserText(child, buf) 568 self.parserText(child, buf)
569 buf.append(u"\n") 569 buf.append("\n")
570 570
571 def parserFootnote(self, elt, buf): 571 def parserFootnote(self, elt, buf):
572 for elt in elt.elements(): 572 for elt in elt.elements():
573 # all children other than <p/> are ignored 573 # all children other than <p/> are ignored
574 if elt.name == "p": 574 if elt.name == "p":
575 a_elt = elt.a 575 a_elt = elt.a
576 if a_elt is None: 576 if a_elt is None:
577 log.warning( 577 log.warning(
578 u"<p/> element doesn't contain <a/> in footnote, ignoring it" 578 "<p/> element doesn't contain <a/> in footnote, ignoring it"
579 ) 579 )
580 continue 580 continue
581 try: 581 try:
582 note_idx = self.footnotes[a_elt["id"]] 582 note_idx = self.footnotes[a_elt["id"]]
583 except KeyError: 583 except KeyError:
584 log.warning(u"Note id doesn't match any known note, ignoring it") 584 log.warning("Note id doesn't match any known note, ignoring it")
585 # we create a dummy element to parse all children after the <a/> 585 # we create a dummy element to parse all children after the <a/>
586 dummy_elt = domish.Element((None, "note")) 586 dummy_elt = domish.Element((None, "note"))
587 a_idx = elt.children.index(a_elt) 587 a_idx = elt.children.index(a_elt)
588 dummy_elt.children = elt.children[a_idx + 1 :] 588 dummy_elt.children = elt.children[a_idx + 1 :]
589 note_buf = [] 589 note_buf = []
590 self.parseChildren(dummy_elt, note_buf) 590 self.parseChildren(dummy_elt, note_buf)
591 # now we can replace the placeholder 591 # now we can replace the placeholder
592 buf[note_idx] = u"".join(note_buf) 592 buf[note_idx] = "".join(note_buf)
593 593
594 def parserText(self, txt, buf, keep_whitespaces=False): 594 def parserText(self, txt, buf, keep_whitespaces=False):
595 txt = unicode(txt) 595 txt = str(txt)
596 if not keep_whitespaces: 596 if not keep_whitespaces:
597 # we get text and only let one inter word space 597 # we get text and only let one inter word space
598 txt = u" ".join(txt.split()) 598 txt = " ".join(txt.split())
599 txt = re.sub(ESCAPE_CHARS, r"\\\1", txt) 599 txt = re.sub(ESCAPE_CHARS, r"\\\1", txt)
600 if txt: 600 if txt:
601 buf.append(txt) 601 buf.append(txt)
602 return txt 602 return txt
603 603
604 def parserGeneric(self, elt, buf): 604 def parserGeneric(self, elt, buf):
605 # as dotclear wiki syntax handle arbitrary XHTML code 605 # as dotclear wiki syntax handle arbitrary XHTML code
606 # we use this feature to add elements that we don't know 606 # we use this feature to add elements that we don't know
607 buf.append(u"\n\n///html\n{}\n///\n\n".format(elt.toXml())) 607 buf.append("\n\n///html\n{}\n///\n\n".format(elt.toXml()))
608 608
609 def parseChildren(self, elt, buf, block=False): 609 def parseChildren(self, elt, buf, block=False):
610 first_visible = True 610 first_visible = True
611 for child in elt.children: 611 for child in elt.children:
612 if not block and not first_visible and buf and buf[-1][-1] not in (" ", "\n"): 612 if not block and not first_visible and buf and buf[-1][-1] not in (" ", "\n"):
613 # we add separation if it isn't already there 613 # we add separation if it isn't already there
614 buf.append(u" ") 614 buf.append(" ")
615 if domish.IElement.providedBy(child): 615 if domish.IElement.providedBy(child):
616 self._parse(child, buf) 616 self._parse(child, buf)
617 first_visible = False 617 first_visible = False
618 else: 618 else:
619 appended = self.parserText(child, buf) 619 appended = self.parserText(child, buf)
640 def parse(self, elt): 640 def parse(self, elt):
641 self.flags = [] 641 self.flags = []
642 self.footnotes = {} 642 self.footnotes = {}
643 buf = [] 643 buf = []
644 self._parse(elt, buf) 644 self._parse(elt, buf)
645 return u"".join(buf) 645 return "".join(buf)
646 646
647 def parseString(self, string): 647 def parseString(self, string):
648 wrapped_html = u"<div>{}</div>".format(string) 648 wrapped_html = "<div>{}</div>".format(string)
649 try: 649 try:
650 div_elt = xml_tools.ElementParser()(wrapped_html) 650 div_elt = xml_tools.ElementParser()(wrapped_html)
651 except domish.ParserError as e: 651 except domish.ParserError as e:
652 log.warning(u"Error while parsing HTML content: {}".format(e)) 652 log.warning("Error while parsing HTML content: {}".format(e))
653 return 653 return
654 children = list(div_elt.elements()) 654 children = list(div_elt.elements())
655 if len(children) == 1 and children[0].name == "div": 655 if len(children) == 1 and children[0].name == "div":
656 div_elt = children[0] 656 div_elt = children[0]
657 return self.parse(div_elt) 657 return self.parse(div_elt)
659 659
660 class DCWikiSyntax(object): 660 class DCWikiSyntax(object):
661 SYNTAX_NAME = "wiki_dotclear" 661 SYNTAX_NAME = "wiki_dotclear"
662 662
663 def __init__(self, host): 663 def __init__(self, host):
664 log.info(_(u"Dotclear wiki syntax plugin initialization")) 664 log.info(_("Dotclear wiki syntax plugin initialization"))
665 self.host = host 665 self.host = host
666 self._dc_parser = DCWikiParser() 666 self._dc_parser = DCWikiParser()
667 self._xhtml_parser = XHTMLParser() 667 self._xhtml_parser = XHTMLParser()
668 self._stx = self.host.plugins["TEXT_SYNTAXES"] 668 self._stx = self.host.plugins["TEXT_SYNTAXES"]
669 self._stx.addSyntax( 669 self._stx.addSyntax(