comparison sat/plugins/plugin_syntax_wiki_dotclear.py @ 2624:56f94936df1e

code style reformatting using black
author Goffi <goffi@goffi.org>
date Wed, 27 Jun 2018 20:14:46 +0200
parents 26edcf3a30eb
children 003b8b4b56a7
comparison
equal deleted inserted replaced
2623:49533de4540b 2624:56f94936df1e
19 19
20 # XXX: ref used: http://dotclear.org/documentation/2.0/usage/syntaxes#wiki-syntax-and-xhtml-equivalent 20 # XXX: ref used: http://dotclear.org/documentation/2.0/usage/syntaxes#wiki-syntax-and-xhtml-equivalent
21 21
22 from sat.core.i18n import _ 22 from sat.core.i18n import _
23 from sat.core.log import getLogger 23 from sat.core.log import getLogger
24
24 log = getLogger(__name__) 25 log = getLogger(__name__)
25 from sat.core.constants import Const as C 26 from sat.core.constants import Const as C
26 from sat.core import exceptions 27 from sat.core import exceptions
27 from twisted.words.xish import domish 28 from twisted.words.xish import domish
28 from sat.tools import xml_tools 29 from sat.tools import xml_tools
34 C.PI_IMPORT_NAME: "SYNT_DC_WIKI", 35 C.PI_IMPORT_NAME: "SYNT_DC_WIKI",
35 C.PI_TYPE: C.PLUG_TYPE_SYNTAXE, 36 C.PI_TYPE: C.PLUG_TYPE_SYNTAXE,
36 C.PI_DEPENDENCIES: ["TEXT-SYNTAXES"], 37 C.PI_DEPENDENCIES: ["TEXT-SYNTAXES"],
37 C.PI_MAIN: "DCWikiSyntax", 38 C.PI_MAIN: "DCWikiSyntax",
38 C.PI_HANDLER: "", 39 C.PI_HANDLER: "",
39 C.PI_DESCRIPTION: _("""Implementation of Dotclear wiki syntax""") 40 C.PI_DESCRIPTION: _("""Implementation of Dotclear wiki syntax"""),
40 } 41 }
41 42
42 NOTE_TPL = u'[{}]' # Note template 43 NOTE_TPL = u"[{}]" # Note template
43 NOTE_A_REV_TPL = u'rev_note_{}' 44 NOTE_A_REV_TPL = u"rev_note_{}"
44 NOTE_A_TPL = u'note_{}' 45 NOTE_A_TPL = u"note_{}"
45 ESCAPE_CHARS_BASE = r"(?P<escape_char>[][{}%|\\/*#@{{}}~$-])" 46 ESCAPE_CHARS_BASE = r"(?P<escape_char>[][{}%|\\/*#@{{}}~$-])"
46 ESCAPE_CHARS_EXTRA = r"!?_+'()" # These chars are not escaped in XHTML => dc_wiki conversion, 47 ESCAPE_CHARS_EXTRA = (
47 # but are used in the other direction 48 r"!?_+'()"
48 ESCAPE_CHARS = ESCAPE_CHARS_BASE.format('') 49 ) # These chars are not escaped in XHTML => dc_wiki conversion,
49 FLAG_UL = 'ul' # must be the name of the element 50 # but are used in the other direction
50 FLAG_OL = 'ol' 51 ESCAPE_CHARS = ESCAPE_CHARS_BASE.format("")
51 ELT_WITH_STYLE = ('img', 'div') # elements where a style attribute is expected 52 FLAG_UL = "ul" # must be the name of the element
52 53 FLAG_OL = "ol"
53 wiki = [r'\\' + ESCAPE_CHARS_BASE.format(ESCAPE_CHARS_EXTRA), 54 ELT_WITH_STYLE = ("img", "div") # elements where a style attribute is expected
54 r"^!!!!!(?P<h1_title>.+?)$", 55
55 r"^!!!!(?P<h2_title>.+?)$", 56 wiki = [
56 r"^!!!(?P<h3_title>.+?)$", 57 r"\\" + ESCAPE_CHARS_BASE.format(ESCAPE_CHARS_EXTRA),
57 r"^!!(?P<h4_title>.+?)$", 58 r"^!!!!!(?P<h1_title>.+?)$",
58 r"^!(?P<h5_title>.+?)$", 59 r"^!!!!(?P<h2_title>.+?)$",
59 r"^----$(?P<horizontal_rule>)", 60 r"^!!!(?P<h3_title>.+?)$",
60 r"^\*(?P<list_bullet>.*?)$", 61 r"^!!(?P<h4_title>.+?)$",
61 r"^#(?P<list_ordered>.*?)$", 62 r"^!(?P<h5_title>.+?)$",
62 r"^ (?P<preformated>.*?)$", 63 r"^----$(?P<horizontal_rule>)",
63 r"^> +?(?P<quote>.*?)$", 64 r"^\*(?P<list_bullet>.*?)$",
64 r"''(?P<emphasis>.+?)''", 65 r"^#(?P<list_ordered>.*?)$",
65 r"__(?P<strong_emphasis>.+?)__", 66 r"^ (?P<preformated>.*?)$",
66 r"%%%(?P<line_break>)", 67 r"^> +?(?P<quote>.*?)$",
67 r"\+\+(?P<insertion>.+?)\+\+", 68 r"''(?P<emphasis>.+?)''",
68 r"--(?P<deletion>.+?)--", 69 r"__(?P<strong_emphasis>.+?)__",
69 r"\[(?P<link>.+?)\]", 70 r"%%%(?P<line_break>)",
70 r"\(\((?P<image>.+?)\)\)", 71 r"\+\+(?P<insertion>.+?)\+\+",
71 r"~(?P<anchor>.+?)~", 72 r"--(?P<deletion>.+?)--",
72 r"\?\?(?P<acronym>.+?\|.+?)\?\?", 73 r"\[(?P<link>.+?)\]",
73 r"{{(?P<inline_quote>.+?)}}", 74 r"\(\((?P<image>.+?)\)\)",
74 r"@@(?P<code>.+?)@@", 75 r"~(?P<anchor>.+?)~",
75 r"\$\$(?P<footnote>.+?)\$\$", 76 r"\?\?(?P<acronym>.+?\|.+?)\?\?",
76 r"(?P<text>.+?)", 77 r"{{(?P<inline_quote>.+?)}}",
77 ] 78 r"@@(?P<code>.+?)@@",
78 79 r"\$\$(?P<footnote>.+?)\$\$",
79 wiki_re = re.compile('|'.join(wiki), re.MULTILINE | re.DOTALL) 80 r"(?P<text>.+?)",
80 wiki_block_level_re = re.compile(r"^///html(?P<html>.+?)///\n\n|(?P<paragraph>.+?)(?:\n{2,}|\Z)", re.MULTILINE | re.DOTALL) 81 ]
82
83 wiki_re = re.compile("|".join(wiki), re.MULTILINE | re.DOTALL)
84 wiki_block_level_re = re.compile(
85 r"^///html(?P<html>.+?)///\n\n|(?P<paragraph>.+?)(?:\n{2,}|\Z)",
86 re.MULTILINE | re.DOTALL,
87 )
81 88
82 89
83 class DCWikiParser(object): 90 class DCWikiParser(object):
84
85 def __init__(self): 91 def __init__(self):
86 self._footnotes = None 92 self._footnotes = None
87 for i in xrange(5): 93 for i in xrange(5):
88 setattr(self, 94 setattr(
89 'parser_h{}_title'.format(i), 95 self,
90 lambda string, parent, i=i: self._parser_title(string, parent, 'h{}'.format(i))) 96 "parser_h{}_title".format(i),
97 lambda string, parent, i=i: self._parser_title(
98 string, parent, "h{}".format(i)
99 ),
100 )
91 101
92 def parser_paragraph(self, string, parent): 102 def parser_paragraph(self, string, parent):
93 p_elt = parent.addElement('p') 103 p_elt = parent.addElement("p")
94 self._parse(string, p_elt) 104 self._parse(string, p_elt)
95 105
96 def parser_html(self, string, parent): 106 def parser_html(self, string, parent):
97 wrapped_html = "<div>{}</div>".format(string) 107 wrapped_html = "<div>{}</div>".format(string)
98 try: 108 try:
99 div_elt = xml_tools.ElementParser()(wrapped_html) 109 div_elt = xml_tools.ElementParser()(wrapped_html)
100 except domish.ParserError as e: 110 except domish.ParserError as e:
101 log.warning(u"Error while parsing HTML content, ignoring it: {}".format(e)) 111 log.warning(u"Error while parsing HTML content, ignoring it: {}".format(e))
102 return 112 return
103 children = list(div_elt.elements()) 113 children = list(div_elt.elements())
104 if len(children) == 1 and children[0].name == 'div': 114 if len(children) == 1 and children[0].name == "div":
105 div_elt = children[0] 115 div_elt = children[0]
106 parent.addChild(div_elt) 116 parent.addChild(div_elt)
107 117
108 def parser_escape_char(self, string, parent): 118 def parser_escape_char(self, string, parent):
109 parent.addContent(string) 119 parent.addContent(string)
111 def _parser_title(self, string, parent, name): 121 def _parser_title(self, string, parent, name):
112 elt = parent.addElement(name) 122 elt = parent.addElement(name)
113 elt.addContent(string) 123 elt.addContent(string)
114 124
115 def parser_horizontal_rule(self, string, parent): 125 def parser_horizontal_rule(self, string, parent):
116 parent.addElement('hr') 126 parent.addElement("hr")
117 127
118 def _parser_list(self, string, parent, list_type): 128 def _parser_list(self, string, parent, list_type):
119 depth = 0 129 depth = 0
120 while string[depth:depth+1] == '*': 130 while string[depth : depth + 1] == "*":
121 depth +=1 131 depth += 1
122 132
123 string = string[depth:].lstrip() 133 string = string[depth:].lstrip()
124 134
125 for i in xrange(depth+1): 135 for i in xrange(depth + 1):
126 list_elt = getattr(parent, list_type) 136 list_elt = getattr(parent, list_type)
127 if not list_elt: 137 if not list_elt:
128 parent = parent.addElement(list_type) 138 parent = parent.addElement(list_type)
129 else: 139 else:
130 parent = list_elt 140 parent = list_elt
131 141
132 li_elt = parent.addElement('li') 142 li_elt = parent.addElement("li")
133 self._parse(string, li_elt) 143 self._parse(string, li_elt)
134 144
135 def parser_list_bullet(self, string, parent): 145 def parser_list_bullet(self, string, parent):
136 self._parser_list(string, parent, 'ul') 146 self._parser_list(string, parent, "ul")
137 147
138 def parser_list_ordered(self, string, parent): 148 def parser_list_ordered(self, string, parent):
139 self._parser_list(string, parent, 'ol') 149 self._parser_list(string, parent, "ol")
140 150
141 def parser_preformated(self, string, parent): 151 def parser_preformated(self, string, parent):
142 pre_elt = parent.pre 152 pre_elt = parent.pre
143 if pre_elt is None: 153 if pre_elt is None:
144 pre_elt = parent.addElement('pre') 154 pre_elt = parent.addElement("pre")
145 else: 155 else:
146 # we are on a new line, and this is important for <pre/> 156 # we are on a new line, and this is important for <pre/>
147 pre_elt.addContent('\n') 157 pre_elt.addContent("\n")
148 pre_elt.addContent(string) 158 pre_elt.addContent(string)
149 159
150 def parser_quote(self, string, parent): 160 def parser_quote(self, string, parent):
151 blockquote_elt = parent.blockquote 161 blockquote_elt = parent.blockquote
152 if blockquote_elt is None: 162 if blockquote_elt is None:
153 blockquote_elt = parent.addElement('blockquote') 163 blockquote_elt = parent.addElement("blockquote")
154 p_elt = blockquote_elt.p 164 p_elt = blockquote_elt.p
155 if p_elt is None: 165 if p_elt is None:
156 p_elt = blockquote_elt.addElement('p') 166 p_elt = blockquote_elt.addElement("p")
157 else: 167 else:
158 string = u'\n' + string 168 string = u"\n" + string
159 169
160 self._parse(string, p_elt) 170 self._parse(string, p_elt)
161 171
162 def parser_emphasis(self, string, parent): 172 def parser_emphasis(self, string, parent):
163 em_elt = parent.addElement('em') 173 em_elt = parent.addElement("em")
164 self._parse(string, em_elt) 174 self._parse(string, em_elt)
165 175
166 def parser_strong_emphasis(self, string, parent): 176 def parser_strong_emphasis(self, string, parent):
167 strong_elt = parent.addElement('strong') 177 strong_elt = parent.addElement("strong")
168 self._parse(string, strong_elt) 178 self._parse(string, strong_elt)
169 179
170 def parser_line_break(self, string, parent): 180 def parser_line_break(self, string, parent):
171 parent.addElement('br') 181 parent.addElement("br")
172 182
173 def parser_insertion(self, string, parent): 183 def parser_insertion(self, string, parent):
174 ins_elt = parent.addElement('ins') 184 ins_elt = parent.addElement("ins")
175 self._parse(string, ins_elt) 185 self._parse(string, ins_elt)
176 186
177 def parser_deletion(self, string, parent): 187 def parser_deletion(self, string, parent):
178 del_elt = parent.addElement('del') 188 del_elt = parent.addElement("del")
179 self._parse(string, del_elt) 189 self._parse(string, del_elt)
180 190
181 def parser_link(self, string, parent): 191 def parser_link(self, string, parent):
182 url_data = string.split(u'|') 192 url_data = string.split(u"|")
183 a_elt = parent.addElement('a') 193 a_elt = parent.addElement("a")
184 length = len(url_data) 194 length = len(url_data)
185 if length == 1: 195 if length == 1:
186 url = url_data[0] 196 url = url_data[0]
187 a_elt['href'] = url 197 a_elt["href"] = url
188 a_elt.addContent(url) 198 a_elt.addContent(url)
189 else: 199 else:
190 name = url_data[0] 200 name = url_data[0]
191 url = url_data[1] 201 url = url_data[1]
192 a_elt['href'] = url 202 a_elt["href"] = url
193 a_elt.addContent(name) 203 a_elt.addContent(name)
194 if length >= 3: 204 if length >= 3:
195 a_elt['lang'] = url_data[2] 205 a_elt["lang"] = url_data[2]
196 if length >= 4: 206 if length >= 4:
197 a_elt['title'] = url_data[3] 207 a_elt["title"] = url_data[3]
198 if length > 4: 208 if length > 4:
199 log.warning(u"too much data for url, ignoring extra data") 209 log.warning(u"too much data for url, ignoring extra data")
200 210
201 def parser_image(self, string, parent): 211 def parser_image(self, string, parent):
202 image_data = string.split(u'|') 212 image_data = string.split(u"|")
203 img_elt = parent.addElement('img') 213 img_elt = parent.addElement("img")
204 214
205 for idx, attribute in enumerate(('src', 'alt', 'position', 'longdesc')): 215 for idx, attribute in enumerate(("src", "alt", "position", "longdesc")):
206 try: 216 try:
207 data = image_data[idx] 217 data = image_data[idx]
208 except IndexError: 218 except IndexError:
209 break 219 break
210 220
211 if attribute != 'position': 221 if attribute != "position":
212 img_elt[attribute] = data 222 img_elt[attribute] = data
213 else: 223 else:
214 data = data.lower() 224 data = data.lower()
215 if data in ('l', 'g'): 225 if data in ("l", "g"):
216 img_elt['style'] = "display:block; float:left; margin:0 1em 1em 0" 226 img_elt["style"] = "display:block; float:left; margin:0 1em 1em 0"
217 elif data in ('r', 'd'): 227 elif data in ("r", "d"):
218 img_elt['style'] = "display:block; float:right; margin:0 0 1em 1em" 228 img_elt["style"] = "display:block; float:right; margin:0 0 1em 1em"
219 elif data == 'c': 229 elif data == "c":
220 img_elt['style'] = "display:block; margin-left:auto; margin-right:auto" 230 img_elt[
231 "style"
232 ] = "display:block; margin-left:auto; margin-right:auto"
221 else: 233 else:
222 log.warning(u"bad position argument for image, ignoring it") 234 log.warning(u"bad position argument for image, ignoring it")
223 235
224 def parser_anchor(self, string, parent): 236 def parser_anchor(self, string, parent):
225 a_elt = parent.addElement('a') 237 a_elt = parent.addElement("a")
226 a_elt['id'] = string 238 a_elt["id"] = string
227 239
228 def parser_acronym(self, string, parent): 240 def parser_acronym(self, string, parent):
229 acronym, title = string.split(u'|',1) 241 acronym, title = string.split(u"|", 1)
230 acronym_elt = parent.addElement('acronym', content=acronym) 242 acronym_elt = parent.addElement("acronym", content=acronym)
231 acronym_elt['title'] = title 243 acronym_elt["title"] = title
232 244
233 def parser_inline_quote(self, string, parent): 245 def parser_inline_quote(self, string, parent):
234 quote_data = string.split(u'|') 246 quote_data = string.split(u"|")
235 quote = quote_data[0] 247 quote = quote_data[0]
236 q_elt = parent.addElement('q', content=quote) 248 q_elt = parent.addElement("q", content=quote)
237 for idx, attribute in enumerate(('lang', 'cite'), 1): 249 for idx, attribute in enumerate(("lang", "cite"), 1):
238 try: 250 try:
239 data = quote_data[idx] 251 data = quote_data[idx]
240 except IndexError: 252 except IndexError:
241 break 253 break
242 q_elt[attribute] = data 254 q_elt[attribute] = data
243 255
244 def parser_code(self, string, parent): 256 def parser_code(self, string, parent):
245 parent.addElement('code', content=string) 257 parent.addElement("code", content=string)
246 258
247 def parser_footnote(self, string, parent): 259 def parser_footnote(self, string, parent):
248 idx = len(self._footnotes) + 1 260 idx = len(self._footnotes) + 1
249 note_txt = NOTE_TPL.format(idx) 261 note_txt = NOTE_TPL.format(idx)
250 sup_elt = parent.addElement('sup') 262 sup_elt = parent.addElement("sup")
251 sup_elt['class'] = 'note' 263 sup_elt["class"] = "note"
252 a_elt = sup_elt.addElement('a', content=note_txt) 264 a_elt = sup_elt.addElement("a", content=note_txt)
253 a_elt['id'] = NOTE_A_REV_TPL.format(idx) 265 a_elt["id"] = NOTE_A_REV_TPL.format(idx)
254 a_elt['href'] = u'#{}'.format(NOTE_A_TPL.format(idx)) 266 a_elt["href"] = u"#{}".format(NOTE_A_TPL.format(idx))
255 267
256 p_elt = domish.Element((None, 'p')) 268 p_elt = domish.Element((None, "p"))
257 a_elt = p_elt.addElement('a', content=note_txt) 269 a_elt = p_elt.addElement("a", content=note_txt)
258 a_elt['id'] = NOTE_A_TPL.format(idx) 270 a_elt["id"] = NOTE_A_TPL.format(idx)
259 a_elt['href'] = u'#{}'.format(NOTE_A_REV_TPL.format(idx)) 271 a_elt["href"] = u"#{}".format(NOTE_A_REV_TPL.format(idx))
260 self._parse(string, p_elt) 272 self._parse(string, p_elt)
261 # footnotes are actually added at the end of the parsing 273 # footnotes are actually added at the end of the parsing
262 self._footnotes.append(p_elt) 274 self._footnotes.append(p_elt)
263 275
264 def parser_text(self, string, parent): 276 def parser_text(self, string, parent):
271 if match.lastgroup is None: 283 if match.lastgroup is None:
272 parent.addContent(string) 284 parent.addContent(string)
273 return 285 return
274 matched = match.group(match.lastgroup) 286 matched = match.group(match.lastgroup)
275 try: 287 try:
276 parser = getattr(self, 'parser_{}'.format(match.lastgroup)) 288 parser = getattr(self, "parser_{}".format(match.lastgroup))
277 except AttributeError: 289 except AttributeError:
278 log.warning(u"No parser found for {}".format(match.lastgroup)) 290 log.warning(u"No parser found for {}".format(match.lastgroup))
279 # parent.addContent(string) 291 # parent.addContent(string)
280 continue 292 continue
281 parser(matched, parent) 293 parser(matched, parent)
282 294
283 def parse(self, string): 295 def parse(self, string):
284 self._footnotes = [] 296 self._footnotes = []
285 div_elt = domish.Element((None, 'div')) 297 div_elt = domish.Element((None, "div"))
286 self._parse(string, parent=div_elt, block_level=True) 298 self._parse(string, parent=div_elt, block_level=True)
287 if self._footnotes: 299 if self._footnotes:
288 foot_div_elt = div_elt.addElement('div') 300 foot_div_elt = div_elt.addElement("div")
289 foot_div_elt['class'] = 'footnotes' 301 foot_div_elt["class"] = "footnotes"
290 # we add a simple horizontal rule which can be customized 302 # we add a simple horizontal rule which can be customized
291 # with footnotes class, instead of a text which would need 303 # with footnotes class, instead of a text which would need
292 # to be translated 304 # to be translated
293 foot_div_elt.addElement('hr') 305 foot_div_elt.addElement("hr")
294 for elt in self._footnotes: 306 for elt in self._footnotes:
295 foot_div_elt.addChild(elt) 307 foot_div_elt.addChild(elt)
296 return div_elt 308 return div_elt
297 309
298 310
299 class XHTMLParser(object): 311 class XHTMLParser(object):
300
301 def __init__(self): 312 def __init__(self):
302 self.flags = None 313 self.flags = None
303 self.toto = 0 314 self.toto = 0
304 self.footnotes = None # will hold a map from url to buffer id 315 self.footnotes = None # will hold a map from url to buffer id
305 for i in xrange(1,6): 316 for i in xrange(1, 6):
306 setattr(self, 317 setattr(
307 'parser_h{}'.format(i), 318 self,
308 lambda elt, buf, level=i: self.parserHeading(elt, buf, level) 319 "parser_h{}".format(i),
309 ) 320 lambda elt, buf, level=i: self.parserHeading(elt, buf, level),
321 )
310 322
311 def parser_a(self, elt, buf): 323 def parser_a(self, elt, buf):
312 try: 324 try:
313 url = elt['href'] 325 url = elt["href"]
314 except KeyError: 326 except KeyError:
315 # probably an anchor 327 # probably an anchor
316 try: 328 try:
317 id_ = elt['id'] 329 id_ = elt["id"]
318 if not id_: 330 if not id_:
319 # we don't want empty values 331 # we don't want empty values
320 raise KeyError 332 raise KeyError
321 except KeyError: 333 except KeyError:
322 self.parserGeneric(elt, buf) 334 self.parserGeneric(elt, buf)
323 else: 335 else:
324 buf.append(u'~~{}~~'.format(id_)) 336 buf.append(u"~~{}~~".format(id_))
325 return 337 return
326 338
327 link_data = [url] 339 link_data = [url]
328 name = unicode(elt) 340 name = unicode(elt)
329 if name != url: 341 if name != url:
330 link_data.insert(0, name) 342 link_data.insert(0, name)
331 343
332 lang = elt.getAttribute('lang') 344 lang = elt.getAttribute("lang")
333 title = elt.getAttribute('title') 345 title = elt.getAttribute("title")
334 if lang is not None: 346 if lang is not None:
335 link_data.append(lang) 347 link_data.append(lang)
336 elif title is not None: 348 elif title is not None:
337 link_data.appand(u'') 349 link_data.appand(u"")
338 if title is not None: 350 if title is not None:
339 link_data.append(title) 351 link_data.append(title)
340 buf.append(u'[') 352 buf.append(u"[")
341 buf.append(u'|'.join(link_data)) 353 buf.append(u"|".join(link_data))
342 buf.append(u']') 354 buf.append(u"]")
343 355
344 def parser_acronym(self, elt, buf): 356 def parser_acronym(self, elt, buf):
345 try: 357 try:
346 title = elt['title'] 358 title = elt["title"]
347 except KeyError: 359 except KeyError:
348 log.debug(u"Acronyme without title, using generic parser") 360 log.debug(u"Acronyme without title, using generic parser")
349 self.parserGeneric(elt, buf) 361 self.parserGeneric(elt, buf)
350 return 362 return
351 buf.append(u'??{}|{}??'.format(unicode(elt), title)) 363 buf.append(u"??{}|{}??".format(unicode(elt), title))
352 364
353 def parser_blockquote(self, elt, buf): 365 def parser_blockquote(self, elt, buf):
354 # we remove wrapping <p> to avoid empty line with "> " 366 # we remove wrapping <p> to avoid empty line with "> "
355 children = list([child for child in elt.children if unicode(child).strip() not in ('', '\n')]) 367 children = list(
356 if len(children) == 1 and children[0].name == 'p': 368 [child for child in elt.children if unicode(child).strip() not in ("", "\n")]
369 )
370 if len(children) == 1 and children[0].name == "p":
357 elt = children[0] 371 elt = children[0]
358 tmp_buf = [] 372 tmp_buf = []
359 self.parseChildren(elt, tmp_buf) 373 self.parseChildren(elt, tmp_buf)
360 blockquote = u'> ' + u'\n> '.join(u''.join(tmp_buf).split('\n')) 374 blockquote = u"> " + u"\n> ".join(u"".join(tmp_buf).split("\n"))
361 buf.append(blockquote) 375 buf.append(blockquote)
362 376
363 def parser_br(self, elt, buf): 377 def parser_br(self, elt, buf):
364 buf.append(u'%%%') 378 buf.append(u"%%%")
365 379
366 def parser_code(self, elt, buf): 380 def parser_code(self, elt, buf):
367 buf.append(u'@@') 381 buf.append(u"@@")
368 self.parseChildren(elt, buf) 382 self.parseChildren(elt, buf)
369 buf.append(u'@@') 383 buf.append(u"@@")
370 384
371 def parser_del(self, elt, buf): 385 def parser_del(self, elt, buf):
372 buf.append(u'--') 386 buf.append(u"--")
373 self.parseChildren(elt, buf) 387 self.parseChildren(elt, buf)
374 buf.append(u'--') 388 buf.append(u"--")
375 389
376 def parser_div(self, elt, buf): 390 def parser_div(self, elt, buf):
377 if elt.getAttribute('class') == 'footnotes': 391 if elt.getAttribute("class") == "footnotes":
378 self.parserFootnote(elt, buf) 392 self.parserFootnote(elt, buf)
379 else: 393 else:
380 self.parseChildren(elt, buf, block=True) 394 self.parseChildren(elt, buf, block=True)
381 395
382 def parser_em(self, elt, buf): 396 def parser_em(self, elt, buf):
385 buf.append(u"''") 399 buf.append(u"''")
386 400
387 def parser_h6(self, elt, buf): 401 def parser_h6(self, elt, buf):
388 # XXX: <h6/> heading is not managed by wiki syntax 402 # XXX: <h6/> heading is not managed by wiki syntax
389 # so we handle it with a <h5/> 403 # so we handle it with a <h5/>
390 elt = copy.copy(elt) # we don't want to change to original element 404 elt = copy.copy(elt) # we don't want to change to original element
391 elt.name = 'h5' 405 elt.name = "h5"
392 self._parse(elt, buf) 406 self._parse(elt, buf)
393 407
394 def parser_hr(self, elt, buf): 408 def parser_hr(self, elt, buf):
395 buf.append(u'\n----\n') 409 buf.append(u"\n----\n")
396 410
397 def parser_img(self, elt, buf): 411 def parser_img(self, elt, buf):
398 try: 412 try:
399 url = elt['src'] 413 url = elt["src"]
400 except KeyError: 414 except KeyError:
401 log.warning(u"Ignoring <img/> without src") 415 log.warning(u"Ignoring <img/> without src")
402 return 416 return
403 417
404 image_data=[url] 418 image_data = [url]
405 419
406 alt = elt.getAttribute('alt') 420 alt = elt.getAttribute("alt")
407 style = elt.getAttribute('style', '') 421 style = elt.getAttribute("style", "")
408 desc = elt.getAttribute('longdesc') 422 desc = elt.getAttribute("longdesc")
409 423
410 if '0 1em 1em 0' in style: 424 if "0 1em 1em 0" in style:
411 position = 'L' 425 position = "L"
412 elif '0 0 1em 1em' in style: 426 elif "0 0 1em 1em" in style:
413 position = 'R' 427 position = "R"
414 elif 'auto' in style: 428 elif "auto" in style:
415 position = 'C' 429 position = "C"
416 else: 430 else:
417 position = None 431 position = None
418 432
419 if alt: 433 if alt:
420 image_data.append(alt) 434 image_data.append(alt)
421 elif position or desc: 435 elif position or desc:
422 image_data.append(u'') 436 image_data.append(u"")
423 437
424 if position: 438 if position:
425 image_data.append(position) 439 image_data.append(position)
426 elif desc: 440 elif desc:
427 image_data.append(u'') 441 image_data.append(u"")
428 442
429 if desc: 443 if desc:
430 image_data.append(desc) 444 image_data.append(desc)
431 445
432 buf.append(u'((') 446 buf.append(u"((")
433 buf.append(u'|'.join(image_data)) 447 buf.append(u"|".join(image_data))
434 buf.append(u'))') 448 buf.append(u"))")
435 449
436 def parser_ins(self, elt, buf): 450 def parser_ins(self, elt, buf):
437 buf.append(u'++') 451 buf.append(u"++")
438 self.parseChildren(elt, buf) 452 self.parseChildren(elt, buf)
439 buf.append(u'++') 453 buf.append(u"++")
440 454
441 def parser_li(self, elt, buf): 455 def parser_li(self, elt, buf):
442 flag = None 456 flag = None
443 current_flag = None 457 current_flag = None
444 bullets = [] 458 bullets = []
445 for flag in reversed(self.flags): 459 for flag in reversed(self.flags):
446 if flag in (FLAG_UL, FLAG_OL): 460 if flag in (FLAG_UL, FLAG_OL):
447 if current_flag is None: 461 if current_flag is None:
448 current_flag = flag 462 current_flag = flag
449 if flag == current_flag: 463 if flag == current_flag:
450 bullets.append(u'*' if flag == FLAG_UL else u'#') 464 bullets.append(u"*" if flag == FLAG_UL else u"#")
451 else: 465 else:
452 break 466 break
453 467
454 if flag != current_flag and buf[-1] == u' ': 468 if flag != current_flag and buf[-1] == u" ":
455 # this trick is to avoid a space when we switch 469 # this trick is to avoid a space when we switch
456 # from (un)ordered to the other type on the same row 470 # from (un)ordered to the other type on the same row
457 # e.g. *# unorder + ordered item 471 # e.g. *# unorder + ordered item
458 del buf[-1] 472 del buf[-1]
459 473
460 buf.extend(bullets) 474 buf.extend(bullets)
461 475
462 buf.append(u' ') 476 buf.append(u" ")
463 self.parseChildren(elt, buf) 477 self.parseChildren(elt, buf)
464 buf.append(u'\n') 478 buf.append(u"\n")
465 479
466 def parser_ol(self, elt, buf): 480 def parser_ol(self, elt, buf):
467 self.parserList(elt, buf, FLAG_OL) 481 self.parserList(elt, buf, FLAG_OL)
468 482
469 def parser_p(self, elt, buf): 483 def parser_p(self, elt, buf):
470 self.parseChildren(elt, buf) 484 self.parseChildren(elt, buf)
471 buf.append(u'\n\n') 485 buf.append(u"\n\n")
472 486
473 def parser_pre(self, elt, buf): 487 def parser_pre(self, elt, buf):
474 pre = u''.join([child.toXml() if domish.IElement.providedBy(child) else unicode(child) for child in elt.children]) 488 pre = u"".join(
475 pre = u' ' + u'\n '.join(pre.split('\n')) 489 [
490 child.toXml() if domish.IElement.providedBy(child) else unicode(child)
491 for child in elt.children
492 ]
493 )
494 pre = u" " + u"\n ".join(pre.split("\n"))
476 buf.append(pre) 495 buf.append(pre)
477 496
478 def parser_q(self, elt, buf): 497 def parser_q(self, elt, buf):
479 quote_data=[unicode(elt)] 498 quote_data = [unicode(elt)]
480 499
481 lang = elt.getAttribute('lang') 500 lang = elt.getAttribute("lang")
482 cite = elt.getAttribute('url') 501 cite = elt.getAttribute("url")
483 502
484 if lang: 503 if lang:
485 quote_data.append(lang) 504 quote_data.append(lang)
486 elif cite: 505 elif cite:
487 quote_data.append(u'') 506 quote_data.append(u"")
488 507
489 if cite: 508 if cite:
490 quote_data.append(cite) 509 quote_data.append(cite)
491 510
492 buf.append(u'{{') 511 buf.append(u"{{")
493 buf.append(u'|'.join(quote_data)) 512 buf.append(u"|".join(quote_data))
494 buf.append(u'}}') 513 buf.append(u"}}")
495 514
496 def parser_span(self, elt, buf): 515 def parser_span(self, elt, buf):
497 self.parseChildren(elt, buf, block=True) 516 self.parseChildren(elt, buf, block=True)
498 517
499 def parser_strong(self, elt, buf): 518 def parser_strong(self, elt, buf):
500 buf.append(u'__') 519 buf.append(u"__")
501 self.parseChildren(elt, buf) 520 self.parseChildren(elt, buf)
502 buf.append(u'__') 521 buf.append(u"__")
503 522
504 def parser_sup(self, elt, buf): 523 def parser_sup(self, elt, buf):
505 # sup is mainly used for footnotes, so we check if we have an anchor inside 524 # sup is mainly used for footnotes, so we check if we have an anchor inside
506 children = list([child for child in elt.children if unicode(child).strip() not in ('', '\n')]) 525 children = list(
507 if (len(children) == 1 and domish.IElement.providedBy(children[0]) 526 [child for child in elt.children if unicode(child).strip() not in ("", "\n")]
508 and children[0].name == 'a' and '#' in children[0].getAttribute('href', '')): 527 )
509 url = children[0]['href'] 528 if (
510 note_id = url[url.find('#')+1:] 529 len(children) == 1
530 and domish.IElement.providedBy(children[0])
531 and children[0].name == "a"
532 and "#" in children[0].getAttribute("href", "")
533 ):
534 url = children[0]["href"]
535 note_id = url[url.find("#") + 1 :]
511 if not note_id: 536 if not note_id:
512 log.warning("bad link found in footnote") 537 log.warning("bad link found in footnote")
513 self.parserGeneric(elt, buf) 538 self.parserGeneric(elt, buf)
514 return 539 return
515 # this looks like a footnote 540 # this looks like a footnote
516 buf.append(u'$$') 541 buf.append(u"$$")
517 buf.append(u' ') # placeholder 542 buf.append(u" ") # placeholder
518 self.footnotes[note_id] = len(buf) - 1 543 self.footnotes[note_id] = len(buf) - 1
519 buf.append(u'$$') 544 buf.append(u"$$")
520 else: 545 else:
521 self.parserGeneric(elt, buf) 546 self.parserGeneric(elt, buf)
522 547
523 def parser_ul(self, elt, buf): 548 def parser_ul(self, elt, buf):
524 self.parserList(elt, buf, FLAG_UL) 549 self.parserList(elt, buf, FLAG_UL)
535 560
536 if idx == 0: 561 if idx == 0:
537 raise exceptions.InternalError(u"flag has been removed by an other parser") 562 raise exceptions.InternalError(u"flag has been removed by an other parser")
538 563
539 def parserHeading(self, elt, buf, level): 564 def parserHeading(self, elt, buf, level):
540 buf.append((6-level) * u'!') 565 buf.append((6 - level) * u"!")
541 for child in elt.children: 566 for child in elt.children:
542 # we ignore other elements for a Hx title 567 # we ignore other elements for a Hx title
543 self.parserText(child, buf) 568 self.parserText(child, buf)
544 buf.append(u'\n') 569 buf.append(u"\n")
545 570
546 def parserFootnote(self, elt, buf): 571 def parserFootnote(self, elt, buf):
547 for elt in elt.elements(): 572 for elt in elt.elements():
548 # all children other than <p/> are ignored 573 # all children other than <p/> are ignored
549 if elt.name == 'p': 574 if elt.name == "p":
550 a_elt = elt.a 575 a_elt = elt.a
551 if a_elt is None: 576 if a_elt is None:
552 log.warning(u"<p/> element doesn't contain <a/> in footnote, ignoring it") 577 log.warning(
578 u"<p/> element doesn't contain <a/> in footnote, ignoring it"
579 )
553 continue 580 continue
554 try: 581 try:
555 note_idx = self.footnotes[a_elt['id']] 582 note_idx = self.footnotes[a_elt["id"]]
556 except KeyError: 583 except KeyError:
557 log.warning(u"Note id doesn't match any known note, ignoring it") 584 log.warning(u"Note id doesn't match any known note, ignoring it")
558 # we create a dummy element to parse all children after the <a/> 585 # we create a dummy element to parse all children after the <a/>
559 dummy_elt = domish.Element((None, 'note')) 586 dummy_elt = domish.Element((None, "note"))
560 a_idx = elt.children.index(a_elt) 587 a_idx = elt.children.index(a_elt)
561 dummy_elt.children = elt.children[a_idx+1:] 588 dummy_elt.children = elt.children[a_idx + 1 :]
562 note_buf = [] 589 note_buf = []
563 self.parseChildren(dummy_elt, note_buf) 590 self.parseChildren(dummy_elt, note_buf)
564 # now we can replace the placeholder 591 # now we can replace the placeholder
565 buf[note_idx] = u''.join(note_buf) 592 buf[note_idx] = u"".join(note_buf)
566 593
567 def parserText(self, txt, buf, keep_whitespaces=False): 594 def parserText(self, txt, buf, keep_whitespaces=False):
568 txt = unicode(txt) 595 txt = unicode(txt)
569 if not keep_whitespaces: 596 if not keep_whitespaces:
570 # we get text and only let one inter word space 597 # we get text and only let one inter word space
571 txt = u' '.join(txt.split()) 598 txt = u" ".join(txt.split())
572 txt = re.sub(ESCAPE_CHARS, r'\\\1', txt) 599 txt = re.sub(ESCAPE_CHARS, r"\\\1", txt)
573 if txt: 600 if txt:
574 buf.append(txt) 601 buf.append(txt)
575 return txt 602 return txt
576 603
577 def parserGeneric(self, elt, buf): 604 def parserGeneric(self, elt, buf):
580 buf.append(u"\n\n///html\n{}\n///\n\n".format(elt.toXml())) 607 buf.append(u"\n\n///html\n{}\n///\n\n".format(elt.toXml()))
581 608
582 def parseChildren(self, elt, buf, block=False): 609 def parseChildren(self, elt, buf, block=False):
583 first_visible = True 610 first_visible = True
584 for child in elt.children: 611 for child in elt.children:
585 if not block and not first_visible and buf and buf[-1][-1] not in (' ','\n'): 612 if not block and not first_visible and buf and buf[-1][-1] not in (" ", "\n"):
586 # we add separation if it isn't already there 613 # we add separation if it isn't already there
587 buf.append(u' ') 614 buf.append(u" ")
588 if domish.IElement.providedBy(child): 615 if domish.IElement.providedBy(child):
589 self._parse(child, buf) 616 self._parse(child, buf)
590 first_visible = False 617 first_visible = False
591 else: 618 else:
592 appended = self.parserText(child, buf) 619 appended = self.parserText(child, buf)
593 if appended: 620 if appended:
594 first_visible = False 621 first_visible = False
595 622
596 def _parse(self, elt, buf): 623 def _parse(self, elt, buf):
597 elt_name = elt.name.lower() 624 elt_name = elt.name.lower()
598 style = elt.getAttribute('style') 625 style = elt.getAttribute("style")
599 if style and elt_name not in ELT_WITH_STYLE: 626 if style and elt_name not in ELT_WITH_STYLE:
600 # if we have style we use generic parser to put raw HTML 627 # if we have style we use generic parser to put raw HTML
601 # to avoid losing it 628 # to avoid losing it
602 parser = self.parserGeneric 629 parser = self.parserGeneric
603 else: 630 else:
604 try: 631 try:
605 parser = getattr(self, "parser_{}".format(elt_name)) 632 parser = getattr(self, "parser_{}".format(elt_name))
606 except AttributeError: 633 except AttributeError:
607 log.debug("Can't find parser for {} element, using generic one".format(elt.name)) 634 log.debug(
635 "Can't find parser for {} element, using generic one".format(elt.name)
636 )
608 parser = self.parserGeneric 637 parser = self.parserGeneric
609 parser(elt, buf) 638 parser(elt, buf)
610 639
611 def parse(self, elt): 640 def parse(self, elt):
612 self.flags = [] 641 self.flags = []
613 self.footnotes = {} 642 self.footnotes = {}
614 buf = [] 643 buf = []
615 self._parse(elt, buf) 644 self._parse(elt, buf)
616 return u''.join(buf) 645 return u"".join(buf)
617 646
618 def parseString(self, string): 647 def parseString(self, string):
619 wrapped_html = u"<div>{}</div>".format(string) 648 wrapped_html = u"<div>{}</div>".format(string)
620 try: 649 try:
621 div_elt = xml_tools.ElementParser()(wrapped_html) 650 div_elt = xml_tools.ElementParser()(wrapped_html)
622 except domish.ParserError as e: 651 except domish.ParserError as e:
623 log.warning(u"Error while parsing HTML content: {}".format(e)) 652 log.warning(u"Error while parsing HTML content: {}".format(e))
624 return 653 return
625 children = list(div_elt.elements()) 654 children = list(div_elt.elements())
626 if len(children) == 1 and children[0].name == 'div': 655 if len(children) == 1 and children[0].name == "div":
627 div_elt = children[0] 656 div_elt = children[0]
628 return self.parse(div_elt) 657 return self.parse(div_elt)
629 658
630 659
631 class DCWikiSyntax(object): 660 class DCWikiSyntax(object):
635 log.info(_(u"Dotclear wiki syntax plugin initialization")) 664 log.info(_(u"Dotclear wiki syntax plugin initialization"))
636 self.host = host 665 self.host = host
637 self._dc_parser = DCWikiParser() 666 self._dc_parser = DCWikiParser()
638 self._xhtml_parser = XHTMLParser() 667 self._xhtml_parser = XHTMLParser()
639 self._stx = self.host.plugins["TEXT-SYNTAXES"] 668 self._stx = self.host.plugins["TEXT-SYNTAXES"]
640 self._stx.addSyntax(self.SYNTAX_NAME, self.parseWiki, self.parseXHTML, [self._stx.OPT_NO_THREAD]) 669 self._stx.addSyntax(
670 self.SYNTAX_NAME, self.parseWiki, self.parseXHTML, [self._stx.OPT_NO_THREAD]
671 )
641 672
642 def parseWiki(self, wiki_stx): 673 def parseWiki(self, wiki_stx):
643 div_elt = self._dc_parser.parse(wiki_stx) 674 div_elt = self._dc_parser.parse(wiki_stx)
644 return div_elt.toXml() 675 return div_elt.toXml()
645 676