Mercurial > libervia-backend
comparison sat/plugins/plugin_syntax_wiki_dotclear.py @ 2624:56f94936df1e
code style reformatting using black
author | Goffi <goffi@goffi.org> |
---|---|
date | Wed, 27 Jun 2018 20:14:46 +0200 |
parents | 26edcf3a30eb |
children | 003b8b4b56a7 |
comparison
equal
deleted
inserted
replaced
2623:49533de4540b | 2624:56f94936df1e |
---|---|
19 | 19 |
20 # XXX: ref used: http://dotclear.org/documentation/2.0/usage/syntaxes#wiki-syntax-and-xhtml-equivalent | 20 # XXX: ref used: http://dotclear.org/documentation/2.0/usage/syntaxes#wiki-syntax-and-xhtml-equivalent |
21 | 21 |
22 from sat.core.i18n import _ | 22 from sat.core.i18n import _ |
23 from sat.core.log import getLogger | 23 from sat.core.log import getLogger |
24 | |
24 log = getLogger(__name__) | 25 log = getLogger(__name__) |
25 from sat.core.constants import Const as C | 26 from sat.core.constants import Const as C |
26 from sat.core import exceptions | 27 from sat.core import exceptions |
27 from twisted.words.xish import domish | 28 from twisted.words.xish import domish |
28 from sat.tools import xml_tools | 29 from sat.tools import xml_tools |
34 C.PI_IMPORT_NAME: "SYNT_DC_WIKI", | 35 C.PI_IMPORT_NAME: "SYNT_DC_WIKI", |
35 C.PI_TYPE: C.PLUG_TYPE_SYNTAXE, | 36 C.PI_TYPE: C.PLUG_TYPE_SYNTAXE, |
36 C.PI_DEPENDENCIES: ["TEXT-SYNTAXES"], | 37 C.PI_DEPENDENCIES: ["TEXT-SYNTAXES"], |
37 C.PI_MAIN: "DCWikiSyntax", | 38 C.PI_MAIN: "DCWikiSyntax", |
38 C.PI_HANDLER: "", | 39 C.PI_HANDLER: "", |
39 C.PI_DESCRIPTION: _("""Implementation of Dotclear wiki syntax""") | 40 C.PI_DESCRIPTION: _("""Implementation of Dotclear wiki syntax"""), |
40 } | 41 } |
41 | 42 |
42 NOTE_TPL = u'[{}]' # Note template | 43 NOTE_TPL = u"[{}]" # Note template |
43 NOTE_A_REV_TPL = u'rev_note_{}' | 44 NOTE_A_REV_TPL = u"rev_note_{}" |
44 NOTE_A_TPL = u'note_{}' | 45 NOTE_A_TPL = u"note_{}" |
45 ESCAPE_CHARS_BASE = r"(?P<escape_char>[][{}%|\\/*#@{{}}~$-])" | 46 ESCAPE_CHARS_BASE = r"(?P<escape_char>[][{}%|\\/*#@{{}}~$-])" |
46 ESCAPE_CHARS_EXTRA = r"!?_+'()" # These chars are not escaped in XHTML => dc_wiki conversion, | 47 ESCAPE_CHARS_EXTRA = ( |
47 # but are used in the other direction | 48 r"!?_+'()" |
48 ESCAPE_CHARS = ESCAPE_CHARS_BASE.format('') | 49 ) # These chars are not escaped in XHTML => dc_wiki conversion, |
49 FLAG_UL = 'ul' # must be the name of the element | 50 # but are used in the other direction |
50 FLAG_OL = 'ol' | 51 ESCAPE_CHARS = ESCAPE_CHARS_BASE.format("") |
51 ELT_WITH_STYLE = ('img', 'div') # elements where a style attribute is expected | 52 FLAG_UL = "ul" # must be the name of the element |
52 | 53 FLAG_OL = "ol" |
53 wiki = [r'\\' + ESCAPE_CHARS_BASE.format(ESCAPE_CHARS_EXTRA), | 54 ELT_WITH_STYLE = ("img", "div") # elements where a style attribute is expected |
54 r"^!!!!!(?P<h1_title>.+?)$", | 55 |
55 r"^!!!!(?P<h2_title>.+?)$", | 56 wiki = [ |
56 r"^!!!(?P<h3_title>.+?)$", | 57 r"\\" + ESCAPE_CHARS_BASE.format(ESCAPE_CHARS_EXTRA), |
57 r"^!!(?P<h4_title>.+?)$", | 58 r"^!!!!!(?P<h1_title>.+?)$", |
58 r"^!(?P<h5_title>.+?)$", | 59 r"^!!!!(?P<h2_title>.+?)$", |
59 r"^----$(?P<horizontal_rule>)", | 60 r"^!!!(?P<h3_title>.+?)$", |
60 r"^\*(?P<list_bullet>.*?)$", | 61 r"^!!(?P<h4_title>.+?)$", |
61 r"^#(?P<list_ordered>.*?)$", | 62 r"^!(?P<h5_title>.+?)$", |
62 r"^ (?P<preformated>.*?)$", | 63 r"^----$(?P<horizontal_rule>)", |
63 r"^> +?(?P<quote>.*?)$", | 64 r"^\*(?P<list_bullet>.*?)$", |
64 r"''(?P<emphasis>.+?)''", | 65 r"^#(?P<list_ordered>.*?)$", |
65 r"__(?P<strong_emphasis>.+?)__", | 66 r"^ (?P<preformated>.*?)$", |
66 r"%%%(?P<line_break>)", | 67 r"^> +?(?P<quote>.*?)$", |
67 r"\+\+(?P<insertion>.+?)\+\+", | 68 r"''(?P<emphasis>.+?)''", |
68 r"--(?P<deletion>.+?)--", | 69 r"__(?P<strong_emphasis>.+?)__", |
69 r"\[(?P<link>.+?)\]", | 70 r"%%%(?P<line_break>)", |
70 r"\(\((?P<image>.+?)\)\)", | 71 r"\+\+(?P<insertion>.+?)\+\+", |
71 r"~(?P<anchor>.+?)~", | 72 r"--(?P<deletion>.+?)--", |
72 r"\?\?(?P<acronym>.+?\|.+?)\?\?", | 73 r"\[(?P<link>.+?)\]", |
73 r"{{(?P<inline_quote>.+?)}}", | 74 r"\(\((?P<image>.+?)\)\)", |
74 r"@@(?P<code>.+?)@@", | 75 r"~(?P<anchor>.+?)~", |
75 r"\$\$(?P<footnote>.+?)\$\$", | 76 r"\?\?(?P<acronym>.+?\|.+?)\?\?", |
76 r"(?P<text>.+?)", | 77 r"{{(?P<inline_quote>.+?)}}", |
77 ] | 78 r"@@(?P<code>.+?)@@", |
78 | 79 r"\$\$(?P<footnote>.+?)\$\$", |
79 wiki_re = re.compile('|'.join(wiki), re.MULTILINE | re.DOTALL) | 80 r"(?P<text>.+?)", |
80 wiki_block_level_re = re.compile(r"^///html(?P<html>.+?)///\n\n|(?P<paragraph>.+?)(?:\n{2,}|\Z)", re.MULTILINE | re.DOTALL) | 81 ] |
82 | |
83 wiki_re = re.compile("|".join(wiki), re.MULTILINE | re.DOTALL) | |
84 wiki_block_level_re = re.compile( | |
85 r"^///html(?P<html>.+?)///\n\n|(?P<paragraph>.+?)(?:\n{2,}|\Z)", | |
86 re.MULTILINE | re.DOTALL, | |
87 ) | |
81 | 88 |
82 | 89 |
83 class DCWikiParser(object): | 90 class DCWikiParser(object): |
84 | |
85 def __init__(self): | 91 def __init__(self): |
86 self._footnotes = None | 92 self._footnotes = None |
87 for i in xrange(5): | 93 for i in xrange(5): |
88 setattr(self, | 94 setattr( |
89 'parser_h{}_title'.format(i), | 95 self, |
90 lambda string, parent, i=i: self._parser_title(string, parent, 'h{}'.format(i))) | 96 "parser_h{}_title".format(i), |
97 lambda string, parent, i=i: self._parser_title( | |
98 string, parent, "h{}".format(i) | |
99 ), | |
100 ) | |
91 | 101 |
92 def parser_paragraph(self, string, parent): | 102 def parser_paragraph(self, string, parent): |
93 p_elt = parent.addElement('p') | 103 p_elt = parent.addElement("p") |
94 self._parse(string, p_elt) | 104 self._parse(string, p_elt) |
95 | 105 |
96 def parser_html(self, string, parent): | 106 def parser_html(self, string, parent): |
97 wrapped_html = "<div>{}</div>".format(string) | 107 wrapped_html = "<div>{}</div>".format(string) |
98 try: | 108 try: |
99 div_elt = xml_tools.ElementParser()(wrapped_html) | 109 div_elt = xml_tools.ElementParser()(wrapped_html) |
100 except domish.ParserError as e: | 110 except domish.ParserError as e: |
101 log.warning(u"Error while parsing HTML content, ignoring it: {}".format(e)) | 111 log.warning(u"Error while parsing HTML content, ignoring it: {}".format(e)) |
102 return | 112 return |
103 children = list(div_elt.elements()) | 113 children = list(div_elt.elements()) |
104 if len(children) == 1 and children[0].name == 'div': | 114 if len(children) == 1 and children[0].name == "div": |
105 div_elt = children[0] | 115 div_elt = children[0] |
106 parent.addChild(div_elt) | 116 parent.addChild(div_elt) |
107 | 117 |
108 def parser_escape_char(self, string, parent): | 118 def parser_escape_char(self, string, parent): |
109 parent.addContent(string) | 119 parent.addContent(string) |
111 def _parser_title(self, string, parent, name): | 121 def _parser_title(self, string, parent, name): |
112 elt = parent.addElement(name) | 122 elt = parent.addElement(name) |
113 elt.addContent(string) | 123 elt.addContent(string) |
114 | 124 |
115 def parser_horizontal_rule(self, string, parent): | 125 def parser_horizontal_rule(self, string, parent): |
116 parent.addElement('hr') | 126 parent.addElement("hr") |
117 | 127 |
118 def _parser_list(self, string, parent, list_type): | 128 def _parser_list(self, string, parent, list_type): |
119 depth = 0 | 129 depth = 0 |
120 while string[depth:depth+1] == '*': | 130 while string[depth : depth + 1] == "*": |
121 depth +=1 | 131 depth += 1 |
122 | 132 |
123 string = string[depth:].lstrip() | 133 string = string[depth:].lstrip() |
124 | 134 |
125 for i in xrange(depth+1): | 135 for i in xrange(depth + 1): |
126 list_elt = getattr(parent, list_type) | 136 list_elt = getattr(parent, list_type) |
127 if not list_elt: | 137 if not list_elt: |
128 parent = parent.addElement(list_type) | 138 parent = parent.addElement(list_type) |
129 else: | 139 else: |
130 parent = list_elt | 140 parent = list_elt |
131 | 141 |
132 li_elt = parent.addElement('li') | 142 li_elt = parent.addElement("li") |
133 self._parse(string, li_elt) | 143 self._parse(string, li_elt) |
134 | 144 |
135 def parser_list_bullet(self, string, parent): | 145 def parser_list_bullet(self, string, parent): |
136 self._parser_list(string, parent, 'ul') | 146 self._parser_list(string, parent, "ul") |
137 | 147 |
138 def parser_list_ordered(self, string, parent): | 148 def parser_list_ordered(self, string, parent): |
139 self._parser_list(string, parent, 'ol') | 149 self._parser_list(string, parent, "ol") |
140 | 150 |
141 def parser_preformated(self, string, parent): | 151 def parser_preformated(self, string, parent): |
142 pre_elt = parent.pre | 152 pre_elt = parent.pre |
143 if pre_elt is None: | 153 if pre_elt is None: |
144 pre_elt = parent.addElement('pre') | 154 pre_elt = parent.addElement("pre") |
145 else: | 155 else: |
146 # we are on a new line, and this is important for <pre/> | 156 # we are on a new line, and this is important for <pre/> |
147 pre_elt.addContent('\n') | 157 pre_elt.addContent("\n") |
148 pre_elt.addContent(string) | 158 pre_elt.addContent(string) |
149 | 159 |
150 def parser_quote(self, string, parent): | 160 def parser_quote(self, string, parent): |
151 blockquote_elt = parent.blockquote | 161 blockquote_elt = parent.blockquote |
152 if blockquote_elt is None: | 162 if blockquote_elt is None: |
153 blockquote_elt = parent.addElement('blockquote') | 163 blockquote_elt = parent.addElement("blockquote") |
154 p_elt = blockquote_elt.p | 164 p_elt = blockquote_elt.p |
155 if p_elt is None: | 165 if p_elt is None: |
156 p_elt = blockquote_elt.addElement('p') | 166 p_elt = blockquote_elt.addElement("p") |
157 else: | 167 else: |
158 string = u'\n' + string | 168 string = u"\n" + string |
159 | 169 |
160 self._parse(string, p_elt) | 170 self._parse(string, p_elt) |
161 | 171 |
162 def parser_emphasis(self, string, parent): | 172 def parser_emphasis(self, string, parent): |
163 em_elt = parent.addElement('em') | 173 em_elt = parent.addElement("em") |
164 self._parse(string, em_elt) | 174 self._parse(string, em_elt) |
165 | 175 |
166 def parser_strong_emphasis(self, string, parent): | 176 def parser_strong_emphasis(self, string, parent): |
167 strong_elt = parent.addElement('strong') | 177 strong_elt = parent.addElement("strong") |
168 self._parse(string, strong_elt) | 178 self._parse(string, strong_elt) |
169 | 179 |
170 def parser_line_break(self, string, parent): | 180 def parser_line_break(self, string, parent): |
171 parent.addElement('br') | 181 parent.addElement("br") |
172 | 182 |
173 def parser_insertion(self, string, parent): | 183 def parser_insertion(self, string, parent): |
174 ins_elt = parent.addElement('ins') | 184 ins_elt = parent.addElement("ins") |
175 self._parse(string, ins_elt) | 185 self._parse(string, ins_elt) |
176 | 186 |
177 def parser_deletion(self, string, parent): | 187 def parser_deletion(self, string, parent): |
178 del_elt = parent.addElement('del') | 188 del_elt = parent.addElement("del") |
179 self._parse(string, del_elt) | 189 self._parse(string, del_elt) |
180 | 190 |
181 def parser_link(self, string, parent): | 191 def parser_link(self, string, parent): |
182 url_data = string.split(u'|') | 192 url_data = string.split(u"|") |
183 a_elt = parent.addElement('a') | 193 a_elt = parent.addElement("a") |
184 length = len(url_data) | 194 length = len(url_data) |
185 if length == 1: | 195 if length == 1: |
186 url = url_data[0] | 196 url = url_data[0] |
187 a_elt['href'] = url | 197 a_elt["href"] = url |
188 a_elt.addContent(url) | 198 a_elt.addContent(url) |
189 else: | 199 else: |
190 name = url_data[0] | 200 name = url_data[0] |
191 url = url_data[1] | 201 url = url_data[1] |
192 a_elt['href'] = url | 202 a_elt["href"] = url |
193 a_elt.addContent(name) | 203 a_elt.addContent(name) |
194 if length >= 3: | 204 if length >= 3: |
195 a_elt['lang'] = url_data[2] | 205 a_elt["lang"] = url_data[2] |
196 if length >= 4: | 206 if length >= 4: |
197 a_elt['title'] = url_data[3] | 207 a_elt["title"] = url_data[3] |
198 if length > 4: | 208 if length > 4: |
199 log.warning(u"too much data for url, ignoring extra data") | 209 log.warning(u"too much data for url, ignoring extra data") |
200 | 210 |
201 def parser_image(self, string, parent): | 211 def parser_image(self, string, parent): |
202 image_data = string.split(u'|') | 212 image_data = string.split(u"|") |
203 img_elt = parent.addElement('img') | 213 img_elt = parent.addElement("img") |
204 | 214 |
205 for idx, attribute in enumerate(('src', 'alt', 'position', 'longdesc')): | 215 for idx, attribute in enumerate(("src", "alt", "position", "longdesc")): |
206 try: | 216 try: |
207 data = image_data[idx] | 217 data = image_data[idx] |
208 except IndexError: | 218 except IndexError: |
209 break | 219 break |
210 | 220 |
211 if attribute != 'position': | 221 if attribute != "position": |
212 img_elt[attribute] = data | 222 img_elt[attribute] = data |
213 else: | 223 else: |
214 data = data.lower() | 224 data = data.lower() |
215 if data in ('l', 'g'): | 225 if data in ("l", "g"): |
216 img_elt['style'] = "display:block; float:left; margin:0 1em 1em 0" | 226 img_elt["style"] = "display:block; float:left; margin:0 1em 1em 0" |
217 elif data in ('r', 'd'): | 227 elif data in ("r", "d"): |
218 img_elt['style'] = "display:block; float:right; margin:0 0 1em 1em" | 228 img_elt["style"] = "display:block; float:right; margin:0 0 1em 1em" |
219 elif data == 'c': | 229 elif data == "c": |
220 img_elt['style'] = "display:block; margin-left:auto; margin-right:auto" | 230 img_elt[ |
231 "style" | |
232 ] = "display:block; margin-left:auto; margin-right:auto" | |
221 else: | 233 else: |
222 log.warning(u"bad position argument for image, ignoring it") | 234 log.warning(u"bad position argument for image, ignoring it") |
223 | 235 |
224 def parser_anchor(self, string, parent): | 236 def parser_anchor(self, string, parent): |
225 a_elt = parent.addElement('a') | 237 a_elt = parent.addElement("a") |
226 a_elt['id'] = string | 238 a_elt["id"] = string |
227 | 239 |
228 def parser_acronym(self, string, parent): | 240 def parser_acronym(self, string, parent): |
229 acronym, title = string.split(u'|',1) | 241 acronym, title = string.split(u"|", 1) |
230 acronym_elt = parent.addElement('acronym', content=acronym) | 242 acronym_elt = parent.addElement("acronym", content=acronym) |
231 acronym_elt['title'] = title | 243 acronym_elt["title"] = title |
232 | 244 |
233 def parser_inline_quote(self, string, parent): | 245 def parser_inline_quote(self, string, parent): |
234 quote_data = string.split(u'|') | 246 quote_data = string.split(u"|") |
235 quote = quote_data[0] | 247 quote = quote_data[0] |
236 q_elt = parent.addElement('q', content=quote) | 248 q_elt = parent.addElement("q", content=quote) |
237 for idx, attribute in enumerate(('lang', 'cite'), 1): | 249 for idx, attribute in enumerate(("lang", "cite"), 1): |
238 try: | 250 try: |
239 data = quote_data[idx] | 251 data = quote_data[idx] |
240 except IndexError: | 252 except IndexError: |
241 break | 253 break |
242 q_elt[attribute] = data | 254 q_elt[attribute] = data |
243 | 255 |
244 def parser_code(self, string, parent): | 256 def parser_code(self, string, parent): |
245 parent.addElement('code', content=string) | 257 parent.addElement("code", content=string) |
246 | 258 |
247 def parser_footnote(self, string, parent): | 259 def parser_footnote(self, string, parent): |
248 idx = len(self._footnotes) + 1 | 260 idx = len(self._footnotes) + 1 |
249 note_txt = NOTE_TPL.format(idx) | 261 note_txt = NOTE_TPL.format(idx) |
250 sup_elt = parent.addElement('sup') | 262 sup_elt = parent.addElement("sup") |
251 sup_elt['class'] = 'note' | 263 sup_elt["class"] = "note" |
252 a_elt = sup_elt.addElement('a', content=note_txt) | 264 a_elt = sup_elt.addElement("a", content=note_txt) |
253 a_elt['id'] = NOTE_A_REV_TPL.format(idx) | 265 a_elt["id"] = NOTE_A_REV_TPL.format(idx) |
254 a_elt['href'] = u'#{}'.format(NOTE_A_TPL.format(idx)) | 266 a_elt["href"] = u"#{}".format(NOTE_A_TPL.format(idx)) |
255 | 267 |
256 p_elt = domish.Element((None, 'p')) | 268 p_elt = domish.Element((None, "p")) |
257 a_elt = p_elt.addElement('a', content=note_txt) | 269 a_elt = p_elt.addElement("a", content=note_txt) |
258 a_elt['id'] = NOTE_A_TPL.format(idx) | 270 a_elt["id"] = NOTE_A_TPL.format(idx) |
259 a_elt['href'] = u'#{}'.format(NOTE_A_REV_TPL.format(idx)) | 271 a_elt["href"] = u"#{}".format(NOTE_A_REV_TPL.format(idx)) |
260 self._parse(string, p_elt) | 272 self._parse(string, p_elt) |
261 # footnotes are actually added at the end of the parsing | 273 # footnotes are actually added at the end of the parsing |
262 self._footnotes.append(p_elt) | 274 self._footnotes.append(p_elt) |
263 | 275 |
264 def parser_text(self, string, parent): | 276 def parser_text(self, string, parent): |
271 if match.lastgroup is None: | 283 if match.lastgroup is None: |
272 parent.addContent(string) | 284 parent.addContent(string) |
273 return | 285 return |
274 matched = match.group(match.lastgroup) | 286 matched = match.group(match.lastgroup) |
275 try: | 287 try: |
276 parser = getattr(self, 'parser_{}'.format(match.lastgroup)) | 288 parser = getattr(self, "parser_{}".format(match.lastgroup)) |
277 except AttributeError: | 289 except AttributeError: |
278 log.warning(u"No parser found for {}".format(match.lastgroup)) | 290 log.warning(u"No parser found for {}".format(match.lastgroup)) |
279 # parent.addContent(string) | 291 # parent.addContent(string) |
280 continue | 292 continue |
281 parser(matched, parent) | 293 parser(matched, parent) |
282 | 294 |
283 def parse(self, string): | 295 def parse(self, string): |
284 self._footnotes = [] | 296 self._footnotes = [] |
285 div_elt = domish.Element((None, 'div')) | 297 div_elt = domish.Element((None, "div")) |
286 self._parse(string, parent=div_elt, block_level=True) | 298 self._parse(string, parent=div_elt, block_level=True) |
287 if self._footnotes: | 299 if self._footnotes: |
288 foot_div_elt = div_elt.addElement('div') | 300 foot_div_elt = div_elt.addElement("div") |
289 foot_div_elt['class'] = 'footnotes' | 301 foot_div_elt["class"] = "footnotes" |
290 # we add a simple horizontal rule which can be customized | 302 # we add a simple horizontal rule which can be customized |
291 # with footnotes class, instead of a text which would need | 303 # with footnotes class, instead of a text which would need |
292 # to be translated | 304 # to be translated |
293 foot_div_elt.addElement('hr') | 305 foot_div_elt.addElement("hr") |
294 for elt in self._footnotes: | 306 for elt in self._footnotes: |
295 foot_div_elt.addChild(elt) | 307 foot_div_elt.addChild(elt) |
296 return div_elt | 308 return div_elt |
297 | 309 |
298 | 310 |
299 class XHTMLParser(object): | 311 class XHTMLParser(object): |
300 | |
301 def __init__(self): | 312 def __init__(self): |
302 self.flags = None | 313 self.flags = None |
303 self.toto = 0 | 314 self.toto = 0 |
304 self.footnotes = None # will hold a map from url to buffer id | 315 self.footnotes = None # will hold a map from url to buffer id |
305 for i in xrange(1,6): | 316 for i in xrange(1, 6): |
306 setattr(self, | 317 setattr( |
307 'parser_h{}'.format(i), | 318 self, |
308 lambda elt, buf, level=i: self.parserHeading(elt, buf, level) | 319 "parser_h{}".format(i), |
309 ) | 320 lambda elt, buf, level=i: self.parserHeading(elt, buf, level), |
321 ) | |
310 | 322 |
311 def parser_a(self, elt, buf): | 323 def parser_a(self, elt, buf): |
312 try: | 324 try: |
313 url = elt['href'] | 325 url = elt["href"] |
314 except KeyError: | 326 except KeyError: |
315 # probably an anchor | 327 # probably an anchor |
316 try: | 328 try: |
317 id_ = elt['id'] | 329 id_ = elt["id"] |
318 if not id_: | 330 if not id_: |
319 # we don't want empty values | 331 # we don't want empty values |
320 raise KeyError | 332 raise KeyError |
321 except KeyError: | 333 except KeyError: |
322 self.parserGeneric(elt, buf) | 334 self.parserGeneric(elt, buf) |
323 else: | 335 else: |
324 buf.append(u'~~{}~~'.format(id_)) | 336 buf.append(u"~~{}~~".format(id_)) |
325 return | 337 return |
326 | 338 |
327 link_data = [url] | 339 link_data = [url] |
328 name = unicode(elt) | 340 name = unicode(elt) |
329 if name != url: | 341 if name != url: |
330 link_data.insert(0, name) | 342 link_data.insert(0, name) |
331 | 343 |
332 lang = elt.getAttribute('lang') | 344 lang = elt.getAttribute("lang") |
333 title = elt.getAttribute('title') | 345 title = elt.getAttribute("title") |
334 if lang is not None: | 346 if lang is not None: |
335 link_data.append(lang) | 347 link_data.append(lang) |
336 elif title is not None: | 348 elif title is not None: |
337 link_data.appand(u'') | 349 link_data.appand(u"") |
338 if title is not None: | 350 if title is not None: |
339 link_data.append(title) | 351 link_data.append(title) |
340 buf.append(u'[') | 352 buf.append(u"[") |
341 buf.append(u'|'.join(link_data)) | 353 buf.append(u"|".join(link_data)) |
342 buf.append(u']') | 354 buf.append(u"]") |
343 | 355 |
344 def parser_acronym(self, elt, buf): | 356 def parser_acronym(self, elt, buf): |
345 try: | 357 try: |
346 title = elt['title'] | 358 title = elt["title"] |
347 except KeyError: | 359 except KeyError: |
348 log.debug(u"Acronyme without title, using generic parser") | 360 log.debug(u"Acronyme without title, using generic parser") |
349 self.parserGeneric(elt, buf) | 361 self.parserGeneric(elt, buf) |
350 return | 362 return |
351 buf.append(u'??{}|{}??'.format(unicode(elt), title)) | 363 buf.append(u"??{}|{}??".format(unicode(elt), title)) |
352 | 364 |
353 def parser_blockquote(self, elt, buf): | 365 def parser_blockquote(self, elt, buf): |
354 # we remove wrapping <p> to avoid empty line with "> " | 366 # we remove wrapping <p> to avoid empty line with "> " |
355 children = list([child for child in elt.children if unicode(child).strip() not in ('', '\n')]) | 367 children = list( |
356 if len(children) == 1 and children[0].name == 'p': | 368 [child for child in elt.children if unicode(child).strip() not in ("", "\n")] |
369 ) | |
370 if len(children) == 1 and children[0].name == "p": | |
357 elt = children[0] | 371 elt = children[0] |
358 tmp_buf = [] | 372 tmp_buf = [] |
359 self.parseChildren(elt, tmp_buf) | 373 self.parseChildren(elt, tmp_buf) |
360 blockquote = u'> ' + u'\n> '.join(u''.join(tmp_buf).split('\n')) | 374 blockquote = u"> " + u"\n> ".join(u"".join(tmp_buf).split("\n")) |
361 buf.append(blockquote) | 375 buf.append(blockquote) |
362 | 376 |
363 def parser_br(self, elt, buf): | 377 def parser_br(self, elt, buf): |
364 buf.append(u'%%%') | 378 buf.append(u"%%%") |
365 | 379 |
366 def parser_code(self, elt, buf): | 380 def parser_code(self, elt, buf): |
367 buf.append(u'@@') | 381 buf.append(u"@@") |
368 self.parseChildren(elt, buf) | 382 self.parseChildren(elt, buf) |
369 buf.append(u'@@') | 383 buf.append(u"@@") |
370 | 384 |
371 def parser_del(self, elt, buf): | 385 def parser_del(self, elt, buf): |
372 buf.append(u'--') | 386 buf.append(u"--") |
373 self.parseChildren(elt, buf) | 387 self.parseChildren(elt, buf) |
374 buf.append(u'--') | 388 buf.append(u"--") |
375 | 389 |
376 def parser_div(self, elt, buf): | 390 def parser_div(self, elt, buf): |
377 if elt.getAttribute('class') == 'footnotes': | 391 if elt.getAttribute("class") == "footnotes": |
378 self.parserFootnote(elt, buf) | 392 self.parserFootnote(elt, buf) |
379 else: | 393 else: |
380 self.parseChildren(elt, buf, block=True) | 394 self.parseChildren(elt, buf, block=True) |
381 | 395 |
382 def parser_em(self, elt, buf): | 396 def parser_em(self, elt, buf): |
385 buf.append(u"''") | 399 buf.append(u"''") |
386 | 400 |
387 def parser_h6(self, elt, buf): | 401 def parser_h6(self, elt, buf): |
388 # XXX: <h6/> heading is not managed by wiki syntax | 402 # XXX: <h6/> heading is not managed by wiki syntax |
389 # so we handle it with a <h5/> | 403 # so we handle it with a <h5/> |
390 elt = copy.copy(elt) # we don't want to change to original element | 404 elt = copy.copy(elt) # we don't want to change to original element |
391 elt.name = 'h5' | 405 elt.name = "h5" |
392 self._parse(elt, buf) | 406 self._parse(elt, buf) |
393 | 407 |
394 def parser_hr(self, elt, buf): | 408 def parser_hr(self, elt, buf): |
395 buf.append(u'\n----\n') | 409 buf.append(u"\n----\n") |
396 | 410 |
397 def parser_img(self, elt, buf): | 411 def parser_img(self, elt, buf): |
398 try: | 412 try: |
399 url = elt['src'] | 413 url = elt["src"] |
400 except KeyError: | 414 except KeyError: |
401 log.warning(u"Ignoring <img/> without src") | 415 log.warning(u"Ignoring <img/> without src") |
402 return | 416 return |
403 | 417 |
404 image_data=[url] | 418 image_data = [url] |
405 | 419 |
406 alt = elt.getAttribute('alt') | 420 alt = elt.getAttribute("alt") |
407 style = elt.getAttribute('style', '') | 421 style = elt.getAttribute("style", "") |
408 desc = elt.getAttribute('longdesc') | 422 desc = elt.getAttribute("longdesc") |
409 | 423 |
410 if '0 1em 1em 0' in style: | 424 if "0 1em 1em 0" in style: |
411 position = 'L' | 425 position = "L" |
412 elif '0 0 1em 1em' in style: | 426 elif "0 0 1em 1em" in style: |
413 position = 'R' | 427 position = "R" |
414 elif 'auto' in style: | 428 elif "auto" in style: |
415 position = 'C' | 429 position = "C" |
416 else: | 430 else: |
417 position = None | 431 position = None |
418 | 432 |
419 if alt: | 433 if alt: |
420 image_data.append(alt) | 434 image_data.append(alt) |
421 elif position or desc: | 435 elif position or desc: |
422 image_data.append(u'') | 436 image_data.append(u"") |
423 | 437 |
424 if position: | 438 if position: |
425 image_data.append(position) | 439 image_data.append(position) |
426 elif desc: | 440 elif desc: |
427 image_data.append(u'') | 441 image_data.append(u"") |
428 | 442 |
429 if desc: | 443 if desc: |
430 image_data.append(desc) | 444 image_data.append(desc) |
431 | 445 |
432 buf.append(u'((') | 446 buf.append(u"((") |
433 buf.append(u'|'.join(image_data)) | 447 buf.append(u"|".join(image_data)) |
434 buf.append(u'))') | 448 buf.append(u"))") |
435 | 449 |
436 def parser_ins(self, elt, buf): | 450 def parser_ins(self, elt, buf): |
437 buf.append(u'++') | 451 buf.append(u"++") |
438 self.parseChildren(elt, buf) | 452 self.parseChildren(elt, buf) |
439 buf.append(u'++') | 453 buf.append(u"++") |
440 | 454 |
441 def parser_li(self, elt, buf): | 455 def parser_li(self, elt, buf): |
442 flag = None | 456 flag = None |
443 current_flag = None | 457 current_flag = None |
444 bullets = [] | 458 bullets = [] |
445 for flag in reversed(self.flags): | 459 for flag in reversed(self.flags): |
446 if flag in (FLAG_UL, FLAG_OL): | 460 if flag in (FLAG_UL, FLAG_OL): |
447 if current_flag is None: | 461 if current_flag is None: |
448 current_flag = flag | 462 current_flag = flag |
449 if flag == current_flag: | 463 if flag == current_flag: |
450 bullets.append(u'*' if flag == FLAG_UL else u'#') | 464 bullets.append(u"*" if flag == FLAG_UL else u"#") |
451 else: | 465 else: |
452 break | 466 break |
453 | 467 |
454 if flag != current_flag and buf[-1] == u' ': | 468 if flag != current_flag and buf[-1] == u" ": |
455 # this trick is to avoid a space when we switch | 469 # this trick is to avoid a space when we switch |
456 # from (un)ordered to the other type on the same row | 470 # from (un)ordered to the other type on the same row |
457 # e.g. *# unorder + ordered item | 471 # e.g. *# unorder + ordered item |
458 del buf[-1] | 472 del buf[-1] |
459 | 473 |
460 buf.extend(bullets) | 474 buf.extend(bullets) |
461 | 475 |
462 buf.append(u' ') | 476 buf.append(u" ") |
463 self.parseChildren(elt, buf) | 477 self.parseChildren(elt, buf) |
464 buf.append(u'\n') | 478 buf.append(u"\n") |
465 | 479 |
466 def parser_ol(self, elt, buf): | 480 def parser_ol(self, elt, buf): |
467 self.parserList(elt, buf, FLAG_OL) | 481 self.parserList(elt, buf, FLAG_OL) |
468 | 482 |
469 def parser_p(self, elt, buf): | 483 def parser_p(self, elt, buf): |
470 self.parseChildren(elt, buf) | 484 self.parseChildren(elt, buf) |
471 buf.append(u'\n\n') | 485 buf.append(u"\n\n") |
472 | 486 |
473 def parser_pre(self, elt, buf): | 487 def parser_pre(self, elt, buf): |
474 pre = u''.join([child.toXml() if domish.IElement.providedBy(child) else unicode(child) for child in elt.children]) | 488 pre = u"".join( |
475 pre = u' ' + u'\n '.join(pre.split('\n')) | 489 [ |
490 child.toXml() if domish.IElement.providedBy(child) else unicode(child) | |
491 for child in elt.children | |
492 ] | |
493 ) | |
494 pre = u" " + u"\n ".join(pre.split("\n")) | |
476 buf.append(pre) | 495 buf.append(pre) |
477 | 496 |
478 def parser_q(self, elt, buf): | 497 def parser_q(self, elt, buf): |
479 quote_data=[unicode(elt)] | 498 quote_data = [unicode(elt)] |
480 | 499 |
481 lang = elt.getAttribute('lang') | 500 lang = elt.getAttribute("lang") |
482 cite = elt.getAttribute('url') | 501 cite = elt.getAttribute("url") |
483 | 502 |
484 if lang: | 503 if lang: |
485 quote_data.append(lang) | 504 quote_data.append(lang) |
486 elif cite: | 505 elif cite: |
487 quote_data.append(u'') | 506 quote_data.append(u"") |
488 | 507 |
489 if cite: | 508 if cite: |
490 quote_data.append(cite) | 509 quote_data.append(cite) |
491 | 510 |
492 buf.append(u'{{') | 511 buf.append(u"{{") |
493 buf.append(u'|'.join(quote_data)) | 512 buf.append(u"|".join(quote_data)) |
494 buf.append(u'}}') | 513 buf.append(u"}}") |
495 | 514 |
496 def parser_span(self, elt, buf): | 515 def parser_span(self, elt, buf): |
497 self.parseChildren(elt, buf, block=True) | 516 self.parseChildren(elt, buf, block=True) |
498 | 517 |
499 def parser_strong(self, elt, buf): | 518 def parser_strong(self, elt, buf): |
500 buf.append(u'__') | 519 buf.append(u"__") |
501 self.parseChildren(elt, buf) | 520 self.parseChildren(elt, buf) |
502 buf.append(u'__') | 521 buf.append(u"__") |
503 | 522 |
504 def parser_sup(self, elt, buf): | 523 def parser_sup(self, elt, buf): |
505 # sup is mainly used for footnotes, so we check if we have an anchor inside | 524 # sup is mainly used for footnotes, so we check if we have an anchor inside |
506 children = list([child for child in elt.children if unicode(child).strip() not in ('', '\n')]) | 525 children = list( |
507 if (len(children) == 1 and domish.IElement.providedBy(children[0]) | 526 [child for child in elt.children if unicode(child).strip() not in ("", "\n")] |
508 and children[0].name == 'a' and '#' in children[0].getAttribute('href', '')): | 527 ) |
509 url = children[0]['href'] | 528 if ( |
510 note_id = url[url.find('#')+1:] | 529 len(children) == 1 |
530 and domish.IElement.providedBy(children[0]) | |
531 and children[0].name == "a" | |
532 and "#" in children[0].getAttribute("href", "") | |
533 ): | |
534 url = children[0]["href"] | |
535 note_id = url[url.find("#") + 1 :] | |
511 if not note_id: | 536 if not note_id: |
512 log.warning("bad link found in footnote") | 537 log.warning("bad link found in footnote") |
513 self.parserGeneric(elt, buf) | 538 self.parserGeneric(elt, buf) |
514 return | 539 return |
515 # this looks like a footnote | 540 # this looks like a footnote |
516 buf.append(u'$$') | 541 buf.append(u"$$") |
517 buf.append(u' ') # placeholder | 542 buf.append(u" ") # placeholder |
518 self.footnotes[note_id] = len(buf) - 1 | 543 self.footnotes[note_id] = len(buf) - 1 |
519 buf.append(u'$$') | 544 buf.append(u"$$") |
520 else: | 545 else: |
521 self.parserGeneric(elt, buf) | 546 self.parserGeneric(elt, buf) |
522 | 547 |
523 def parser_ul(self, elt, buf): | 548 def parser_ul(self, elt, buf): |
524 self.parserList(elt, buf, FLAG_UL) | 549 self.parserList(elt, buf, FLAG_UL) |
535 | 560 |
536 if idx == 0: | 561 if idx == 0: |
537 raise exceptions.InternalError(u"flag has been removed by an other parser") | 562 raise exceptions.InternalError(u"flag has been removed by an other parser") |
538 | 563 |
539 def parserHeading(self, elt, buf, level): | 564 def parserHeading(self, elt, buf, level): |
540 buf.append((6-level) * u'!') | 565 buf.append((6 - level) * u"!") |
541 for child in elt.children: | 566 for child in elt.children: |
542 # we ignore other elements for a Hx title | 567 # we ignore other elements for a Hx title |
543 self.parserText(child, buf) | 568 self.parserText(child, buf) |
544 buf.append(u'\n') | 569 buf.append(u"\n") |
545 | 570 |
546 def parserFootnote(self, elt, buf): | 571 def parserFootnote(self, elt, buf): |
547 for elt in elt.elements(): | 572 for elt in elt.elements(): |
548 # all children other than <p/> are ignored | 573 # all children other than <p/> are ignored |
549 if elt.name == 'p': | 574 if elt.name == "p": |
550 a_elt = elt.a | 575 a_elt = elt.a |
551 if a_elt is None: | 576 if a_elt is None: |
552 log.warning(u"<p/> element doesn't contain <a/> in footnote, ignoring it") | 577 log.warning( |
578 u"<p/> element doesn't contain <a/> in footnote, ignoring it" | |
579 ) | |
553 continue | 580 continue |
554 try: | 581 try: |
555 note_idx = self.footnotes[a_elt['id']] | 582 note_idx = self.footnotes[a_elt["id"]] |
556 except KeyError: | 583 except KeyError: |
557 log.warning(u"Note id doesn't match any known note, ignoring it") | 584 log.warning(u"Note id doesn't match any known note, ignoring it") |
558 # we create a dummy element to parse all children after the <a/> | 585 # we create a dummy element to parse all children after the <a/> |
559 dummy_elt = domish.Element((None, 'note')) | 586 dummy_elt = domish.Element((None, "note")) |
560 a_idx = elt.children.index(a_elt) | 587 a_idx = elt.children.index(a_elt) |
561 dummy_elt.children = elt.children[a_idx+1:] | 588 dummy_elt.children = elt.children[a_idx + 1 :] |
562 note_buf = [] | 589 note_buf = [] |
563 self.parseChildren(dummy_elt, note_buf) | 590 self.parseChildren(dummy_elt, note_buf) |
564 # now we can replace the placeholder | 591 # now we can replace the placeholder |
565 buf[note_idx] = u''.join(note_buf) | 592 buf[note_idx] = u"".join(note_buf) |
566 | 593 |
567 def parserText(self, txt, buf, keep_whitespaces=False): | 594 def parserText(self, txt, buf, keep_whitespaces=False): |
568 txt = unicode(txt) | 595 txt = unicode(txt) |
569 if not keep_whitespaces: | 596 if not keep_whitespaces: |
570 # we get text and only let one inter word space | 597 # we get text and only let one inter word space |
571 txt = u' '.join(txt.split()) | 598 txt = u" ".join(txt.split()) |
572 txt = re.sub(ESCAPE_CHARS, r'\\\1', txt) | 599 txt = re.sub(ESCAPE_CHARS, r"\\\1", txt) |
573 if txt: | 600 if txt: |
574 buf.append(txt) | 601 buf.append(txt) |
575 return txt | 602 return txt |
576 | 603 |
577 def parserGeneric(self, elt, buf): | 604 def parserGeneric(self, elt, buf): |
580 buf.append(u"\n\n///html\n{}\n///\n\n".format(elt.toXml())) | 607 buf.append(u"\n\n///html\n{}\n///\n\n".format(elt.toXml())) |
581 | 608 |
582 def parseChildren(self, elt, buf, block=False): | 609 def parseChildren(self, elt, buf, block=False): |
583 first_visible = True | 610 first_visible = True |
584 for child in elt.children: | 611 for child in elt.children: |
585 if not block and not first_visible and buf and buf[-1][-1] not in (' ','\n'): | 612 if not block and not first_visible and buf and buf[-1][-1] not in (" ", "\n"): |
586 # we add separation if it isn't already there | 613 # we add separation if it isn't already there |
587 buf.append(u' ') | 614 buf.append(u" ") |
588 if domish.IElement.providedBy(child): | 615 if domish.IElement.providedBy(child): |
589 self._parse(child, buf) | 616 self._parse(child, buf) |
590 first_visible = False | 617 first_visible = False |
591 else: | 618 else: |
592 appended = self.parserText(child, buf) | 619 appended = self.parserText(child, buf) |
593 if appended: | 620 if appended: |
594 first_visible = False | 621 first_visible = False |
595 | 622 |
596 def _parse(self, elt, buf): | 623 def _parse(self, elt, buf): |
597 elt_name = elt.name.lower() | 624 elt_name = elt.name.lower() |
598 style = elt.getAttribute('style') | 625 style = elt.getAttribute("style") |
599 if style and elt_name not in ELT_WITH_STYLE: | 626 if style and elt_name not in ELT_WITH_STYLE: |
600 # if we have style we use generic parser to put raw HTML | 627 # if we have style we use generic parser to put raw HTML |
601 # to avoid losing it | 628 # to avoid losing it |
602 parser = self.parserGeneric | 629 parser = self.parserGeneric |
603 else: | 630 else: |
604 try: | 631 try: |
605 parser = getattr(self, "parser_{}".format(elt_name)) | 632 parser = getattr(self, "parser_{}".format(elt_name)) |
606 except AttributeError: | 633 except AttributeError: |
607 log.debug("Can't find parser for {} element, using generic one".format(elt.name)) | 634 log.debug( |
635 "Can't find parser for {} element, using generic one".format(elt.name) | |
636 ) | |
608 parser = self.parserGeneric | 637 parser = self.parserGeneric |
609 parser(elt, buf) | 638 parser(elt, buf) |
610 | 639 |
611 def parse(self, elt): | 640 def parse(self, elt): |
612 self.flags = [] | 641 self.flags = [] |
613 self.footnotes = {} | 642 self.footnotes = {} |
614 buf = [] | 643 buf = [] |
615 self._parse(elt, buf) | 644 self._parse(elt, buf) |
616 return u''.join(buf) | 645 return u"".join(buf) |
617 | 646 |
618 def parseString(self, string): | 647 def parseString(self, string): |
619 wrapped_html = u"<div>{}</div>".format(string) | 648 wrapped_html = u"<div>{}</div>".format(string) |
620 try: | 649 try: |
621 div_elt = xml_tools.ElementParser()(wrapped_html) | 650 div_elt = xml_tools.ElementParser()(wrapped_html) |
622 except domish.ParserError as e: | 651 except domish.ParserError as e: |
623 log.warning(u"Error while parsing HTML content: {}".format(e)) | 652 log.warning(u"Error while parsing HTML content: {}".format(e)) |
624 return | 653 return |
625 children = list(div_elt.elements()) | 654 children = list(div_elt.elements()) |
626 if len(children) == 1 and children[0].name == 'div': | 655 if len(children) == 1 and children[0].name == "div": |
627 div_elt = children[0] | 656 div_elt = children[0] |
628 return self.parse(div_elt) | 657 return self.parse(div_elt) |
629 | 658 |
630 | 659 |
631 class DCWikiSyntax(object): | 660 class DCWikiSyntax(object): |
635 log.info(_(u"Dotclear wiki syntax plugin initialization")) | 664 log.info(_(u"Dotclear wiki syntax plugin initialization")) |
636 self.host = host | 665 self.host = host |
637 self._dc_parser = DCWikiParser() | 666 self._dc_parser = DCWikiParser() |
638 self._xhtml_parser = XHTMLParser() | 667 self._xhtml_parser = XHTMLParser() |
639 self._stx = self.host.plugins["TEXT-SYNTAXES"] | 668 self._stx = self.host.plugins["TEXT-SYNTAXES"] |
640 self._stx.addSyntax(self.SYNTAX_NAME, self.parseWiki, self.parseXHTML, [self._stx.OPT_NO_THREAD]) | 669 self._stx.addSyntax( |
670 self.SYNTAX_NAME, self.parseWiki, self.parseXHTML, [self._stx.OPT_NO_THREAD] | |
671 ) | |
641 | 672 |
642 def parseWiki(self, wiki_stx): | 673 def parseWiki(self, wiki_stx): |
643 div_elt = self._dc_parser.parse(wiki_stx) | 674 div_elt = self._dc_parser.parse(wiki_stx) |
644 return div_elt.toXml() | 675 return div_elt.toXml() |
645 | 676 |