Mercurial > libervia-backend
comparison sat/plugins/plugin_blog_import_dokuwiki.py @ 4037:524856bd7b19
massive refactoring to switch from camelCase to snake_case:
historically, Libervia (SàT before) was using camelCase as allowed by PEP8 when using a
pre-PEP8 code, to use the same coding style as in Twisted.
However, snake_case is more readable and it's better to follow PEP8 best practices, so it
has been decided to move on full snake_case. Because Libervia has a huge codebase, this
ended with a ugly mix of camelCase and snake_case.
To fix that, this patch does a big refactoring by renaming every function and method
(including bridge) that are not coming from Twisted or Wokkel, to use fully snake_case.
This is a massive change, and may result in some bugs.
author | Goffi <goffi@goffi.org> |
---|---|
date | Sat, 08 Apr 2023 13:54:42 +0200 |
parents | be6d91572633 |
children |
comparison
equal
deleted
inserted
replaced
4036:c4464d7ae97b | 4037:524856bd7b19 |
---|---|
121 self.media_repo = media_repo | 121 self.media_repo = media_repo |
122 self.temp_dir = tempfile.mkdtemp() if self.media_repo else None | 122 self.temp_dir = tempfile.mkdtemp() if self.media_repo else None |
123 self.limit = limit | 123 self.limit = limit |
124 self.posts_data = OrderedDict() | 124 self.posts_data = OrderedDict() |
125 | 125 |
126 def getPostId(self, post): | 126 def get_post_id(self, post): |
127 """Return a unique and constant post id | 127 """Return a unique and constant post id |
128 | 128 |
129 @param post(dict): parsed post data | 129 @param post(dict): parsed post data |
130 @return (unicode): post unique item id | 130 @return (unicode): post unique item id |
131 """ | 131 """ |
132 return str(post["id"]) | 132 return str(post["id"]) |
133 | 133 |
134 def getPostUpdated(self, post): | 134 def get_post_updated(self, post): |
135 """Return the update date. | 135 """Return the update date. |
136 | 136 |
137 @param post(dict): parsed post data | 137 @param post(dict): parsed post data |
138 @return (unicode): update date | 138 @return (unicode): update date |
139 """ | 139 """ |
140 return str(post["mtime"]) | 140 return str(post["mtime"]) |
141 | 141 |
142 def getPostPublished(self, post): | 142 def get_post_published(self, post): |
143 """Try to parse the date from the message ID, else use "mtime". | 143 """Try to parse the date from the message ID, else use "mtime". |
144 | 144 |
145 The date can be extracted if the message ID looks like one of: | 145 The date can be extracted if the message ID looks like one of: |
146 - namespace:YYMMDD_short_title | 146 - namespace:YYMMDD_short_title |
147 - namespace:YYYYMMDD_short_title | 147 - namespace:YYYYMMDD_short_title |
160 time_struct = time.strptime(date, "%Y%m%d") | 160 time_struct = time.strptime(date, "%Y%m%d") |
161 except ValueError: | 161 except ValueError: |
162 return default | 162 return default |
163 return str(calendar.timegm(time_struct)) | 163 return str(calendar.timegm(time_struct)) |
164 | 164 |
165 def processPost(self, post, profile_jid): | 165 def process_post(self, post, profile_jid): |
166 """Process a single page. | 166 """Process a single page. |
167 | 167 |
168 @param post (dict): parsed post data | 168 @param post (dict): parsed post data |
169 @param profile_jid | 169 @param profile_jid |
170 """ | 170 """ |
171 # get main information | 171 # get main information |
172 id_ = self.getPostId(post) | 172 id_ = self.get_post_id(post) |
173 updated = self.getPostUpdated(post) | 173 updated = self.get_post_updated(post) |
174 published = self.getPostPublished(post) | 174 published = self.get_post_published(post) |
175 | 175 |
176 # manage links | 176 # manage links |
177 backlinks = self.pages.backlinks(id_) | 177 backlinks = self.pages.backlinks(id_) |
178 for link in self.pages.links(id_): | 178 for link in self.pages.links(id_): |
179 if link["type"] != "extern": | 179 if link["type"] != "extern": |
180 assert link["type"] == "local" | 180 assert link["type"] == "local" |
181 page = link["page"] | 181 page = link["page"] |
182 backlinks.append(page[1:] if page.startswith(":") else page) | 182 backlinks.append(page[1:] if page.startswith(":") else page) |
183 | 183 |
184 self.pages.get(id_) | 184 self.pages.get(id_) |
185 content_xhtml = self.processContent(self.pages.html(id_), backlinks, profile_jid) | 185 content_xhtml = self.process_content(self.pages.html(id_), backlinks, profile_jid) |
186 | 186 |
187 # XXX: title is already in content_xhtml and difficult to remove, so leave it | 187 # XXX: title is already in content_xhtml and difficult to remove, so leave it |
188 # title = content.split("\n")[0].strip(u"\ufeff= ") | 188 # title = content.split("\n")[0].strip(u"\ufeff= ") |
189 | 189 |
190 # build the extra data dictionary | 190 # build the extra data dictionary |
228 pages_list = [page for page in pages_list if page["id"] == namespace] | 228 pages_list = [page for page in pages_list if page["id"] == namespace] |
229 namespace = real_namespace | 229 namespace = real_namespace |
230 | 230 |
231 count = 0 | 231 count = 0 |
232 for page in pages_list: | 232 for page in pages_list: |
233 self.processPost(page, profile_jid) | 233 self.process_post(page, profile_jid) |
234 count += 1 | 234 count += 1 |
235 if count >= self.limit: | 235 if count >= self.limit: |
236 break | 236 break |
237 | 237 |
238 return (iter(self.posts_data.values()), len(self.posts_data)) | 238 return (iter(self.posts_data.values()), len(self.posts_data)) |
239 | 239 |
240 def processContent(self, text, backlinks, profile_jid): | 240 def process_content(self, text, backlinks, profile_jid): |
241 """Do text substitutions and file copy. | 241 """Do text substitutions and file copy. |
242 | 242 |
243 @param text (unicode): message content | 243 @param text (unicode): message content |
244 @param backlinks (list[unicode]): list of backlinks | 244 @param backlinks (list[unicode]): list of backlinks |
245 """ | 245 """ |
257 type_, attr, link = tag.group(1), tag.group(2), tag.group(3) | 257 type_, attr, link = tag.group(1), tag.group(2), tag.group(3) |
258 assert (type_ == "img" and attr == "src") or (type_ == "a" and attr == "href") | 258 assert (type_ == "img" and attr == "src") or (type_ == "a" and attr == "href") |
259 if re.match(r"^\w*://", link): # absolute URL to link directly | 259 if re.match(r"^\w*://", link): # absolute URL to link directly |
260 continue | 260 continue |
261 if self.media_repo: | 261 if self.media_repo: |
262 self.moveMedia(link, subs) | 262 self.move_media(link, subs) |
263 elif link not in subs: | 263 elif link not in subs: |
264 subs[link] = urllib.parse.urljoin(self.url, link) | 264 subs[link] = urllib.parse.urljoin(self.url, link) |
265 | 265 |
266 for url, new_url in subs.items(): | 266 for url, new_url in subs.items(): |
267 text = text.replace(url, new_url) | 267 text = text.replace(url, new_url) |
268 return text | 268 return text |
269 | 269 |
270 def moveMedia(self, link, subs): | 270 def move_media(self, link, subs): |
271 """Move a media from the DokuWiki host to the new repository. | 271 """Move a media from the DokuWiki host to the new repository. |
272 | 272 |
273 This also updates the hyperlinks to internal media files. | 273 This also updates the hyperlinks to internal media files. |
274 @param link (unicode): media link | 274 @param link (unicode): media link |
275 @param subs (dict): substitutions data | 275 @param subs (dict): substitutions data |
302 filename = link[13:] | 302 filename = link[13:] |
303 else: # fake alert... there's no media (or we don't handle it yet) | 303 else: # fake alert... there's no media (or we don't handle it yet) |
304 return | 304 return |
305 | 305 |
306 filepath = os.path.join(self.temp_dir, filename) | 306 filepath = os.path.join(self.temp_dir, filename) |
307 self.downloadMedia(url, filepath) | 307 self.download_media(url, filepath) |
308 | 308 |
309 if thumb_width: | 309 if thumb_width: |
310 filename = os.path.join("thumbs", thumb_width, filename) | 310 filename = os.path.join("thumbs", thumb_width, filename) |
311 thumbnail = os.path.join(self.temp_dir, filename) | 311 thumbnail = os.path.join(self.temp_dir, filename) |
312 self.createThumbnail(filepath, thumbnail, thumb_width) | 312 self.create_thumbnail(filepath, thumbnail, thumb_width) |
313 | 313 |
314 new_url = os.path.join(self.media_repo, filename) | 314 new_url = os.path.join(self.media_repo, filename) |
315 subs[link] = new_url | 315 subs[link] = new_url |
316 | 316 |
317 def downloadMedia(self, source, dest): | 317 def download_media(self, source, dest): |
318 """Copy media to localhost. | 318 """Copy media to localhost. |
319 | 319 |
320 @param source (unicode): source url | 320 @param source (unicode): source url |
321 @param dest (unicode): target path | 321 @param dest (unicode): target path |
322 """ | 322 """ |
325 if not os.path.exists(dirname): | 325 if not os.path.exists(dirname): |
326 os.makedirs(dirname) | 326 os.makedirs(dirname) |
327 urllib.request.urlretrieve(source, dest) | 327 urllib.request.urlretrieve(source, dest) |
328 log.debug("DokuWiki media file copied to %s" % dest) | 328 log.debug("DokuWiki media file copied to %s" % dest) |
329 | 329 |
330 def createThumbnail(self, source, dest, width): | 330 def create_thumbnail(self, source, dest, width): |
331 """Create a thumbnail. | 331 """Create a thumbnail. |
332 | 332 |
333 @param source (unicode): source file path | 333 @param source (unicode): source file path |
334 @param dest (unicode): destination file path | 334 @param dest (unicode): destination file path |
335 @param width (unicode): thumbnail's width | 335 @param width (unicode): thumbnail's width |
346 log.error("Cannot create DokuWiki media thumbnail %s" % dest) | 346 log.error("Cannot create DokuWiki media thumbnail %s" % dest) |
347 | 347 |
348 | 348 |
349 class DokuwikiImport(object): | 349 class DokuwikiImport(object): |
350 def __init__(self, host): | 350 def __init__(self, host): |
351 log.info(_("plugin Dokuwiki Import initialization")) | 351 log.info(_("plugin Dokuwiki import initialization")) |
352 self.host = host | 352 self.host = host |
353 self._blog_import = host.plugins["BLOG_IMPORT"] | 353 self._blog_import = host.plugins["BLOG_IMPORT"] |
354 self._blog_import.register("dokuwiki", self.DkImport, SHORT_DESC, LONG_DESC) | 354 self._blog_import.register("dokuwiki", self.dk_import, SHORT_DESC, LONG_DESC) |
355 | 355 |
356 def DkImport(self, client, location, options=None): | 356 def dk_import(self, client, location, options=None): |
357 """Import from DokuWiki to PubSub | 357 """import from DokuWiki to PubSub |
358 | 358 |
359 @param location (unicode): DokuWiki site URL | 359 @param location (unicode): DokuWiki site URL |
360 @param options (dict, None): DokuWiki import parameters | 360 @param options (dict, None): DokuWiki import parameters |
361 - user (unicode): DokuWiki admin user | 361 - user (unicode): DokuWiki admin user |
362 - passwd (unicode): DokuWiki admin password | 362 - passwd (unicode): DokuWiki admin password |
405 | 405 |
406 dk_importer = Importer(location, user, passwd, media_repo, limit) | 406 dk_importer = Importer(location, user, passwd, media_repo, limit) |
407 info_msg = info_msg.format( | 407 info_msg = info_msg.format( |
408 temp_dir=dk_importer.temp_dir, media_repo=media_repo, location=location | 408 temp_dir=dk_importer.temp_dir, media_repo=media_repo, location=location |
409 ) | 409 ) |
410 self.host.actionNew( | 410 self.host.action_new( |
411 {"xmlui": xml_tools.note(info_msg).toXml()}, profile=client.profile | 411 {"xmlui": xml_tools.note(info_msg).toXml()}, profile=client.profile |
412 ) | 412 ) |
413 d = threads.deferToThread(dk_importer.process, client, namespace) | 413 d = threads.deferToThread(dk_importer.process, client, namespace) |
414 return d | 414 return d |