comparison sat/plugins/plugin_blog_import_dokuwiki.py @ 4037:524856bd7b19

massive refactoring to switch from camelCase to snake_case: historically, Libervia (SàT before) was using camelCase as allowed by PEP8 when using a pre-PEP8 code, to use the same coding style as in Twisted. However, snake_case is more readable and it's better to follow PEP8 best practices, so it has been decided to move on full snake_case. Because Libervia has a huge codebase, this ended with a ugly mix of camelCase and snake_case. To fix that, this patch does a big refactoring by renaming every function and method (including bridge) that are not coming from Twisted or Wokkel, to use fully snake_case. This is a massive change, and may result in some bugs.
author Goffi <goffi@goffi.org>
date Sat, 08 Apr 2023 13:54:42 +0200
parents be6d91572633
children
comparison
equal deleted inserted replaced
4036:c4464d7ae97b 4037:524856bd7b19
121 self.media_repo = media_repo 121 self.media_repo = media_repo
122 self.temp_dir = tempfile.mkdtemp() if self.media_repo else None 122 self.temp_dir = tempfile.mkdtemp() if self.media_repo else None
123 self.limit = limit 123 self.limit = limit
124 self.posts_data = OrderedDict() 124 self.posts_data = OrderedDict()
125 125
126 def getPostId(self, post): 126 def get_post_id(self, post):
127 """Return a unique and constant post id 127 """Return a unique and constant post id
128 128
129 @param post(dict): parsed post data 129 @param post(dict): parsed post data
130 @return (unicode): post unique item id 130 @return (unicode): post unique item id
131 """ 131 """
132 return str(post["id"]) 132 return str(post["id"])
133 133
134 def getPostUpdated(self, post): 134 def get_post_updated(self, post):
135 """Return the update date. 135 """Return the update date.
136 136
137 @param post(dict): parsed post data 137 @param post(dict): parsed post data
138 @return (unicode): update date 138 @return (unicode): update date
139 """ 139 """
140 return str(post["mtime"]) 140 return str(post["mtime"])
141 141
142 def getPostPublished(self, post): 142 def get_post_published(self, post):
143 """Try to parse the date from the message ID, else use "mtime". 143 """Try to parse the date from the message ID, else use "mtime".
144 144
145 The date can be extracted if the message ID looks like one of: 145 The date can be extracted if the message ID looks like one of:
146 - namespace:YYMMDD_short_title 146 - namespace:YYMMDD_short_title
147 - namespace:YYYYMMDD_short_title 147 - namespace:YYYYMMDD_short_title
160 time_struct = time.strptime(date, "%Y%m%d") 160 time_struct = time.strptime(date, "%Y%m%d")
161 except ValueError: 161 except ValueError:
162 return default 162 return default
163 return str(calendar.timegm(time_struct)) 163 return str(calendar.timegm(time_struct))
164 164
165 def processPost(self, post, profile_jid): 165 def process_post(self, post, profile_jid):
166 """Process a single page. 166 """Process a single page.
167 167
168 @param post (dict): parsed post data 168 @param post (dict): parsed post data
169 @param profile_jid 169 @param profile_jid
170 """ 170 """
171 # get main information 171 # get main information
172 id_ = self.getPostId(post) 172 id_ = self.get_post_id(post)
173 updated = self.getPostUpdated(post) 173 updated = self.get_post_updated(post)
174 published = self.getPostPublished(post) 174 published = self.get_post_published(post)
175 175
176 # manage links 176 # manage links
177 backlinks = self.pages.backlinks(id_) 177 backlinks = self.pages.backlinks(id_)
178 for link in self.pages.links(id_): 178 for link in self.pages.links(id_):
179 if link["type"] != "extern": 179 if link["type"] != "extern":
180 assert link["type"] == "local" 180 assert link["type"] == "local"
181 page = link["page"] 181 page = link["page"]
182 backlinks.append(page[1:] if page.startswith(":") else page) 182 backlinks.append(page[1:] if page.startswith(":") else page)
183 183
184 self.pages.get(id_) 184 self.pages.get(id_)
185 content_xhtml = self.processContent(self.pages.html(id_), backlinks, profile_jid) 185 content_xhtml = self.process_content(self.pages.html(id_), backlinks, profile_jid)
186 186
187 # XXX: title is already in content_xhtml and difficult to remove, so leave it 187 # XXX: title is already in content_xhtml and difficult to remove, so leave it
188 # title = content.split("\n")[0].strip(u"\ufeff= ") 188 # title = content.split("\n")[0].strip(u"\ufeff= ")
189 189
190 # build the extra data dictionary 190 # build the extra data dictionary
228 pages_list = [page for page in pages_list if page["id"] == namespace] 228 pages_list = [page for page in pages_list if page["id"] == namespace]
229 namespace = real_namespace 229 namespace = real_namespace
230 230
231 count = 0 231 count = 0
232 for page in pages_list: 232 for page in pages_list:
233 self.processPost(page, profile_jid) 233 self.process_post(page, profile_jid)
234 count += 1 234 count += 1
235 if count >= self.limit: 235 if count >= self.limit:
236 break 236 break
237 237
238 return (iter(self.posts_data.values()), len(self.posts_data)) 238 return (iter(self.posts_data.values()), len(self.posts_data))
239 239
240 def processContent(self, text, backlinks, profile_jid): 240 def process_content(self, text, backlinks, profile_jid):
241 """Do text substitutions and file copy. 241 """Do text substitutions and file copy.
242 242
243 @param text (unicode): message content 243 @param text (unicode): message content
244 @param backlinks (list[unicode]): list of backlinks 244 @param backlinks (list[unicode]): list of backlinks
245 """ 245 """
257 type_, attr, link = tag.group(1), tag.group(2), tag.group(3) 257 type_, attr, link = tag.group(1), tag.group(2), tag.group(3)
258 assert (type_ == "img" and attr == "src") or (type_ == "a" and attr == "href") 258 assert (type_ == "img" and attr == "src") or (type_ == "a" and attr == "href")
259 if re.match(r"^\w*://", link): # absolute URL to link directly 259 if re.match(r"^\w*://", link): # absolute URL to link directly
260 continue 260 continue
261 if self.media_repo: 261 if self.media_repo:
262 self.moveMedia(link, subs) 262 self.move_media(link, subs)
263 elif link not in subs: 263 elif link not in subs:
264 subs[link] = urllib.parse.urljoin(self.url, link) 264 subs[link] = urllib.parse.urljoin(self.url, link)
265 265
266 for url, new_url in subs.items(): 266 for url, new_url in subs.items():
267 text = text.replace(url, new_url) 267 text = text.replace(url, new_url)
268 return text 268 return text
269 269
270 def moveMedia(self, link, subs): 270 def move_media(self, link, subs):
271 """Move a media from the DokuWiki host to the new repository. 271 """Move a media from the DokuWiki host to the new repository.
272 272
273 This also updates the hyperlinks to internal media files. 273 This also updates the hyperlinks to internal media files.
274 @param link (unicode): media link 274 @param link (unicode): media link
275 @param subs (dict): substitutions data 275 @param subs (dict): substitutions data
302 filename = link[13:] 302 filename = link[13:]
303 else: # fake alert... there's no media (or we don't handle it yet) 303 else: # fake alert... there's no media (or we don't handle it yet)
304 return 304 return
305 305
306 filepath = os.path.join(self.temp_dir, filename) 306 filepath = os.path.join(self.temp_dir, filename)
307 self.downloadMedia(url, filepath) 307 self.download_media(url, filepath)
308 308
309 if thumb_width: 309 if thumb_width:
310 filename = os.path.join("thumbs", thumb_width, filename) 310 filename = os.path.join("thumbs", thumb_width, filename)
311 thumbnail = os.path.join(self.temp_dir, filename) 311 thumbnail = os.path.join(self.temp_dir, filename)
312 self.createThumbnail(filepath, thumbnail, thumb_width) 312 self.create_thumbnail(filepath, thumbnail, thumb_width)
313 313
314 new_url = os.path.join(self.media_repo, filename) 314 new_url = os.path.join(self.media_repo, filename)
315 subs[link] = new_url 315 subs[link] = new_url
316 316
317 def downloadMedia(self, source, dest): 317 def download_media(self, source, dest):
318 """Copy media to localhost. 318 """Copy media to localhost.
319 319
320 @param source (unicode): source url 320 @param source (unicode): source url
321 @param dest (unicode): target path 321 @param dest (unicode): target path
322 """ 322 """
325 if not os.path.exists(dirname): 325 if not os.path.exists(dirname):
326 os.makedirs(dirname) 326 os.makedirs(dirname)
327 urllib.request.urlretrieve(source, dest) 327 urllib.request.urlretrieve(source, dest)
328 log.debug("DokuWiki media file copied to %s" % dest) 328 log.debug("DokuWiki media file copied to %s" % dest)
329 329
330 def createThumbnail(self, source, dest, width): 330 def create_thumbnail(self, source, dest, width):
331 """Create a thumbnail. 331 """Create a thumbnail.
332 332
333 @param source (unicode): source file path 333 @param source (unicode): source file path
334 @param dest (unicode): destination file path 334 @param dest (unicode): destination file path
335 @param width (unicode): thumbnail's width 335 @param width (unicode): thumbnail's width
346 log.error("Cannot create DokuWiki media thumbnail %s" % dest) 346 log.error("Cannot create DokuWiki media thumbnail %s" % dest)
347 347
348 348
349 class DokuwikiImport(object): 349 class DokuwikiImport(object):
350 def __init__(self, host): 350 def __init__(self, host):
351 log.info(_("plugin Dokuwiki Import initialization")) 351 log.info(_("plugin Dokuwiki import initialization"))
352 self.host = host 352 self.host = host
353 self._blog_import = host.plugins["BLOG_IMPORT"] 353 self._blog_import = host.plugins["BLOG_IMPORT"]
354 self._blog_import.register("dokuwiki", self.DkImport, SHORT_DESC, LONG_DESC) 354 self._blog_import.register("dokuwiki", self.dk_import, SHORT_DESC, LONG_DESC)
355 355
356 def DkImport(self, client, location, options=None): 356 def dk_import(self, client, location, options=None):
357 """Import from DokuWiki to PubSub 357 """import from DokuWiki to PubSub
358 358
359 @param location (unicode): DokuWiki site URL 359 @param location (unicode): DokuWiki site URL
360 @param options (dict, None): DokuWiki import parameters 360 @param options (dict, None): DokuWiki import parameters
361 - user (unicode): DokuWiki admin user 361 - user (unicode): DokuWiki admin user
362 - passwd (unicode): DokuWiki admin password 362 - passwd (unicode): DokuWiki admin password
405 405
406 dk_importer = Importer(location, user, passwd, media_repo, limit) 406 dk_importer = Importer(location, user, passwd, media_repo, limit)
407 info_msg = info_msg.format( 407 info_msg = info_msg.format(
408 temp_dir=dk_importer.temp_dir, media_repo=media_repo, location=location 408 temp_dir=dk_importer.temp_dir, media_repo=media_repo, location=location
409 ) 409 )
410 self.host.actionNew( 410 self.host.action_new(
411 {"xmlui": xml_tools.note(info_msg).toXml()}, profile=client.profile 411 {"xmlui": xml_tools.note(info_msg).toXml()}, profile=client.profile
412 ) 412 )
413 d = threads.deferToThread(dk_importer.process, client, namespace) 413 d = threads.deferToThread(dk_importer.process, client, namespace)
414 return d 414 return d