comparison src/plugins/plugin_blog_import_dokuwiki.py @ 1843:a51355982f11

plugin blog_import_dokuwiki: fixes wrong URL when a substitution occurs twice
author souliane <souliane@mailoo.org>
date Thu, 04 Feb 2016 17:37:33 +0100
parents 9fd517248dc8
children 489b968b3723
comparison
equal deleted inserted replaced
1842:9fd517248dc8 1843:a51355982f11
245 for backlink in backlinks: 245 for backlink in backlinks:
246 src = '/doku.php?id=%s"' % backlink 246 src = '/doku.php?id=%s"' % backlink
247 tgt = '/blog/%s/%s" target="#"' % (profile_jid.user, backlink) 247 tgt = '/blog/%s/%s" target="#"' % (profile_jid.user, backlink)
248 text = text.replace(src, tgt) 248 text = text.replace(src, tgt)
249 249
250 subs = [] 250 subs = {}
251 251
252 link_pattern = r"""<(img|a)[^>]* (src|href)="([^"]+)"[^>]*>""" 252 link_pattern = r"""<(img|a)[^>]* (src|href)="([^"]+)"[^>]*>"""
253 for tag in re.finditer(link_pattern, text): 253 for tag in re.finditer(link_pattern, text):
254 type_, attr, link = tag.group(1), tag.group(2), tag.group(3) 254 type_, attr, link = tag.group(1), tag.group(2), tag.group(3)
255 assert (type_ == "img" and attr == "src") or (type_ == "a" and attr == "href") 255 assert (type_ == "img" and attr == "src") or (type_ == "a" and attr == "href")
256 if re.match(r"^\w*://", link): # absolute URL to link directly 256 if re.match(r"^\w*://", link): # absolute URL to link directly
257 continue 257 continue
258 if self.media_repo: 258 if self.media_repo:
259 self.moveMedia(link, subs) 259 self.moveMedia(link, subs)
260 else: 260 elif link not in subs:
261 subs.append((link, urlparse.urljoin(self.url, link))) 261 subs[link] = urlparse.urljoin(self.url, link)
262 262
263 for url, new_url in subs: 263 for url, new_url in subs.iteritems():
264 text = text.replace(url, new_url) 264 text = text.replace(url, new_url)
265 return text 265 return text
266 266
267 def moveMedia(self, link, subs): 267 def moveMedia(self, link, subs):
268 """Move a media from the DokuWiki host to the new repository. 268 """Move a media from the DokuWiki host to the new repository.
269 269
270 @param link (unicode): media link 270 @param link (unicode): media link
271 @param subs (set(couple)): substitutions list 271 @param subs (dict): substitutions data
272 """ 272 """
273 url = urlparse.urljoin(self.url, link) 273 url = urlparse.urljoin(self.url, link)
274 user_media = re.match(r"(/lib/exe/\w+.php\?)(.*)", link) 274 user_media = re.match(r"(/lib/exe/\w+.php\?)(.*)", link)
275 thumb_width = None 275 thumb_width = None
276 276
280 media = params["media"][0] 280 media = params["media"][0]
281 except KeyError: 281 except KeyError:
282 log.warning("No media found in fetch URL: %s" % user_media.group(2)) 282 log.warning("No media found in fetch URL: %s" % user_media.group(2))
283 return 283 return
284 if re.match(r"^\w*://", media): # external URL to link directly 284 if re.match(r"^\w*://", media): # external URL to link directly
285 subs.append((link, media)) 285 subs[link] = media
286 return 286 return
287 try: # create thumbnail 287 try: # create thumbnail
288 thumb_width = params["w"][0] 288 thumb_width = params["w"][0]
289 except KeyError: 289 except KeyError:
290 pass 290 pass
306 filename = os.path.join("thumbs", thumb_width, filename) 306 filename = os.path.join("thumbs", thumb_width, filename)
307 thumbnail = os.path.join(self.temp_dir, filename) 307 thumbnail = os.path.join(self.temp_dir, filename)
308 self.createThumbnail(filepath, thumbnail, thumb_width) 308 self.createThumbnail(filepath, thumbnail, thumb_width)
309 309
310 new_url = os.path.join(self.media_repo, filename) 310 new_url = os.path.join(self.media_repo, filename)
311 subs.append((link, new_url)) 311 subs[link] = new_url
312 312
313 def downloadMedia(self, source, dest): 313 def downloadMedia(self, source, dest):
314 """Copy media to localhost. 314 """Copy media to localhost.
315 315
316 @param source (unicode): source url 316 @param source (unicode): source url