# HG changeset patch # User souliane # Date 1454603782 -3600 # Node ID 9fd517248dc852cf6ef146e8d5401c196ee7c7b8 # Parent 7717975b3ec3b6f5ca5d4d498b80b07ce6b0a864 plugin blog_import_dokuwiki: refactor to make it look more similar to blog_import_dotclear diff -r 7717975b3ec3 -r 9fd517248dc8 src/plugins/plugin_blog_import_dokuwiki.py --- a/src/plugins/plugin_blog_import_dokuwiki.py Thu Feb 04 12:29:43 2016 +0100 +++ b/src/plugins/plugin_blog_import_dokuwiki.py Thu Feb 04 17:36:22 2016 +0100 @@ -119,10 +119,92 @@ self.temp_dir = tempfile.mkdtemp() self.info_msg = _("DokuWiki media files will be downloaded to %s - to finish the import you will need to upload them to %s" % (self.temp_dir, self.media_repo)) else: - self.info_msg = _("DokuWiki media files will stay on %s - some of them may be protected by DokuWiki ACL and will not be accessible from XMPP." % url) + self.info_msg = _("DokuWiki media files will stay on %s - some of them may be protected by DokuWiki ACL and will not be accessible." % url) self.limit = limit self.posts_data = OrderedDict() + def getPostId(self, post): + """Return a unique and constant post id + + @param post(dict): parsed post data + @return (unicode): post unique item id + """ + return unicode(post['id']) + + def getPostUpdated(self, post): + """Return the update date. + + @param post(dict): parsed post data + @return (unicode): update date + """ + return unicode(post['mtime']) + + def getPostPublished(self, post): + """Try to parse the date from the message ID, else use "mtime". + + The date can be extracted if the message ID looks like one of: + - namespace:YYMMDD_short_title + - namespace:YYYYMMDD_short_title + @param post (dict): parsed post data + @return (unicode): publication date + """ + id_, default = unicode(post["id"]), unicode(post["mtime"]) + try: + date = id_.split(":")[-1].split("_")[0] + except KeyError: + return default + try: + time_struct = time.strptime(date, "%y%m%d") + except ValueError: + try: + time_struct = time.strptime(date, "%Y%m%d") + except ValueError: + return default + return unicode(calendar.timegm(time_struct)) + + def processPost(self, post, profile_jid): + """Process a single page. + + @param post (dict): parsed post data + @param profile_jid + """ + # get main information + id_ = self.getPostId(post) + updated = self.getPostUpdated(post) + published = self.getPostPublished(post) + + # manage links + backlinks = self.pages.backlinks(id_) + for link in self.pages.links(id_): + if link["type"] != "extern": + assert link["type"] == "local" + page = link["page"] + backlinks.append(page[1:] if page.startswith(":") else page) + + content = self.pages.get(id_) + content_xhtml = self.processContent(self.pages.html(id_), backlinks, profile_jid) + + # XXX: title is already in content_xhtml and difficult to remove, so leave it + # title = content.split("\n")[0].strip(u"\ufeff= ") + + # build the extra data dictionary + mb_data = {"id": id_, + "published": published, + "updated": updated, + "author": profile_jid.user, + # "content": content, # when passed, it is displayed in Libervia instead of content_xhtml + "content_xhtml": content_xhtml, + # "title": title, + "allow_comments": "true", + } + + # find out if the message access is public or restricted + namespace = id_.split(":")[0] + if namespace and namespace.lower() not in ("public", "/"): + mb_data["group"] = namespace # roster group must exist + + self.posts_data[id_] = {'blog': mb_data, 'comments':[[]]} + def process(self, client, namespace=DEFAULT_NAMESPACE): """Process a namespace or a single page. @@ -145,72 +227,13 @@ count = 0 for page in pages_list: - - # get main information - id_, updated, published = unicode(page['id']), unicode(page['mtime']), self.getOriginalDate(page) - - # manage links - backlinks = self.pages.backlinks(id_) - for link in self.pages.links(id_): - if link["type"] != "extern": - assert link["type"] == "local" - page = link["page"] - backlinks.append(page[1:] if page.startswith(":") else page) - - content = self.pages.get(id_) - content_xhtml = self.processContent(self.pages.html(id_), backlinks, profile_jid) - - # XXX: title is already in content_xhtml and difficult to remove, so leave it - # title = content.split("\n")[0].strip(u"\ufeff= ") - - # build the extra data dictionary - mb_data = {"id": id_, - "published": published, - "updated": updated, - "author": profile_jid.user, - # "content": content, # when passed, it is displayed in Libervia instead of content_xhtml - "content_xhtml": content_xhtml, - # "title": title, - "allow_comments": "true", - } - - # find out if the message access is public or restricted - namespace = id_.split(":")[0] - if namespace and namespace.lower() not in ("public", "/"): - mb_data["group"] = namespace # roster group must exist - - self.posts_data[id_] = {'blog': mb_data, 'comments':[[]]} - + self.processPost(page, profile_jid) count += 1 if count >= self.limit : break return (self.posts_data.itervalues(), len(self.posts_data)) - def getOriginalDate(self, page): - """Try to parse the date from the message ID, else use "mtime". - - The date can be extracted if the message ID looks like one of: - - namespace:YYMMDD_short_title - - namespace:YYYYMMDD_short_title - @param page (dict): message page - @return unicode - """ - id_, default = unicode(page["id"]), unicode(page["mtime"]) - try: - date = id_.split(":")[-1].split("_")[0] - except KeyError: - return default - try: - time_struct = time.strptime(date, "%y%m%d") - except ValueError: - try: - time_struct = time.strptime(date, "%Y%m%d") - except ValueError: - return default - return unicode(calendar.timegm(time_struct)) - - def processContent(self, text, backlinks, profile_jid): """Do text substitutions and file copy.