changeset 1842:9fd517248dc8

plugin blog_import_dokuwiki: refactor to make it look more similar to blog_import_dotclear
author souliane <souliane@mailoo.org>
date Thu, 04 Feb 2016 17:36:22 +0100
parents 7717975b3ec3
children a51355982f11
files src/plugins/plugin_blog_import_dokuwiki.py
diffstat 1 files changed, 84 insertions(+), 61 deletions(-) [+]
line wrap: on
line diff
--- a/src/plugins/plugin_blog_import_dokuwiki.py	Thu Feb 04 12:29:43 2016 +0100
+++ b/src/plugins/plugin_blog_import_dokuwiki.py	Thu Feb 04 17:36:22 2016 +0100
@@ -119,10 +119,92 @@
             self.temp_dir = tempfile.mkdtemp()
             self.info_msg = _("DokuWiki media files will be downloaded to %s - to finish the import you will need to upload them to %s" % (self.temp_dir, self.media_repo))
         else:
-            self.info_msg = _("DokuWiki media files will stay on %s - some of them may be protected by DokuWiki ACL and will not be accessible from XMPP." % url)
+            self.info_msg = _("DokuWiki media files will stay on %s - some of them may be protected by DokuWiki ACL and will not be accessible." % url)
         self.limit = limit
         self.posts_data = OrderedDict()
 
+    def getPostId(self, post):
+        """Return a unique and constant post id
+
+        @param post(dict): parsed post data
+        @return (unicode): post unique item id
+        """
+        return unicode(post['id'])
+
+    def getPostUpdated(self, post):
+        """Return the update date.
+
+        @param post(dict): parsed post data
+        @return (unicode): update date
+        """
+        return unicode(post['mtime'])
+
+    def getPostPublished(self, post):
+        """Try to parse the date from the message ID, else use "mtime".
+
+        The date can be extracted if the message ID looks like one of:
+            - namespace:YYMMDD_short_title
+            - namespace:YYYYMMDD_short_title
+        @param post (dict):  parsed post data
+        @return (unicode): publication date
+        """
+        id_, default = unicode(post["id"]), unicode(post["mtime"])
+        try:
+            date = id_.split(":")[-1].split("_")[0]
+        except KeyError:
+            return default
+        try:
+            time_struct = time.strptime(date, "%y%m%d")
+        except ValueError:
+            try:
+                time_struct = time.strptime(date, "%Y%m%d")
+            except ValueError:
+                return default
+        return unicode(calendar.timegm(time_struct))
+
+    def processPost(self, post, profile_jid):
+        """Process a single page.
+
+        @param post (dict): parsed post data
+        @param profile_jid
+        """
+        # get main information
+        id_ = self.getPostId(post)
+        updated = self.getPostUpdated(post)
+        published = self.getPostPublished(post)
+
+        # manage links
+        backlinks = self.pages.backlinks(id_)
+        for link in self.pages.links(id_):
+            if link["type"] != "extern":
+                assert link["type"] == "local"
+                page = link["page"]
+                backlinks.append(page[1:] if page.startswith(":") else page)
+
+        content = self.pages.get(id_)
+        content_xhtml = self.processContent(self.pages.html(id_), backlinks, profile_jid)
+
+        # XXX: title is already in content_xhtml and difficult to remove, so leave it
+        # title = content.split("\n")[0].strip(u"\ufeff= ")
+
+        # build the extra data dictionary
+        mb_data = {"id": id_,
+                "published": published,
+                "updated": updated,
+                "author": profile_jid.user,
+                # "content": content,  # when passed, it is displayed in Libervia instead of content_xhtml
+                "content_xhtml": content_xhtml,
+                # "title": title,
+                "allow_comments": "true",
+                }
+
+        # find out if the message access is public or restricted
+        namespace = id_.split(":")[0]
+        if namespace and namespace.lower() not in ("public", "/"):
+            mb_data["group"] = namespace  # roster group must exist
+
+        self.posts_data[id_] = {'blog': mb_data, 'comments':[[]]}
+
     def process(self, client, namespace=DEFAULT_NAMESPACE):
         """Process a namespace or a single page.
 
@@ -145,72 +227,13 @@
 
         count = 0
         for page in pages_list:
-
-            # get main information
-            id_, updated, published = unicode(page['id']), unicode(page['mtime']), self.getOriginalDate(page)
-
-            # manage links
-            backlinks = self.pages.backlinks(id_)
-            for link in self.pages.links(id_):
-                if link["type"] != "extern":
-                    assert link["type"] == "local"
-                    page = link["page"]
-                    backlinks.append(page[1:] if page.startswith(":") else page)
-
-            content = self.pages.get(id_)
-            content_xhtml = self.processContent(self.pages.html(id_), backlinks, profile_jid)
-
-            # XXX: title is already in content_xhtml and difficult to remove, so leave it
-            # title = content.split("\n")[0].strip(u"\ufeff= ")
-
-            # build the extra data dictionary
-            mb_data = {"id": id_,
-                    "published": published,
-                    "updated": updated,
-                    "author": profile_jid.user,
-                    # "content": content,  # when passed, it is displayed in Libervia instead of content_xhtml
-                    "content_xhtml": content_xhtml,
-                    # "title": title,
-                    "allow_comments": "true",
-                    }
-
-            # find out if the message access is public or restricted
-            namespace = id_.split(":")[0]
-            if namespace and namespace.lower() not in ("public", "/"):
-                mb_data["group"] = namespace  # roster group must exist
-
-            self.posts_data[id_] = {'blog': mb_data, 'comments':[[]]}
-
+            self.processPost(page, profile_jid)
             count += 1
             if count >= self.limit :
                 break
 
         return (self.posts_data.itervalues(), len(self.posts_data))
 
-    def getOriginalDate(self, page):
-        """Try to parse the date from the message ID, else use "mtime".
-
-        The date can be extracted if the message ID looks like one of:
-            - namespace:YYMMDD_short_title
-            - namespace:YYYYMMDD_short_title
-        @param page (dict): message page
-        @return unicode
-        """
-        id_, default = unicode(page["id"]), unicode(page["mtime"])
-        try:
-            date = id_.split(":")[-1].split("_")[0]
-        except KeyError:
-            return default
-        try:
-            time_struct = time.strptime(date, "%y%m%d")
-        except ValueError:
-            try:
-                time_struct = time.strptime(date, "%Y%m%d")
-            except ValueError:
-                return default
-        return unicode(calendar.timegm(time_struct))
-
-
     def processContent(self, text, backlinks, profile_jid):
         """Do text substitutions and file copy.