changeset 1831:68c0dc13d821

plugin blog import, XEP-0277: progress + redirect: - progression is now handled - url redirections are handled with PubSub URIs, and returned as metadata with progressFinished - tmp_dir is cleaned in a finally close
author Goffi <goffi@goffi.org>
date Sat, 23 Jan 2016 20:01:28 +0100
parents 74676624ad5d
children 39545dc527a1
files src/plugins/plugin_blog_import.py src/plugins/plugin_xep_0277.py
diffstat 2 files changed, 61 insertions(+), 23 deletions(-) [+]
line wrap: on
line diff
--- a/src/plugins/plugin_blog_import.py	Sat Jan 23 19:56:25 2016 +0100
+++ b/src/plugins/plugin_blog_import.py	Sat Jan 23 20:01:28 2016 +0100
@@ -38,7 +38,7 @@
     "name": "blog import",
     "import_name": "BLOG_IMPORT",
     "type": C.PLUG_TYPE_BLOG,
-    "dependencies": ["XEP-0277", "TEXT-SYNTAXES", "UPLOAD"],
+    "dependencies": ["XEP-0060", "XEP-0277", "TEXT-SYNTAXES", "UPLOAD"],
     "main": "BlogImportPlugin",
     "handler": "no",
     "description": _(u"""Blog import management:
@@ -49,6 +49,7 @@
 OPT_UPLOAD_IMAGES = 'upload_images'
 OPT_UPLOAD_IGNORE_HOST = 'upload_ignore_host'
 OPT_IGNORE_TLS = 'ignore_tls_errors'
+URL_REDIRECT_PREFIX = 'url_redirect_'
 BOOL_OPTIONS = (OPT_UPLOAD_IMAGES, OPT_IGNORE_TLS)
 
 
@@ -62,12 +63,17 @@
         self.host = host
         self._importers = {}
         self._u = host.plugins['UPLOAD']
+        self._p = host.plugins['XEP-0060']
         self._m = host.plugins['XEP-0277']
         self._s = self.host.plugins['TEXT-SYNTAXES']
-        host.bridge.addMethod("blogImport", ".plugin", in_sign='ssa{ss}ss', out_sign='', method=self._blogImport, async=True)
+        host.bridge.addMethod("blogImport", ".plugin", in_sign='ssa{ss}ss', out_sign='s', method=self._blogImport, async=True)
         host.bridge.addMethod("blogImportList", ".plugin", in_sign='', out_sign='a(ss)', method=self.listImporters)
         host.bridge.addMethod("blogImportDesc", ".plugin", in_sign='s', out_sign='(ss)', method=self.getDescription)
 
+    def getProgress(self, progress_id, profile):
+        client = self.host.getClient(profile)
+        return client._blogImport_progress[progress_id]
+
     def listImporters(self):
         importers = self._importers.keys()
         importers.sort()
@@ -115,6 +121,7 @@
                     Default: False
         @param pubsub_service(jid.JID, None): jid of the PubSub service where blog must be imported
             None to use profile's server
+        @return (unicode): progress id
         """
         if options is None:
             options = {}
@@ -134,22 +141,37 @@
             importer = self._importers[name]
         except KeyError:
             raise exceptions.NotFound(u"Importer [{}] not found".format(name))
-        posts_data = yield importer.callback(client, location, options)
-        url_links = {}
-        yield self._recursiveImport(client, posts_data, options, url_links)
+        posts_data, posts_count = yield importer.callback(client, location, options)
+        url_redirect = {}
+        progress_id = unicode(uuid.uuid4())
+        try:
+            progress_data = client._blogImport_progress
+        except AttributeError:
+            progress_data = client._blogImport_progress = {}
+        progress_data[progress_id] = {u'position': '0'}
+        if posts_count is not None:
+            progress_data[progress_id]['size'] = unicode(posts_count)
+        metadata = {'name': u'{}: {}'.format(name, location),
+                    'direction': 'out',
+                    'type': 'BLOG_IMPORT'
+                   }
+        self.host.registerProgressCb(progress_id, self.getProgress, metadata, profile=client.profile)
+        self.host.bridge.progressStarted(progress_id, metadata, client.profile)
+        self._recursiveImport(client, posts_data, progress_id, options, url_redirect)
+        defer.returnValue(progress_id)
 
     @defer.inlineCallbacks
-    def _recursiveImport(self, client, posts_data, options, url_links, service=None, node=None, depth=0):
+    def _recursiveImport(self, client, posts_data, progress_id, options, url_redirect, service=None, node=None, depth=0):
         """Do the upload recursively
 
         @param posts_data(list): list of data as specified in [register]
         @param options(dict): import options
-        @param url_links(dict): link between former posts and new items
+        @param url_redirect(dict): link between former posts and new items
         @param service(jid.JID, None): PubSub service to use
         @param node(unicode, None): PubSub node to use
         @param depth(int): level of recursion
         """
-        for data in posts_data:
+        for idx, data in enumerate(posts_data):
             # data checks/filters
             mb_data = data['blog']
             try:
@@ -160,15 +182,17 @@
             try:
                 # we keep the link between old url and new blog item
                 # so the user can redirect its former blog urls
-                old_url = data['url']
+                old_uri = data['url']
             except KeyError:
                 pass
             else:
-                url_links[old_url] = (service, node, item_id)
-                log.info(u"url link from {old} to {service}/{node}/{id}".format(
-                    old = old_url, service=service or u'server', node=node or u'', id=item_id))
+                new_uri = url_redirect[old_uri] = self._p.getNodeURI(
+                    service if service is not None else client.jid.userhostJID(),
+                    node or self._m.namespace,
+                    item_id)
+                log.info(u"url link from {old} to {new}".format(
+                    old=old_uri, new=new_uri))
 
-            depth or log.debug(u"Filtering data")
             yield self.blogFilters(client, mb_data, options)
 
             # comments data
@@ -176,7 +200,7 @@
                 raise NotImplementedError(u"can't manage multiple comment links")
             allow_comments = C.bool(mb_data.get('allow_comments', C.BOOL_FALSE))
             if allow_comments:
-                comments_service, comments_node = self._m.getCommentService(client), self._m.getCommentNode(item_id)
+                comments_service, comments_node = self._m.getCommentsService(client), self._m.getCommentsNode(item_id)
                 mb_data['comments_service'] = comments_service
                 mb_data['comments_node'] = comments_node
             else:
@@ -190,7 +214,16 @@
             # comments upload
             depth or log.debug(u"uploading comments")
             if allow_comments:
-                yield self._recursiveImport(client, data['comments'][0], options, url_links, service=comments_service, node=comments_node, depth=depth+1)
+                yield self._recursiveImport(client, data['comments'][0], progress_id, options, url_redirect, service=comments_service, node=comments_node, depth=depth+1)
+            if depth == 0:
+                client._blogImport_progress[progress_id]['position'] = unicode(idx+1)
+
+        if depth == 0:
+            self.host.bridge.progressFinished(progress_id,
+                {u'{}{}'.format(URL_REDIRECT_PREFIX, old): new for old, new in url_redirect.iteritems()},
+                client.profile)
+            self.host.removeProgressCb(progress_id, client.profile)
+            del client._blogImport_progress[progress_id]
 
     @defer.inlineCallbacks
     def blogFilters(self, client, mb_data, options):
@@ -246,9 +279,11 @@
             opt_host = urlparse.urlunsplit((parsed_host.scheme or 'http', parsed_host.netloc or parsed_host.path, '', '', ''))
 
         tmp_dir = tempfile.mkdtemp()
-        for img_elt in xml_tools.findAll(top_elt, ['img']):
-            yield self.imgFilters(client, img_elt, options, opt_host, tmp_dir)
-        os.rmdir(tmp_dir) # XXX: tmp_dir should be empty, or something went wrong
+        try:
+            for img_elt in xml_tools.findAll(top_elt, ['img']):
+                yield self.imgFilters(client, img_elt, options, opt_host, tmp_dir)
+        finally:
+            os.rmdir(tmp_dir) # XXX: tmp_dir should be empty, or something went wrong
 
         # we now replace the content with filtered one
         mb_data['content_xhtml'] = top_elt.toXml()
@@ -322,7 +357,9 @@
         @param name(unicode): unique importer name, should indicate the blogging software it handler and always lowercase
         @param callback(callable): method to call:
             the signature must be (client, location, options) (cf. [blogImport])
-            the importer must return an iterable of dict which must have the following keys:
+            the importer must return a tuple with (posts_data, posts_count)
+
+            posts_data is an iterable of dict which must have the following keys:
                 'blog' (dict): microblog data of the blog post (cf. http://wiki.goffi.org/wiki/Bridge_API_-_Microblogging/en)
                     the importer MUST NOT create node or call XEP-0277 plugin itself
                     'comments*' key MUST NOT be used in this microblog_data, see bellow for comments
@@ -338,9 +375,9 @@
                 'url' (unicode): former url of the post (only the path, without host part)
                     if present the association to the new path will be displayed to user, so it can make redirections if necessary
 
-            Optionally, the importer plugin can return a tuple with the just described iterator and a post_total
-                where "post_total" (int) indicate the total number of posts (without comments)
+            posts_count (int, None) indicate the total number of posts (without comments)
                 useful to display a progress indicator when the iterator is a generator
+                use None if you can't guess the total number of blog posts
         @param short_desc(unicode): one line description of the importer
         @param long_desc(unicode): long description of the importer, its options, etc.
         """
--- a/src/plugins/plugin_xep_0277.py	Sat Jan 23 19:56:25 2016 +0100
+++ b/src/plugins/plugin_xep_0277.py	Sat Jan 23 20:01:28 2016 +0100
@@ -65,6 +65,7 @@
 
 
 class XEP_0277(object):
+    namespace = NS_MICROBLOG
 
     def __init__(self, host):
         log.info(_(u"Microblogging plugin initialization"))
@@ -454,7 +455,7 @@
 
     ## publish ##
 
-    def getCommentNode(self, item_id):
+    def getCommentsNode(self, item_id):
         """Generate comment node
 
         @param item_id(unicode): id of the parent item
@@ -462,7 +463,7 @@
         """
         return u"{}{}".format(NS_COMMENT_PREFIX, item_id)
 
-    def getCommentService(self, client, parent_service=None):
+    def getCommentsService(self, client, parent_service=None):
         """Get prefered PubSub service to create comment node
 
         @param pubsub_service(jid.JID, None): PubSub service of the parent item