libervia-backend: src/plugins/plugin_blog

comparison src/plugins/plugin_blog_import.py @ 1831:68c0dc13d821

plugin blog import, XEP-0277: progress + redirect: - progression is now handled - url redirections are handled with PubSub URIs, and returned as metadata with progressFinished - tmp_dir is cleaned in a finally close

author	Goffi <goffi@goffi.org>
date	Sat, 23 Jan 2016 20:01:28 +0100
parents	4e51f21c687f
children	cdecf553e051

comparison

equal deleted inserted replaced

-:74676624ad5d
+:68c0dc13d821
 PLUGIN_INFO = {
 "name": "blog import",
 "import_name": "BLOG_IMPORT",
 "type": C.PLUG_TYPE_BLOG,
-"dependencies": ["XEP-0277", "TEXT-SYNTAXES", "UPLOAD"],
+"dependencies": ["XEP-0060", "XEP-0277", "TEXT-SYNTAXES", "UPLOAD"],
 "main": "BlogImportPlugin",
 "handler": "no",
 "description": _(u"""Blog import management:
 This plugin manage the different blog importers which can register to it, and handler generic importing tasks.""")
 }
 OPT_HOST = 'host'
 OPT_UPLOAD_IMAGES = 'upload_images'
 OPT_UPLOAD_IGNORE_HOST = 'upload_ignore_host'
 OPT_IGNORE_TLS = 'ignore_tls_errors'
+URL_REDIRECT_PREFIX = 'url_redirect_'
 BOOL_OPTIONS = (OPT_UPLOAD_IMAGES, OPT_IGNORE_TLS)
 BlogImporter = collections.namedtuple('BlogImporter', ('callback', 'short_desc', 'long_desc'))
 def __init__(self, host):
 log.info(_("plugin Blog Import initialization"))
 self.host = host
 self._importers = {}
 self._u = host.plugins['UPLOAD']
+self._p = host.plugins['XEP-0060']
 self._m = host.plugins['XEP-0277']
 self._s = self.host.plugins['TEXT-SYNTAXES']
-host.bridge.addMethod("blogImport", ".plugin", in_sign='ssa{ss}ss', out_sign='', method=self._blogImport, async=True)
+host.bridge.addMethod("blogImport", ".plugin", in_sign='ssa{ss}ss', out_sign='s', method=self._blogImport, async=True)
 host.bridge.addMethod("blogImportList", ".plugin", in_sign='', out_sign='a(ss)', method=self.listImporters)
 host.bridge.addMethod("blogImportDesc", ".plugin", in_sign='s', out_sign='(ss)', method=self.getDescription)
+def getProgress(self, progress_id, profile):
+client = self.host.getClient(profile)
+return client._blogImport_progress[progress_id]
 def listImporters(self):
 importers = self._importers.keys()
 importers.sort()
 return [(name, self._importers[name].short_desc) for name in self._importers]
 - OPT_UPLOAD_IGNORE_HOST (unicode): don't upload images from this host
 - OPT_IGNORE_TLS (bool): ignore TLS error for image upload.
 Default: False
 @param pubsub_service(jid.JID, None): jid of the PubSub service where blog must be imported
 None to use profile's server
+@return (unicode): progress id
 """
 if options is None:
 options = {}
 else:
 for opt_name, opt_default in ((OPT_UPLOAD_IMAGES, True),
 del options[opt_name]
 try:
 importer = self._importers[name]
 except KeyError:
 raise exceptions.NotFound(u"Importer [{}] not found".format(name))
-posts_data = yield importer.callback(client, location, options)
+posts_data, posts_count = yield importer.callback(client, location, options)
-url_links = {}
+url_redirect = {}
-yield self._recursiveImport(client, posts_data, options, url_links)
+progress_id = unicode(uuid.uuid4())
+try:
+progress_data = client._blogImport_progress
+except AttributeError:
+progress_data = client._blogImport_progress = {}
+progress_data[progress_id] = {u'position': '0'}
+if posts_count is not None:
+progress_data[progress_id]['size'] = unicode(posts_count)
+metadata = {'name': u'{}: {}'.format(name, location),
+'direction': 'out',
+'type': 'BLOG_IMPORT'
+}
+self.host.registerProgressCb(progress_id, self.getProgress, metadata, profile=client.profile)
+self.host.bridge.progressStarted(progress_id, metadata, client.profile)
+self._recursiveImport(client, posts_data, progress_id, options, url_redirect)
+defer.returnValue(progress_id)
 @defer.inlineCallbacks
-def _recursiveImport(self, client, posts_data, options, url_links, service=None, node=None, depth=0):
+def _recursiveImport(self, client, posts_data, progress_id, options, url_redirect, service=None, node=None, depth=0):
 """Do the upload recursively
 @param posts_data(list): list of data as specified in [register]
 @param options(dict): import options
-@param url_links(dict): link between former posts and new items
+@param url_redirect(dict): link between former posts and new items
 @param service(jid.JID, None): PubSub service to use
 @param node(unicode, None): PubSub node to use
 @param depth(int): level of recursion
 """
-for data in posts_data:
+for idx, data in enumerate(posts_data):
 # data checks/filters
 mb_data = data['blog']
 try:
 item_id = mb_data['id']
 except KeyError:
 item_id = mb_data['id'] = unicode(uuid.uuid4())
 try:
 # we keep the link between old url and new blog item
 # so the user can redirect its former blog urls
-old_url = data['url']
+old_uri = data['url']
 except KeyError:
 pass
 else:
-url_links[old_url] = (service, node, item_id)
+new_uri = url_redirect[old_uri] = self._p.getNodeURI(
-log.info(u"url link from {old} to {service}/{node}/{id}".format(
+service if service is not None else client.jid.userhostJID(),
-old = old_url, service=service or u'server', node=node or u'', id=item_id))
+node or self._m.namespace,
+item_id)
-depth or log.debug(u"Filtering data")
+log.info(u"url link from {old} to {new}".format(
+old=old_uri, new=new_uri))
 yield self.blogFilters(client, mb_data, options)
 # comments data
 if len(data['comments']) != 1:
 raise NotImplementedError(u"can't manage multiple comment links")
 allow_comments = C.bool(mb_data.get('allow_comments', C.BOOL_FALSE))
 if allow_comments:
-comments_service, comments_node = self._m.getCommentService(client), self._m.getCommentNode(item_id)
+comments_service, comments_node = self._m.getCommentsService(client), self._m.getCommentsNode(item_id)
 mb_data['comments_service'] = comments_service
 mb_data['comments_node'] = comments_node
 else:
 if data['comments'][0]:
 raise exceptions.DataError(u"allow_comments set to False, but comments are there")
 yield self._m.send(mb_data, service, node, profile=client.profile)
 # comments upload
 depth or log.debug(u"uploading comments")
 if allow_comments:
-yield self._recursiveImport(client, data['comments'][0], options, url_links, service=comments_service, node=comments_node, depth=depth+1)
+yield self._recursiveImport(client, data['comments'][0], progress_id, options, url_redirect, service=comments_service, node=comments_node, depth=depth+1)
+if depth == 0:
+client._blogImport_progress[progress_id]['position'] = unicode(idx+1)
+if depth == 0:
+self.host.bridge.progressFinished(progress_id,
+{u'{}{}'.format(URL_REDIRECT_PREFIX, old): new for old, new in url_redirect.iteritems()},
+client.profile)
+self.host.removeProgressCb(progress_id, client.profile)
+del client._blogImport_progress[progress_id]
 @defer.inlineCallbacks
 def blogFilters(self, client, mb_data, options):
 """Apply filters according to options
 # we normalise the domain
 parsed_host = urlparse.urlsplit(opt_host)
 opt_host = urlparse.urlunsplit((parsed_host.scheme or 'http', parsed_host.netloc or parsed_host.path, '', '', ''))
 tmp_dir = tempfile.mkdtemp()
-for img_elt in xml_tools.findAll(top_elt, ['img']):
+try:
-yield self.imgFilters(client, img_elt, options, opt_host, tmp_dir)
+for img_elt in xml_tools.findAll(top_elt, ['img']):
-os.rmdir(tmp_dir) # XXX: tmp_dir should be empty, or something went wrong
+yield self.imgFilters(client, img_elt, options, opt_host, tmp_dir)
+finally:
+os.rmdir(tmp_dir) # XXX: tmp_dir should be empty, or something went wrong
 # we now replace the content with filtered one
 mb_data['content_xhtml'] = top_elt.toXml()
 @defer.inlineCallbacks
 """Register a blogImport method
 @param name(unicode): unique importer name, should indicate the blogging software it handler and always lowercase
 @param callback(callable): method to call:
 the signature must be (client, location, options) (cf. [blogImport])
-the importer must return an iterable of dict which must have the following keys:
+the importer must return a tuple with (posts_data, posts_count)
+posts_data is an iterable of dict which must have the following keys:
 'blog' (dict): microblog data of the blog post (cf. http://wiki.goffi.org/wiki/Bridge_API_-_Microblogging/en)
 the importer MUST NOT create node or call XEP-0277 plugin itself
 'comments*' key MUST NOT be used in this microblog_data, see bellow for comments
 It is recommanded to use a unique id in the "id" key which is constant per blog item,
 so if the import fail, a new import will overwrite the failed items and avoid duplicates.
 If allow_comments is False and some comments are present, a exceptions.DataError will be raised
 the import MAY optionally have the following keys:
 'url' (unicode): former url of the post (only the path, without host part)
 if present the association to the new path will be displayed to user, so it can make redirections if necessary
-Optionally, the importer plugin can return a tuple with the just described iterator and a post_total
+posts_count (int, None) indicate the total number of posts (without comments)
-where "post_total" (int) indicate the total number of posts (without comments)
 useful to display a progress indicator when the iterator is a generator
+use None if you can't guess the total number of blog posts
 @param short_desc(unicode): one line description of the importer
 @param long_desc(unicode): long description of the importer, its options, etc.
 """
 name = name.lower()
 if name in self._importers:

Mercurial > libervia-backend

comparison src/plugins/plugin_blog_import.py @ 1831:68c0dc13d821