diff src/plugins/plugin_blog_import.py @ 2369:cdaa58e14553

plugin import: generic data import plugin: this plugin handle common task for importers. Specialized importers (e.g. blog import) use it as a basic, and specific importers (e.g. Dotclear) register to the specialized one. Blog importer generic method have been moved to it.
author Goffi <goffi@goffi.org>
date Sun, 01 Oct 2017 12:21:23 +0200
parents 33c8c4973743
children 2c2b826b0bb3
line wrap: on
line diff
--- a/src/plugins/plugin_blog_import.py	Sun Oct 01 12:21:23 2017 +0200
+++ b/src/plugins/plugin_blog_import.py	Sun Oct 01 12:21:23 2017 +0200
@@ -17,6 +17,7 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
+
 from sat.core.i18n import _
 from sat.core.constants import Const as C
 from sat.core.log import getLogger
@@ -31,18 +32,18 @@
 import os.path
 import tempfile
 import urlparse
-import uuid
+import shortuuid
 
 
 PLUGIN_INFO = {
     C.PI_NAME: "blog import",
     C.PI_IMPORT_NAME: "BLOG_IMPORT",
-    C.PI_TYPE: C.PLUG_TYPE_BLOG,
-    C.PI_DEPENDENCIES: ["XEP-0060", "XEP-0277", "TEXT-SYNTAXES", "UPLOAD"],
+    C.PI_TYPE: (C.PLUG_TYPE_BLOG, C.PLUG_TYPE_IMPORT),
+    C.PI_DEPENDENCIES: ["IMPORT", "XEP-0060", "XEP-0277", "TEXT-SYNTAXES", "UPLOAD"],
     C.PI_MAIN: "BlogImportPlugin",
     C.PI_HANDLER: "no",
     C.PI_DESCRIPTION: _(u"""Blog import management:
-This plugin manage the different blog importers which can register to it, and handler generic importing tasks.""")
+This plugin manage the different blog importers which can register to it, and handle generic importing tasks.""")
 }
 
 OPT_HOST = 'host'
@@ -50,67 +51,45 @@
 OPT_UPLOAD_IGNORE_HOST = 'upload_ignore_host'
 OPT_IGNORE_TLS = 'ignore_tls_errors'
 URL_REDIRECT_PREFIX = 'url_redirect_'
-BOOL_OPTIONS = (OPT_UPLOAD_IMAGES, OPT_IGNORE_TLS)
 
 
 BlogImporter = collections.namedtuple('BlogImporter', ('callback', 'short_desc', 'long_desc'))
 
 
 class BlogImportPlugin(object):
+    BOOL_OPTIONS = (OPT_UPLOAD_IMAGES, OPT_IGNORE_TLS)
+    OPT_DEFAULTS = {OPT_UPLOAD_IMAGES: True,
+                    OPT_IGNORE_TLS: False}
 
     def __init__(self, host):
         log.info(_("plugin Blog Import initialization"))
         self.host = host
-        self._importers = {}
         self._u = host.plugins['UPLOAD']
         self._p = host.plugins['XEP-0060']
         self._m = host.plugins['XEP-0277']
         self._s = self.host.plugins['TEXT-SYNTAXES']
-        host.bridge.addMethod("blogImport", ".plugin", in_sign='ssa{ss}ss', out_sign='s', method=self._blogImport, async=True)
-        host.bridge.addMethod("blogImportList", ".plugin", in_sign='', out_sign='a(ss)', method=self.listImporters)
-        host.bridge.addMethod("blogImportDesc", ".plugin", in_sign='s', out_sign='(ss)', method=self.getDescription)
-
-    def getProgress(self, progress_id, profile):
-        client = self.host.getClient(profile)
-        return client._blogImport_progress[progress_id]
+        host.plugins['IMPORT'].initialize(self, u'blog')
 
-    def listImporters(self):
-        importers = self._importers.keys()
-        importers.sort()
-        return [(name, self._importers[name].short_desc) for name in self._importers]
-
-    def getDescription(self, name):
-        """Return import short and long descriptions
+    def importItem(self, client, item_import_data, options, return_data, service, node):
+        """importItem specialized for blog import
 
-        @param name(unicode): blog importer name
-        @return (tuple[unicode,unicode]): short and long description
-        """
-        try:
-            importer = self._importers[name]
-        except KeyError:
-            raise exceptions.NotFound(u"Blog importer not found [{}]".format(name))
-        else:
-            return importer.short_desc, importer.long_desc
+        @param items_import_data(iterable[dict]):
+            * mandatory keys:
+                'blog' (dict): microblog data of the blog post (cf. http://wiki.goffi.org/wiki/Bridge_API_-_Microblogging/en)
+                    the importer MUST NOT create node or call XEP-0277 plugin itself
+                    'comments*' key MUST NOT be used in this microblog_data, see bellow for comments
+                    It is recommanded to use a unique id in the "id" key which is constant per blog item,
+                    so if the import fail, a new import will overwrite the failed items and avoid duplicates.
 
-    def _blogImport(self, name, location, options, pubsub_service='', profile=C.PROF_KEY_DEFAULT):
-        client = self.host.getClient(profile)
-        options = {key: unicode(value) for key, value in options.iteritems()}
-        for option in BOOL_OPTIONS:
-            try:
-                options[option] = C.bool(options[option])
-            except KeyError:
-                pass
-        return self.blogImport(client, unicode(name), unicode(location), options)
-
-    @defer.inlineCallbacks
-    def blogImport(self, client, name, location, options=None, pubsub_service=None):
-        """Import a blog
-
-        @param name(unicode): name of the blog importer
-        @param location(unicode): location of the blog data to import
-            can be an url, a file path, or anything which make sense
-            check importer description for more details
-        @param options(dict, None): extra options. Below are the generic options,
+                'comments' (list[list[dict]],None): Dictionaries must have the same keys as main item (i.e. 'blog' and 'comments')
+                    a list of list is used because XEP-0277 can handler several comments nodes,
+                    but in most cases, there will we only one item it the first list (something like [[{comment1_data},{comment2_data}, ...]])
+                    blog['allow_comments'] must be True if there is any comment, and False (or not present) if comments are not allowed.
+                    If allow_comments is False and some comments are present, an exceptions.DataError will be raised
+            * optional keys:
+                'url' (unicode): former url of the post (only the path, without host part)
+                    if present the association to the new path will be displayed to user, so it can make redirections if necessary
+        @param options(dict, None): Below are the generic options,
             blog importer can have specific ones. All options have unicode values
             generic options:
                 - OPT_HOST (unicode): original host
@@ -120,114 +99,56 @@
                 - OPT_UPLOAD_IGNORE_HOST (unicode): don't upload images from this host
                 - OPT_IGNORE_TLS (bool): ignore TLS error for image upload.
                     Default: False
-        @param pubsub_service(jid.JID, None): jid of the PubSub service where blog must be imported
-            None to use profile's server
-        @return (unicode): progress id
+        @param return_data(dict): will contain link between former posts and new items
+
         """
-        if options is None:
-            options = {}
-        else:
-            for opt_name, opt_default in ((OPT_UPLOAD_IMAGES, True),
-                                          (OPT_IGNORE_TLS, False)):
-                # we want an filled options dict, with all empty or False values removed
-                try:
-                    value =options[opt_name]
-                except KeyError:
-                    if opt_default:
-                        options[opt_name] = opt_default
-                else:
-                    if not value:
-                        del options[opt_name]
+        mb_data = item_import_data['blog']
+        try:
+            item_id = mb_data['id']
+        except KeyError:
+            item_id = mb_data['id'] = unicode(shortuuid.uuid())
+
         try:
-            importer = self._importers[name]
+            # we keep the link between old url and new blog item
+            # so the user can redirect its former blog urls
+            old_uri = item_import_data['url']
         except KeyError:
-            raise exceptions.NotFound(u"Importer [{}] not found".format(name))
-        posts_data, posts_count = yield importer.callback(client, location, options)
-        url_redirect = {}
-        progress_id = unicode(uuid.uuid4())
-        try:
-            progress_data = client._blogImport_progress
-        except AttributeError:
-            progress_data = client._blogImport_progress = {}
-        progress_data[progress_id] = {u'position': '0'}
-        if posts_count is not None:
-            progress_data[progress_id]['size'] = unicode(posts_count)
-        metadata = {'name': u'{}: {}'.format(name, location),
-                    'direction': 'out',
-                    'type': 'BLOG_IMPORT'
-                   }
-        self.host.registerProgressCb(progress_id, self.getProgress, metadata, profile=client.profile)
-        self.host.bridge.progressStarted(progress_id, metadata, client.profile)
-        self._recursiveImport(client, posts_data, progress_id, options, url_redirect)
-        defer.returnValue(progress_id)
+            pass
+        else:
+            new_uri = return_data[URL_REDIRECT_PREFIX + old_uri] = self._p.getNodeURI(
+                service if service is not None else client.jid.userhostJID(),
+                node or self._m.namespace,
+                item_id)
+            log.info(u"url link from {old} to {new}".format(
+                old=old_uri, new=new_uri))
+
+        return mb_data
+
+    def importSubItems(self, client, item_import_data, mb_data, options):
+        # comments data
+        if len(item_import_data['comments']) != 1:
+            raise NotImplementedError(u"can't manage multiple comment links")
+        allow_comments = C.bool(mb_data.get('allow_comments', C.BOOL_FALSE))
+        if allow_comments:
+            comments_service, comments_node = self._m.getCommentsService(client), self._m.getCommentsNode(mb_data['id'])
+            mb_data['comments_service'] = comments_service.full()
+            mb_data['comments_node'] = comments_node
+            recurse_kwargs = {
+                'items_import_data':item_import_data['comments'][0],
+                'service':comments_service,
+                'node':comments_node}
+            return recurse_kwargs
+        else:
+            if item_import_data['comments'][0]:
+                raise exceptions.DataError(u"allow_comments set to False, but comments are there")
+            return None
+
+    def publishItem(self, client, mb_data, service, node):
+        log.debug(u"uploading item [{id}]: {title}".format(id=mb_data['id'], title=mb_data.get('title','')))
+        return self._m.send(client, mb_data, service, node)
 
     @defer.inlineCallbacks
-    def _recursiveImport(self, client, posts_data, progress_id, options, url_redirect, service=None, node=None, depth=0):
-        """Do the upload recursively
-
-        @param posts_data(list): list of data as specified in [register]
-        @param options(dict): import options
-        @param url_redirect(dict): link between former posts and new items
-        @param service(jid.JID, None): PubSub service to use
-        @param node(unicode, None): PubSub node to use
-        @param depth(int): level of recursion
-        """
-        for idx, data in enumerate(posts_data):
-            # data checks/filters
-            mb_data = data['blog']
-            try:
-                item_id = mb_data['id']
-            except KeyError:
-                item_id = mb_data['id'] = unicode(uuid.uuid4())
-
-            try:
-                # we keep the link between old url and new blog item
-                # so the user can redirect its former blog urls
-                old_uri = data['url']
-            except KeyError:
-                pass
-            else:
-                new_uri = url_redirect[old_uri] = self._p.getNodeURI(
-                    service if service is not None else client.jid.userhostJID(),
-                    node or self._m.namespace,
-                    item_id)
-                log.info(u"url link from {old} to {new}".format(
-                    old=old_uri, new=new_uri))
-
-            yield self.blogFilters(client, mb_data, options)
-
-            # comments data
-            if len(data['comments']) != 1:
-                raise NotImplementedError(u"can't manage multiple comment links")
-            allow_comments = C.bool(mb_data.get('allow_comments', C.BOOL_FALSE))
-            if allow_comments:
-                comments_service, comments_node = self._m.getCommentsService(client), self._m.getCommentsNode(item_id)
-                mb_data['comments_service'] = comments_service.full()
-                mb_data['comments_node'] = comments_node
-            else:
-                if data['comments'][0]:
-                    raise exceptions.DataError(u"allow_comments set to False, but comments are there")
-
-            # post upload
-            depth or log.debug(u"uploading item [{id}]: {title}".format(id=mb_data['id'], title=mb_data.get('title','')))
-            yield self._m.send(mb_data, service, node, profile=client.profile)
-
-            # comments upload
-            depth or log.debug(u"uploading comments")
-            if allow_comments:
-                yield self._recursiveImport(client, data['comments'][0], progress_id, options, url_redirect, service=comments_service, node=comments_node, depth=depth+1)
-            if depth == 0:
-                client._blogImport_progress[progress_id]['position'] = unicode(idx+1)
-
-        if depth == 0:
-            self.host.bridge.progressFinished(progress_id,
-                {u'{}{}'.format(URL_REDIRECT_PREFIX, old): new for old, new in url_redirect.iteritems()},
-                client.profile)
-            self.host.removeProgressCb(progress_id, client.profile)
-            del client._blogImport_progress[progress_id]
-
-    @defer.inlineCallbacks
-    def blogFilters(self, client, mb_data, options):
+    def itemFilters(self, client, mb_data, options):
         """Apply filters according to options
 
         modify mb_data in place
@@ -352,41 +273,3 @@
                 os.unlink(tmp_file)
             except OSError:
                 pass
-
-    def register(self, name, callback, short_desc='', long_desc=''):
-        """Register a blogImport method
-
-        @param name(unicode): unique importer name, should indicate the blogging software it handler and always lowercase
-        @param callback(callable): method to call:
-            the signature must be (client, location, options) (cf. [blogImport])
-            the importer must return a tuple with (posts_data, posts_count)
-
-            posts_data is an iterable of dict which must have the following keys:
-                'blog' (dict): microblog data of the blog post (cf. http://wiki.goffi.org/wiki/Bridge_API_-_Microblogging/en)
-                    the importer MUST NOT create node or call XEP-0277 plugin itself
-                    'comments*' key MUST NOT be used in this microblog_data, see bellow for comments
-                    It is recommanded to use a unique id in the "id" key which is constant per blog item,
-                    so if the import fail, a new import will overwrite the failed items and avoid duplicates.
-
-                'comments' (list[list[dict]],None): Dictionaries must have the same keys as main item (i.e. 'blog' and 'comments')
-                    a list of list is used because XEP-0277 can handler several comments nodes,
-                    but in most cases, there will we only one item it the first list (something like [[{comment1_data},{comment2_data}, ...]])
-                    blog['allow_comments'] must be True if there is any comment, and False (or not present) if comments are not allowed.
-                    If allow_comments is False and some comments are present, a exceptions.DataError will be raised
-            the import MAY optionally have the following keys:
-                'url' (unicode): former url of the post (only the path, without host part)
-                    if present the association to the new path will be displayed to user, so it can make redirections if necessary
-
-            posts_count (int, None) indicate the total number of posts (without comments)
-                useful to display a progress indicator when the iterator is a generator
-                use None if you can't guess the total number of blog posts
-        @param short_desc(unicode): one line description of the importer
-        @param long_desc(unicode): long description of the importer, its options, etc.
-        """
-        name = name.lower()
-        if name in self._importers:
-            raise exceptions.ConflictError(u"A blog importer with the name {} already exsit".format(name))
-        self._importers[name] = BlogImporter(callback, short_desc, long_desc)
-
-    def unregister(self, name):
-        del self._importers[name]