comparison src/plugins/plugin_import.py @ 2369:cdaa58e14553

plugin import: generic data import plugin: this plugin handle common task for importers. Specialized importers (e.g. blog import) use it as a basic, and specific importers (e.g. Dotclear) register to the specialized one. Blog importer generic method have been moved to it.
author Goffi <goffi@goffi.org>
date Sun, 01 Oct 2017 12:21:23 +0200
parents
children 2c2b826b0bb3
comparison
equal deleted inserted replaced
2368:3865a772c360 2369:cdaa58e14553
1 #!/usr/bin/env python2
2 # -*- coding: utf-8 -*-
3
4 # SàT plugin for generic data import handling
5 # Copyright (C) 2009-2016 Jérôme Poisson (goffi@goffi.org)
6
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU Affero General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Affero General Public License for more details.
16
17 # You should have received a copy of the GNU Affero General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
20 from sat.core.i18n import _
21 from sat.core.constants import Const as C
22 from sat.core.log import getLogger
23 log = getLogger(__name__)
24 from twisted.internet import defer
25 from sat.core import exceptions
26 from functools import partial
27 import collections
28 import uuid
29
30
31 PLUGIN_INFO = {
32 C.PI_NAME: "import",
33 C.PI_IMPORT_NAME: "IMPORT",
34 C.PI_TYPE: C.PLUG_TYPE_IMPORT,
35 C.PI_DEPENDENCIES: [],
36 C.PI_MAIN: "ImportPlugin",
37 C.PI_HANDLER: "no",
38 C.PI_DESCRIPTION: _(u"""Generic import plugin, base for specialized importers""")
39 }
40
41 Importer = collections.namedtuple('Importer', ('callback', 'short_desc', 'long_desc'))
42
43
44 class ImportPlugin(object):
45
46 def __init__(self, host):
47 log.info(_("plugin Import initialization"))
48 self.host = host
49
50 def initialize(self, import_handler, name):
51 """Initialize a specialized import handler
52
53 @param import_handler(object): specialized import handler instance
54 must have the following methods:
55 - importItem: import a single main item (i.e. prepare data for publishing)
56 - importSubitems: import sub items (i.e. items linked to main item, e.g. comments). Must return a dict with kwargs for recursiveImport if items are to be imported. At east "items_import_data", "service" and "node" keys must be provided.
57 if None is returned, no subitems will be imported
58 - publishItem: actualy publish an item
59 - itemFilters: modify item according to options
60 @param name(unicode): import handler name
61 """
62 assert name == name.lower().strip()
63 log.info(_(u'initializing {name} import handler').format(name=name))
64 import_handler.name = name
65 import_handler.register = partial(self.register, import_handler)
66 import_handler.unregister = partial(self.unregister, import_handler)
67 import_handler.importers = {}
68 def _import(name, location, options, pubsub_service, profile):
69 return self._doImport(import_handler, name, location, options, pubsub_service, profile)
70 def _importList():
71 return self.listImporters(import_handler)
72 def _importDesc(name):
73 return self.getDescription(import_handler, name)
74
75 self.host.bridge.addMethod(name + "Import", ".plugin", in_sign='ssa{ss}ss', out_sign='s', method=_import, async=True)
76 self.host.bridge.addMethod(name + "ImportList", ".plugin", in_sign='', out_sign='a(ss)', method=_importList)
77 self.host.bridge.addMethod(name + "ImportDesc", ".plugin", in_sign='s', out_sign='(ss)', method=_importDesc)
78
79 def getProgress(self, import_handler, progress_id, profile):
80 client = self.host.getClient(profile)
81 return client._import[import_handler.name][progress_id]
82
83 def listImporters(self, import_handler):
84 importers = import_handler.importers.keys()
85 importers.sort()
86 return [(name, import_handler.importers[name].short_desc) for name in import_handler.importers]
87
88 def getDescription(self, import_handler, name):
89 """Return import short and long descriptions
90
91 @param name(unicode): importer name
92 @return (tuple[unicode,unicode]): short and long description
93 """
94 try:
95 importer = import_handler.importers[name]
96 except KeyError:
97 raise exceptions.NotFound(u"{handler_name} importer not found [{name}]".format(
98 handler_name = import_handler.name,
99 name = name))
100 else:
101 return importer.short_desc, importer.long_desc
102
103 def _doImport(self, import_handler, name, location, options, pubsub_service='', profile=C.PROF_KEY_NONE):
104 client = self.host.getClient(profile)
105 options = {key: unicode(value) for key, value in options.iteritems()}
106 for option in import_handler.BOOL_OPTIONS:
107 try:
108 options[option] = C.bool(options[option])
109 except KeyError:
110 pass
111 return self.doImport(client, import_handler, unicode(name), unicode(location), options)
112
113 @defer.inlineCallbacks
114 def doImport(self, client, import_handler, name, location, options=None, pubsub_service=None):
115 """Import data
116
117 @param import_handler(object): instance of the import handler
118 @param name(unicode): name of the importer
119 @param location(unicode): location of the data to import
120 can be an url, a file path, or anything which make sense
121 check importer description for more details
122 @param options(dict, None): extra options.
123 @param pubsub_service(jid.JID, None): jid of the PubSub service where data must be imported
124 None to use profile's server
125 @return (unicode): progress id
126 """
127 if options is None:
128 options = {}
129 else:
130 for opt_name, opt_default in import_handler.OPT_DEFAULTS.iteritems():
131 # we want a filled options dict, with all empty or False values removed
132 try:
133 value =options[opt_name]
134 except KeyError:
135 if opt_default:
136 options[opt_name] = opt_default
137 else:
138 if not value:
139 del options[opt_name]
140 try:
141 importer = import_handler.importers[name]
142 except KeyError:
143 raise exceptions.NotFound(u"Importer [{}] not found".format(name))
144 items_import_data, items_count = yield importer.callback(client, location, options)
145 progress_id = unicode(uuid.uuid4())
146 try:
147 _import = client._import
148 except AttributeError:
149 _import = client._import = {}
150 progress_data = _import.setdefault(import_handler.name, {})
151 progress_data[progress_id] = {u'position': '0'}
152 if items_count is not None:
153 progress_data[progress_id]['size'] = unicode(items_count)
154 metadata = {'name': u'{}: {}'.format(name, location),
155 'direction': 'out',
156 'type': import_handler.name.upper() + '_IMPORT'
157 }
158 self.host.registerProgressCb(progress_id, partial(self.getProgress, import_handler), metadata, profile=client.profile)
159 self.host.bridge.progressStarted(progress_id, metadata, client.profile)
160 url_redirect = {}
161 self.recursiveImport(client, import_handler, items_import_data, progress_id, options, url_redirect)
162 defer.returnValue(progress_id)
163
164 @defer.inlineCallbacks
165 def recursiveImport(self, client, import_handler, items_import_data, progress_id, options, return_data=None, service=None, node=None, depth=0):
166 """Do the import recursively
167
168 @param import_handler(object): instance of the import handler
169 @param items_import_data(iterable): iterable of data as specified in [register]
170 @param progress_id(unicode): id of progression
171 @param options(dict): import options
172 @param return_data(dict): data to return on progressFinished
173 @param service(jid.JID, None): PubSub service to use
174 @param node(unicode, None): PubSub node to use
175 @param depth(int): level of recursion
176 """
177 if return_data is None:
178 return_data = {}
179 for idx, item_import_data in enumerate(items_import_data):
180 item_data = yield import_handler.importItem(client, item_import_data, options, return_data, service, node)
181 yield import_handler.itemFilters(client, item_data, options)
182 recurse_kwargs = yield import_handler.importSubItems(client, item_import_data, item_data, options)
183 yield import_handler.publishItem(client, item_data, service, node)
184
185 if recurse_kwargs is not None:
186 recurse_kwargs['client'] = client
187 recurse_kwargs['import_handler'] = import_handler
188 recurse_kwargs['progress_id'] = progress_id
189 recurse_kwargs.setdefault('options', options)
190 recurse_kwargs['return_data'] = return_data
191 recurse_kwargs['depth'] = depth + 1
192 log.debug(_(u"uploading subitems"))
193 yield self.recursiveImport(**recurse_kwargs)
194
195 if depth == 0:
196 client._import[import_handler.name][progress_id]['position'] = unicode(idx+1)
197
198 if depth == 0:
199 self.host.bridge.progressFinished(progress_id,
200 return_data,
201 client.profile)
202 self.host.removeProgressCb(progress_id, client.profile)
203 del client._import[import_handler.name][progress_id]
204
205 def register(self, import_handler, name, callback, short_desc='', long_desc=''):
206 """Register an Importer method
207
208 @param name(unicode): unique importer name, should indicate the software it can import and always lowercase
209 @param callback(callable): method to call:
210 the signature must be (client, location, options) (cf. [doImport])
211 the importer must return a tuple with (items_import_data, items_count)
212 items_import_data(iterable[dict]) data specific to specialized importer
213 cf. importItem docstring of specialized importer for details
214 items_count (int, None) indicate the total number of items (without subitems)
215 useful to display a progress indicator when the iterator is a generator
216 use None if you can't guess the total number of items
217 @param short_desc(unicode): one line description of the importer
218 @param long_desc(unicode): long description of the importer, its options, etc.
219 """
220 name = name.lower()
221 if name in import_handler.importers:
222 raise exceptions.ConflictError(_(u"An {handler_name} importer with the name {name} already exist").format(
223 handler_name = import_handler.name,
224 name = name))
225 import_handler.importers[name] = Importer(callback, short_desc, long_desc)
226
227 def unregister(self, import_handler, name):
228 del import_handler.importers[name]