Mercurial > libervia-backend
comparison src/plugins/plugin_blog_import.py @ 1831:68c0dc13d821
plugin blog import, XEP-0277: progress + redirect:
- progression is now handled
- url redirections are handled with PubSub URIs, and returned as metadata with progressFinished
- tmp_dir is cleaned in a finally close
author | Goffi <goffi@goffi.org> |
---|---|
date | Sat, 23 Jan 2016 20:01:28 +0100 |
parents | 4e51f21c687f |
children | cdecf553e051 |
comparison
equal
deleted
inserted
replaced
1830:74676624ad5d | 1831:68c0dc13d821 |
---|---|
36 | 36 |
37 PLUGIN_INFO = { | 37 PLUGIN_INFO = { |
38 "name": "blog import", | 38 "name": "blog import", |
39 "import_name": "BLOG_IMPORT", | 39 "import_name": "BLOG_IMPORT", |
40 "type": C.PLUG_TYPE_BLOG, | 40 "type": C.PLUG_TYPE_BLOG, |
41 "dependencies": ["XEP-0277", "TEXT-SYNTAXES", "UPLOAD"], | 41 "dependencies": ["XEP-0060", "XEP-0277", "TEXT-SYNTAXES", "UPLOAD"], |
42 "main": "BlogImportPlugin", | 42 "main": "BlogImportPlugin", |
43 "handler": "no", | 43 "handler": "no", |
44 "description": _(u"""Blog import management: | 44 "description": _(u"""Blog import management: |
45 This plugin manage the different blog importers which can register to it, and handler generic importing tasks.""") | 45 This plugin manage the different blog importers which can register to it, and handler generic importing tasks.""") |
46 } | 46 } |
47 | 47 |
48 OPT_HOST = 'host' | 48 OPT_HOST = 'host' |
49 OPT_UPLOAD_IMAGES = 'upload_images' | 49 OPT_UPLOAD_IMAGES = 'upload_images' |
50 OPT_UPLOAD_IGNORE_HOST = 'upload_ignore_host' | 50 OPT_UPLOAD_IGNORE_HOST = 'upload_ignore_host' |
51 OPT_IGNORE_TLS = 'ignore_tls_errors' | 51 OPT_IGNORE_TLS = 'ignore_tls_errors' |
52 URL_REDIRECT_PREFIX = 'url_redirect_' | |
52 BOOL_OPTIONS = (OPT_UPLOAD_IMAGES, OPT_IGNORE_TLS) | 53 BOOL_OPTIONS = (OPT_UPLOAD_IMAGES, OPT_IGNORE_TLS) |
53 | 54 |
54 | 55 |
55 BlogImporter = collections.namedtuple('BlogImporter', ('callback', 'short_desc', 'long_desc')) | 56 BlogImporter = collections.namedtuple('BlogImporter', ('callback', 'short_desc', 'long_desc')) |
56 | 57 |
60 def __init__(self, host): | 61 def __init__(self, host): |
61 log.info(_("plugin Blog Import initialization")) | 62 log.info(_("plugin Blog Import initialization")) |
62 self.host = host | 63 self.host = host |
63 self._importers = {} | 64 self._importers = {} |
64 self._u = host.plugins['UPLOAD'] | 65 self._u = host.plugins['UPLOAD'] |
66 self._p = host.plugins['XEP-0060'] | |
65 self._m = host.plugins['XEP-0277'] | 67 self._m = host.plugins['XEP-0277'] |
66 self._s = self.host.plugins['TEXT-SYNTAXES'] | 68 self._s = self.host.plugins['TEXT-SYNTAXES'] |
67 host.bridge.addMethod("blogImport", ".plugin", in_sign='ssa{ss}ss', out_sign='', method=self._blogImport, async=True) | 69 host.bridge.addMethod("blogImport", ".plugin", in_sign='ssa{ss}ss', out_sign='s', method=self._blogImport, async=True) |
68 host.bridge.addMethod("blogImportList", ".plugin", in_sign='', out_sign='a(ss)', method=self.listImporters) | 70 host.bridge.addMethod("blogImportList", ".plugin", in_sign='', out_sign='a(ss)', method=self.listImporters) |
69 host.bridge.addMethod("blogImportDesc", ".plugin", in_sign='s', out_sign='(ss)', method=self.getDescription) | 71 host.bridge.addMethod("blogImportDesc", ".plugin", in_sign='s', out_sign='(ss)', method=self.getDescription) |
72 | |
73 def getProgress(self, progress_id, profile): | |
74 client = self.host.getClient(profile) | |
75 return client._blogImport_progress[progress_id] | |
70 | 76 |
71 def listImporters(self): | 77 def listImporters(self): |
72 importers = self._importers.keys() | 78 importers = self._importers.keys() |
73 importers.sort() | 79 importers.sort() |
74 return [(name, self._importers[name].short_desc) for name in self._importers] | 80 return [(name, self._importers[name].short_desc) for name in self._importers] |
113 - OPT_UPLOAD_IGNORE_HOST (unicode): don't upload images from this host | 119 - OPT_UPLOAD_IGNORE_HOST (unicode): don't upload images from this host |
114 - OPT_IGNORE_TLS (bool): ignore TLS error for image upload. | 120 - OPT_IGNORE_TLS (bool): ignore TLS error for image upload. |
115 Default: False | 121 Default: False |
116 @param pubsub_service(jid.JID, None): jid of the PubSub service where blog must be imported | 122 @param pubsub_service(jid.JID, None): jid of the PubSub service where blog must be imported |
117 None to use profile's server | 123 None to use profile's server |
124 @return (unicode): progress id | |
118 """ | 125 """ |
119 if options is None: | 126 if options is None: |
120 options = {} | 127 options = {} |
121 else: | 128 else: |
122 for opt_name, opt_default in ((OPT_UPLOAD_IMAGES, True), | 129 for opt_name, opt_default in ((OPT_UPLOAD_IMAGES, True), |
132 del options[opt_name] | 139 del options[opt_name] |
133 try: | 140 try: |
134 importer = self._importers[name] | 141 importer = self._importers[name] |
135 except KeyError: | 142 except KeyError: |
136 raise exceptions.NotFound(u"Importer [{}] not found".format(name)) | 143 raise exceptions.NotFound(u"Importer [{}] not found".format(name)) |
137 posts_data = yield importer.callback(client, location, options) | 144 posts_data, posts_count = yield importer.callback(client, location, options) |
138 url_links = {} | 145 url_redirect = {} |
139 yield self._recursiveImport(client, posts_data, options, url_links) | 146 progress_id = unicode(uuid.uuid4()) |
147 try: | |
148 progress_data = client._blogImport_progress | |
149 except AttributeError: | |
150 progress_data = client._blogImport_progress = {} | |
151 progress_data[progress_id] = {u'position': '0'} | |
152 if posts_count is not None: | |
153 progress_data[progress_id]['size'] = unicode(posts_count) | |
154 metadata = {'name': u'{}: {}'.format(name, location), | |
155 'direction': 'out', | |
156 'type': 'BLOG_IMPORT' | |
157 } | |
158 self.host.registerProgressCb(progress_id, self.getProgress, metadata, profile=client.profile) | |
159 self.host.bridge.progressStarted(progress_id, metadata, client.profile) | |
160 self._recursiveImport(client, posts_data, progress_id, options, url_redirect) | |
161 defer.returnValue(progress_id) | |
140 | 162 |
141 @defer.inlineCallbacks | 163 @defer.inlineCallbacks |
142 def _recursiveImport(self, client, posts_data, options, url_links, service=None, node=None, depth=0): | 164 def _recursiveImport(self, client, posts_data, progress_id, options, url_redirect, service=None, node=None, depth=0): |
143 """Do the upload recursively | 165 """Do the upload recursively |
144 | 166 |
145 @param posts_data(list): list of data as specified in [register] | 167 @param posts_data(list): list of data as specified in [register] |
146 @param options(dict): import options | 168 @param options(dict): import options |
147 @param url_links(dict): link between former posts and new items | 169 @param url_redirect(dict): link between former posts and new items |
148 @param service(jid.JID, None): PubSub service to use | 170 @param service(jid.JID, None): PubSub service to use |
149 @param node(unicode, None): PubSub node to use | 171 @param node(unicode, None): PubSub node to use |
150 @param depth(int): level of recursion | 172 @param depth(int): level of recursion |
151 """ | 173 """ |
152 for data in posts_data: | 174 for idx, data in enumerate(posts_data): |
153 # data checks/filters | 175 # data checks/filters |
154 mb_data = data['blog'] | 176 mb_data = data['blog'] |
155 try: | 177 try: |
156 item_id = mb_data['id'] | 178 item_id = mb_data['id'] |
157 except KeyError: | 179 except KeyError: |
158 item_id = mb_data['id'] = unicode(uuid.uuid4()) | 180 item_id = mb_data['id'] = unicode(uuid.uuid4()) |
159 | 181 |
160 try: | 182 try: |
161 # we keep the link between old url and new blog item | 183 # we keep the link between old url and new blog item |
162 # so the user can redirect its former blog urls | 184 # so the user can redirect its former blog urls |
163 old_url = data['url'] | 185 old_uri = data['url'] |
164 except KeyError: | 186 except KeyError: |
165 pass | 187 pass |
166 else: | 188 else: |
167 url_links[old_url] = (service, node, item_id) | 189 new_uri = url_redirect[old_uri] = self._p.getNodeURI( |
168 log.info(u"url link from {old} to {service}/{node}/{id}".format( | 190 service if service is not None else client.jid.userhostJID(), |
169 old = old_url, service=service or u'server', node=node or u'', id=item_id)) | 191 node or self._m.namespace, |
170 | 192 item_id) |
171 depth or log.debug(u"Filtering data") | 193 log.info(u"url link from {old} to {new}".format( |
194 old=old_uri, new=new_uri)) | |
195 | |
172 yield self.blogFilters(client, mb_data, options) | 196 yield self.blogFilters(client, mb_data, options) |
173 | 197 |
174 # comments data | 198 # comments data |
175 if len(data['comments']) != 1: | 199 if len(data['comments']) != 1: |
176 raise NotImplementedError(u"can't manage multiple comment links") | 200 raise NotImplementedError(u"can't manage multiple comment links") |
177 allow_comments = C.bool(mb_data.get('allow_comments', C.BOOL_FALSE)) | 201 allow_comments = C.bool(mb_data.get('allow_comments', C.BOOL_FALSE)) |
178 if allow_comments: | 202 if allow_comments: |
179 comments_service, comments_node = self._m.getCommentService(client), self._m.getCommentNode(item_id) | 203 comments_service, comments_node = self._m.getCommentsService(client), self._m.getCommentsNode(item_id) |
180 mb_data['comments_service'] = comments_service | 204 mb_data['comments_service'] = comments_service |
181 mb_data['comments_node'] = comments_node | 205 mb_data['comments_node'] = comments_node |
182 else: | 206 else: |
183 if data['comments'][0]: | 207 if data['comments'][0]: |
184 raise exceptions.DataError(u"allow_comments set to False, but comments are there") | 208 raise exceptions.DataError(u"allow_comments set to False, but comments are there") |
188 yield self._m.send(mb_data, service, node, profile=client.profile) | 212 yield self._m.send(mb_data, service, node, profile=client.profile) |
189 | 213 |
190 # comments upload | 214 # comments upload |
191 depth or log.debug(u"uploading comments") | 215 depth or log.debug(u"uploading comments") |
192 if allow_comments: | 216 if allow_comments: |
193 yield self._recursiveImport(client, data['comments'][0], options, url_links, service=comments_service, node=comments_node, depth=depth+1) | 217 yield self._recursiveImport(client, data['comments'][0], progress_id, options, url_redirect, service=comments_service, node=comments_node, depth=depth+1) |
218 if depth == 0: | |
219 client._blogImport_progress[progress_id]['position'] = unicode(idx+1) | |
220 | |
221 if depth == 0: | |
222 self.host.bridge.progressFinished(progress_id, | |
223 {u'{}{}'.format(URL_REDIRECT_PREFIX, old): new for old, new in url_redirect.iteritems()}, | |
224 client.profile) | |
225 self.host.removeProgressCb(progress_id, client.profile) | |
226 del client._blogImport_progress[progress_id] | |
194 | 227 |
195 @defer.inlineCallbacks | 228 @defer.inlineCallbacks |
196 def blogFilters(self, client, mb_data, options): | 229 def blogFilters(self, client, mb_data, options): |
197 """Apply filters according to options | 230 """Apply filters according to options |
198 | 231 |
244 # we normalise the domain | 277 # we normalise the domain |
245 parsed_host = urlparse.urlsplit(opt_host) | 278 parsed_host = urlparse.urlsplit(opt_host) |
246 opt_host = urlparse.urlunsplit((parsed_host.scheme or 'http', parsed_host.netloc or parsed_host.path, '', '', '')) | 279 opt_host = urlparse.urlunsplit((parsed_host.scheme or 'http', parsed_host.netloc or parsed_host.path, '', '', '')) |
247 | 280 |
248 tmp_dir = tempfile.mkdtemp() | 281 tmp_dir = tempfile.mkdtemp() |
249 for img_elt in xml_tools.findAll(top_elt, ['img']): | 282 try: |
250 yield self.imgFilters(client, img_elt, options, opt_host, tmp_dir) | 283 for img_elt in xml_tools.findAll(top_elt, ['img']): |
251 os.rmdir(tmp_dir) # XXX: tmp_dir should be empty, or something went wrong | 284 yield self.imgFilters(client, img_elt, options, opt_host, tmp_dir) |
285 finally: | |
286 os.rmdir(tmp_dir) # XXX: tmp_dir should be empty, or something went wrong | |
252 | 287 |
253 # we now replace the content with filtered one | 288 # we now replace the content with filtered one |
254 mb_data['content_xhtml'] = top_elt.toXml() | 289 mb_data['content_xhtml'] = top_elt.toXml() |
255 | 290 |
256 @defer.inlineCallbacks | 291 @defer.inlineCallbacks |
320 """Register a blogImport method | 355 """Register a blogImport method |
321 | 356 |
322 @param name(unicode): unique importer name, should indicate the blogging software it handler and always lowercase | 357 @param name(unicode): unique importer name, should indicate the blogging software it handler and always lowercase |
323 @param callback(callable): method to call: | 358 @param callback(callable): method to call: |
324 the signature must be (client, location, options) (cf. [blogImport]) | 359 the signature must be (client, location, options) (cf. [blogImport]) |
325 the importer must return an iterable of dict which must have the following keys: | 360 the importer must return a tuple with (posts_data, posts_count) |
361 | |
362 posts_data is an iterable of dict which must have the following keys: | |
326 'blog' (dict): microblog data of the blog post (cf. http://wiki.goffi.org/wiki/Bridge_API_-_Microblogging/en) | 363 'blog' (dict): microblog data of the blog post (cf. http://wiki.goffi.org/wiki/Bridge_API_-_Microblogging/en) |
327 the importer MUST NOT create node or call XEP-0277 plugin itself | 364 the importer MUST NOT create node or call XEP-0277 plugin itself |
328 'comments*' key MUST NOT be used in this microblog_data, see bellow for comments | 365 'comments*' key MUST NOT be used in this microblog_data, see bellow for comments |
329 It is recommanded to use a unique id in the "id" key which is constant per blog item, | 366 It is recommanded to use a unique id in the "id" key which is constant per blog item, |
330 so if the import fail, a new import will overwrite the failed items and avoid duplicates. | 367 so if the import fail, a new import will overwrite the failed items and avoid duplicates. |
336 If allow_comments is False and some comments are present, a exceptions.DataError will be raised | 373 If allow_comments is False and some comments are present, a exceptions.DataError will be raised |
337 the import MAY optionally have the following keys: | 374 the import MAY optionally have the following keys: |
338 'url' (unicode): former url of the post (only the path, without host part) | 375 'url' (unicode): former url of the post (only the path, without host part) |
339 if present the association to the new path will be displayed to user, so it can make redirections if necessary | 376 if present the association to the new path will be displayed to user, so it can make redirections if necessary |
340 | 377 |
341 Optionally, the importer plugin can return a tuple with the just described iterator and a post_total | 378 posts_count (int, None) indicate the total number of posts (without comments) |
342 where "post_total" (int) indicate the total number of posts (without comments) | |
343 useful to display a progress indicator when the iterator is a generator | 379 useful to display a progress indicator when the iterator is a generator |
380 use None if you can't guess the total number of blog posts | |
344 @param short_desc(unicode): one line description of the importer | 381 @param short_desc(unicode): one line description of the importer |
345 @param long_desc(unicode): long description of the importer, its options, etc. | 382 @param long_desc(unicode): long description of the importer, its options, etc. |
346 """ | 383 """ |
347 name = name.lower() | 384 name = name.lower() |
348 if name in self._importers: | 385 if name in self._importers: |