Mercurial > libervia-backend
comparison src/plugins/plugin_blog_import.py @ 2369:cdaa58e14553
plugin import: generic data import plugin:
this plugin handle common task for importers. Specialized importers (e.g. blog import) use it as a basic, and specific importers (e.g. Dotclear) register to the specialized one.
Blog importer generic method have been moved to it.
author | Goffi <goffi@goffi.org> |
---|---|
date | Sun, 01 Oct 2017 12:21:23 +0200 |
parents | 33c8c4973743 |
children | 2c2b826b0bb3 |
comparison
equal
deleted
inserted
replaced
2368:3865a772c360 | 2369:cdaa58e14553 |
---|---|
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 # GNU Affero General Public License for more details. | 15 # GNU Affero General Public License for more details. |
16 | 16 |
17 # You should have received a copy of the GNU Affero General Public License | 17 # You should have received a copy of the GNU Affero General Public License |
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. | 18 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
19 | |
19 | 20 |
20 from sat.core.i18n import _ | 21 from sat.core.i18n import _ |
21 from sat.core.constants import Const as C | 22 from sat.core.constants import Const as C |
22 from sat.core.log import getLogger | 23 from sat.core.log import getLogger |
23 log = getLogger(__name__) | 24 log = getLogger(__name__) |
29 import collections | 30 import collections |
30 import os | 31 import os |
31 import os.path | 32 import os.path |
32 import tempfile | 33 import tempfile |
33 import urlparse | 34 import urlparse |
34 import uuid | 35 import shortuuid |
35 | 36 |
36 | 37 |
37 PLUGIN_INFO = { | 38 PLUGIN_INFO = { |
38 C.PI_NAME: "blog import", | 39 C.PI_NAME: "blog import", |
39 C.PI_IMPORT_NAME: "BLOG_IMPORT", | 40 C.PI_IMPORT_NAME: "BLOG_IMPORT", |
40 C.PI_TYPE: C.PLUG_TYPE_BLOG, | 41 C.PI_TYPE: (C.PLUG_TYPE_BLOG, C.PLUG_TYPE_IMPORT), |
41 C.PI_DEPENDENCIES: ["XEP-0060", "XEP-0277", "TEXT-SYNTAXES", "UPLOAD"], | 42 C.PI_DEPENDENCIES: ["IMPORT", "XEP-0060", "XEP-0277", "TEXT-SYNTAXES", "UPLOAD"], |
42 C.PI_MAIN: "BlogImportPlugin", | 43 C.PI_MAIN: "BlogImportPlugin", |
43 C.PI_HANDLER: "no", | 44 C.PI_HANDLER: "no", |
44 C.PI_DESCRIPTION: _(u"""Blog import management: | 45 C.PI_DESCRIPTION: _(u"""Blog import management: |
45 This plugin manage the different blog importers which can register to it, and handler generic importing tasks.""") | 46 This plugin manage the different blog importers which can register to it, and handle generic importing tasks.""") |
46 } | 47 } |
47 | 48 |
48 OPT_HOST = 'host' | 49 OPT_HOST = 'host' |
49 OPT_UPLOAD_IMAGES = 'upload_images' | 50 OPT_UPLOAD_IMAGES = 'upload_images' |
50 OPT_UPLOAD_IGNORE_HOST = 'upload_ignore_host' | 51 OPT_UPLOAD_IGNORE_HOST = 'upload_ignore_host' |
51 OPT_IGNORE_TLS = 'ignore_tls_errors' | 52 OPT_IGNORE_TLS = 'ignore_tls_errors' |
52 URL_REDIRECT_PREFIX = 'url_redirect_' | 53 URL_REDIRECT_PREFIX = 'url_redirect_' |
53 BOOL_OPTIONS = (OPT_UPLOAD_IMAGES, OPT_IGNORE_TLS) | |
54 | 54 |
55 | 55 |
56 BlogImporter = collections.namedtuple('BlogImporter', ('callback', 'short_desc', 'long_desc')) | 56 BlogImporter = collections.namedtuple('BlogImporter', ('callback', 'short_desc', 'long_desc')) |
57 | 57 |
58 | 58 |
59 class BlogImportPlugin(object): | 59 class BlogImportPlugin(object): |
60 BOOL_OPTIONS = (OPT_UPLOAD_IMAGES, OPT_IGNORE_TLS) | |
61 OPT_DEFAULTS = {OPT_UPLOAD_IMAGES: True, | |
62 OPT_IGNORE_TLS: False} | |
60 | 63 |
61 def __init__(self, host): | 64 def __init__(self, host): |
62 log.info(_("plugin Blog Import initialization")) | 65 log.info(_("plugin Blog Import initialization")) |
63 self.host = host | 66 self.host = host |
64 self._importers = {} | |
65 self._u = host.plugins['UPLOAD'] | 67 self._u = host.plugins['UPLOAD'] |
66 self._p = host.plugins['XEP-0060'] | 68 self._p = host.plugins['XEP-0060'] |
67 self._m = host.plugins['XEP-0277'] | 69 self._m = host.plugins['XEP-0277'] |
68 self._s = self.host.plugins['TEXT-SYNTAXES'] | 70 self._s = self.host.plugins['TEXT-SYNTAXES'] |
69 host.bridge.addMethod("blogImport", ".plugin", in_sign='ssa{ss}ss', out_sign='s', method=self._blogImport, async=True) | 71 host.plugins['IMPORT'].initialize(self, u'blog') |
70 host.bridge.addMethod("blogImportList", ".plugin", in_sign='', out_sign='a(ss)', method=self.listImporters) | 72 |
71 host.bridge.addMethod("blogImportDesc", ".plugin", in_sign='s', out_sign='(ss)', method=self.getDescription) | 73 def importItem(self, client, item_import_data, options, return_data, service, node): |
72 | 74 """importItem specialized for blog import |
73 def getProgress(self, progress_id, profile): | 75 |
74 client = self.host.getClient(profile) | 76 @param items_import_data(iterable[dict]): |
75 return client._blogImport_progress[progress_id] | 77 * mandatory keys: |
76 | 78 'blog' (dict): microblog data of the blog post (cf. http://wiki.goffi.org/wiki/Bridge_API_-_Microblogging/en) |
77 def listImporters(self): | 79 the importer MUST NOT create node or call XEP-0277 plugin itself |
78 importers = self._importers.keys() | 80 'comments*' key MUST NOT be used in this microblog_data, see bellow for comments |
79 importers.sort() | 81 It is recommanded to use a unique id in the "id" key which is constant per blog item, |
80 return [(name, self._importers[name].short_desc) for name in self._importers] | 82 so if the import fail, a new import will overwrite the failed items and avoid duplicates. |
81 | 83 |
82 def getDescription(self, name): | 84 'comments' (list[list[dict]],None): Dictionaries must have the same keys as main item (i.e. 'blog' and 'comments') |
83 """Return import short and long descriptions | 85 a list of list is used because XEP-0277 can handler several comments nodes, |
84 | 86 but in most cases, there will we only one item it the first list (something like [[{comment1_data},{comment2_data}, ...]]) |
85 @param name(unicode): blog importer name | 87 blog['allow_comments'] must be True if there is any comment, and False (or not present) if comments are not allowed. |
86 @return (tuple[unicode,unicode]): short and long description | 88 If allow_comments is False and some comments are present, an exceptions.DataError will be raised |
87 """ | 89 * optional keys: |
88 try: | 90 'url' (unicode): former url of the post (only the path, without host part) |
89 importer = self._importers[name] | 91 if present the association to the new path will be displayed to user, so it can make redirections if necessary |
90 except KeyError: | 92 @param options(dict, None): Below are the generic options, |
91 raise exceptions.NotFound(u"Blog importer not found [{}]".format(name)) | |
92 else: | |
93 return importer.short_desc, importer.long_desc | |
94 | |
95 def _blogImport(self, name, location, options, pubsub_service='', profile=C.PROF_KEY_DEFAULT): | |
96 client = self.host.getClient(profile) | |
97 options = {key: unicode(value) for key, value in options.iteritems()} | |
98 for option in BOOL_OPTIONS: | |
99 try: | |
100 options[option] = C.bool(options[option]) | |
101 except KeyError: | |
102 pass | |
103 return self.blogImport(client, unicode(name), unicode(location), options) | |
104 | |
105 @defer.inlineCallbacks | |
106 def blogImport(self, client, name, location, options=None, pubsub_service=None): | |
107 """Import a blog | |
108 | |
109 @param name(unicode): name of the blog importer | |
110 @param location(unicode): location of the blog data to import | |
111 can be an url, a file path, or anything which make sense | |
112 check importer description for more details | |
113 @param options(dict, None): extra options. Below are the generic options, | |
114 blog importer can have specific ones. All options have unicode values | 93 blog importer can have specific ones. All options have unicode values |
115 generic options: | 94 generic options: |
116 - OPT_HOST (unicode): original host | 95 - OPT_HOST (unicode): original host |
117 - OPT_UPLOAD_IMAGES (bool): upload images to XMPP server if True | 96 - OPT_UPLOAD_IMAGES (bool): upload images to XMPP server if True |
118 see OPT_UPLOAD_IGNORE_HOST. | 97 see OPT_UPLOAD_IGNORE_HOST. |
119 Default: True | 98 Default: True |
120 - OPT_UPLOAD_IGNORE_HOST (unicode): don't upload images from this host | 99 - OPT_UPLOAD_IGNORE_HOST (unicode): don't upload images from this host |
121 - OPT_IGNORE_TLS (bool): ignore TLS error for image upload. | 100 - OPT_IGNORE_TLS (bool): ignore TLS error for image upload. |
122 Default: False | 101 Default: False |
123 @param pubsub_service(jid.JID, None): jid of the PubSub service where blog must be imported | 102 @param return_data(dict): will contain link between former posts and new items |
124 None to use profile's server | 103 |
125 @return (unicode): progress id | |
126 """ | 104 """ |
127 if options is None: | 105 mb_data = item_import_data['blog'] |
128 options = {} | 106 try: |
107 item_id = mb_data['id'] | |
108 except KeyError: | |
109 item_id = mb_data['id'] = unicode(shortuuid.uuid()) | |
110 | |
111 try: | |
112 # we keep the link between old url and new blog item | |
113 # so the user can redirect its former blog urls | |
114 old_uri = item_import_data['url'] | |
115 except KeyError: | |
116 pass | |
129 else: | 117 else: |
130 for opt_name, opt_default in ((OPT_UPLOAD_IMAGES, True), | 118 new_uri = return_data[URL_REDIRECT_PREFIX + old_uri] = self._p.getNodeURI( |
131 (OPT_IGNORE_TLS, False)): | 119 service if service is not None else client.jid.userhostJID(), |
132 # we want an filled options dict, with all empty or False values removed | 120 node or self._m.namespace, |
133 try: | 121 item_id) |
134 value =options[opt_name] | 122 log.info(u"url link from {old} to {new}".format( |
135 except KeyError: | 123 old=old_uri, new=new_uri)) |
136 if opt_default: | 124 |
137 options[opt_name] = opt_default | 125 return mb_data |
138 else: | 126 |
139 if not value: | 127 def importSubItems(self, client, item_import_data, mb_data, options): |
140 del options[opt_name] | 128 # comments data |
141 try: | 129 if len(item_import_data['comments']) != 1: |
142 importer = self._importers[name] | 130 raise NotImplementedError(u"can't manage multiple comment links") |
143 except KeyError: | 131 allow_comments = C.bool(mb_data.get('allow_comments', C.BOOL_FALSE)) |
144 raise exceptions.NotFound(u"Importer [{}] not found".format(name)) | 132 if allow_comments: |
145 posts_data, posts_count = yield importer.callback(client, location, options) | 133 comments_service, comments_node = self._m.getCommentsService(client), self._m.getCommentsNode(mb_data['id']) |
146 url_redirect = {} | 134 mb_data['comments_service'] = comments_service.full() |
147 progress_id = unicode(uuid.uuid4()) | 135 mb_data['comments_node'] = comments_node |
148 try: | 136 recurse_kwargs = { |
149 progress_data = client._blogImport_progress | 137 'items_import_data':item_import_data['comments'][0], |
150 except AttributeError: | 138 'service':comments_service, |
151 progress_data = client._blogImport_progress = {} | 139 'node':comments_node} |
152 progress_data[progress_id] = {u'position': '0'} | 140 return recurse_kwargs |
153 if posts_count is not None: | 141 else: |
154 progress_data[progress_id]['size'] = unicode(posts_count) | 142 if item_import_data['comments'][0]: |
155 metadata = {'name': u'{}: {}'.format(name, location), | 143 raise exceptions.DataError(u"allow_comments set to False, but comments are there") |
156 'direction': 'out', | 144 return None |
157 'type': 'BLOG_IMPORT' | 145 |
158 } | 146 def publishItem(self, client, mb_data, service, node): |
159 self.host.registerProgressCb(progress_id, self.getProgress, metadata, profile=client.profile) | 147 log.debug(u"uploading item [{id}]: {title}".format(id=mb_data['id'], title=mb_data.get('title',''))) |
160 self.host.bridge.progressStarted(progress_id, metadata, client.profile) | 148 return self._m.send(client, mb_data, service, node) |
161 self._recursiveImport(client, posts_data, progress_id, options, url_redirect) | |
162 defer.returnValue(progress_id) | |
163 | 149 |
164 @defer.inlineCallbacks | 150 @defer.inlineCallbacks |
165 def _recursiveImport(self, client, posts_data, progress_id, options, url_redirect, service=None, node=None, depth=0): | 151 def itemFilters(self, client, mb_data, options): |
166 """Do the upload recursively | |
167 | |
168 @param posts_data(list): list of data as specified in [register] | |
169 @param options(dict): import options | |
170 @param url_redirect(dict): link between former posts and new items | |
171 @param service(jid.JID, None): PubSub service to use | |
172 @param node(unicode, None): PubSub node to use | |
173 @param depth(int): level of recursion | |
174 """ | |
175 for idx, data in enumerate(posts_data): | |
176 # data checks/filters | |
177 mb_data = data['blog'] | |
178 try: | |
179 item_id = mb_data['id'] | |
180 except KeyError: | |
181 item_id = mb_data['id'] = unicode(uuid.uuid4()) | |
182 | |
183 try: | |
184 # we keep the link between old url and new blog item | |
185 # so the user can redirect its former blog urls | |
186 old_uri = data['url'] | |
187 except KeyError: | |
188 pass | |
189 else: | |
190 new_uri = url_redirect[old_uri] = self._p.getNodeURI( | |
191 service if service is not None else client.jid.userhostJID(), | |
192 node or self._m.namespace, | |
193 item_id) | |
194 log.info(u"url link from {old} to {new}".format( | |
195 old=old_uri, new=new_uri)) | |
196 | |
197 yield self.blogFilters(client, mb_data, options) | |
198 | |
199 # comments data | |
200 if len(data['comments']) != 1: | |
201 raise NotImplementedError(u"can't manage multiple comment links") | |
202 allow_comments = C.bool(mb_data.get('allow_comments', C.BOOL_FALSE)) | |
203 if allow_comments: | |
204 comments_service, comments_node = self._m.getCommentsService(client), self._m.getCommentsNode(item_id) | |
205 mb_data['comments_service'] = comments_service.full() | |
206 mb_data['comments_node'] = comments_node | |
207 else: | |
208 if data['comments'][0]: | |
209 raise exceptions.DataError(u"allow_comments set to False, but comments are there") | |
210 | |
211 # post upload | |
212 depth or log.debug(u"uploading item [{id}]: {title}".format(id=mb_data['id'], title=mb_data.get('title',''))) | |
213 yield self._m.send(mb_data, service, node, profile=client.profile) | |
214 | |
215 # comments upload | |
216 depth or log.debug(u"uploading comments") | |
217 if allow_comments: | |
218 yield self._recursiveImport(client, data['comments'][0], progress_id, options, url_redirect, service=comments_service, node=comments_node, depth=depth+1) | |
219 if depth == 0: | |
220 client._blogImport_progress[progress_id]['position'] = unicode(idx+1) | |
221 | |
222 if depth == 0: | |
223 self.host.bridge.progressFinished(progress_id, | |
224 {u'{}{}'.format(URL_REDIRECT_PREFIX, old): new for old, new in url_redirect.iteritems()}, | |
225 client.profile) | |
226 self.host.removeProgressCb(progress_id, client.profile) | |
227 del client._blogImport_progress[progress_id] | |
228 | |
229 @defer.inlineCallbacks | |
230 def blogFilters(self, client, mb_data, options): | |
231 """Apply filters according to options | 152 """Apply filters according to options |
232 | 153 |
233 modify mb_data in place | 154 modify mb_data in place |
234 @param posts_data(list[dict]): data as returned by importer callback | 155 @param posts_data(list[dict]): data as returned by importer callback |
235 @param options(dict): dict as given in [blogImport] | 156 @param options(dict): dict as given in [blogImport] |
350 | 271 |
351 try: | 272 try: |
352 os.unlink(tmp_file) | 273 os.unlink(tmp_file) |
353 except OSError: | 274 except OSError: |
354 pass | 275 pass |
355 | |
356 def register(self, name, callback, short_desc='', long_desc=''): | |
357 """Register a blogImport method | |
358 | |
359 @param name(unicode): unique importer name, should indicate the blogging software it handler and always lowercase | |
360 @param callback(callable): method to call: | |
361 the signature must be (client, location, options) (cf. [blogImport]) | |
362 the importer must return a tuple with (posts_data, posts_count) | |
363 | |
364 posts_data is an iterable of dict which must have the following keys: | |
365 'blog' (dict): microblog data of the blog post (cf. http://wiki.goffi.org/wiki/Bridge_API_-_Microblogging/en) | |
366 the importer MUST NOT create node or call XEP-0277 plugin itself | |
367 'comments*' key MUST NOT be used in this microblog_data, see bellow for comments | |
368 It is recommanded to use a unique id in the "id" key which is constant per blog item, | |
369 so if the import fail, a new import will overwrite the failed items and avoid duplicates. | |
370 | |
371 'comments' (list[list[dict]],None): Dictionaries must have the same keys as main item (i.e. 'blog' and 'comments') | |
372 a list of list is used because XEP-0277 can handler several comments nodes, | |
373 but in most cases, there will we only one item it the first list (something like [[{comment1_data},{comment2_data}, ...]]) | |
374 blog['allow_comments'] must be True if there is any comment, and False (or not present) if comments are not allowed. | |
375 If allow_comments is False and some comments are present, a exceptions.DataError will be raised | |
376 the import MAY optionally have the following keys: | |
377 'url' (unicode): former url of the post (only the path, without host part) | |
378 if present the association to the new path will be displayed to user, so it can make redirections if necessary | |
379 | |
380 posts_count (int, None) indicate the total number of posts (without comments) | |
381 useful to display a progress indicator when the iterator is a generator | |
382 use None if you can't guess the total number of blog posts | |
383 @param short_desc(unicode): one line description of the importer | |
384 @param long_desc(unicode): long description of the importer, its options, etc. | |
385 """ | |
386 name = name.lower() | |
387 if name in self._importers: | |
388 raise exceptions.ConflictError(u"A blog importer with the name {} already exsit".format(name)) | |
389 self._importers[name] = BlogImporter(callback, short_desc, long_desc) | |
390 | |
391 def unregister(self, name): | |
392 del self._importers[name] |