Mercurial > libervia-backend
comparison sat/plugins/plugin_blog_import.py @ 2624:56f94936df1e
code style reformatting using black
author | Goffi <goffi@goffi.org> |
---|---|
date | Wed, 27 Jun 2018 20:14:46 +0200 |
parents | 26edcf3a30eb |
children | 378188abe941 |
comparison
equal
deleted
inserted
replaced
2623:49533de4540b | 2624:56f94936df1e |
---|---|
19 | 19 |
20 | 20 |
21 from sat.core.i18n import _ | 21 from sat.core.i18n import _ |
22 from sat.core.constants import Const as C | 22 from sat.core.constants import Const as C |
23 from sat.core.log import getLogger | 23 from sat.core.log import getLogger |
24 | |
24 log = getLogger(__name__) | 25 log = getLogger(__name__) |
25 from twisted.internet import defer | 26 from twisted.internet import defer |
26 from twisted.web import client as web_client | 27 from twisted.web import client as web_client |
27 from twisted.words.xish import domish | 28 from twisted.words.xish import domish |
28 from sat.core import exceptions | 29 from sat.core import exceptions |
39 C.PI_IMPORT_NAME: "BLOG_IMPORT", | 40 C.PI_IMPORT_NAME: "BLOG_IMPORT", |
40 C.PI_TYPE: (C.PLUG_TYPE_BLOG, C.PLUG_TYPE_IMPORT), | 41 C.PI_TYPE: (C.PLUG_TYPE_BLOG, C.PLUG_TYPE_IMPORT), |
41 C.PI_DEPENDENCIES: ["IMPORT", "XEP-0060", "XEP-0277", "TEXT-SYNTAXES", "UPLOAD"], | 42 C.PI_DEPENDENCIES: ["IMPORT", "XEP-0060", "XEP-0277", "TEXT-SYNTAXES", "UPLOAD"], |
42 C.PI_MAIN: "BlogImportPlugin", | 43 C.PI_MAIN: "BlogImportPlugin", |
43 C.PI_HANDLER: "no", | 44 C.PI_HANDLER: "no", |
44 C.PI_DESCRIPTION: _(u"""Blog import management: | 45 C.PI_DESCRIPTION: _( |
45 This plugin manage the different blog importers which can register to it, and handle generic importing tasks.""") | 46 u"""Blog import management: |
47 This plugin manage the different blog importers which can register to it, and handle generic importing tasks.""" | |
48 ), | |
46 } | 49 } |
47 | 50 |
48 OPT_HOST = 'host' | 51 OPT_HOST = "host" |
49 OPT_UPLOAD_IMAGES = 'upload_images' | 52 OPT_UPLOAD_IMAGES = "upload_images" |
50 OPT_UPLOAD_IGNORE_HOST = 'upload_ignore_host' | 53 OPT_UPLOAD_IGNORE_HOST = "upload_ignore_host" |
51 OPT_IGNORE_TLS = 'ignore_tls_errors' | 54 OPT_IGNORE_TLS = "ignore_tls_errors" |
52 URL_REDIRECT_PREFIX = 'url_redirect_' | 55 URL_REDIRECT_PREFIX = "url_redirect_" |
53 | 56 |
54 | 57 |
55 class BlogImportPlugin(object): | 58 class BlogImportPlugin(object): |
56 BOOL_OPTIONS = (OPT_UPLOAD_IMAGES, OPT_IGNORE_TLS) | 59 BOOL_OPTIONS = (OPT_UPLOAD_IMAGES, OPT_IGNORE_TLS) |
57 JSON_OPTIONS = () | 60 JSON_OPTIONS = () |
58 OPT_DEFAULTS = {OPT_UPLOAD_IMAGES: True, | 61 OPT_DEFAULTS = {OPT_UPLOAD_IMAGES: True, OPT_IGNORE_TLS: False} |
59 OPT_IGNORE_TLS: False} | |
60 | 62 |
61 def __init__(self, host): | 63 def __init__(self, host): |
62 log.info(_("plugin Blog Import initialization")) | 64 log.info(_("plugin Blog Import initialization")) |
63 self.host = host | 65 self.host = host |
64 self._u = host.plugins['UPLOAD'] | 66 self._u = host.plugins["UPLOAD"] |
65 self._p = host.plugins['XEP-0060'] | 67 self._p = host.plugins["XEP-0060"] |
66 self._m = host.plugins['XEP-0277'] | 68 self._m = host.plugins["XEP-0277"] |
67 self._s = self.host.plugins['TEXT-SYNTAXES'] | 69 self._s = self.host.plugins["TEXT-SYNTAXES"] |
68 host.plugins['IMPORT'].initialize(self, u'blog') | 70 host.plugins["IMPORT"].initialize(self, u"blog") |
69 | 71 |
70 def importItem(self, client, item_import_data, session, options, return_data, service, node): | 72 def importItem( |
73 self, client, item_import_data, session, options, return_data, service, node | |
74 ): | |
71 """importItem specialized for blog import | 75 """importItem specialized for blog import |
72 | 76 |
73 @param item_import_data(dict): | 77 @param item_import_data(dict): |
74 * mandatory keys: | 78 * mandatory keys: |
75 'blog' (dict): microblog data of the blog post (cf. http://wiki.goffi.org/wiki/Bridge_API_-_Microblogging/en) | 79 'blog' (dict): microblog data of the blog post (cf. http://wiki.goffi.org/wiki/Bridge_API_-_Microblogging/en) |
97 - OPT_IGNORE_TLS (bool): ignore TLS error for image upload. | 101 - OPT_IGNORE_TLS (bool): ignore TLS error for image upload. |
98 Default: False | 102 Default: False |
99 @param return_data(dict): will contain link between former posts and new items | 103 @param return_data(dict): will contain link between former posts and new items |
100 | 104 |
101 """ | 105 """ |
102 mb_data = item_import_data['blog'] | 106 mb_data = item_import_data["blog"] |
103 try: | 107 try: |
104 item_id = mb_data['id'] | 108 item_id = mb_data["id"] |
105 except KeyError: | 109 except KeyError: |
106 item_id = mb_data['id'] = unicode(shortuuid.uuid()) | 110 item_id = mb_data["id"] = unicode(shortuuid.uuid()) |
107 | 111 |
108 try: | 112 try: |
109 # we keep the link between old url and new blog item | 113 # we keep the link between old url and new blog item |
110 # so the user can redirect its former blog urls | 114 # so the user can redirect its former blog urls |
111 old_uri = item_import_data['url'] | 115 old_uri = item_import_data["url"] |
112 except KeyError: | 116 except KeyError: |
113 pass | 117 pass |
114 else: | 118 else: |
115 new_uri = return_data[URL_REDIRECT_PREFIX + old_uri] = self._p.getNodeURI( | 119 new_uri = return_data[URL_REDIRECT_PREFIX + old_uri] = self._p.getNodeURI( |
116 service if service is not None else client.jid.userhostJID(), | 120 service if service is not None else client.jid.userhostJID(), |
117 node or self._m.namespace, | 121 node or self._m.namespace, |
118 item_id) | 122 item_id, |
119 log.info(u"url link from {old} to {new}".format( | 123 ) |
120 old=old_uri, new=new_uri)) | 124 log.info(u"url link from {old} to {new}".format(old=old_uri, new=new_uri)) |
121 | 125 |
122 return mb_data | 126 return mb_data |
123 | 127 |
124 @defer.inlineCallbacks | 128 @defer.inlineCallbacks |
125 def importSubItems(self, client, item_import_data, mb_data, session, options): | 129 def importSubItems(self, client, item_import_data, mb_data, session, options): |
126 # comments data | 130 # comments data |
127 if len(item_import_data['comments']) != 1: | 131 if len(item_import_data["comments"]) != 1: |
128 raise NotImplementedError(u"can't manage multiple comment links") | 132 raise NotImplementedError(u"can't manage multiple comment links") |
129 allow_comments = C.bool(mb_data.get('allow_comments', C.BOOL_FALSE)) | 133 allow_comments = C.bool(mb_data.get("allow_comments", C.BOOL_FALSE)) |
130 if allow_comments: | 134 if allow_comments: |
131 comments_service = yield self._m.getCommentsService(client) | 135 comments_service = yield self._m.getCommentsService(client) |
132 comments_node = self._m.getCommentsNode(mb_data['id']) | 136 comments_node = self._m.getCommentsNode(mb_data["id"]) |
133 mb_data['comments_service'] = comments_service.full() | 137 mb_data["comments_service"] = comments_service.full() |
134 mb_data['comments_node'] = comments_node | 138 mb_data["comments_node"] = comments_node |
135 recurse_kwargs = { | 139 recurse_kwargs = { |
136 'items_import_data':item_import_data['comments'][0], | 140 "items_import_data": item_import_data["comments"][0], |
137 'service':comments_service, | 141 "service": comments_service, |
138 'node':comments_node} | 142 "node": comments_node, |
143 } | |
139 defer.returnValue(recurse_kwargs) | 144 defer.returnValue(recurse_kwargs) |
140 else: | 145 else: |
141 if item_import_data['comments'][0]: | 146 if item_import_data["comments"][0]: |
142 raise exceptions.DataError(u"allow_comments set to False, but comments are there") | 147 raise exceptions.DataError( |
148 u"allow_comments set to False, but comments are there" | |
149 ) | |
143 defer.returnValue(None) | 150 defer.returnValue(None) |
144 | 151 |
145 def publishItem(self, client, mb_data, service, node, session): | 152 def publishItem(self, client, mb_data, service, node, session): |
146 log.debug(u"uploading item [{id}]: {title}".format(id=mb_data['id'], title=mb_data.get('title',''))) | 153 log.debug( |
154 u"uploading item [{id}]: {title}".format( | |
155 id=mb_data["id"], title=mb_data.get("title", "") | |
156 ) | |
157 ) | |
147 return self._m.send(client, mb_data, service, node) | 158 return self._m.send(client, mb_data, service, node) |
148 | 159 |
149 @defer.inlineCallbacks | 160 @defer.inlineCallbacks |
150 def itemFilters(self, client, mb_data, session, options): | 161 def itemFilters(self, client, mb_data, session, options): |
151 """Apply filters according to options | 162 """Apply filters according to options |
159 # filters can then be used by converting text to XHTML | 170 # filters can then be used by converting text to XHTML |
160 if not options: | 171 if not options: |
161 return | 172 return |
162 | 173 |
163 # we want only XHTML content | 174 # we want only XHTML content |
164 for prefix in ('content',): # a tuple is use, if title need to be added in the future | 175 for prefix in ( |
165 try: | 176 "content", |
166 rich = mb_data['{}_rich'.format(prefix)] | 177 ): # a tuple is use, if title need to be added in the future |
178 try: | |
179 rich = mb_data["{}_rich".format(prefix)] | |
167 except KeyError: | 180 except KeyError: |
168 pass | 181 pass |
169 else: | 182 else: |
170 if '{}_xhtml'.format(prefix) in mb_data: | 183 if "{}_xhtml".format(prefix) in mb_data: |
171 raise exceptions.DataError(u"importer gave {prefix}_rich and {prefix}_xhtml at the same time, this is not allowed".format(prefix=prefix)) | 184 raise exceptions.DataError( |
185 u"importer gave {prefix}_rich and {prefix}_xhtml at the same time, this is not allowed".format( | |
186 prefix=prefix | |
187 ) | |
188 ) | |
172 # we convert rich syntax to XHTML here, so we can handle filters easily | 189 # we convert rich syntax to XHTML here, so we can handle filters easily |
173 converted = yield self._s.convert(rich, self._s.getCurrentSyntax(client.profile), safe=False) | 190 converted = yield self._s.convert( |
174 mb_data['{}_xhtml'.format(prefix)] = converted | 191 rich, self._s.getCurrentSyntax(client.profile), safe=False |
175 del mb_data['{}_rich'.format(prefix)] | 192 ) |
176 | 193 mb_data["{}_xhtml".format(prefix)] = converted |
177 try: | 194 del mb_data["{}_rich".format(prefix)] |
178 mb_data['txt'] | 195 |
196 try: | |
197 mb_data["txt"] | |
179 except KeyError: | 198 except KeyError: |
180 pass | 199 pass |
181 else: | 200 else: |
182 if '{}_xhtml'.format(prefix) in mb_data: | 201 if "{}_xhtml".format(prefix) in mb_data: |
183 log.warning(u"{prefix}_text will be replaced by converted {prefix}_xhtml, so filters can be handled".format(prefix=prefix)) | 202 log.warning( |
184 del mb_data['{}_text'.format(prefix)] | 203 u"{prefix}_text will be replaced by converted {prefix}_xhtml, so filters can be handled".format( |
204 prefix=prefix | |
205 ) | |
206 ) | |
207 del mb_data["{}_text".format(prefix)] | |
185 else: | 208 else: |
186 log.warning(u"importer gave a text {prefix}, blog filters don't work on text {prefix}".format(prefix=prefix)) | 209 log.warning( |
210 u"importer gave a text {prefix}, blog filters don't work on text {prefix}".format( | |
211 prefix=prefix | |
212 ) | |
213 ) | |
187 return | 214 return |
188 | 215 |
189 # at this point, we have only XHTML version of content | 216 # at this point, we have only XHTML version of content |
190 try: | 217 try: |
191 top_elt = xml_tools.ElementParser()(mb_data['content_xhtml'], namespace=C.NS_XHTML) | 218 top_elt = xml_tools.ElementParser()( |
219 mb_data["content_xhtml"], namespace=C.NS_XHTML | |
220 ) | |
192 except domish.ParserError: | 221 except domish.ParserError: |
193 # we clean the xml and try again our luck | 222 # we clean the xml and try again our luck |
194 cleaned = yield self._s.cleanXHTML(mb_data['content_xhtml']) | 223 cleaned = yield self._s.cleanXHTML(mb_data["content_xhtml"]) |
195 top_elt = xml_tools.ElementParser()(cleaned, namespace=C.NS_XHTML) | 224 top_elt = xml_tools.ElementParser()(cleaned, namespace=C.NS_XHTML) |
196 opt_host = options.get(OPT_HOST) | 225 opt_host = options.get(OPT_HOST) |
197 if opt_host: | 226 if opt_host: |
198 # we normalise the domain | 227 # we normalise the domain |
199 parsed_host = urlparse.urlsplit(opt_host) | 228 parsed_host = urlparse.urlsplit(opt_host) |
200 opt_host = urlparse.urlunsplit((parsed_host.scheme or 'http', parsed_host.netloc or parsed_host.path, '', '', '')) | 229 opt_host = urlparse.urlunsplit( |
230 ( | |
231 parsed_host.scheme or "http", | |
232 parsed_host.netloc or parsed_host.path, | |
233 "", | |
234 "", | |
235 "", | |
236 ) | |
237 ) | |
201 | 238 |
202 tmp_dir = tempfile.mkdtemp() | 239 tmp_dir = tempfile.mkdtemp() |
203 try: | 240 try: |
204 # TODO: would be nice to also update the hyperlinks to these images, e.g. when you have <a href="{url}"><img src="{url}"></a> | 241 # TODO: would be nice to also update the hyperlinks to these images, e.g. when you have <a href="{url}"><img src="{url}"></a> |
205 for img_elt in xml_tools.findAll(top_elt, names=[u'img']): | 242 for img_elt in xml_tools.findAll(top_elt, names=[u"img"]): |
206 yield self.imgFilters(client, img_elt, options, opt_host, tmp_dir) | 243 yield self.imgFilters(client, img_elt, options, opt_host, tmp_dir) |
207 finally: | 244 finally: |
208 os.rmdir(tmp_dir) # XXX: tmp_dir should be empty, or something went wrong | 245 os.rmdir(tmp_dir) # XXX: tmp_dir should be empty, or something went wrong |
209 | 246 |
210 # we now replace the content with filtered one | 247 # we now replace the content with filtered one |
211 mb_data['content_xhtml'] = top_elt.toXml() | 248 mb_data["content_xhtml"] = top_elt.toXml() |
212 | 249 |
213 @defer.inlineCallbacks | 250 @defer.inlineCallbacks |
214 def imgFilters(self, client, img_elt, options, opt_host, tmp_dir): | 251 def imgFilters(self, client, img_elt, options, opt_host, tmp_dir): |
215 """Filters handling images | 252 """Filters handling images |
216 | 253 |
220 @param options(dict): filters options | 257 @param options(dict): filters options |
221 @param opt_host(unicode): normalised host given in options | 258 @param opt_host(unicode): normalised host given in options |
222 @param tmp_dir(str): path to temp directory | 259 @param tmp_dir(str): path to temp directory |
223 """ | 260 """ |
224 try: | 261 try: |
225 url = img_elt['src'] | 262 url = img_elt["src"] |
226 if url[0] == u'/': | 263 if url[0] == u"/": |
227 if not opt_host: | 264 if not opt_host: |
228 log.warning(u"host was not specified, we can't deal with src without host ({url}) and have to ignore the following <img/>:\n{xml}" | 265 log.warning( |
229 .format(url=url, xml=img_elt.toXml())) | 266 u"host was not specified, we can't deal with src without host ({url}) and have to ignore the following <img/>:\n{xml}".format( |
267 url=url, xml=img_elt.toXml() | |
268 ) | |
269 ) | |
230 return | 270 return |
231 else: | 271 else: |
232 url = urlparse.urljoin(opt_host, url) | 272 url = urlparse.urljoin(opt_host, url) |
233 filename = url.rsplit('/',1)[-1].strip() | 273 filename = url.rsplit("/", 1)[-1].strip() |
234 if not filename: | 274 if not filename: |
235 raise KeyError | 275 raise KeyError |
236 except (KeyError, IndexError): | 276 except (KeyError, IndexError): |
237 log.warning(u"ignoring invalid img element: {}".format(img_elt.toXml())) | 277 log.warning(u"ignoring invalid img element: {}".format(img_elt.toXml())) |
238 return | 278 return |
239 | 279 |
240 # we change the url for the normalized one | 280 # we change the url for the normalized one |
241 img_elt['src'] = url | 281 img_elt["src"] = url |
242 | 282 |
243 if options.get(OPT_UPLOAD_IMAGES, False): | 283 if options.get(OPT_UPLOAD_IMAGES, False): |
244 # upload is requested | 284 # upload is requested |
245 try: | 285 try: |
246 ignore_host = options[OPT_UPLOAD_IGNORE_HOST] | 286 ignore_host = options[OPT_UPLOAD_IGNORE_HOST] |
248 pass | 288 pass |
249 else: | 289 else: |
250 # host is the ignored one, we skip | 290 # host is the ignored one, we skip |
251 parsed_url = urlparse.urlsplit(url) | 291 parsed_url = urlparse.urlsplit(url) |
252 if ignore_host in parsed_url.hostname: | 292 if ignore_host in parsed_url.hostname: |
253 log.info(u"Don't upload image at {url} because of {opt} option".format( | 293 log.info( |
254 url=url, opt=OPT_UPLOAD_IGNORE_HOST)) | 294 u"Don't upload image at {url} because of {opt} option".format( |
295 url=url, opt=OPT_UPLOAD_IGNORE_HOST | |
296 ) | |
297 ) | |
255 return | 298 return |
256 | 299 |
257 # we download images and re-upload them via XMPP | 300 # we download images and re-upload them via XMPP |
258 tmp_file = os.path.join(tmp_dir, filename).encode('utf-8') | 301 tmp_file = os.path.join(tmp_dir, filename).encode("utf-8") |
259 upload_options = {'ignore_tls_errors': options.get(OPT_IGNORE_TLS, False)} | 302 upload_options = {"ignore_tls_errors": options.get(OPT_IGNORE_TLS, False)} |
260 | 303 |
261 try: | 304 try: |
262 yield web_client.downloadPage(url.encode('utf-8'), tmp_file) | 305 yield web_client.downloadPage(url.encode("utf-8"), tmp_file) |
263 filename = filename.replace(u'%', u'_') # FIXME: tmp workaround for a bug in prosody http upload | 306 filename = filename.replace( |
264 dummy, download_d = yield self._u.upload(client, tmp_file, filename, options=upload_options) | 307 u"%", u"_" |
308 ) # FIXME: tmp workaround for a bug in prosody http upload | |
309 dummy, download_d = yield self._u.upload( | |
310 client, tmp_file, filename, options=upload_options | |
311 ) | |
265 download_url = yield download_d | 312 download_url = yield download_d |
266 except Exception as e: | 313 except Exception as e: |
267 log.warning(u"can't download image at {url}: {reason}".format(url=url, reason=e)) | 314 log.warning( |
315 u"can't download image at {url}: {reason}".format(url=url, reason=e) | |
316 ) | |
268 else: | 317 else: |
269 img_elt['src'] = download_url | 318 img_elt["src"] = download_url |
270 | 319 |
271 try: | 320 try: |
272 os.unlink(tmp_file) | 321 os.unlink(tmp_file) |
273 except OSError: | 322 except OSError: |
274 pass | 323 pass |