Mercurial > libervia-backend
comparison sat/plugins/plugin_blog_import.py @ 3028:ab2696e34d29
Python 3 port:
/!\ this is a huge commit
/!\ starting from this commit, SàT is needs Python 3.6+
/!\ SàT maybe be instable or some feature may not work anymore, this will improve with time
This patch port backend, bridge and frontends to Python 3.
Roughly this has been done this way:
- 2to3 tools has been applied (with python 3.7)
- all references to python2 have been replaced with python3 (notably shebangs)
- fixed files not handled by 2to3 (notably the shell script)
- several manual fixes
- fixed issues reported by Python 3 that where not handled in Python 2
- replaced "async" with "async_" when needed (it's a reserved word from Python 3.7)
- replaced zope's "implements" with @implementer decorator
- temporary hack to handle data pickled in database, as str or bytes may be returned,
to be checked later
- fixed hash comparison for password
- removed some code which is not needed anymore with Python 3
- deactivated some code which needs to be checked (notably certificate validation)
- tested with jp, fixed reported issues until some basic commands worked
- ported Primitivus (after porting dependencies like urwid satext)
- more manual fixes
author | Goffi <goffi@goffi.org> |
---|---|
date | Tue, 13 Aug 2019 19:08:41 +0200 |
parents | 85d3240a400f |
children | 9d0df638c8b4 |
comparison
equal
deleted
inserted
replaced
3027:ff5bcb12ae60 | 3028:ab2696e34d29 |
---|---|
1 #!/usr/bin/env python2 | 1 #!/usr/bin/env python3 |
2 # -*- coding: utf-8 -*- | 2 # -*- coding: utf-8 -*- |
3 | 3 |
4 # SàT plugin for import external blogs | 4 # SàT plugin for import external blogs |
5 # Copyright (C) 2009-2019 Jérôme Poisson (goffi@goffi.org) | 5 # Copyright (C) 2009-2019 Jérôme Poisson (goffi@goffi.org) |
6 | 6 |
29 from sat.core import exceptions | 29 from sat.core import exceptions |
30 from sat.tools import xml_tools | 30 from sat.tools import xml_tools |
31 import os | 31 import os |
32 import os.path | 32 import os.path |
33 import tempfile | 33 import tempfile |
34 import urlparse | 34 import urllib.parse |
35 import shortuuid | 35 import shortuuid |
36 | 36 |
37 | 37 |
38 PLUGIN_INFO = { | 38 PLUGIN_INFO = { |
39 C.PI_NAME: "blog import", | 39 C.PI_NAME: "blog import", |
41 C.PI_TYPE: (C.PLUG_TYPE_BLOG, C.PLUG_TYPE_IMPORT), | 41 C.PI_TYPE: (C.PLUG_TYPE_BLOG, C.PLUG_TYPE_IMPORT), |
42 C.PI_DEPENDENCIES: ["IMPORT", "XEP-0060", "XEP-0277", "TEXT_SYNTAXES", "UPLOAD"], | 42 C.PI_DEPENDENCIES: ["IMPORT", "XEP-0060", "XEP-0277", "TEXT_SYNTAXES", "UPLOAD"], |
43 C.PI_MAIN: "BlogImportPlugin", | 43 C.PI_MAIN: "BlogImportPlugin", |
44 C.PI_HANDLER: "no", | 44 C.PI_HANDLER: "no", |
45 C.PI_DESCRIPTION: _( | 45 C.PI_DESCRIPTION: _( |
46 u"""Blog import management: | 46 """Blog import management: |
47 This plugin manage the different blog importers which can register to it, and handle generic importing tasks.""" | 47 This plugin manage the different blog importers which can register to it, and handle generic importing tasks.""" |
48 ), | 48 ), |
49 } | 49 } |
50 | 50 |
51 OPT_HOST = "host" | 51 OPT_HOST = "host" |
65 self.host = host | 65 self.host = host |
66 self._u = host.plugins["UPLOAD"] | 66 self._u = host.plugins["UPLOAD"] |
67 self._p = host.plugins["XEP-0060"] | 67 self._p = host.plugins["XEP-0060"] |
68 self._m = host.plugins["XEP-0277"] | 68 self._m = host.plugins["XEP-0277"] |
69 self._s = self.host.plugins["TEXT_SYNTAXES"] | 69 self._s = self.host.plugins["TEXT_SYNTAXES"] |
70 host.plugins["IMPORT"].initialize(self, u"blog") | 70 host.plugins["IMPORT"].initialize(self, "blog") |
71 | 71 |
72 def importItem( | 72 def importItem( |
73 self, client, item_import_data, session, options, return_data, service, node | 73 self, client, item_import_data, session, options, return_data, service, node |
74 ): | 74 ): |
75 """importItem specialized for blog import | 75 """importItem specialized for blog import |
105 """ | 105 """ |
106 mb_data = item_import_data["blog"] | 106 mb_data = item_import_data["blog"] |
107 try: | 107 try: |
108 item_id = mb_data["id"] | 108 item_id = mb_data["id"] |
109 except KeyError: | 109 except KeyError: |
110 item_id = mb_data["id"] = unicode(shortuuid.uuid()) | 110 item_id = mb_data["id"] = str(shortuuid.uuid()) |
111 | 111 |
112 try: | 112 try: |
113 # we keep the link between old url and new blog item | 113 # we keep the link between old url and new blog item |
114 # so the user can redirect its former blog urls | 114 # so the user can redirect its former blog urls |
115 old_uri = item_import_data["url"] | 115 old_uri = item_import_data["url"] |
119 new_uri = return_data[URL_REDIRECT_PREFIX + old_uri] = self._p.getNodeURI( | 119 new_uri = return_data[URL_REDIRECT_PREFIX + old_uri] = self._p.getNodeURI( |
120 service if service is not None else client.jid.userhostJID(), | 120 service if service is not None else client.jid.userhostJID(), |
121 node or self._m.namespace, | 121 node or self._m.namespace, |
122 item_id, | 122 item_id, |
123 ) | 123 ) |
124 log.info(u"url link from {old} to {new}".format(old=old_uri, new=new_uri)) | 124 log.info("url link from {old} to {new}".format(old=old_uri, new=new_uri)) |
125 | 125 |
126 return mb_data | 126 return mb_data |
127 | 127 |
128 @defer.inlineCallbacks | 128 @defer.inlineCallbacks |
129 def importSubItems(self, client, item_import_data, mb_data, session, options): | 129 def importSubItems(self, client, item_import_data, mb_data, session, options): |
130 # comments data | 130 # comments data |
131 if len(item_import_data["comments"]) != 1: | 131 if len(item_import_data["comments"]) != 1: |
132 raise NotImplementedError(u"can't manage multiple comment links") | 132 raise NotImplementedError("can't manage multiple comment links") |
133 allow_comments = C.bool(mb_data.get("allow_comments", C.BOOL_FALSE)) | 133 allow_comments = C.bool(mb_data.get("allow_comments", C.BOOL_FALSE)) |
134 if allow_comments: | 134 if allow_comments: |
135 comments_service = yield self._m.getCommentsService(client) | 135 comments_service = yield self._m.getCommentsService(client) |
136 comments_node = self._m.getCommentsNode(mb_data["id"]) | 136 comments_node = self._m.getCommentsNode(mb_data["id"]) |
137 mb_data["comments_service"] = comments_service.full() | 137 mb_data["comments_service"] = comments_service.full() |
143 } | 143 } |
144 defer.returnValue(recurse_kwargs) | 144 defer.returnValue(recurse_kwargs) |
145 else: | 145 else: |
146 if item_import_data["comments"][0]: | 146 if item_import_data["comments"][0]: |
147 raise exceptions.DataError( | 147 raise exceptions.DataError( |
148 u"allow_comments set to False, but comments are there" | 148 "allow_comments set to False, but comments are there" |
149 ) | 149 ) |
150 defer.returnValue(None) | 150 defer.returnValue(None) |
151 | 151 |
152 def publishItem(self, client, mb_data, service, node, session): | 152 def publishItem(self, client, mb_data, service, node, session): |
153 log.debug( | 153 log.debug( |
154 u"uploading item [{id}]: {title}".format( | 154 "uploading item [{id}]: {title}".format( |
155 id=mb_data["id"], title=mb_data.get("title", "") | 155 id=mb_data["id"], title=mb_data.get("title", "") |
156 ) | 156 ) |
157 ) | 157 ) |
158 return self._m.send(client, mb_data, service, node) | 158 return self._m.send(client, mb_data, service, node) |
159 | 159 |
180 except KeyError: | 180 except KeyError: |
181 pass | 181 pass |
182 else: | 182 else: |
183 if "{}_xhtml".format(prefix) in mb_data: | 183 if "{}_xhtml".format(prefix) in mb_data: |
184 raise exceptions.DataError( | 184 raise exceptions.DataError( |
185 u"importer gave {prefix}_rich and {prefix}_xhtml at the same time, this is not allowed".format( | 185 "importer gave {prefix}_rich and {prefix}_xhtml at the same time, this is not allowed".format( |
186 prefix=prefix | 186 prefix=prefix |
187 ) | 187 ) |
188 ) | 188 ) |
189 # we convert rich syntax to XHTML here, so we can handle filters easily | 189 # we convert rich syntax to XHTML here, so we can handle filters easily |
190 converted = yield self._s.convert( | 190 converted = yield self._s.convert( |
198 except KeyError: | 198 except KeyError: |
199 pass | 199 pass |
200 else: | 200 else: |
201 if "{}_xhtml".format(prefix) in mb_data: | 201 if "{}_xhtml".format(prefix) in mb_data: |
202 log.warning( | 202 log.warning( |
203 u"{prefix}_text will be replaced by converted {prefix}_xhtml, so filters can be handled".format( | 203 "{prefix}_text will be replaced by converted {prefix}_xhtml, so filters can be handled".format( |
204 prefix=prefix | 204 prefix=prefix |
205 ) | 205 ) |
206 ) | 206 ) |
207 del mb_data["{}_text".format(prefix)] | 207 del mb_data["{}_text".format(prefix)] |
208 else: | 208 else: |
209 log.warning( | 209 log.warning( |
210 u"importer gave a text {prefix}, blog filters don't work on text {prefix}".format( | 210 "importer gave a text {prefix}, blog filters don't work on text {prefix}".format( |
211 prefix=prefix | 211 prefix=prefix |
212 ) | 212 ) |
213 ) | 213 ) |
214 return | 214 return |
215 | 215 |
223 cleaned = yield self._s.cleanXHTML(mb_data["content_xhtml"]) | 223 cleaned = yield self._s.cleanXHTML(mb_data["content_xhtml"]) |
224 top_elt = xml_tools.ElementParser()(cleaned, namespace=C.NS_XHTML) | 224 top_elt = xml_tools.ElementParser()(cleaned, namespace=C.NS_XHTML) |
225 opt_host = options.get(OPT_HOST) | 225 opt_host = options.get(OPT_HOST) |
226 if opt_host: | 226 if opt_host: |
227 # we normalise the domain | 227 # we normalise the domain |
228 parsed_host = urlparse.urlsplit(opt_host) | 228 parsed_host = urllib.parse.urlsplit(opt_host) |
229 opt_host = urlparse.urlunsplit( | 229 opt_host = urllib.parse.urlunsplit( |
230 ( | 230 ( |
231 parsed_host.scheme or "http", | 231 parsed_host.scheme or "http", |
232 parsed_host.netloc or parsed_host.path, | 232 parsed_host.netloc or parsed_host.path, |
233 "", | 233 "", |
234 "", | 234 "", |
237 ) | 237 ) |
238 | 238 |
239 tmp_dir = tempfile.mkdtemp() | 239 tmp_dir = tempfile.mkdtemp() |
240 try: | 240 try: |
241 # TODO: would be nice to also update the hyperlinks to these images, e.g. when you have <a href="{url}"><img src="{url}"></a> | 241 # TODO: would be nice to also update the hyperlinks to these images, e.g. when you have <a href="{url}"><img src="{url}"></a> |
242 for img_elt in xml_tools.findAll(top_elt, names=[u"img"]): | 242 for img_elt in xml_tools.findAll(top_elt, names=["img"]): |
243 yield self.imgFilters(client, img_elt, options, opt_host, tmp_dir) | 243 yield self.imgFilters(client, img_elt, options, opt_host, tmp_dir) |
244 finally: | 244 finally: |
245 os.rmdir(tmp_dir) # XXX: tmp_dir should be empty, or something went wrong | 245 os.rmdir(tmp_dir) # XXX: tmp_dir should be empty, or something went wrong |
246 | 246 |
247 # we now replace the content with filtered one | 247 # we now replace the content with filtered one |
258 @param opt_host(unicode): normalised host given in options | 258 @param opt_host(unicode): normalised host given in options |
259 @param tmp_dir(str): path to temp directory | 259 @param tmp_dir(str): path to temp directory |
260 """ | 260 """ |
261 try: | 261 try: |
262 url = img_elt["src"] | 262 url = img_elt["src"] |
263 if url[0] == u"/": | 263 if url[0] == "/": |
264 if not opt_host: | 264 if not opt_host: |
265 log.warning( | 265 log.warning( |
266 u"host was not specified, we can't deal with src without host ({url}) and have to ignore the following <img/>:\n{xml}".format( | 266 "host was not specified, we can't deal with src without host ({url}) and have to ignore the following <img/>:\n{xml}".format( |
267 url=url, xml=img_elt.toXml() | 267 url=url, xml=img_elt.toXml() |
268 ) | 268 ) |
269 ) | 269 ) |
270 return | 270 return |
271 else: | 271 else: |
272 url = urlparse.urljoin(opt_host, url) | 272 url = urllib.parse.urljoin(opt_host, url) |
273 filename = url.rsplit("/", 1)[-1].strip() | 273 filename = url.rsplit("/", 1)[-1].strip() |
274 if not filename: | 274 if not filename: |
275 raise KeyError | 275 raise KeyError |
276 except (KeyError, IndexError): | 276 except (KeyError, IndexError): |
277 log.warning(u"ignoring invalid img element: {}".format(img_elt.toXml())) | 277 log.warning("ignoring invalid img element: {}".format(img_elt.toXml())) |
278 return | 278 return |
279 | 279 |
280 # we change the url for the normalized one | 280 # we change the url for the normalized one |
281 img_elt["src"] = url | 281 img_elt["src"] = url |
282 | 282 |
286 ignore_host = options[OPT_UPLOAD_IGNORE_HOST] | 286 ignore_host = options[OPT_UPLOAD_IGNORE_HOST] |
287 except KeyError: | 287 except KeyError: |
288 pass | 288 pass |
289 else: | 289 else: |
290 # host is the ignored one, we skip | 290 # host is the ignored one, we skip |
291 parsed_url = urlparse.urlsplit(url) | 291 parsed_url = urllib.parse.urlsplit(url) |
292 if ignore_host in parsed_url.hostname: | 292 if ignore_host in parsed_url.hostname: |
293 log.info( | 293 log.info( |
294 u"Don't upload image at {url} because of {opt} option".format( | 294 "Don't upload image at {url} because of {opt} option".format( |
295 url=url, opt=OPT_UPLOAD_IGNORE_HOST | 295 url=url, opt=OPT_UPLOAD_IGNORE_HOST |
296 ) | 296 ) |
297 ) | 297 ) |
298 return | 298 return |
299 | 299 |
302 upload_options = {"ignore_tls_errors": options.get(OPT_IGNORE_TLS, False)} | 302 upload_options = {"ignore_tls_errors": options.get(OPT_IGNORE_TLS, False)} |
303 | 303 |
304 try: | 304 try: |
305 yield web_client.downloadPage(url.encode("utf-8"), tmp_file) | 305 yield web_client.downloadPage(url.encode("utf-8"), tmp_file) |
306 filename = filename.replace( | 306 filename = filename.replace( |
307 u"%", u"_" | 307 "%", "_" |
308 ) # FIXME: tmp workaround for a bug in prosody http upload | 308 ) # FIXME: tmp workaround for a bug in prosody http upload |
309 __, download_d = yield self._u.upload( | 309 __, download_d = yield self._u.upload( |
310 client, tmp_file, filename, options=upload_options | 310 client, tmp_file, filename, options=upload_options |
311 ) | 311 ) |
312 download_url = yield download_d | 312 download_url = yield download_d |
313 except Exception as e: | 313 except Exception as e: |
314 log.warning( | 314 log.warning( |
315 u"can't download image at {url}: {reason}".format(url=url, reason=e) | 315 "can't download image at {url}: {reason}".format(url=url, reason=e) |
316 ) | 316 ) |
317 else: | 317 else: |
318 img_elt["src"] = download_url | 318 img_elt["src"] = download_url |
319 | 319 |
320 try: | 320 try: |