comparison sat/plugins/plugin_blog_import.py @ 3028:ab2696e34d29

Python 3 port: /!\ this is a huge commit /!\ starting from this commit, SàT is needs Python 3.6+ /!\ SàT maybe be instable or some feature may not work anymore, this will improve with time This patch port backend, bridge and frontends to Python 3. Roughly this has been done this way: - 2to3 tools has been applied (with python 3.7) - all references to python2 have been replaced with python3 (notably shebangs) - fixed files not handled by 2to3 (notably the shell script) - several manual fixes - fixed issues reported by Python 3 that where not handled in Python 2 - replaced "async" with "async_" when needed (it's a reserved word from Python 3.7) - replaced zope's "implements" with @implementer decorator - temporary hack to handle data pickled in database, as str or bytes may be returned, to be checked later - fixed hash comparison for password - removed some code which is not needed anymore with Python 3 - deactivated some code which needs to be checked (notably certificate validation) - tested with jp, fixed reported issues until some basic commands worked - ported Primitivus (after porting dependencies like urwid satext) - more manual fixes
author Goffi <goffi@goffi.org>
date Tue, 13 Aug 2019 19:08:41 +0200
parents 85d3240a400f
children 9d0df638c8b4
comparison
equal deleted inserted replaced
3027:ff5bcb12ae60 3028:ab2696e34d29
1 #!/usr/bin/env python2 1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*- 2 # -*- coding: utf-8 -*-
3 3
4 # SàT plugin for import external blogs 4 # SàT plugin for import external blogs
5 # Copyright (C) 2009-2019 Jérôme Poisson (goffi@goffi.org) 5 # Copyright (C) 2009-2019 Jérôme Poisson (goffi@goffi.org)
6 6
29 from sat.core import exceptions 29 from sat.core import exceptions
30 from sat.tools import xml_tools 30 from sat.tools import xml_tools
31 import os 31 import os
32 import os.path 32 import os.path
33 import tempfile 33 import tempfile
34 import urlparse 34 import urllib.parse
35 import shortuuid 35 import shortuuid
36 36
37 37
38 PLUGIN_INFO = { 38 PLUGIN_INFO = {
39 C.PI_NAME: "blog import", 39 C.PI_NAME: "blog import",
41 C.PI_TYPE: (C.PLUG_TYPE_BLOG, C.PLUG_TYPE_IMPORT), 41 C.PI_TYPE: (C.PLUG_TYPE_BLOG, C.PLUG_TYPE_IMPORT),
42 C.PI_DEPENDENCIES: ["IMPORT", "XEP-0060", "XEP-0277", "TEXT_SYNTAXES", "UPLOAD"], 42 C.PI_DEPENDENCIES: ["IMPORT", "XEP-0060", "XEP-0277", "TEXT_SYNTAXES", "UPLOAD"],
43 C.PI_MAIN: "BlogImportPlugin", 43 C.PI_MAIN: "BlogImportPlugin",
44 C.PI_HANDLER: "no", 44 C.PI_HANDLER: "no",
45 C.PI_DESCRIPTION: _( 45 C.PI_DESCRIPTION: _(
46 u"""Blog import management: 46 """Blog import management:
47 This plugin manage the different blog importers which can register to it, and handle generic importing tasks.""" 47 This plugin manage the different blog importers which can register to it, and handle generic importing tasks."""
48 ), 48 ),
49 } 49 }
50 50
51 OPT_HOST = "host" 51 OPT_HOST = "host"
65 self.host = host 65 self.host = host
66 self._u = host.plugins["UPLOAD"] 66 self._u = host.plugins["UPLOAD"]
67 self._p = host.plugins["XEP-0060"] 67 self._p = host.plugins["XEP-0060"]
68 self._m = host.plugins["XEP-0277"] 68 self._m = host.plugins["XEP-0277"]
69 self._s = self.host.plugins["TEXT_SYNTAXES"] 69 self._s = self.host.plugins["TEXT_SYNTAXES"]
70 host.plugins["IMPORT"].initialize(self, u"blog") 70 host.plugins["IMPORT"].initialize(self, "blog")
71 71
72 def importItem( 72 def importItem(
73 self, client, item_import_data, session, options, return_data, service, node 73 self, client, item_import_data, session, options, return_data, service, node
74 ): 74 ):
75 """importItem specialized for blog import 75 """importItem specialized for blog import
105 """ 105 """
106 mb_data = item_import_data["blog"] 106 mb_data = item_import_data["blog"]
107 try: 107 try:
108 item_id = mb_data["id"] 108 item_id = mb_data["id"]
109 except KeyError: 109 except KeyError:
110 item_id = mb_data["id"] = unicode(shortuuid.uuid()) 110 item_id = mb_data["id"] = str(shortuuid.uuid())
111 111
112 try: 112 try:
113 # we keep the link between old url and new blog item 113 # we keep the link between old url and new blog item
114 # so the user can redirect its former blog urls 114 # so the user can redirect its former blog urls
115 old_uri = item_import_data["url"] 115 old_uri = item_import_data["url"]
119 new_uri = return_data[URL_REDIRECT_PREFIX + old_uri] = self._p.getNodeURI( 119 new_uri = return_data[URL_REDIRECT_PREFIX + old_uri] = self._p.getNodeURI(
120 service if service is not None else client.jid.userhostJID(), 120 service if service is not None else client.jid.userhostJID(),
121 node or self._m.namespace, 121 node or self._m.namespace,
122 item_id, 122 item_id,
123 ) 123 )
124 log.info(u"url link from {old} to {new}".format(old=old_uri, new=new_uri)) 124 log.info("url link from {old} to {new}".format(old=old_uri, new=new_uri))
125 125
126 return mb_data 126 return mb_data
127 127
128 @defer.inlineCallbacks 128 @defer.inlineCallbacks
129 def importSubItems(self, client, item_import_data, mb_data, session, options): 129 def importSubItems(self, client, item_import_data, mb_data, session, options):
130 # comments data 130 # comments data
131 if len(item_import_data["comments"]) != 1: 131 if len(item_import_data["comments"]) != 1:
132 raise NotImplementedError(u"can't manage multiple comment links") 132 raise NotImplementedError("can't manage multiple comment links")
133 allow_comments = C.bool(mb_data.get("allow_comments", C.BOOL_FALSE)) 133 allow_comments = C.bool(mb_data.get("allow_comments", C.BOOL_FALSE))
134 if allow_comments: 134 if allow_comments:
135 comments_service = yield self._m.getCommentsService(client) 135 comments_service = yield self._m.getCommentsService(client)
136 comments_node = self._m.getCommentsNode(mb_data["id"]) 136 comments_node = self._m.getCommentsNode(mb_data["id"])
137 mb_data["comments_service"] = comments_service.full() 137 mb_data["comments_service"] = comments_service.full()
143 } 143 }
144 defer.returnValue(recurse_kwargs) 144 defer.returnValue(recurse_kwargs)
145 else: 145 else:
146 if item_import_data["comments"][0]: 146 if item_import_data["comments"][0]:
147 raise exceptions.DataError( 147 raise exceptions.DataError(
148 u"allow_comments set to False, but comments are there" 148 "allow_comments set to False, but comments are there"
149 ) 149 )
150 defer.returnValue(None) 150 defer.returnValue(None)
151 151
152 def publishItem(self, client, mb_data, service, node, session): 152 def publishItem(self, client, mb_data, service, node, session):
153 log.debug( 153 log.debug(
154 u"uploading item [{id}]: {title}".format( 154 "uploading item [{id}]: {title}".format(
155 id=mb_data["id"], title=mb_data.get("title", "") 155 id=mb_data["id"], title=mb_data.get("title", "")
156 ) 156 )
157 ) 157 )
158 return self._m.send(client, mb_data, service, node) 158 return self._m.send(client, mb_data, service, node)
159 159
180 except KeyError: 180 except KeyError:
181 pass 181 pass
182 else: 182 else:
183 if "{}_xhtml".format(prefix) in mb_data: 183 if "{}_xhtml".format(prefix) in mb_data:
184 raise exceptions.DataError( 184 raise exceptions.DataError(
185 u"importer gave {prefix}_rich and {prefix}_xhtml at the same time, this is not allowed".format( 185 "importer gave {prefix}_rich and {prefix}_xhtml at the same time, this is not allowed".format(
186 prefix=prefix 186 prefix=prefix
187 ) 187 )
188 ) 188 )
189 # we convert rich syntax to XHTML here, so we can handle filters easily 189 # we convert rich syntax to XHTML here, so we can handle filters easily
190 converted = yield self._s.convert( 190 converted = yield self._s.convert(
198 except KeyError: 198 except KeyError:
199 pass 199 pass
200 else: 200 else:
201 if "{}_xhtml".format(prefix) in mb_data: 201 if "{}_xhtml".format(prefix) in mb_data:
202 log.warning( 202 log.warning(
203 u"{prefix}_text will be replaced by converted {prefix}_xhtml, so filters can be handled".format( 203 "{prefix}_text will be replaced by converted {prefix}_xhtml, so filters can be handled".format(
204 prefix=prefix 204 prefix=prefix
205 ) 205 )
206 ) 206 )
207 del mb_data["{}_text".format(prefix)] 207 del mb_data["{}_text".format(prefix)]
208 else: 208 else:
209 log.warning( 209 log.warning(
210 u"importer gave a text {prefix}, blog filters don't work on text {prefix}".format( 210 "importer gave a text {prefix}, blog filters don't work on text {prefix}".format(
211 prefix=prefix 211 prefix=prefix
212 ) 212 )
213 ) 213 )
214 return 214 return
215 215
223 cleaned = yield self._s.cleanXHTML(mb_data["content_xhtml"]) 223 cleaned = yield self._s.cleanXHTML(mb_data["content_xhtml"])
224 top_elt = xml_tools.ElementParser()(cleaned, namespace=C.NS_XHTML) 224 top_elt = xml_tools.ElementParser()(cleaned, namespace=C.NS_XHTML)
225 opt_host = options.get(OPT_HOST) 225 opt_host = options.get(OPT_HOST)
226 if opt_host: 226 if opt_host:
227 # we normalise the domain 227 # we normalise the domain
228 parsed_host = urlparse.urlsplit(opt_host) 228 parsed_host = urllib.parse.urlsplit(opt_host)
229 opt_host = urlparse.urlunsplit( 229 opt_host = urllib.parse.urlunsplit(
230 ( 230 (
231 parsed_host.scheme or "http", 231 parsed_host.scheme or "http",
232 parsed_host.netloc or parsed_host.path, 232 parsed_host.netloc or parsed_host.path,
233 "", 233 "",
234 "", 234 "",
237 ) 237 )
238 238
239 tmp_dir = tempfile.mkdtemp() 239 tmp_dir = tempfile.mkdtemp()
240 try: 240 try:
241 # TODO: would be nice to also update the hyperlinks to these images, e.g. when you have <a href="{url}"><img src="{url}"></a> 241 # TODO: would be nice to also update the hyperlinks to these images, e.g. when you have <a href="{url}"><img src="{url}"></a>
242 for img_elt in xml_tools.findAll(top_elt, names=[u"img"]): 242 for img_elt in xml_tools.findAll(top_elt, names=["img"]):
243 yield self.imgFilters(client, img_elt, options, opt_host, tmp_dir) 243 yield self.imgFilters(client, img_elt, options, opt_host, tmp_dir)
244 finally: 244 finally:
245 os.rmdir(tmp_dir) # XXX: tmp_dir should be empty, or something went wrong 245 os.rmdir(tmp_dir) # XXX: tmp_dir should be empty, or something went wrong
246 246
247 # we now replace the content with filtered one 247 # we now replace the content with filtered one
258 @param opt_host(unicode): normalised host given in options 258 @param opt_host(unicode): normalised host given in options
259 @param tmp_dir(str): path to temp directory 259 @param tmp_dir(str): path to temp directory
260 """ 260 """
261 try: 261 try:
262 url = img_elt["src"] 262 url = img_elt["src"]
263 if url[0] == u"/": 263 if url[0] == "/":
264 if not opt_host: 264 if not opt_host:
265 log.warning( 265 log.warning(
266 u"host was not specified, we can't deal with src without host ({url}) and have to ignore the following <img/>:\n{xml}".format( 266 "host was not specified, we can't deal with src without host ({url}) and have to ignore the following <img/>:\n{xml}".format(
267 url=url, xml=img_elt.toXml() 267 url=url, xml=img_elt.toXml()
268 ) 268 )
269 ) 269 )
270 return 270 return
271 else: 271 else:
272 url = urlparse.urljoin(opt_host, url) 272 url = urllib.parse.urljoin(opt_host, url)
273 filename = url.rsplit("/", 1)[-1].strip() 273 filename = url.rsplit("/", 1)[-1].strip()
274 if not filename: 274 if not filename:
275 raise KeyError 275 raise KeyError
276 except (KeyError, IndexError): 276 except (KeyError, IndexError):
277 log.warning(u"ignoring invalid img element: {}".format(img_elt.toXml())) 277 log.warning("ignoring invalid img element: {}".format(img_elt.toXml()))
278 return 278 return
279 279
280 # we change the url for the normalized one 280 # we change the url for the normalized one
281 img_elt["src"] = url 281 img_elt["src"] = url
282 282
286 ignore_host = options[OPT_UPLOAD_IGNORE_HOST] 286 ignore_host = options[OPT_UPLOAD_IGNORE_HOST]
287 except KeyError: 287 except KeyError:
288 pass 288 pass
289 else: 289 else:
290 # host is the ignored one, we skip 290 # host is the ignored one, we skip
291 parsed_url = urlparse.urlsplit(url) 291 parsed_url = urllib.parse.urlsplit(url)
292 if ignore_host in parsed_url.hostname: 292 if ignore_host in parsed_url.hostname:
293 log.info( 293 log.info(
294 u"Don't upload image at {url} because of {opt} option".format( 294 "Don't upload image at {url} because of {opt} option".format(
295 url=url, opt=OPT_UPLOAD_IGNORE_HOST 295 url=url, opt=OPT_UPLOAD_IGNORE_HOST
296 ) 296 )
297 ) 297 )
298 return 298 return
299 299
302 upload_options = {"ignore_tls_errors": options.get(OPT_IGNORE_TLS, False)} 302 upload_options = {"ignore_tls_errors": options.get(OPT_IGNORE_TLS, False)}
303 303
304 try: 304 try:
305 yield web_client.downloadPage(url.encode("utf-8"), tmp_file) 305 yield web_client.downloadPage(url.encode("utf-8"), tmp_file)
306 filename = filename.replace( 306 filename = filename.replace(
307 u"%", u"_" 307 "%", "_"
308 ) # FIXME: tmp workaround for a bug in prosody http upload 308 ) # FIXME: tmp workaround for a bug in prosody http upload
309 __, download_d = yield self._u.upload( 309 __, download_d = yield self._u.upload(
310 client, tmp_file, filename, options=upload_options 310 client, tmp_file, filename, options=upload_options
311 ) 311 )
312 download_url = yield download_d 312 download_url = yield download_d
313 except Exception as e: 313 except Exception as e:
314 log.warning( 314 log.warning(
315 u"can't download image at {url}: {reason}".format(url=url, reason=e) 315 "can't download image at {url}: {reason}".format(url=url, reason=e)
316 ) 316 )
317 else: 317 else:
318 img_elt["src"] = download_url 318 img_elt["src"] = download_url
319 319
320 try: 320 try: