comparison sat/plugins/plugin_blog_import.py @ 2624:56f94936df1e

code style reformatting using black
author Goffi <goffi@goffi.org>
date Wed, 27 Jun 2018 20:14:46 +0200
parents 26edcf3a30eb
children 378188abe941
comparison
equal deleted inserted replaced
2623:49533de4540b 2624:56f94936df1e
19 19
20 20
21 from sat.core.i18n import _ 21 from sat.core.i18n import _
22 from sat.core.constants import Const as C 22 from sat.core.constants import Const as C
23 from sat.core.log import getLogger 23 from sat.core.log import getLogger
24
24 log = getLogger(__name__) 25 log = getLogger(__name__)
25 from twisted.internet import defer 26 from twisted.internet import defer
26 from twisted.web import client as web_client 27 from twisted.web import client as web_client
27 from twisted.words.xish import domish 28 from twisted.words.xish import domish
28 from sat.core import exceptions 29 from sat.core import exceptions
39 C.PI_IMPORT_NAME: "BLOG_IMPORT", 40 C.PI_IMPORT_NAME: "BLOG_IMPORT",
40 C.PI_TYPE: (C.PLUG_TYPE_BLOG, C.PLUG_TYPE_IMPORT), 41 C.PI_TYPE: (C.PLUG_TYPE_BLOG, C.PLUG_TYPE_IMPORT),
41 C.PI_DEPENDENCIES: ["IMPORT", "XEP-0060", "XEP-0277", "TEXT-SYNTAXES", "UPLOAD"], 42 C.PI_DEPENDENCIES: ["IMPORT", "XEP-0060", "XEP-0277", "TEXT-SYNTAXES", "UPLOAD"],
42 C.PI_MAIN: "BlogImportPlugin", 43 C.PI_MAIN: "BlogImportPlugin",
43 C.PI_HANDLER: "no", 44 C.PI_HANDLER: "no",
44 C.PI_DESCRIPTION: _(u"""Blog import management: 45 C.PI_DESCRIPTION: _(
45 This plugin manage the different blog importers which can register to it, and handle generic importing tasks.""") 46 u"""Blog import management:
47 This plugin manage the different blog importers which can register to it, and handle generic importing tasks."""
48 ),
46 } 49 }
47 50
48 OPT_HOST = 'host' 51 OPT_HOST = "host"
49 OPT_UPLOAD_IMAGES = 'upload_images' 52 OPT_UPLOAD_IMAGES = "upload_images"
50 OPT_UPLOAD_IGNORE_HOST = 'upload_ignore_host' 53 OPT_UPLOAD_IGNORE_HOST = "upload_ignore_host"
51 OPT_IGNORE_TLS = 'ignore_tls_errors' 54 OPT_IGNORE_TLS = "ignore_tls_errors"
52 URL_REDIRECT_PREFIX = 'url_redirect_' 55 URL_REDIRECT_PREFIX = "url_redirect_"
53 56
54 57
55 class BlogImportPlugin(object): 58 class BlogImportPlugin(object):
56 BOOL_OPTIONS = (OPT_UPLOAD_IMAGES, OPT_IGNORE_TLS) 59 BOOL_OPTIONS = (OPT_UPLOAD_IMAGES, OPT_IGNORE_TLS)
57 JSON_OPTIONS = () 60 JSON_OPTIONS = ()
58 OPT_DEFAULTS = {OPT_UPLOAD_IMAGES: True, 61 OPT_DEFAULTS = {OPT_UPLOAD_IMAGES: True, OPT_IGNORE_TLS: False}
59 OPT_IGNORE_TLS: False}
60 62
61 def __init__(self, host): 63 def __init__(self, host):
62 log.info(_("plugin Blog Import initialization")) 64 log.info(_("plugin Blog Import initialization"))
63 self.host = host 65 self.host = host
64 self._u = host.plugins['UPLOAD'] 66 self._u = host.plugins["UPLOAD"]
65 self._p = host.plugins['XEP-0060'] 67 self._p = host.plugins["XEP-0060"]
66 self._m = host.plugins['XEP-0277'] 68 self._m = host.plugins["XEP-0277"]
67 self._s = self.host.plugins['TEXT-SYNTAXES'] 69 self._s = self.host.plugins["TEXT-SYNTAXES"]
68 host.plugins['IMPORT'].initialize(self, u'blog') 70 host.plugins["IMPORT"].initialize(self, u"blog")
69 71
70 def importItem(self, client, item_import_data, session, options, return_data, service, node): 72 def importItem(
73 self, client, item_import_data, session, options, return_data, service, node
74 ):
71 """importItem specialized for blog import 75 """importItem specialized for blog import
72 76
73 @param item_import_data(dict): 77 @param item_import_data(dict):
74 * mandatory keys: 78 * mandatory keys:
75 'blog' (dict): microblog data of the blog post (cf. http://wiki.goffi.org/wiki/Bridge_API_-_Microblogging/en) 79 'blog' (dict): microblog data of the blog post (cf. http://wiki.goffi.org/wiki/Bridge_API_-_Microblogging/en)
97 - OPT_IGNORE_TLS (bool): ignore TLS error for image upload. 101 - OPT_IGNORE_TLS (bool): ignore TLS error for image upload.
98 Default: False 102 Default: False
99 @param return_data(dict): will contain link between former posts and new items 103 @param return_data(dict): will contain link between former posts and new items
100 104
101 """ 105 """
102 mb_data = item_import_data['blog'] 106 mb_data = item_import_data["blog"]
103 try: 107 try:
104 item_id = mb_data['id'] 108 item_id = mb_data["id"]
105 except KeyError: 109 except KeyError:
106 item_id = mb_data['id'] = unicode(shortuuid.uuid()) 110 item_id = mb_data["id"] = unicode(shortuuid.uuid())
107 111
108 try: 112 try:
109 # we keep the link between old url and new blog item 113 # we keep the link between old url and new blog item
110 # so the user can redirect its former blog urls 114 # so the user can redirect its former blog urls
111 old_uri = item_import_data['url'] 115 old_uri = item_import_data["url"]
112 except KeyError: 116 except KeyError:
113 pass 117 pass
114 else: 118 else:
115 new_uri = return_data[URL_REDIRECT_PREFIX + old_uri] = self._p.getNodeURI( 119 new_uri = return_data[URL_REDIRECT_PREFIX + old_uri] = self._p.getNodeURI(
116 service if service is not None else client.jid.userhostJID(), 120 service if service is not None else client.jid.userhostJID(),
117 node or self._m.namespace, 121 node or self._m.namespace,
118 item_id) 122 item_id,
119 log.info(u"url link from {old} to {new}".format( 123 )
120 old=old_uri, new=new_uri)) 124 log.info(u"url link from {old} to {new}".format(old=old_uri, new=new_uri))
121 125
122 return mb_data 126 return mb_data
123 127
124 @defer.inlineCallbacks 128 @defer.inlineCallbacks
125 def importSubItems(self, client, item_import_data, mb_data, session, options): 129 def importSubItems(self, client, item_import_data, mb_data, session, options):
126 # comments data 130 # comments data
127 if len(item_import_data['comments']) != 1: 131 if len(item_import_data["comments"]) != 1:
128 raise NotImplementedError(u"can't manage multiple comment links") 132 raise NotImplementedError(u"can't manage multiple comment links")
129 allow_comments = C.bool(mb_data.get('allow_comments', C.BOOL_FALSE)) 133 allow_comments = C.bool(mb_data.get("allow_comments", C.BOOL_FALSE))
130 if allow_comments: 134 if allow_comments:
131 comments_service = yield self._m.getCommentsService(client) 135 comments_service = yield self._m.getCommentsService(client)
132 comments_node = self._m.getCommentsNode(mb_data['id']) 136 comments_node = self._m.getCommentsNode(mb_data["id"])
133 mb_data['comments_service'] = comments_service.full() 137 mb_data["comments_service"] = comments_service.full()
134 mb_data['comments_node'] = comments_node 138 mb_data["comments_node"] = comments_node
135 recurse_kwargs = { 139 recurse_kwargs = {
136 'items_import_data':item_import_data['comments'][0], 140 "items_import_data": item_import_data["comments"][0],
137 'service':comments_service, 141 "service": comments_service,
138 'node':comments_node} 142 "node": comments_node,
143 }
139 defer.returnValue(recurse_kwargs) 144 defer.returnValue(recurse_kwargs)
140 else: 145 else:
141 if item_import_data['comments'][0]: 146 if item_import_data["comments"][0]:
142 raise exceptions.DataError(u"allow_comments set to False, but comments are there") 147 raise exceptions.DataError(
148 u"allow_comments set to False, but comments are there"
149 )
143 defer.returnValue(None) 150 defer.returnValue(None)
144 151
145 def publishItem(self, client, mb_data, service, node, session): 152 def publishItem(self, client, mb_data, service, node, session):
146 log.debug(u"uploading item [{id}]: {title}".format(id=mb_data['id'], title=mb_data.get('title',''))) 153 log.debug(
154 u"uploading item [{id}]: {title}".format(
155 id=mb_data["id"], title=mb_data.get("title", "")
156 )
157 )
147 return self._m.send(client, mb_data, service, node) 158 return self._m.send(client, mb_data, service, node)
148 159
149 @defer.inlineCallbacks 160 @defer.inlineCallbacks
150 def itemFilters(self, client, mb_data, session, options): 161 def itemFilters(self, client, mb_data, session, options):
151 """Apply filters according to options 162 """Apply filters according to options
159 # filters can then be used by converting text to XHTML 170 # filters can then be used by converting text to XHTML
160 if not options: 171 if not options:
161 return 172 return
162 173
163 # we want only XHTML content 174 # we want only XHTML content
164 for prefix in ('content',): # a tuple is use, if title need to be added in the future 175 for prefix in (
165 try: 176 "content",
166 rich = mb_data['{}_rich'.format(prefix)] 177 ): # a tuple is use, if title need to be added in the future
178 try:
179 rich = mb_data["{}_rich".format(prefix)]
167 except KeyError: 180 except KeyError:
168 pass 181 pass
169 else: 182 else:
170 if '{}_xhtml'.format(prefix) in mb_data: 183 if "{}_xhtml".format(prefix) in mb_data:
171 raise exceptions.DataError(u"importer gave {prefix}_rich and {prefix}_xhtml at the same time, this is not allowed".format(prefix=prefix)) 184 raise exceptions.DataError(
185 u"importer gave {prefix}_rich and {prefix}_xhtml at the same time, this is not allowed".format(
186 prefix=prefix
187 )
188 )
172 # we convert rich syntax to XHTML here, so we can handle filters easily 189 # we convert rich syntax to XHTML here, so we can handle filters easily
173 converted = yield self._s.convert(rich, self._s.getCurrentSyntax(client.profile), safe=False) 190 converted = yield self._s.convert(
174 mb_data['{}_xhtml'.format(prefix)] = converted 191 rich, self._s.getCurrentSyntax(client.profile), safe=False
175 del mb_data['{}_rich'.format(prefix)] 192 )
176 193 mb_data["{}_xhtml".format(prefix)] = converted
177 try: 194 del mb_data["{}_rich".format(prefix)]
178 mb_data['txt'] 195
196 try:
197 mb_data["txt"]
179 except KeyError: 198 except KeyError:
180 pass 199 pass
181 else: 200 else:
182 if '{}_xhtml'.format(prefix) in mb_data: 201 if "{}_xhtml".format(prefix) in mb_data:
183 log.warning(u"{prefix}_text will be replaced by converted {prefix}_xhtml, so filters can be handled".format(prefix=prefix)) 202 log.warning(
184 del mb_data['{}_text'.format(prefix)] 203 u"{prefix}_text will be replaced by converted {prefix}_xhtml, so filters can be handled".format(
204 prefix=prefix
205 )
206 )
207 del mb_data["{}_text".format(prefix)]
185 else: 208 else:
186 log.warning(u"importer gave a text {prefix}, blog filters don't work on text {prefix}".format(prefix=prefix)) 209 log.warning(
210 u"importer gave a text {prefix}, blog filters don't work on text {prefix}".format(
211 prefix=prefix
212 )
213 )
187 return 214 return
188 215
189 # at this point, we have only XHTML version of content 216 # at this point, we have only XHTML version of content
190 try: 217 try:
191 top_elt = xml_tools.ElementParser()(mb_data['content_xhtml'], namespace=C.NS_XHTML) 218 top_elt = xml_tools.ElementParser()(
219 mb_data["content_xhtml"], namespace=C.NS_XHTML
220 )
192 except domish.ParserError: 221 except domish.ParserError:
193 # we clean the xml and try again our luck 222 # we clean the xml and try again our luck
194 cleaned = yield self._s.cleanXHTML(mb_data['content_xhtml']) 223 cleaned = yield self._s.cleanXHTML(mb_data["content_xhtml"])
195 top_elt = xml_tools.ElementParser()(cleaned, namespace=C.NS_XHTML) 224 top_elt = xml_tools.ElementParser()(cleaned, namespace=C.NS_XHTML)
196 opt_host = options.get(OPT_HOST) 225 opt_host = options.get(OPT_HOST)
197 if opt_host: 226 if opt_host:
198 # we normalise the domain 227 # we normalise the domain
199 parsed_host = urlparse.urlsplit(opt_host) 228 parsed_host = urlparse.urlsplit(opt_host)
200 opt_host = urlparse.urlunsplit((parsed_host.scheme or 'http', parsed_host.netloc or parsed_host.path, '', '', '')) 229 opt_host = urlparse.urlunsplit(
230 (
231 parsed_host.scheme or "http",
232 parsed_host.netloc or parsed_host.path,
233 "",
234 "",
235 "",
236 )
237 )
201 238
202 tmp_dir = tempfile.mkdtemp() 239 tmp_dir = tempfile.mkdtemp()
203 try: 240 try:
204 # TODO: would be nice to also update the hyperlinks to these images, e.g. when you have <a href="{url}"><img src="{url}"></a> 241 # TODO: would be nice to also update the hyperlinks to these images, e.g. when you have <a href="{url}"><img src="{url}"></a>
205 for img_elt in xml_tools.findAll(top_elt, names=[u'img']): 242 for img_elt in xml_tools.findAll(top_elt, names=[u"img"]):
206 yield self.imgFilters(client, img_elt, options, opt_host, tmp_dir) 243 yield self.imgFilters(client, img_elt, options, opt_host, tmp_dir)
207 finally: 244 finally:
208 os.rmdir(tmp_dir) # XXX: tmp_dir should be empty, or something went wrong 245 os.rmdir(tmp_dir) # XXX: tmp_dir should be empty, or something went wrong
209 246
210 # we now replace the content with filtered one 247 # we now replace the content with filtered one
211 mb_data['content_xhtml'] = top_elt.toXml() 248 mb_data["content_xhtml"] = top_elt.toXml()
212 249
213 @defer.inlineCallbacks 250 @defer.inlineCallbacks
214 def imgFilters(self, client, img_elt, options, opt_host, tmp_dir): 251 def imgFilters(self, client, img_elt, options, opt_host, tmp_dir):
215 """Filters handling images 252 """Filters handling images
216 253
220 @param options(dict): filters options 257 @param options(dict): filters options
221 @param opt_host(unicode): normalised host given in options 258 @param opt_host(unicode): normalised host given in options
222 @param tmp_dir(str): path to temp directory 259 @param tmp_dir(str): path to temp directory
223 """ 260 """
224 try: 261 try:
225 url = img_elt['src'] 262 url = img_elt["src"]
226 if url[0] == u'/': 263 if url[0] == u"/":
227 if not opt_host: 264 if not opt_host:
228 log.warning(u"host was not specified, we can't deal with src without host ({url}) and have to ignore the following <img/>:\n{xml}" 265 log.warning(
229 .format(url=url, xml=img_elt.toXml())) 266 u"host was not specified, we can't deal with src without host ({url}) and have to ignore the following <img/>:\n{xml}".format(
267 url=url, xml=img_elt.toXml()
268 )
269 )
230 return 270 return
231 else: 271 else:
232 url = urlparse.urljoin(opt_host, url) 272 url = urlparse.urljoin(opt_host, url)
233 filename = url.rsplit('/',1)[-1].strip() 273 filename = url.rsplit("/", 1)[-1].strip()
234 if not filename: 274 if not filename:
235 raise KeyError 275 raise KeyError
236 except (KeyError, IndexError): 276 except (KeyError, IndexError):
237 log.warning(u"ignoring invalid img element: {}".format(img_elt.toXml())) 277 log.warning(u"ignoring invalid img element: {}".format(img_elt.toXml()))
238 return 278 return
239 279
240 # we change the url for the normalized one 280 # we change the url for the normalized one
241 img_elt['src'] = url 281 img_elt["src"] = url
242 282
243 if options.get(OPT_UPLOAD_IMAGES, False): 283 if options.get(OPT_UPLOAD_IMAGES, False):
244 # upload is requested 284 # upload is requested
245 try: 285 try:
246 ignore_host = options[OPT_UPLOAD_IGNORE_HOST] 286 ignore_host = options[OPT_UPLOAD_IGNORE_HOST]
248 pass 288 pass
249 else: 289 else:
250 # host is the ignored one, we skip 290 # host is the ignored one, we skip
251 parsed_url = urlparse.urlsplit(url) 291 parsed_url = urlparse.urlsplit(url)
252 if ignore_host in parsed_url.hostname: 292 if ignore_host in parsed_url.hostname:
253 log.info(u"Don't upload image at {url} because of {opt} option".format( 293 log.info(
254 url=url, opt=OPT_UPLOAD_IGNORE_HOST)) 294 u"Don't upload image at {url} because of {opt} option".format(
295 url=url, opt=OPT_UPLOAD_IGNORE_HOST
296 )
297 )
255 return 298 return
256 299
257 # we download images and re-upload them via XMPP 300 # we download images and re-upload them via XMPP
258 tmp_file = os.path.join(tmp_dir, filename).encode('utf-8') 301 tmp_file = os.path.join(tmp_dir, filename).encode("utf-8")
259 upload_options = {'ignore_tls_errors': options.get(OPT_IGNORE_TLS, False)} 302 upload_options = {"ignore_tls_errors": options.get(OPT_IGNORE_TLS, False)}
260 303
261 try: 304 try:
262 yield web_client.downloadPage(url.encode('utf-8'), tmp_file) 305 yield web_client.downloadPage(url.encode("utf-8"), tmp_file)
263 filename = filename.replace(u'%', u'_') # FIXME: tmp workaround for a bug in prosody http upload 306 filename = filename.replace(
264 dummy, download_d = yield self._u.upload(client, tmp_file, filename, options=upload_options) 307 u"%", u"_"
308 ) # FIXME: tmp workaround for a bug in prosody http upload
309 dummy, download_d = yield self._u.upload(
310 client, tmp_file, filename, options=upload_options
311 )
265 download_url = yield download_d 312 download_url = yield download_d
266 except Exception as e: 313 except Exception as e:
267 log.warning(u"can't download image at {url}: {reason}".format(url=url, reason=e)) 314 log.warning(
315 u"can't download image at {url}: {reason}".format(url=url, reason=e)
316 )
268 else: 317 else:
269 img_elt['src'] = download_url 318 img_elt["src"] = download_url
270 319
271 try: 320 try:
272 os.unlink(tmp_file) 321 os.unlink(tmp_file)
273 except OSError: 322 except OSError:
274 pass 323 pass