Mercurial > libervia-backend
annotate src/plugins/plugin_blog_import.py @ 1825:4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
- blog importers are plugin which register to this one with the "register" method
- blog import manage uploading the given blog data to a PubSub server
- filters can be used.
- current filters fixe the <img> src without host, and upload images to XMPP server
author | Goffi <goffi@goffi.org> |
---|---|
date | Fri, 22 Jan 2016 20:24:17 +0100 |
parents | |
children | 68c0dc13d821 |
rev | line source |
---|---|
1825
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
1 #!/usr/bin/python |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
2 # -*- coding: utf-8 -*- |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
3 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
4 # SàT plugin for import external blogs |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
5 # Copyright (C) 2009-2016 Jérôme Poisson (goffi@goffi.org) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
6 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
7 # This program is free software: you can redistribute it and/or modify |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
8 # it under the terms of the GNU Affero General Public License as published by |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
9 # the Free Software Foundation, either version 3 of the License, or |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
10 # (at your option) any later version. |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
11 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
12 # This program is distributed in the hope that it will be useful, |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
15 # GNU Affero General Public License for more details. |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
16 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
17 # You should have received a copy of the GNU Affero General Public License |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
19 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
20 from sat.core.i18n import _ |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
21 from sat.core.constants import Const as C |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
22 from sat.core.log import getLogger |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
23 log = getLogger(__name__) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
24 from twisted.internet import defer |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
25 from twisted.web import client as web_client |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
26 from twisted.words.xish import domish |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
27 from sat.core import exceptions |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
28 from sat.tools import xml_tools |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
29 import collections |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
30 import os |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
31 import os.path |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
32 import tempfile |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
33 import urlparse |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
34 import uuid |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
35 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
36 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
37 PLUGIN_INFO = { |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
38 "name": "blog import", |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
39 "import_name": "BLOG_IMPORT", |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
40 "type": C.PLUG_TYPE_BLOG, |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
41 "dependencies": ["XEP-0277", "TEXT-SYNTAXES", "UPLOAD"], |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
42 "main": "BlogImportPlugin", |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
43 "handler": "no", |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
44 "description": _(u"""Blog import management: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
45 This plugin manage the different blog importers which can register to it, and handler generic importing tasks.""") |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
46 } |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
47 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
48 OPT_HOST = 'host' |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
49 OPT_UPLOAD_IMAGES = 'upload_images' |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
50 OPT_UPLOAD_IGNORE_HOST = 'upload_ignore_host' |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
51 OPT_IGNORE_TLS = 'ignore_tls_errors' |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
52 BOOL_OPTIONS = (OPT_UPLOAD_IMAGES, OPT_IGNORE_TLS) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
53 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
54 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
55 BlogImporter = collections.namedtuple('BlogImporter', ('callback', 'short_desc', 'long_desc')) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
56 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
57 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
58 class BlogImportPlugin(object): |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
59 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
60 def __init__(self, host): |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
61 log.info(_("plugin Blog Import initialization")) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
62 self.host = host |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
63 self._importers = {} |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
64 self._u = host.plugins['UPLOAD'] |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
65 self._m = host.plugins['XEP-0277'] |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
66 self._s = self.host.plugins['TEXT-SYNTAXES'] |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
67 host.bridge.addMethod("blogImport", ".plugin", in_sign='ssa{ss}ss', out_sign='', method=self._blogImport, async=True) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
68 host.bridge.addMethod("blogImportList", ".plugin", in_sign='', out_sign='a(ss)', method=self.listImporters) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
69 host.bridge.addMethod("blogImportDesc", ".plugin", in_sign='s', out_sign='(ss)', method=self.getDescription) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
70 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
71 def listImporters(self): |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
72 importers = self._importers.keys() |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
73 importers.sort() |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
74 return [(name, self._importers[name].short_desc) for name in self._importers] |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
75 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
76 def getDescription(self, name): |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
77 """Return import short and long descriptions |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
78 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
79 @param name(unicode): blog importer name |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
80 @return (tuple[unicode,unicode]): short and long description |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
81 """ |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
82 try: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
83 importer = self._importers[name] |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
84 except KeyError: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
85 raise exceptions.NotFound(u"Blog importer not found [{}]".format(name)) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
86 else: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
87 return importer.short_desc, importer.long_desc |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
88 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
89 def _blogImport(self, name, location, options, pubsub_service='', profile=C.PROF_KEY_DEFAULT): |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
90 client = self.host.getClient(profile) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
91 for option in BOOL_OPTIONS: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
92 try: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
93 options[option] = C.bool(options[option]) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
94 except KeyError: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
95 pass |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
96 return self.blogImport(client, name, location, options) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
97 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
98 @defer.inlineCallbacks |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
99 def blogImport(self, client, name, location, options=None, pubsub_service=None): |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
100 """Import a blog |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
101 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
102 @param name(unicode): name of the blog importer |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
103 @param location(unicode): location of the blog data to import |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
104 can be an url, a file path, or anything which make sense |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
105 check importer description for more details |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
106 @param options(dict, None): extra options. Below are the generic options, |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
107 blog importer can have specific ones. All options have unicode values |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
108 generic options: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
109 - OPT_HOST (unicode): original host |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
110 - OPT_UPLOAD_IMAGES (bool): upload images to XMPP server if True |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
111 see OPT_UPLOAD_IGNORE_HOST. |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
112 Default: True |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
113 - OPT_UPLOAD_IGNORE_HOST (unicode): don't upload images from this host |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
114 - OPT_IGNORE_TLS (bool): ignore TLS error for image upload. |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
115 Default: False |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
116 @param pubsub_service(jid.JID, None): jid of the PubSub service where blog must be imported |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
117 None to use profile's server |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
118 """ |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
119 if options is None: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
120 options = {} |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
121 else: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
122 for opt_name, opt_default in ((OPT_UPLOAD_IMAGES, True), |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
123 (OPT_IGNORE_TLS, False)): |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
124 # we want an filled options dict, with all empty or False values removed |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
125 try: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
126 value =options[opt_name] |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
127 except KeyError: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
128 if opt_default: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
129 options[opt_name] = opt_default |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
130 else: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
131 if not value: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
132 del options[opt_name] |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
133 try: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
134 importer = self._importers[name] |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
135 except KeyError: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
136 raise exceptions.NotFound(u"Importer [{}] not found".format(name)) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
137 posts_data = yield importer.callback(client, location, options) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
138 url_links = {} |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
139 yield self._recursiveImport(client, posts_data, options, url_links) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
140 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
141 @defer.inlineCallbacks |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
142 def _recursiveImport(self, client, posts_data, options, url_links, service=None, node=None, depth=0): |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
143 """Do the upload recursively |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
144 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
145 @param posts_data(list): list of data as specified in [register] |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
146 @param options(dict): import options |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
147 @param url_links(dict): link between former posts and new items |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
148 @param service(jid.JID, None): PubSub service to use |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
149 @param node(unicode, None): PubSub node to use |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
150 @param depth(int): level of recursion |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
151 """ |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
152 for data in posts_data: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
153 # data checks/filters |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
154 mb_data = data['blog'] |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
155 try: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
156 item_id = mb_data['id'] |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
157 except KeyError: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
158 item_id = mb_data['id'] = unicode(uuid.uuid4()) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
159 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
160 try: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
161 # we keep the link between old url and new blog item |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
162 # so the user can redirect its former blog urls |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
163 old_url = data['url'] |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
164 except KeyError: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
165 pass |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
166 else: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
167 url_links[old_url] = (service, node, item_id) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
168 log.info(u"url link from {old} to {service}/{node}/{id}".format( |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
169 old = old_url, service=service or u'server', node=node or u'', id=item_id)) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
170 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
171 depth or log.debug(u"Filtering data") |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
172 yield self.blogFilters(client, mb_data, options) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
173 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
174 # comments data |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
175 if len(data['comments']) != 1: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
176 raise NotImplementedError(u"can't manage multiple comment links") |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
177 allow_comments = C.bool(mb_data.get('allow_comments', C.BOOL_FALSE)) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
178 if allow_comments: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
179 comments_service, comments_node = self._m.getCommentService(client), self._m.getCommentNode(item_id) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
180 mb_data['comments_service'] = comments_service |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
181 mb_data['comments_node'] = comments_node |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
182 else: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
183 if data['comments'][0]: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
184 raise exceptions.DataError(u"allow_comments set to False, but comments are there") |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
185 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
186 # post upload |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
187 depth or log.debug(u"uploading item [{id}]: {title}".format(id=mb_data['id'], title=mb_data.get('title',''))) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
188 yield self._m.send(mb_data, service, node, profile=client.profile) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
189 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
190 # comments upload |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
191 depth or log.debug(u"uploading comments") |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
192 if allow_comments: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
193 yield self._recursiveImport(client, data['comments'][0], options, url_links, service=comments_service, node=comments_node, depth=depth+1) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
194 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
195 @defer.inlineCallbacks |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
196 def blogFilters(self, client, mb_data, options): |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
197 """Apply filters according to options |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
198 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
199 modify mb_data in place |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
200 @param posts_data(list[dict]): data as returned by importer callback |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
201 @param options(dict): dict as given in [blogImport] |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
202 """ |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
203 # FIXME: blog filters don't work on text content |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
204 # TODO: text => XHTML conversion should handler links with <a/> |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
205 # filters can then be used by converting text to XHTML |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
206 if not options: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
207 return |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
208 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
209 # we want only XHTML content |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
210 for prefix in ('content',): # a tuple is use, if title need to be added in the future |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
211 try: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
212 rich = mb_data['{}_rich'.format(prefix)] |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
213 except KeyError: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
214 pass |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
215 else: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
216 if '{}_xhtml'.format(prefix) in mb_data: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
217 raise exceptions.DataError(u"importer gave {prefix}_rich and {prefix}_xhtml at the same time, this is not allowed".format(prefix=prefix)) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
218 # we convert rich syntax to XHTML here, so we can handle filters easily |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
219 converted = yield self._s.convert(rich, self._s.getCurrentSyntax(client.profile), safe=False) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
220 mb_data['{}_xhtml'.format(prefix)] = converted |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
221 del mb_data['{}_rich'.format(prefix)] |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
222 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
223 try: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
224 mb_data['txt'] |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
225 except KeyError: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
226 pass |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
227 else: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
228 if '{}_xhtml'.format(prefix) in mb_data: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
229 log.warning(u"{prefix}_text will be replaced by converted {prefix}_xhtml, so filters can be handled".format(prefix=prefix)) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
230 del mb_data['{}_text'.format(prefix)] |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
231 else: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
232 log.warning(u"importer gave a text {prefix}, blog filters don't work on text {prefix}".format(prefix=prefix)) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
233 return |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
234 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
235 # at this point, we have only XHTML version of content |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
236 try: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
237 top_elt = xml_tools.ElementParser()(mb_data['content_xhtml'], namespace=C.NS_XHTML) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
238 except domish.ParserError: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
239 # we clean the xml and try again our luck |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
240 cleaned = yield self._s.cleanXHTML(mb_data['content_xhtml']) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
241 top_elt = xml_tools.ElementParser()(cleaned, namespace=C.NS_XHTML) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
242 opt_host = options.get(OPT_HOST) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
243 if opt_host: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
244 # we normalise the domain |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
245 parsed_host = urlparse.urlsplit(opt_host) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
246 opt_host = urlparse.urlunsplit((parsed_host.scheme or 'http', parsed_host.netloc or parsed_host.path, '', '', '')) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
247 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
248 tmp_dir = tempfile.mkdtemp() |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
249 for img_elt in xml_tools.findAll(top_elt, ['img']): |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
250 yield self.imgFilters(client, img_elt, options, opt_host, tmp_dir) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
251 os.rmdir(tmp_dir) # XXX: tmp_dir should be empty, or something went wrong |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
252 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
253 # we now replace the content with filtered one |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
254 mb_data['content_xhtml'] = top_elt.toXml() |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
255 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
256 @defer.inlineCallbacks |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
257 def imgFilters(self, client, img_elt, options, opt_host, tmp_dir): |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
258 """Filters handling images |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
259 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
260 url without host are fixed (if possible) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
261 according to options, images are uploaded to XMPP server |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
262 @param img_elt(domish.Element): <img/> element to handle |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
263 @param options(dict): filters options |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
264 @param opt_host(unicode): normalised host given in options |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
265 @param tmp_dir(str): path to temp directory |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
266 """ |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
267 try: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
268 url = img_elt['src'] |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
269 if url[0] == u'/': |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
270 if not opt_host: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
271 log.warning(u"host was not specified, we can't deal with src without host ({url}) and have to ignore the following <img/>:\n{xml}" |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
272 .format(url=url, xml=img_elt.toXml())) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
273 return |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
274 else: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
275 url = urlparse.urljoin(opt_host, url) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
276 filename = url.rsplit('/',1)[-1].strip() |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
277 if not filename: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
278 raise KeyError |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
279 except (KeyError, IndexError): |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
280 log.warning(u"ignoring invalid img element: {}".format(img_elt.toXml())) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
281 return |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
282 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
283 # we change the url for the normalized one |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
284 img_elt['src'] = url |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
285 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
286 if options.get(OPT_UPLOAD_IMAGES, False): |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
287 # upload is requested |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
288 try: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
289 ignore_host = options[OPT_UPLOAD_IGNORE_HOST] |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
290 except KeyError: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
291 pass |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
292 else: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
293 # host is the ignored one, we skip |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
294 parsed_url = urlparse.urlsplit(url) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
295 if ignore_host in parsed_url.hostname: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
296 log.info(u"Don't upload image at {url} because of {opt} option".format( |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
297 url=url, opt=OPT_UPLOAD_IGNORE_HOST)) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
298 return |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
299 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
300 # we download images and re-upload them via XMPP |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
301 tmp_file = os.path.join(tmp_dir, filename).encode('utf-8') |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
302 upload_options = {'ignore_tls_errors': options.get(OPT_IGNORE_TLS, False)} |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
303 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
304 try: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
305 yield web_client.downloadPage(url.encode('utf-8'), tmp_file) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
306 filename = filename.replace(u'%', u'_') # FIXME: tmp workaround for a bug in prosody http upload |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
307 dummy, download_d = yield self._u.upload(client, tmp_file, filename, options=upload_options) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
308 download_url = yield download_d |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
309 except Exception as e: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
310 log.warning(u"can't download image at {url}: {reason}".format(url=url, reason=e)) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
311 else: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
312 img_elt['src'] = download_url |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
313 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
314 try: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
315 os.unlink(tmp_file) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
316 except OSError: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
317 pass |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
318 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
319 def register(self, name, callback, short_desc='', long_desc=''): |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
320 """Register a blogImport method |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
321 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
322 @param name(unicode): unique importer name, should indicate the blogging software it handler and always lowercase |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
323 @param callback(callable): method to call: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
324 the signature must be (client, location, options) (cf. [blogImport]) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
325 the importer must return an iterable of dict which must have the following keys: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
326 'blog' (dict): microblog data of the blog post (cf. http://wiki.goffi.org/wiki/Bridge_API_-_Microblogging/en) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
327 the importer MUST NOT create node or call XEP-0277 plugin itself |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
328 'comments*' key MUST NOT be used in this microblog_data, see bellow for comments |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
329 It is recommanded to use a unique id in the "id" key which is constant per blog item, |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
330 so if the import fail, a new import will overwrite the failed items and avoid duplicates. |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
331 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
332 'comments' (list[list[dict]],None): Dictionaries must have the same keys as main item (i.e. 'blog' and 'comments') |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
333 a list of list is used because XEP-0277 can handler several comments nodes, |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
334 but in most cases, there will we only one item it the first list (something like [[{comment1_data},{comment2_data}, ...]]) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
335 blog['allow_comments'] must be True if there is any comment, and False (or not present) if comments are not allowed. |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
336 If allow_comments is False and some comments are present, a exceptions.DataError will be raised |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
337 the import MAY optionally have the following keys: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
338 'url' (unicode): former url of the post (only the path, without host part) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
339 if present the association to the new path will be displayed to user, so it can make redirections if necessary |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
340 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
341 Optionally, the importer plugin can return a tuple with the just described iterator and a post_total |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
342 where "post_total" (int) indicate the total number of posts (without comments) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
343 useful to display a progress indicator when the iterator is a generator |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
344 @param short_desc(unicode): one line description of the importer |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
345 @param long_desc(unicode): long description of the importer, its options, etc. |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
346 """ |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
347 name = name.lower() |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
348 if name in self._importers: |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
349 raise exceptions.ConflictError(u"A blog importer with the name {} already exsit".format(name)) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
350 self._importers[name] = BlogImporter(callback, short_desc, long_desc) |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
351 |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
352 def unregister(self, name): |
4e51f21c687f
plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
353 del self._importers[name] |