annotate libervia/backend/plugins/plugin_blog_import.py @ 4258:ba28ca268f4a

plugin text syntaxes: fix empty string handling in `_remove_markups`.
author Goffi <goffi@goffi.org>
date Wed, 05 Jun 2024 22:33:37 +0200
parents 4b842c1fb686
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2780
diff changeset
1 #!/usr/bin/env python3
3137
559a625a236b fixed shebangs
Goffi <goffi@goffi.org>
parents: 3136
diff changeset
2
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
3
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
4 # SàT plugin for import external blogs
3479
be6d91572633 date update
Goffi <goffi@goffi.org>
parents: 3137
diff changeset
5 # Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org)
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
6
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
7 # This program is free software: you can redistribute it and/or modify
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
8 # it under the terms of the GNU Affero General Public License as published by
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
9 # the Free Software Foundation, either version 3 of the License, or
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
10 # (at your option) any later version.
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
11
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
12 # This program is distributed in the hope that it will be useful,
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
15 # GNU Affero General Public License for more details.
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
16
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
17 # You should have received a copy of the GNU Affero General Public License
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
19
2369
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
20
4071
4b842c1fb686 refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents: 4037
diff changeset
21 from libervia.backend.core.i18n import _
4b842c1fb686 refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents: 4037
diff changeset
22 from libervia.backend.core.constants import Const as C
4b842c1fb686 refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents: 4037
diff changeset
23 from libervia.backend.core.log import getLogger
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
24
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
25 log = getLogger(__name__)
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
26 from twisted.internet import defer
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
27 from twisted.web import client as web_client
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
28 from twisted.words.xish import domish
4071
4b842c1fb686 refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents: 4037
diff changeset
29 from libervia.backend.core import exceptions
4b842c1fb686 refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents: 4037
diff changeset
30 from libervia.backend.tools import xml_tools
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
31 import os
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
32 import os.path
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
33 import tempfile
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2780
diff changeset
34 import urllib.parse
2369
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
35 import shortuuid
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
36
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
37
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
38 PLUGIN_INFO = {
2145
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2108
diff changeset
39 C.PI_NAME: "blog import",
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2108
diff changeset
40 C.PI_IMPORT_NAME: "BLOG_IMPORT",
2369
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
41 C.PI_TYPE: (C.PLUG_TYPE_BLOG, C.PLUG_TYPE_IMPORT),
2780
85d3240a400f plugin text syntaxes: changed import name to TEXT_SYNTAX (better with underscore for autocompletion)
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
42 C.PI_DEPENDENCIES: ["IMPORT", "XEP-0060", "XEP-0277", "TEXT_SYNTAXES", "UPLOAD"],
2145
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2108
diff changeset
43 C.PI_MAIN: "BlogImportPlugin",
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 2108
diff changeset
44 C.PI_HANDLER: "no",
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
45 C.PI_DESCRIPTION: _(
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2780
diff changeset
46 """Blog import management:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
47 This plugin manage the different blog importers which can register to it, and handle generic importing tasks."""
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
48 ),
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
49 }
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
50
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
51 OPT_HOST = "host"
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
52 OPT_UPLOAD_IMAGES = "upload_images"
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
53 OPT_UPLOAD_IGNORE_HOST = "upload_ignore_host"
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
54 OPT_IGNORE_TLS = "ignore_tls_errors"
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
55 URL_REDIRECT_PREFIX = "url_redirect_"
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
56
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
57
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
58 class BlogImportPlugin(object):
2369
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
59 BOOL_OPTIONS = (OPT_UPLOAD_IMAGES, OPT_IGNORE_TLS)
2396
66baa687c682 plugins tickets import, jp (ticket/import): implemented mapping:
Goffi <goffi@goffi.org>
parents: 2370
diff changeset
60 JSON_OPTIONS = ()
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
61 OPT_DEFAULTS = {OPT_UPLOAD_IMAGES: True, OPT_IGNORE_TLS: False}
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
62
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
63 def __init__(self, host):
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3922
diff changeset
64 log.info(_("plugin Blog import initialization"))
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
65 self.host = host
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
66 self._u = host.plugins["UPLOAD"]
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
67 self._p = host.plugins["XEP-0060"]
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
68 self._m = host.plugins["XEP-0277"]
2780
85d3240a400f plugin text syntaxes: changed import name to TEXT_SYNTAX (better with underscore for autocompletion)
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
69 self._s = self.host.plugins["TEXT_SYNTAXES"]
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2780
diff changeset
70 host.plugins["IMPORT"].initialize(self, "blog")
1831
68c0dc13d821 plugin blog import, XEP-0277: progress + redirect:
Goffi <goffi@goffi.org>
parents: 1825
diff changeset
71
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3922
diff changeset
72 def import_item(
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
73 self, client, item_import_data, session, options, return_data, service, node
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
74 ):
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3922
diff changeset
75 """import_item specialized for blog import
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
76
2370
2c2b826b0bb3 plugin import: node can now be specified + added a "session" dict to keep import session data:
Goffi <goffi@goffi.org>
parents: 2369
diff changeset
77 @param item_import_data(dict):
2369
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
78 * mandatory keys:
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
79 'blog' (dict): microblog data of the blog post (cf. http://wiki.goffi.org/wiki/Bridge_API_-_Microblogging/en)
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
80 the importer MUST NOT create node or call XEP-0277 plugin itself
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
81 'comments*' key MUST NOT be used in this microblog_data, see bellow for comments
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
82 It is recommanded to use a unique id in the "id" key which is constant per blog item,
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
83 so if the import fail, a new import will overwrite the failed items and avoid duplicates.
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
84
2369
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
85 'comments' (list[list[dict]],None): Dictionaries must have the same keys as main item (i.e. 'blog' and 'comments')
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
86 a list of list is used because XEP-0277 can handler several comments nodes,
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
87 but in most cases, there will we only one item it the first list (something like [[{comment1_data},{comment2_data}, ...]])
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
88 blog['allow_comments'] must be True if there is any comment, and False (or not present) if comments are not allowed.
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
89 If allow_comments is False and some comments are present, an exceptions.DataError will be raised
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
90 * optional keys:
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
91 'url' (unicode): former url of the post (only the path, without host part)
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
92 if present the association to the new path will be displayed to user, so it can make redirections if necessary
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
93 @param options(dict, None): Below are the generic options,
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
94 blog importer can have specific ones. All options have unicode values
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
95 generic options:
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
96 - OPT_HOST (unicode): original host
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
97 - OPT_UPLOAD_IMAGES (bool): upload images to XMPP server if True
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
98 see OPT_UPLOAD_IGNORE_HOST.
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
99 Default: True
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
100 - OPT_UPLOAD_IGNORE_HOST (unicode): don't upload images from this host
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
101 - OPT_IGNORE_TLS (bool): ignore TLS error for image upload.
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
102 Default: False
2369
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
103 @param return_data(dict): will contain link between former posts and new items
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
104
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
105 """
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
106 mb_data = item_import_data["blog"]
2369
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
107 try:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
108 item_id = mb_data["id"]
2369
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
109 except KeyError:
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2780
diff changeset
110 item_id = mb_data["id"] = str(shortuuid.uuid())
2369
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
111
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
112 try:
2369
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
113 # we keep the link between old url and new blog item
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
114 # so the user can redirect its former blog urls
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
115 old_uri = item_import_data["url"]
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
116 except KeyError:
2369
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
117 pass
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
118 else:
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3922
diff changeset
119 new_uri = return_data[URL_REDIRECT_PREFIX + old_uri] = self._p.get_node_uri(
2369
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
120 service if service is not None else client.jid.userhostJID(),
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
121 node or self._m.namespace,
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
122 item_id,
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
123 )
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2780
diff changeset
124 log.info("url link from {old} to {new}".format(old=old_uri, new=new_uri))
2369
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
125
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
126 return mb_data
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
127
2437
91bbad17fd53 plugins blog import, tickets import, XEP-0277: fixed call to getCommentsService (it is now returning a Deferred)
Goffi <goffi@goffi.org>
parents: 2414
diff changeset
128 @defer.inlineCallbacks
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3922
diff changeset
129 def import_sub_items(self, client, item_import_data, mb_data, session, options):
2369
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
130 # comments data
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
131 if len(item_import_data["comments"]) != 1:
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2780
diff changeset
132 raise NotImplementedError("can't manage multiple comment links")
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
133 allow_comments = C.bool(mb_data.get("allow_comments", C.BOOL_FALSE))
2369
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
134 if allow_comments:
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3922
diff changeset
135 comments_service = yield self._m.get_comments_service(client)
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3922
diff changeset
136 comments_node = self._m.get_comments_node(mb_data["id"])
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
137 mb_data["comments_service"] = comments_service.full()
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
138 mb_data["comments_node"] = comments_node
2369
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
139 recurse_kwargs = {
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
140 "items_import_data": item_import_data["comments"][0],
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
141 "service": comments_service,
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
142 "node": comments_node,
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
143 }
2437
91bbad17fd53 plugins blog import, tickets import, XEP-0277: fixed call to getCommentsService (it is now returning a Deferred)
Goffi <goffi@goffi.org>
parents: 2414
diff changeset
144 defer.returnValue(recurse_kwargs)
2369
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
145 else:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
146 if item_import_data["comments"][0]:
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
147 raise exceptions.DataError(
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2780
diff changeset
148 "allow_comments set to False, but comments are there"
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
149 )
2437
91bbad17fd53 plugins blog import, tickets import, XEP-0277: fixed call to getCommentsService (it is now returning a Deferred)
Goffi <goffi@goffi.org>
parents: 2414
diff changeset
150 defer.returnValue(None)
2369
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
151
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3922
diff changeset
152 def publish_item(self, client, mb_data, service, node, session):
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
153 log.debug(
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2780
diff changeset
154 "uploading item [{id}]: {title}".format(
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
155 id=mb_data["id"], title=mb_data.get("title", "")
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
156 )
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
157 )
2369
cdaa58e14553 plugin import: generic data import plugin:
Goffi <goffi@goffi.org>
parents: 2145
diff changeset
158 return self._m.send(client, mb_data, service, node)
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
159
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
160 @defer.inlineCallbacks
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3922
diff changeset
161 def item_filters(self, client, mb_data, session, options):
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
162 """Apply filters according to options
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
163
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
164 modify mb_data in place
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
165 @param posts_data(list[dict]): data as returned by importer callback
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
166 @param options(dict): dict as given in [blogImport]
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
167 """
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
168 # FIXME: blog filters don't work on text content
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
169 # TODO: text => XHTML conversion should handler links with <a/>
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
170 # filters can then be used by converting text to XHTML
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
171 if not options:
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
172 return
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
173
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
174 # we want only XHTML content
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
175 for prefix in (
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
176 "content",
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
177 ): # a tuple is use, if title need to be added in the future
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
178 try:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
179 rich = mb_data["{}_rich".format(prefix)]
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
180 except KeyError:
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
181 pass
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
182 else:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
183 if "{}_xhtml".format(prefix) in mb_data:
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
184 raise exceptions.DataError(
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2780
diff changeset
185 "importer gave {prefix}_rich and {prefix}_xhtml at the same time, this is not allowed".format(
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
186 prefix=prefix
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
187 )
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
188 )
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
189 # we convert rich syntax to XHTML here, so we can handle filters easily
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
190 converted = yield self._s.convert(
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3922
diff changeset
191 rich, self._s.get_current_syntax(client.profile), safe=False
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
192 )
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
193 mb_data["{}_xhtml".format(prefix)] = converted
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
194 del mb_data["{}_rich".format(prefix)]
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
195
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
196 try:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
197 mb_data["txt"]
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
198 except KeyError:
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
199 pass
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
200 else:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
201 if "{}_xhtml".format(prefix) in mb_data:
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
202 log.warning(
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2780
diff changeset
203 "{prefix}_text will be replaced by converted {prefix}_xhtml, so filters can be handled".format(
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
204 prefix=prefix
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
205 )
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
206 )
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
207 del mb_data["{}_text".format(prefix)]
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
208 else:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
209 log.warning(
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2780
diff changeset
210 "importer gave a text {prefix}, blog filters don't work on text {prefix}".format(
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
211 prefix=prefix
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
212 )
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
213 )
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
214 return
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
215
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
216 # at this point, we have only XHTML version of content
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
217 try:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
218 top_elt = xml_tools.ElementParser()(
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
219 mb_data["content_xhtml"], namespace=C.NS_XHTML
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
220 )
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
221 except domish.ParserError:
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
222 # we clean the xml and try again our luck
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3922
diff changeset
223 cleaned = yield self._s.clean_xhtml(mb_data["content_xhtml"])
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
224 top_elt = xml_tools.ElementParser()(cleaned, namespace=C.NS_XHTML)
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
225 opt_host = options.get(OPT_HOST)
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
226 if opt_host:
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
227 # we normalise the domain
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2780
diff changeset
228 parsed_host = urllib.parse.urlsplit(opt_host)
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2780
diff changeset
229 opt_host = urllib.parse.urlunsplit(
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
230 (
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
231 parsed_host.scheme or "http",
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
232 parsed_host.netloc or parsed_host.path,
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
233 "",
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
234 "",
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
235 "",
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
236 )
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
237 )
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
238
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
239 tmp_dir = tempfile.mkdtemp()
1831
68c0dc13d821 plugin blog import, XEP-0277: progress + redirect:
Goffi <goffi@goffi.org>
parents: 1825
diff changeset
240 try:
1844
489b968b3723 plugin blog_import_dokuwiki: also uses the generic image uploader from blog_import (when media_repo is empty and OPT_UPLOAD_IMAGES is True)
souliane <souliane@mailoo.org>
parents: 1839
diff changeset
241 # TODO: would be nice to also update the hyperlinks to these images, e.g. when you have <a href="{url}"><img src="{url}"></a>
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3922
diff changeset
242 for img_elt in xml_tools.find_all(top_elt, names=["img"]):
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3922
diff changeset
243 yield self.img_filters(client, img_elt, options, opt_host, tmp_dir)
1831
68c0dc13d821 plugin blog import, XEP-0277: progress + redirect:
Goffi <goffi@goffi.org>
parents: 1825
diff changeset
244 finally:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
245 os.rmdir(tmp_dir) # XXX: tmp_dir should be empty, or something went wrong
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
246
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
247 # we now replace the content with filtered one
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
248 mb_data["content_xhtml"] = top_elt.toXml()
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
249
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
250 @defer.inlineCallbacks
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3922
diff changeset
251 def img_filters(self, client, img_elt, options, opt_host, tmp_dir):
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
252 """Filters handling images
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
253
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
254 url without host are fixed (if possible)
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
255 according to options, images are uploaded to XMPP server
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
256 @param img_elt(domish.Element): <img/> element to handle
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
257 @param options(dict): filters options
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
258 @param opt_host(unicode): normalised host given in options
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
259 @param tmp_dir(str): path to temp directory
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
260 """
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
261 try:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
262 url = img_elt["src"]
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2780
diff changeset
263 if url[0] == "/":
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
264 if not opt_host:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
265 log.warning(
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2780
diff changeset
266 "host was not specified, we can't deal with src without host ({url}) and have to ignore the following <img/>:\n{xml}".format(
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
267 url=url, xml=img_elt.toXml()
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
268 )
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
269 )
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
270 return
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
271 else:
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2780
diff changeset
272 url = urllib.parse.urljoin(opt_host, url)
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
273 filename = url.rsplit("/", 1)[-1].strip()
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
274 if not filename:
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
275 raise KeyError
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
276 except (KeyError, IndexError):
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2780
diff changeset
277 log.warning("ignoring invalid img element: {}".format(img_elt.toXml()))
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
278 return
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
279
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
280 # we change the url for the normalized one
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
281 img_elt["src"] = url
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
282
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
283 if options.get(OPT_UPLOAD_IMAGES, False):
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
284 # upload is requested
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
285 try:
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
286 ignore_host = options[OPT_UPLOAD_IGNORE_HOST]
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
287 except KeyError:
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
288 pass
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
289 else:
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
290 # host is the ignored one, we skip
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2780
diff changeset
291 parsed_url = urllib.parse.urlsplit(url)
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
292 if ignore_host in parsed_url.hostname:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
293 log.info(
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2780
diff changeset
294 "Don't upload image at {url} because of {opt} option".format(
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
295 url=url, opt=OPT_UPLOAD_IGNORE_HOST
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
296 )
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
297 )
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
298 return
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
299
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
300 # we download images and re-upload them via XMPP
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
301 tmp_file = os.path.join(tmp_dir, filename).encode("utf-8")
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
302 upload_options = {"ignore_tls_errors": options.get(OPT_IGNORE_TLS, False)}
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
303
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
304 try:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
305 yield web_client.downloadPage(url.encode("utf-8"), tmp_file)
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
306 filename = filename.replace(
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2780
diff changeset
307 "%", "_"
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
308 ) # FIXME: tmp workaround for a bug in prosody http upload
2765
378188abe941 misc: replaced all "dummy" by the more conventional and readable "__" ("_" being used for gettext)
Goffi <goffi@goffi.org>
parents: 2624
diff changeset
309 __, download_d = yield self._u.upload(
3922
0ff265725489 plugin XEP-0447: handle attachment and download:
Goffi <goffi@goffi.org>
parents: 3479
diff changeset
310 client, tmp_file, filename, extra=upload_options
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
311 )
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
312 download_url = yield download_d
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
313 except Exception as e:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
314 log.warning(
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2780
diff changeset
315 "can't download image at {url}: {reason}".format(url=url, reason=e)
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
316 )
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
317 else:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
318 img_elt["src"] = download_url
1825
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
319
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
320 try:
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
321 os.unlink(tmp_file)
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
322 except OSError:
4e51f21c687f plugin blog import: this plugin is the base handling blog importers:
Goffi <goffi@goffi.org>
parents:
diff changeset
323 pass