annotate libervia/backend/plugins/plugin_blog_import_dotclear.py @ 4149:c36295487082

core: introduce Pydantic based models in `libervia.backend.models.core`
author Goffi <goffi@goffi.org>
date Wed, 22 Nov 2023 14:31:05 +0100
parents 4b842c1fb686
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
1 #!/usr/bin/env python3
3137
559a625a236b fixed shebangs
Goffi <goffi@goffi.org>
parents: 3136
diff changeset
2
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
3
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
4 # SàT plugin for import external blogs
3479
be6d91572633 date update
Goffi <goffi@goffi.org>
parents: 3137
diff changeset
5 # Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org)
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
6
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
7 # This program is free software: you can redistribute it and/or modify
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
8 # it under the terms of the GNU Affero General Public License as published by
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
9 # the Free Software Foundation, either version 3 of the License, or
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
10 # (at your option) any later version.
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
11
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
12 # This program is distributed in the hope that it will be useful,
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
15 # GNU Affero General Public License for more details.
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
16
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
17 # You should have received a copy of the GNU Affero General Public License
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
19
4071
4b842c1fb686 refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents: 4037
diff changeset
20 from libervia.backend.core.i18n import _, D_
4b842c1fb686 refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents: 4037
diff changeset
21 from libervia.backend.core.constants import Const as C
4b842c1fb686 refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents: 4037
diff changeset
22 from libervia.backend.core.log import getLogger
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
23
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
24 log = getLogger(__name__)
4071
4b842c1fb686 refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents: 4037
diff changeset
25 from libervia.backend.core import exceptions
4b842c1fb686 refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents: 4037
diff changeset
26 from libervia.backend.tools.common import data_format
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
27 from twisted.internet import threads
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
28 from collections import OrderedDict
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
29 import itertools
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
30 import time
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
31 import cgi
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
32 import os.path
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
33
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
34
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
35 PLUGIN_INFO = {
2145
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 1934
diff changeset
36 C.PI_NAME: "Dotclear import",
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 1934
diff changeset
37 C.PI_IMPORT_NAME: "IMPORT_DOTCLEAR",
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 1934
diff changeset
38 C.PI_TYPE: C.PLUG_TYPE_BLOG,
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 1934
diff changeset
39 C.PI_DEPENDENCIES: ["BLOG_IMPORT"],
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 1934
diff changeset
40 C.PI_MAIN: "DotclearImport",
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 1934
diff changeset
41 C.PI_HANDLER: "no",
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
42 C.PI_DESCRIPTION: _("""Blog importer for Dotclear blog engine."""),
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
43 }
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
44
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
45 SHORT_DESC = D_("import posts from Dotclear blog engine")
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
46
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
47 LONG_DESC = D_(
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
48 """This importer handle Dotclear blog engine.
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
49
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
50 To use it, you'll need to export your blog to a flat file.
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
51 You must go in your admin interface and select Plugins/Maintenance then Backup.
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
52 Export only one blog if you have many, i.e. select "Download database of current blog"
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3479
diff changeset
53 Depending on your configuration, your may need to use import/Export plugin and export as a flat file.
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
54
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
55 location: you must use the absolute path to your backup for the location parameter
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
56 """
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
57 )
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
58 POST_ID_PREFIX = "sat_dc_"
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
59 KNOWN_DATA_TYPES = (
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
60 "link",
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
61 "setting",
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
62 "post",
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
63 "meta",
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
64 "media",
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
65 "post_media",
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
66 "comment",
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
67 "captcha",
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
68 )
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
69 ESCAPE_MAP = {"r": "\r", "n": "\n", '"': '"', "\\": "\\"}
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
70
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
71
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
72 class DotclearParser(object):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
73 # XXX: we have to parse all file to build data
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
74 # this can be ressource intensive on huge blogs
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
75
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
76 def __init__(self):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
77 self.posts_data = OrderedDict()
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
78 self.tags = {}
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
79
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3479
diff changeset
80 def get_post_id(self, post):
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
81 """Return a unique and constant post id
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
82
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
83 @param post(dict): parsed post data
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
84 @return (unicode): post unique item id
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
85 """
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
86 return "{}_{}_{}_{}:{}".format(
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
87 POST_ID_PREFIX,
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
88 post["blog_id"],
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
89 post["user_id"],
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
90 post["post_id"],
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
91 post["post_url"],
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
92 )
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
93
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3479
diff changeset
94 def get_comment_id(self, comment):
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
95 """Return a unique and constant comment id
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
96
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
97 @param comment(dict): parsed comment
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
98 @return (unicode): comment unique comment id
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
99 """
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
100 post_id = comment["post_id"]
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
101 parent_item_id = self.posts_data[post_id]["blog"]["id"]
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
102 return "{}_comment_{}".format(parent_item_id, comment["comment_id"])
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
103
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
104 def getTime(self, data, key):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
105 """Parse time as given by dotclear, with timezone handling
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
106
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
107 @param data(dict): dotclear data (post or comment)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
108 @param key(unicode): key to get (e.g. "post_creadt")
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
109 @return (float): Unix time
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
110 """
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
111 return time.mktime(time.strptime(data[key], "%Y-%m-%d %H:%M:%S"))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
112
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3479
diff changeset
113 def read_fields(self, fields_data):
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
114 buf = []
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
115 idx = 0
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
116 while True:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
117 if fields_data[idx] != '"':
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
118 raise exceptions.ParsingError
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
119 while True:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
120 idx += 1
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
121 try:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
122 char = fields_data[idx]
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
123 except IndexError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
124 raise exceptions.ParsingError("Data was expected")
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
125 if char == '"':
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
126 # we have reached the end of this field,
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
127 # we try to parse a new one
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
128 yield "".join(buf)
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
129 buf = []
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
130 idx += 1
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
131 try:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
132 separator = fields_data[idx]
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
133 except IndexError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
134 return
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
135 if separator != ",":
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
136 raise exceptions.ParsingError("Field separator was expeceted")
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
137 idx += 1
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
138 break # we have a new field
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
139 elif char == "\\":
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
140 idx += 1
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
141 try:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
142 char = ESCAPE_MAP[fields_data[idx]]
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
143 except IndexError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
144 raise exceptions.ParsingError("Escaped char was expected")
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
145 except KeyError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
146 char = fields_data[idx]
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
147 log.warning("Unknown key to escape: {}".format(char))
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
148 buf.append(char)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
149
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
150 def parseFields(self, headers, data):
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3479
diff changeset
151 return dict(zip(headers, self.read_fields(data)))
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
152
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3479
diff changeset
153 def post_handler(self, headers, data, index):
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
154 post = self.parseFields(headers, data)
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
155 log.debug("({}) post found: {}".format(index, post["post_title"]))
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
156 mb_data = {
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3479
diff changeset
157 "id": self.get_post_id(post),
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
158 "published": self.getTime(post, "post_creadt"),
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
159 "updated": self.getTime(post, "post_upddt"),
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
160 "author": post["user_id"], # there use info are not in the archive
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
161 # TODO: option to specify user info
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
162 "content_xhtml": "{}{}".format(
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
163 post["post_content_xhtml"], post["post_excerpt_xhtml"]
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
164 ),
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
165 "title": post["post_title"],
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3479
diff changeset
166 "allow_comments": C.bool_const(bool(int(post["post_open_comment"]))),
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
167 }
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
168 self.posts_data[post["post_id"]] = {
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
169 "blog": mb_data,
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
170 "comments": [[]],
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
171 "url": "/post/{}".format(post["post_url"]),
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
172 }
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
173
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3479
diff changeset
174 def meta_handler(self, headers, data, index):
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
175 meta = self.parseFields(headers, data)
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
176 if meta["meta_type"] == "tag":
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
177 tags = self.tags.setdefault(meta["post_id"], set())
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
178 tags.add(meta["meta_id"])
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
179
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3479
diff changeset
180 def meta_finished_handler(self):
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
181 for post_id, tags in self.tags.items():
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
182 data_format.iter2dict("tag", tags, self.posts_data[post_id]["blog"])
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
183 del self.tags
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
184
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3479
diff changeset
185 def comment_handler(self, headers, data, index):
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
186 comment = self.parseFields(headers, data)
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
187 if comment["comment_site"]:
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
188 # we don't use atom:uri because it's used for jid in XMPP
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
189 content = '{}\n<hr>\n<a href="{}">author website</a>'.format(
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
190 comment["comment_content"],
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
191 cgi.escape(comment["comment_site"]).replace('"', "%22"),
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
192 )
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
193 else:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
194 content = comment["comment_content"]
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
195 mb_data = {
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3479
diff changeset
196 "id": self.get_comment_id(comment),
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
197 "published": self.getTime(comment, "comment_dt"),
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
198 "updated": self.getTime(comment, "comment_upddt"),
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
199 "author": comment["comment_author"],
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
200 # we don't keep email addresses to avoid the author to be spammed
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
201 # (they would be available publicly else)
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
202 # 'author_email': comment['comment_email'],
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
203 "content_xhtml": content,
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
204 }
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
205 self.posts_data[comment["post_id"]]["comments"][0].append(
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
206 {"blog": mb_data, "comments": [[]]}
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
207 )
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
208
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
209 def parse(self, db_path):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
210 with open(db_path) as f:
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
211 signature = f.readline()
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
212 try:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
213 version = signature.split("|")[1]
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
214 except IndexError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
215 version = None
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
216 log.debug("Dotclear version: {}".format(version))
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
217 data_type = None
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
218 data_headers = None
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
219 index = None
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
220 while True:
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
221 buf = f.readline()
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
222 if not buf:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
223 break
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
224 if buf.startswith("["):
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
225 header = buf.split(" ", 1)
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
226 data_type = header[0][1:]
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
227 if data_type not in KNOWN_DATA_TYPES:
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
228 log.warning("unkown data type: {}".format(data_type))
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
229 index = 0
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
230 try:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
231 data_headers = header[1].split(",")
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
232 # we need to remove the ']' from the last header
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
233 last_header = data_headers[-1]
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
234 data_headers[-1] = last_header[: last_header.rfind("]")]
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
235 except IndexError:
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
236 log.warning("Can't read data)")
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
237 else:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
238 if data_type is None:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
239 continue
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
240 buf = buf.strip()
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
241 if not buf and data_type in KNOWN_DATA_TYPES:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
242 try:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
243 finished_handler = getattr(
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
244 self, "{}FinishedHandler".format(data_type)
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
245 )
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
246 except AttributeError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
247 pass
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
248 else:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
249 finished_handler()
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
250 log.debug("{} data finished".format(data_type))
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
251 data_type = None
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
252 continue
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
253 assert data_type
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
254 try:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
255 fields_handler = getattr(self, "{}Handler".format(data_type))
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
256 except AttributeError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
257 pass
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
258 else:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
259 fields_handler(data_headers, buf, index)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
260 index += 1
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
261 return (iter(self.posts_data.values()), len(self.posts_data))
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
262
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
263
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
264 class DotclearImport(object):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
265 def __init__(self, host):
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3479
diff changeset
266 log.info(_("plugin Dotclear import initialization"))
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
267 self.host = host
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
268 host.plugins["BLOG_IMPORT"].register(
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3479
diff changeset
269 "dotclear", self.dc_import, SHORT_DESC, LONG_DESC
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
270 )
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
271
4037
524856bd7b19 massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents: 3479
diff changeset
272 def dc_import(self, client, location, options=None):
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
273 if not os.path.isabs(location):
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
274 raise exceptions.DataError(
3028
ab2696e34d29 Python 3 port:
Goffi <goffi@goffi.org>
parents: 2771
diff changeset
275 "An absolute path to backup data need to be given as location"
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
276 )
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
277 dc_parser = DotclearParser()
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
278 d = threads.deferToThread(dc_parser.parse, location)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
279 return d