Mercurial > libervia-backend
annotate libervia/backend/plugins/plugin_blog_import_dotclear.py @ 4197:9cda0347e0ac
plugin XEP-0359: if no origin ID is found, use <message> ID instead:
This way, we always have the right ID to check for non groupchat message in
`History.origin_id`.
author | Goffi <goffi@goffi.org> |
---|---|
date | Wed, 13 Dec 2023 22:00:25 +0100 |
parents | 4b842c1fb686 |
children |
rev | line source |
---|---|
3028 | 1 #!/usr/bin/env python3 |
3137 | 2 |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
3 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
4 # SàT plugin for import external blogs |
3479 | 5 # Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
6 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
7 # This program is free software: you can redistribute it and/or modify |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
8 # it under the terms of the GNU Affero General Public License as published by |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
9 # the Free Software Foundation, either version 3 of the License, or |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
10 # (at your option) any later version. |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
11 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
12 # This program is distributed in the hope that it will be useful, |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
15 # GNU Affero General Public License for more details. |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
16 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
17 # You should have received a copy of the GNU Affero General Public License |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
19 |
4071
4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents:
4037
diff
changeset
|
20 from libervia.backend.core.i18n import _, D_ |
4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents:
4037
diff
changeset
|
21 from libervia.backend.core.constants import Const as C |
4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents:
4037
diff
changeset
|
22 from libervia.backend.core.log import getLogger |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
23 |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
24 log = getLogger(__name__) |
4071
4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents:
4037
diff
changeset
|
25 from libervia.backend.core import exceptions |
4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents:
4037
diff
changeset
|
26 from libervia.backend.tools.common import data_format |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
27 from twisted.internet import threads |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
28 from collections import OrderedDict |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
29 import itertools |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
30 import time |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
31 import cgi |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
32 import os.path |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
33 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
34 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
35 PLUGIN_INFO = { |
2145
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
36 C.PI_NAME: "Dotclear import", |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
37 C.PI_IMPORT_NAME: "IMPORT_DOTCLEAR", |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
38 C.PI_TYPE: C.PLUG_TYPE_BLOG, |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
39 C.PI_DEPENDENCIES: ["BLOG_IMPORT"], |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
40 C.PI_MAIN: "DotclearImport", |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
41 C.PI_HANDLER: "no", |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
42 C.PI_DESCRIPTION: _("""Blog importer for Dotclear blog engine."""), |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
43 } |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
44 |
3028 | 45 SHORT_DESC = D_("import posts from Dotclear blog engine") |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
46 |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
47 LONG_DESC = D_( |
3028 | 48 """This importer handle Dotclear blog engine. |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
49 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
50 To use it, you'll need to export your blog to a flat file. |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
51 You must go in your admin interface and select Plugins/Maintenance then Backup. |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
52 Export only one blog if you have many, i.e. select "Download database of current blog" |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
53 Depending on your configuration, your may need to use import/Export plugin and export as a flat file. |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
54 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
55 location: you must use the absolute path to your backup for the location parameter |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
56 """ |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
57 ) |
3028 | 58 POST_ID_PREFIX = "sat_dc_" |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
59 KNOWN_DATA_TYPES = ( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
60 "link", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
61 "setting", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
62 "post", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
63 "meta", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
64 "media", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
65 "post_media", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
66 "comment", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
67 "captcha", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
68 ) |
3028 | 69 ESCAPE_MAP = {"r": "\r", "n": "\n", '"': '"', "\\": "\\"} |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
70 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
71 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
72 class DotclearParser(object): |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
73 # XXX: we have to parse all file to build data |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
74 # this can be ressource intensive on huge blogs |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
75 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
76 def __init__(self): |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
77 self.posts_data = OrderedDict() |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
78 self.tags = {} |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
79 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
80 def get_post_id(self, post): |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
81 """Return a unique and constant post id |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
82 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
83 @param post(dict): parsed post data |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
84 @return (unicode): post unique item id |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
85 """ |
3028 | 86 return "{}_{}_{}_{}:{}".format( |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
87 POST_ID_PREFIX, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
88 post["blog_id"], |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
89 post["user_id"], |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
90 post["post_id"], |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
91 post["post_url"], |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
92 ) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
93 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
94 def get_comment_id(self, comment): |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
95 """Return a unique and constant comment id |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
96 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
97 @param comment(dict): parsed comment |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
98 @return (unicode): comment unique comment id |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
99 """ |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
100 post_id = comment["post_id"] |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
101 parent_item_id = self.posts_data[post_id]["blog"]["id"] |
3028 | 102 return "{}_comment_{}".format(parent_item_id, comment["comment_id"]) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
103 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
104 def getTime(self, data, key): |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
105 """Parse time as given by dotclear, with timezone handling |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
106 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
107 @param data(dict): dotclear data (post or comment) |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
108 @param key(unicode): key to get (e.g. "post_creadt") |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
109 @return (float): Unix time |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
110 """ |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
111 return time.mktime(time.strptime(data[key], "%Y-%m-%d %H:%M:%S")) |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
112 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
113 def read_fields(self, fields_data): |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
114 buf = [] |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
115 idx = 0 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
116 while True: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
117 if fields_data[idx] != '"': |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
118 raise exceptions.ParsingError |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
119 while True: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
120 idx += 1 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
121 try: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
122 char = fields_data[idx] |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
123 except IndexError: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
124 raise exceptions.ParsingError("Data was expected") |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
125 if char == '"': |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
126 # we have reached the end of this field, |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
127 # we try to parse a new one |
3028 | 128 yield "".join(buf) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
129 buf = [] |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
130 idx += 1 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
131 try: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
132 separator = fields_data[idx] |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
133 except IndexError: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
134 return |
3028 | 135 if separator != ",": |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
136 raise exceptions.ParsingError("Field separator was expeceted") |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
137 idx += 1 |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
138 break # we have a new field |
3028 | 139 elif char == "\\": |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
140 idx += 1 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
141 try: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
142 char = ESCAPE_MAP[fields_data[idx]] |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
143 except IndexError: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
144 raise exceptions.ParsingError("Escaped char was expected") |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
145 except KeyError: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
146 char = fields_data[idx] |
3028 | 147 log.warning("Unknown key to escape: {}".format(char)) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
148 buf.append(char) |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
149 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
150 def parseFields(self, headers, data): |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
151 return dict(zip(headers, self.read_fields(data))) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
152 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
153 def post_handler(self, headers, data, index): |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
154 post = self.parseFields(headers, data) |
3028 | 155 log.debug("({}) post found: {}".format(index, post["post_title"])) |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
156 mb_data = { |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
157 "id": self.get_post_id(post), |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
158 "published": self.getTime(post, "post_creadt"), |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
159 "updated": self.getTime(post, "post_upddt"), |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
160 "author": post["user_id"], # there use info are not in the archive |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
161 # TODO: option to specify user info |
3028 | 162 "content_xhtml": "{}{}".format( |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
163 post["post_content_xhtml"], post["post_excerpt_xhtml"] |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
164 ), |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
165 "title": post["post_title"], |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
166 "allow_comments": C.bool_const(bool(int(post["post_open_comment"]))), |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
167 } |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
168 self.posts_data[post["post_id"]] = { |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
169 "blog": mb_data, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
170 "comments": [[]], |
3028 | 171 "url": "/post/{}".format(post["post_url"]), |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
172 } |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
173 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
174 def meta_handler(self, headers, data, index): |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
175 meta = self.parseFields(headers, data) |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
176 if meta["meta_type"] == "tag": |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
177 tags = self.tags.setdefault(meta["post_id"], set()) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
178 tags.add(meta["meta_id"]) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
179 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
180 def meta_finished_handler(self): |
3028 | 181 for post_id, tags in self.tags.items(): |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
182 data_format.iter2dict("tag", tags, self.posts_data[post_id]["blog"]) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
183 del self.tags |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
184 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
185 def comment_handler(self, headers, data, index): |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
186 comment = self.parseFields(headers, data) |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
187 if comment["comment_site"]: |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
188 # we don't use atom:uri because it's used for jid in XMPP |
3028 | 189 content = '{}\n<hr>\n<a href="{}">author website</a>'.format( |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
190 comment["comment_content"], |
3028 | 191 cgi.escape(comment["comment_site"]).replace('"', "%22"), |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
192 ) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
193 else: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
194 content = comment["comment_content"] |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
195 mb_data = { |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
196 "id": self.get_comment_id(comment), |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
197 "published": self.getTime(comment, "comment_dt"), |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
198 "updated": self.getTime(comment, "comment_upddt"), |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
199 "author": comment["comment_author"], |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
200 # we don't keep email addresses to avoid the author to be spammed |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
201 # (they would be available publicly else) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
202 # 'author_email': comment['comment_email'], |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
203 "content_xhtml": content, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
204 } |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
205 self.posts_data[comment["post_id"]]["comments"][0].append( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
206 {"blog": mb_data, "comments": [[]]} |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
207 ) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
208 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
209 def parse(self, db_path): |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
210 with open(db_path) as f: |
3028 | 211 signature = f.readline() |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
212 try: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
213 version = signature.split("|")[1] |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
214 except IndexError: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
215 version = None |
3028 | 216 log.debug("Dotclear version: {}".format(version)) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
217 data_type = None |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
218 data_headers = None |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
219 index = None |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
220 while True: |
3028 | 221 buf = f.readline() |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
222 if not buf: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
223 break |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
224 if buf.startswith("["): |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
225 header = buf.split(" ", 1) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
226 data_type = header[0][1:] |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
227 if data_type not in KNOWN_DATA_TYPES: |
3028 | 228 log.warning("unkown data type: {}".format(data_type)) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
229 index = 0 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
230 try: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
231 data_headers = header[1].split(",") |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
232 # we need to remove the ']' from the last header |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
233 last_header = data_headers[-1] |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
234 data_headers[-1] = last_header[: last_header.rfind("]")] |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
235 except IndexError: |
3028 | 236 log.warning("Can't read data)") |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
237 else: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
238 if data_type is None: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
239 continue |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
240 buf = buf.strip() |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
241 if not buf and data_type in KNOWN_DATA_TYPES: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
242 try: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
243 finished_handler = getattr( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
244 self, "{}FinishedHandler".format(data_type) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
245 ) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
246 except AttributeError: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
247 pass |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
248 else: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
249 finished_handler() |
3028 | 250 log.debug("{} data finished".format(data_type)) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
251 data_type = None |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
252 continue |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
253 assert data_type |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
254 try: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
255 fields_handler = getattr(self, "{}Handler".format(data_type)) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
256 except AttributeError: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
257 pass |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
258 else: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
259 fields_handler(data_headers, buf, index) |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
260 index += 1 |
3028 | 261 return (iter(self.posts_data.values()), len(self.posts_data)) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
262 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
263 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
264 class DotclearImport(object): |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
265 def __init__(self, host): |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
266 log.info(_("plugin Dotclear import initialization")) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
267 self.host = host |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
268 host.plugins["BLOG_IMPORT"].register( |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
269 "dotclear", self.dc_import, SHORT_DESC, LONG_DESC |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
270 ) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
271 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
272 def dc_import(self, client, location, options=None): |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
273 if not os.path.isabs(location): |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
274 raise exceptions.DataError( |
3028 | 275 "An absolute path to backup data need to be given as location" |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
276 ) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
277 dc_parser = DotclearParser() |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
278 d = threads.deferToThread(dc_parser.parse, location) |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
279 return d |