Mercurial > libervia-backend
annotate libervia/backend/plugins/plugin_blog_import_dotclear.py @ 4095:684ba556a617
core (memory/sqla_mapping): fix legacy pickled values:
folloing packages refactoring, legacy pickled values could not be unpickled (due to use of
old classes). This temporary workaround fix it, but the right thing to do will be to move
from pickle to JSON at some point.
author | Goffi <goffi@goffi.org> |
---|---|
date | Mon, 12 Jun 2023 14:57:27 +0200 |
parents | 4b842c1fb686 |
children | 73d83cb53673 |
rev | line source |
---|---|
3028 | 1 #!/usr/bin/env python3 |
3137 | 2 |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
3 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
4 # SàT plugin for import external blogs |
3479 | 5 # Copyright (C) 2009-2021 Jérôme Poisson (goffi@goffi.org) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
6 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
7 # This program is free software: you can redistribute it and/or modify |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
8 # it under the terms of the GNU Affero General Public License as published by |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
9 # the Free Software Foundation, either version 3 of the License, or |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
10 # (at your option) any later version. |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
11 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
12 # This program is distributed in the hope that it will be useful, |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
15 # GNU Affero General Public License for more details. |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
16 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
17 # You should have received a copy of the GNU Affero General Public License |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
19 |
4071
4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents:
4037
diff
changeset
|
20 from libervia.backend.core.i18n import _, D_ |
4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents:
4037
diff
changeset
|
21 from libervia.backend.core.constants import Const as C |
4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents:
4037
diff
changeset
|
22 from libervia.backend.core.log import getLogger |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
23 |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
24 log = getLogger(__name__) |
4071
4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents:
4037
diff
changeset
|
25 from libervia.backend.core import exceptions |
4b842c1fb686
refactoring: renamed `sat` package to `libervia.backend`
Goffi <goffi@goffi.org>
parents:
4037
diff
changeset
|
26 from libervia.backend.tools.common import data_format |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
27 from twisted.internet import threads |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
28 from collections import OrderedDict |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
29 import itertools |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
30 import time |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
31 import cgi |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
32 import os.path |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
33 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
34 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
35 PLUGIN_INFO = { |
2145
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
36 C.PI_NAME: "Dotclear import", |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
37 C.PI_IMPORT_NAME: "IMPORT_DOTCLEAR", |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
38 C.PI_TYPE: C.PLUG_TYPE_BLOG, |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
39 C.PI_DEPENDENCIES: ["BLOG_IMPORT"], |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
40 C.PI_MAIN: "DotclearImport", |
33c8c4973743
core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents:
1934
diff
changeset
|
41 C.PI_HANDLER: "no", |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
42 C.PI_DESCRIPTION: _("""Blog importer for Dotclear blog engine."""), |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
43 } |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
44 |
3028 | 45 SHORT_DESC = D_("import posts from Dotclear blog engine") |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
46 |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
47 LONG_DESC = D_( |
3028 | 48 """This importer handle Dotclear blog engine. |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
49 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
50 To use it, you'll need to export your blog to a flat file. |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
51 You must go in your admin interface and select Plugins/Maintenance then Backup. |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
52 Export only one blog if you have many, i.e. select "Download database of current blog" |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
53 Depending on your configuration, your may need to use import/Export plugin and export as a flat file. |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
54 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
55 location: you must use the absolute path to your backup for the location parameter |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
56 """ |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
57 ) |
3028 | 58 POST_ID_PREFIX = "sat_dc_" |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
59 KNOWN_DATA_TYPES = ( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
60 "link", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
61 "setting", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
62 "post", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
63 "meta", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
64 "media", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
65 "post_media", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
66 "comment", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
67 "captcha", |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
68 ) |
3028 | 69 ESCAPE_MAP = {"r": "\r", "n": "\n", '"': '"', "\\": "\\"} |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
70 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
71 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
72 class DotclearParser(object): |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
73 # XXX: we have to parse all file to build data |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
74 # this can be ressource intensive on huge blogs |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
75 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
76 def __init__(self): |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
77 self.posts_data = OrderedDict() |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
78 self.tags = {} |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
79 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
80 def get_post_id(self, post): |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
81 """Return a unique and constant post id |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
82 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
83 @param post(dict): parsed post data |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
84 @return (unicode): post unique item id |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
85 """ |
3028 | 86 return "{}_{}_{}_{}:{}".format( |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
87 POST_ID_PREFIX, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
88 post["blog_id"], |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
89 post["user_id"], |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
90 post["post_id"], |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
91 post["post_url"], |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
92 ) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
93 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
94 def get_comment_id(self, comment): |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
95 """Return a unique and constant comment id |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
96 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
97 @param comment(dict): parsed comment |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
98 @return (unicode): comment unique comment id |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
99 """ |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
100 post_id = comment["post_id"] |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
101 parent_item_id = self.posts_data[post_id]["blog"]["id"] |
3028 | 102 return "{}_comment_{}".format(parent_item_id, comment["comment_id"]) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
103 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
104 def getTime(self, data, key): |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
105 """Parse time as given by dotclear, with timezone handling |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
106 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
107 @param data(dict): dotclear data (post or comment) |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
108 @param key(unicode): key to get (e.g. "post_creadt") |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
109 @return (float): Unix time |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
110 """ |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
111 return time.mktime(time.strptime(data[key], "%Y-%m-%d %H:%M:%S")) |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
112 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
113 def read_fields(self, fields_data): |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
114 buf = [] |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
115 idx = 0 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
116 while True: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
117 if fields_data[idx] != '"': |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
118 raise exceptions.ParsingError |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
119 while True: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
120 idx += 1 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
121 try: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
122 char = fields_data[idx] |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
123 except IndexError: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
124 raise exceptions.ParsingError("Data was expected") |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
125 if char == '"': |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
126 # we have reached the end of this field, |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
127 # we try to parse a new one |
3028 | 128 yield "".join(buf) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
129 buf = [] |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
130 idx += 1 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
131 try: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
132 separator = fields_data[idx] |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
133 except IndexError: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
134 return |
3028 | 135 if separator != ",": |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
136 raise exceptions.ParsingError("Field separator was expeceted") |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
137 idx += 1 |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
138 break # we have a new field |
3028 | 139 elif char == "\\": |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
140 idx += 1 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
141 try: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
142 char = ESCAPE_MAP[fields_data[idx]] |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
143 except IndexError: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
144 raise exceptions.ParsingError("Escaped char was expected") |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
145 except KeyError: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
146 char = fields_data[idx] |
3028 | 147 log.warning("Unknown key to escape: {}".format(char)) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
148 buf.append(char) |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
149 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
150 def parseFields(self, headers, data): |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
151 return dict(zip(headers, self.read_fields(data))) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
152 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
153 def post_handler(self, headers, data, index): |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
154 post = self.parseFields(headers, data) |
3028 | 155 log.debug("({}) post found: {}".format(index, post["post_title"])) |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
156 mb_data = { |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
157 "id": self.get_post_id(post), |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
158 "published": self.getTime(post, "post_creadt"), |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
159 "updated": self.getTime(post, "post_upddt"), |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
160 "author": post["user_id"], # there use info are not in the archive |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
161 # TODO: option to specify user info |
3028 | 162 "content_xhtml": "{}{}".format( |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
163 post["post_content_xhtml"], post["post_excerpt_xhtml"] |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
164 ), |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
165 "title": post["post_title"], |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
166 "allow_comments": C.bool_const(bool(int(post["post_open_comment"]))), |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
167 } |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
168 self.posts_data[post["post_id"]] = { |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
169 "blog": mb_data, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
170 "comments": [[]], |
3028 | 171 "url": "/post/{}".format(post["post_url"]), |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
172 } |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
173 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
174 def meta_handler(self, headers, data, index): |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
175 meta = self.parseFields(headers, data) |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
176 if meta["meta_type"] == "tag": |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
177 tags = self.tags.setdefault(meta["post_id"], set()) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
178 tags.add(meta["meta_id"]) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
179 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
180 def meta_finished_handler(self): |
3028 | 181 for post_id, tags in self.tags.items(): |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
182 data_format.iter2dict("tag", tags, self.posts_data[post_id]["blog"]) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
183 del self.tags |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
184 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
185 def comment_handler(self, headers, data, index): |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
186 comment = self.parseFields(headers, data) |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
187 if comment["comment_site"]: |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
188 # we don't use atom:uri because it's used for jid in XMPP |
3028 | 189 content = '{}\n<hr>\n<a href="{}">author website</a>'.format( |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
190 comment["comment_content"], |
3028 | 191 cgi.escape(comment["comment_site"]).replace('"', "%22"), |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
192 ) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
193 else: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
194 content = comment["comment_content"] |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
195 mb_data = { |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
196 "id": self.get_comment_id(comment), |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
197 "published": self.getTime(comment, "comment_dt"), |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
198 "updated": self.getTime(comment, "comment_upddt"), |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
199 "author": comment["comment_author"], |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
200 # we don't keep email addresses to avoid the author to be spammed |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
201 # (they would be available publicly else) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
202 # 'author_email': comment['comment_email'], |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
203 "content_xhtml": content, |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
204 } |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
205 self.posts_data[comment["post_id"]]["comments"][0].append( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
206 {"blog": mb_data, "comments": [[]]} |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
207 ) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
208 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
209 def parse(self, db_path): |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
210 with open(db_path) as f: |
3028 | 211 signature = f.readline() |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
212 try: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
213 version = signature.split("|")[1] |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
214 except IndexError: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
215 version = None |
3028 | 216 log.debug("Dotclear version: {}".format(version)) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
217 data_type = None |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
218 data_headers = None |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
219 index = None |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
220 while True: |
3028 | 221 buf = f.readline() |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
222 if not buf: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
223 break |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
224 if buf.startswith("["): |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
225 header = buf.split(" ", 1) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
226 data_type = header[0][1:] |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
227 if data_type not in KNOWN_DATA_TYPES: |
3028 | 228 log.warning("unkown data type: {}".format(data_type)) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
229 index = 0 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
230 try: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
231 data_headers = header[1].split(",") |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
232 # we need to remove the ']' from the last header |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
233 last_header = data_headers[-1] |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
234 data_headers[-1] = last_header[: last_header.rfind("]")] |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
235 except IndexError: |
3028 | 236 log.warning("Can't read data)") |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
237 else: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
238 if data_type is None: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
239 continue |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
240 buf = buf.strip() |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
241 if not buf and data_type in KNOWN_DATA_TYPES: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
242 try: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
243 finished_handler = getattr( |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
244 self, "{}FinishedHandler".format(data_type) |
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
245 ) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
246 except AttributeError: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
247 pass |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
248 else: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
249 finished_handler() |
3028 | 250 log.debug("{} data finished".format(data_type)) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
251 data_type = None |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
252 continue |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
253 assert data_type |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
254 try: |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
255 fields_handler = getattr(self, "{}Handler".format(data_type)) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
256 except AttributeError: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
257 pass |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
258 else: |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
259 fields_handler(data_headers, buf, index) |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
260 index += 1 |
3028 | 261 return (iter(self.posts_data.values()), len(self.posts_data)) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
262 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
263 |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
264 class DotclearImport(object): |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
265 def __init__(self, host): |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
266 log.info(_("plugin Dotclear import initialization")) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
267 self.host = host |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
268 host.plugins["BLOG_IMPORT"].register( |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
269 "dotclear", self.dc_import, SHORT_DESC, LONG_DESC |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
270 ) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
271 |
4037
524856bd7b19
massive refactoring to switch from camelCase to snake_case:
Goffi <goffi@goffi.org>
parents:
3479
diff
changeset
|
272 def dc_import(self, client, location, options=None): |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
273 if not os.path.isabs(location): |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
274 raise exceptions.DataError( |
3028 | 275 "An absolute path to backup data need to be given as location" |
2624
56f94936df1e
code style reformatting using black
Goffi <goffi@goffi.org>
parents:
2562
diff
changeset
|
276 ) |
1826
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
277 dc_parser = DotclearParser() |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
278 d = threads.deferToThread(dc_parser.parse, location) |
d80ccf4bf201
plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff
changeset
|
279 return d |