annotate sat/plugins/plugin_blog_import_dotclear.py @ 2831:cd81e9cdeaac

misc (CHANGELOG): cap hash disco extensions update
author Goffi <goffi@goffi.org>
date Fri, 01 Mar 2019 19:36:51 +0100
parents 003b8b4b56a7
children ab2696e34d29
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1934
2daf7b4c6756 use of /usr/bin/env instead of /usr/bin/python in shebang
Goffi <goffi@goffi.org>
parents: 1919
diff changeset
1 #!/usr/bin/env python2
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
2 # -*- coding: utf-8 -*-
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
3
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
4 # SàT plugin for import external blogs
2771
003b8b4b56a7 date update
Goffi <goffi@goffi.org>
parents: 2624
diff changeset
5 # Copyright (C) 2009-2019 Jérôme Poisson (goffi@goffi.org)
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
6
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
7 # This program is free software: you can redistribute it and/or modify
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
8 # it under the terms of the GNU Affero General Public License as published by
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
9 # the Free Software Foundation, either version 3 of the License, or
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
10 # (at your option) any later version.
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
11
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
12 # This program is distributed in the hope that it will be useful,
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
15 # GNU Affero General Public License for more details.
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
16
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
17 # You should have received a copy of the GNU Affero General Public License
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
19
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
20 from sat.core.i18n import _, D_
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
21 from sat.core.constants import Const as C
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
22 from sat.core.log import getLogger
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
23
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
24 log = getLogger(__name__)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
25 from sat.core import exceptions
1919
d3354c80bd1f core (tools): moved common to a separate package, and put data method in a data_format module
Goffi <goffi@goffi.org>
parents: 1826
diff changeset
26 from sat.tools.common import data_format
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
27 from twisted.internet import threads
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
28 from collections import OrderedDict
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
29 import itertools
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
30 import time
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
31 import cgi
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
32 import os.path
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
33
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
34
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
35 PLUGIN_INFO = {
2145
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 1934
diff changeset
36 C.PI_NAME: "Dotclear import",
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 1934
diff changeset
37 C.PI_IMPORT_NAME: "IMPORT_DOTCLEAR",
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 1934
diff changeset
38 C.PI_TYPE: C.PLUG_TYPE_BLOG,
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 1934
diff changeset
39 C.PI_DEPENDENCIES: ["BLOG_IMPORT"],
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 1934
diff changeset
40 C.PI_MAIN: "DotclearImport",
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 1934
diff changeset
41 C.PI_HANDLER: "no",
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
42 C.PI_DESCRIPTION: _("""Blog importer for Dotclear blog engine."""),
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
43 }
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
44
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
45 SHORT_DESC = D_(u"import posts from Dotclear blog engine")
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
46
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
47 LONG_DESC = D_(
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
48 u"""This importer handle Dotclear blog engine.
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
49
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
50 To use it, you'll need to export your blog to a flat file.
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
51 You must go in your admin interface and select Plugins/Maintenance then Backup.
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
52 Export only one blog if you have many, i.e. select "Download database of current blog"
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
53 Depending on your configuration, your may need to use Import/Export plugin and export as a flat file.
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
54
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
55 location: you must use the absolute path to your backup for the location parameter
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
56 """
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
57 )
2481
11ac6157fc73 plugin blog import dotclear: use a smaller and lower case prefix to generate ids
Goffi <goffi@goffi.org>
parents: 2414
diff changeset
58 POST_ID_PREFIX = u"sat_dc_"
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
59 KNOWN_DATA_TYPES = (
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
60 "link",
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
61 "setting",
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
62 "post",
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
63 "meta",
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
64 "media",
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
65 "post_media",
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
66 "comment",
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
67 "captcha",
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
68 )
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
69 ESCAPE_MAP = {"r": u"\r", "n": u"\n", '"': u'"', "\\": u"\\"}
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
70
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
71
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
72 class DotclearParser(object):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
73 # XXX: we have to parse all file to build data
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
74 # this can be ressource intensive on huge blogs
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
75
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
76 def __init__(self):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
77 self.posts_data = OrderedDict()
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
78 self.tags = {}
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
79
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
80 def getPostId(self, post):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
81 """Return a unique and constant post id
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
82
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
83 @param post(dict): parsed post data
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
84 @return (unicode): post unique item id
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
85 """
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
86 return u"{}_{}_{}_{}:{}".format(
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
87 POST_ID_PREFIX,
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
88 post["blog_id"],
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
89 post["user_id"],
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
90 post["post_id"],
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
91 post["post_url"],
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
92 )
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
93
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
94 def getCommentId(self, comment):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
95 """Return a unique and constant comment id
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
96
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
97 @param comment(dict): parsed comment
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
98 @return (unicode): comment unique comment id
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
99 """
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
100 post_id = comment["post_id"]
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
101 parent_item_id = self.posts_data[post_id]["blog"]["id"]
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
102 return u"{}_comment_{}".format(parent_item_id, comment["comment_id"])
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
103
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
104 def getTime(self, data, key):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
105 """Parse time as given by dotclear, with timezone handling
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
106
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
107 @param data(dict): dotclear data (post or comment)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
108 @param key(unicode): key to get (e.g. "post_creadt")
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
109 @return (float): Unix time
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
110 """
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
111 return time.mktime(time.strptime(data[key], "%Y-%m-%d %H:%M:%S"))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
112
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
113 def readFields(self, fields_data):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
114 buf = []
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
115 idx = 0
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
116 while True:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
117 if fields_data[idx] != '"':
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
118 raise exceptions.ParsingError
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
119 while True:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
120 idx += 1
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
121 try:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
122 char = fields_data[idx]
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
123 except IndexError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
124 raise exceptions.ParsingError("Data was expected")
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
125 if char == '"':
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
126 # we have reached the end of this field,
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
127 # we try to parse a new one
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
128 yield u"".join(buf)
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
129 buf = []
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
130 idx += 1
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
131 try:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
132 separator = fields_data[idx]
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
133 except IndexError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
134 return
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
135 if separator != u",":
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
136 raise exceptions.ParsingError("Field separator was expeceted")
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
137 idx += 1
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
138 break # we have a new field
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
139 elif char == u"\\":
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
140 idx += 1
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
141 try:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
142 char = ESCAPE_MAP[fields_data[idx]]
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
143 except IndexError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
144 raise exceptions.ParsingError("Escaped char was expected")
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
145 except KeyError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
146 char = fields_data[idx]
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
147 log.warning(u"Unknown key to escape: {}".format(char))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
148 buf.append(char)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
149
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
150 def parseFields(self, headers, data):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
151 return dict(itertools.izip(headers, self.readFields(data)))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
152
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
153 def postHandler(self, headers, data, index):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
154 post = self.parseFields(headers, data)
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
155 log.debug(u"({}) post found: {}".format(index, post["post_title"]))
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
156 mb_data = {
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
157 "id": self.getPostId(post),
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
158 "published": self.getTime(post, "post_creadt"),
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
159 "updated": self.getTime(post, "post_upddt"),
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
160 "author": post["user_id"], # there use info are not in the archive
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
161 # TODO: option to specify user info
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
162 "content_xhtml": u"{}{}".format(
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
163 post["post_content_xhtml"], post["post_excerpt_xhtml"]
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
164 ),
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
165 "title": post["post_title"],
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
166 "allow_comments": C.boolConst(bool(int(post["post_open_comment"]))),
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
167 }
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
168 self.posts_data[post["post_id"]] = {
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
169 "blog": mb_data,
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
170 "comments": [[]],
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
171 "url": u"/post/{}".format(post["post_url"]),
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
172 }
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
173
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
174 def metaHandler(self, headers, data, index):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
175 meta = self.parseFields(headers, data)
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
176 if meta["meta_type"] == "tag":
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
177 tags = self.tags.setdefault(meta["post_id"], set())
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
178 tags.add(meta["meta_id"])
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
179
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
180 def metaFinishedHandler(self):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
181 for post_id, tags in self.tags.iteritems():
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
182 data_format.iter2dict("tag", tags, self.posts_data[post_id]["blog"])
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
183 del self.tags
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
184
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
185 def commentHandler(self, headers, data, index):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
186 comment = self.parseFields(headers, data)
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
187 if comment["comment_site"]:
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
188 # we don't use atom:uri because it's used for jid in XMPP
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
189 content = u'{}\n<hr>\n<a href="{}">author website</a>'.format(
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
190 comment["comment_content"],
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
191 cgi.escape(comment["comment_site"]).replace('"', u"%22"),
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
192 )
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
193 else:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
194 content = comment["comment_content"]
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
195 mb_data = {
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
196 "id": self.getCommentId(comment),
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
197 "published": self.getTime(comment, "comment_dt"),
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
198 "updated": self.getTime(comment, "comment_upddt"),
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
199 "author": comment["comment_author"],
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
200 # we don't keep email addresses to avoid the author to be spammed
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
201 # (they would be available publicly else)
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
202 # 'author_email': comment['comment_email'],
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
203 "content_xhtml": content,
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
204 }
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
205 self.posts_data[comment["post_id"]]["comments"][0].append(
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
206 {"blog": mb_data, "comments": [[]]}
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
207 )
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
208
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
209 def parse(self, db_path):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
210 with open(db_path) as f:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
211 signature = f.readline().decode("utf-8")
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
212 try:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
213 version = signature.split("|")[1]
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
214 except IndexError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
215 version = None
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
216 log.debug(u"Dotclear version: {}".format(version))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
217 data_type = None
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
218 data_headers = None
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
219 index = None
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
220 while True:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
221 buf = f.readline().decode("utf-8")
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
222 if not buf:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
223 break
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
224 if buf.startswith("["):
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
225 header = buf.split(" ", 1)
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
226 data_type = header[0][1:]
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
227 if data_type not in KNOWN_DATA_TYPES:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
228 log.warning(u"unkown data type: {}".format(data_type))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
229 index = 0
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
230 try:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
231 data_headers = header[1].split(",")
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
232 # we need to remove the ']' from the last header
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
233 last_header = data_headers[-1]
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
234 data_headers[-1] = last_header[: last_header.rfind("]")]
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
235 except IndexError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
236 log.warning(u"Can't read data)")
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
237 else:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
238 if data_type is None:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
239 continue
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
240 buf = buf.strip()
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
241 if not buf and data_type in KNOWN_DATA_TYPES:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
242 try:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
243 finished_handler = getattr(
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
244 self, "{}FinishedHandler".format(data_type)
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
245 )
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
246 except AttributeError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
247 pass
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
248 else:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
249 finished_handler()
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
250 log.debug(u"{} data finished".format(data_type))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
251 data_type = None
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
252 continue
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
253 assert data_type
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
254 try:
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
255 fields_handler = getattr(self, "{}Handler".format(data_type))
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
256 except AttributeError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
257 pass
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
258 else:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
259 fields_handler(data_headers, buf, index)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
260 index += 1
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
261 return (self.posts_data.itervalues(), len(self.posts_data))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
262
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
263
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
264 class DotclearImport(object):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
265 def __init__(self, host):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
266 log.info(_("plugin Dotclear Import initialization"))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
267 self.host = host
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
268 host.plugins["BLOG_IMPORT"].register(
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
269 "dotclear", self.DcImport, SHORT_DESC, LONG_DESC
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
270 )
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
271
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
272 def DcImport(self, client, location, options=None):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
273 if not os.path.isabs(location):
2624
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
274 raise exceptions.DataError(
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
275 u"An absolute path to backup data need to be given as location"
56f94936df1e code style reformatting using black
Goffi <goffi@goffi.org>
parents: 2562
diff changeset
276 )
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
277 dc_parser = DotclearParser()
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
278 d = threads.deferToThread(dc_parser.parse, location)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
279 return d