comparison sat/plugins/plugin_blog_import_dotclear.py @ 2624:56f94936df1e

code style reformatting using black
author Goffi <goffi@goffi.org>
date Wed, 27 Jun 2018 20:14:46 +0200
parents 26edcf3a30eb
children 003b8b4b56a7
comparison
equal deleted inserted replaced
2623:49533de4540b 2624:56f94936df1e
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. 18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 19
20 from sat.core.i18n import _, D_ 20 from sat.core.i18n import _, D_
21 from sat.core.constants import Const as C 21 from sat.core.constants import Const as C
22 from sat.core.log import getLogger 22 from sat.core.log import getLogger
23
23 log = getLogger(__name__) 24 log = getLogger(__name__)
24 from sat.core import exceptions 25 from sat.core import exceptions
25 from sat.tools.common import data_format 26 from sat.tools.common import data_format
26 from twisted.internet import threads 27 from twisted.internet import threads
27 from collections import OrderedDict 28 from collections import OrderedDict
36 C.PI_IMPORT_NAME: "IMPORT_DOTCLEAR", 37 C.PI_IMPORT_NAME: "IMPORT_DOTCLEAR",
37 C.PI_TYPE: C.PLUG_TYPE_BLOG, 38 C.PI_TYPE: C.PLUG_TYPE_BLOG,
38 C.PI_DEPENDENCIES: ["BLOG_IMPORT"], 39 C.PI_DEPENDENCIES: ["BLOG_IMPORT"],
39 C.PI_MAIN: "DotclearImport", 40 C.PI_MAIN: "DotclearImport",
40 C.PI_HANDLER: "no", 41 C.PI_HANDLER: "no",
41 C.PI_DESCRIPTION: _("""Blog importer for Dotclear blog engine.""") 42 C.PI_DESCRIPTION: _("""Blog importer for Dotclear blog engine."""),
42 } 43 }
43 44
44 SHORT_DESC = D_(u"import posts from Dotclear blog engine") 45 SHORT_DESC = D_(u"import posts from Dotclear blog engine")
45 46
46 LONG_DESC = D_(u"""This importer handle Dotclear blog engine. 47 LONG_DESC = D_(
48 u"""This importer handle Dotclear blog engine.
47 49
48 To use it, you'll need to export your blog to a flat file. 50 To use it, you'll need to export your blog to a flat file.
49 You must go in your admin interface and select Plugins/Maintenance then Backup. 51 You must go in your admin interface and select Plugins/Maintenance then Backup.
50 Export only one blog if you have many, i.e. select "Download database of current blog" 52 Export only one blog if you have many, i.e. select "Download database of current blog"
51 Depending on your configuration, your may need to use Import/Export plugin and export as a flat file. 53 Depending on your configuration, your may need to use Import/Export plugin and export as a flat file.
52 54
53 location: you must use the absolute path to your backup for the location parameter 55 location: you must use the absolute path to your backup for the location parameter
54 """) 56 """
57 )
55 POST_ID_PREFIX = u"sat_dc_" 58 POST_ID_PREFIX = u"sat_dc_"
56 KNOWN_DATA_TYPES = ('link', 'setting', 'post', 'meta', 'media', 'post_media', 'comment', 'captcha') 59 KNOWN_DATA_TYPES = (
57 ESCAPE_MAP = { 60 "link",
58 'r': u'\r', 61 "setting",
59 'n': u'\n', 62 "post",
60 '"': u'"', 63 "meta",
61 '\\': u'\\', 64 "media",
62 } 65 "post_media",
66 "comment",
67 "captcha",
68 )
69 ESCAPE_MAP = {"r": u"\r", "n": u"\n", '"': u'"', "\\": u"\\"}
63 70
64 71
65 class DotclearParser(object): 72 class DotclearParser(object):
66 # XXX: we have to parse all file to build data 73 # XXX: we have to parse all file to build data
67 # this can be ressource intensive on huge blogs 74 # this can be ressource intensive on huge blogs
74 """Return a unique and constant post id 81 """Return a unique and constant post id
75 82
76 @param post(dict): parsed post data 83 @param post(dict): parsed post data
77 @return (unicode): post unique item id 84 @return (unicode): post unique item id
78 """ 85 """
79 return u"{}_{}_{}_{}:{}".format(POST_ID_PREFIX, post['blog_id'], post['user_id'], post['post_id'], post['post_url']) 86 return u"{}_{}_{}_{}:{}".format(
87 POST_ID_PREFIX,
88 post["blog_id"],
89 post["user_id"],
90 post["post_id"],
91 post["post_url"],
92 )
80 93
81 def getCommentId(self, comment): 94 def getCommentId(self, comment):
82 """Return a unique and constant comment id 95 """Return a unique and constant comment id
83 96
84 @param comment(dict): parsed comment 97 @param comment(dict): parsed comment
85 @return (unicode): comment unique comment id 98 @return (unicode): comment unique comment id
86 """ 99 """
87 post_id = comment['post_id'] 100 post_id = comment["post_id"]
88 parent_item_id = self.posts_data[post_id]['blog']['id'] 101 parent_item_id = self.posts_data[post_id]["blog"]["id"]
89 return u"{}_comment_{}".format(parent_item_id, comment['comment_id']) 102 return u"{}_comment_{}".format(parent_item_id, comment["comment_id"])
90 103
91 def getTime(self, data, key): 104 def getTime(self, data, key):
92 """Parse time as given by dotclear, with timezone handling 105 """Parse time as given by dotclear, with timezone handling
93 106
94 @param data(dict): dotclear data (post or comment) 107 @param data(dict): dotclear data (post or comment)
110 except IndexError: 123 except IndexError:
111 raise exceptions.ParsingError("Data was expected") 124 raise exceptions.ParsingError("Data was expected")
112 if char == '"': 125 if char == '"':
113 # we have reached the end of this field, 126 # we have reached the end of this field,
114 # we try to parse a new one 127 # we try to parse a new one
115 yield u''.join(buf) 128 yield u"".join(buf)
116 buf = [] 129 buf = []
117 idx += 1 130 idx += 1
118 try: 131 try:
119 separator = fields_data[idx] 132 separator = fields_data[idx]
120 except IndexError: 133 except IndexError:
121 return 134 return
122 if separator != u',': 135 if separator != u",":
123 raise exceptions.ParsingError("Field separator was expeceted") 136 raise exceptions.ParsingError("Field separator was expeceted")
124 idx += 1 137 idx += 1
125 break # we have a new field 138 break # we have a new field
126 elif char == u'\\': 139 elif char == u"\\":
127 idx += 1 140 idx += 1
128 try: 141 try:
129 char = ESCAPE_MAP[fields_data[idx]] 142 char = ESCAPE_MAP[fields_data[idx]]
130 except IndexError: 143 except IndexError:
131 raise exceptions.ParsingError("Escaped char was expected") 144 raise exceptions.ParsingError("Escaped char was expected")
137 def parseFields(self, headers, data): 150 def parseFields(self, headers, data):
138 return dict(itertools.izip(headers, self.readFields(data))) 151 return dict(itertools.izip(headers, self.readFields(data)))
139 152
140 def postHandler(self, headers, data, index): 153 def postHandler(self, headers, data, index):
141 post = self.parseFields(headers, data) 154 post = self.parseFields(headers, data)
142 log.debug(u'({}) post found: {}'.format(index, post['post_title'])) 155 log.debug(u"({}) post found: {}".format(index, post["post_title"]))
143 mb_data = {'id': self.getPostId(post), 156 mb_data = {
144 'published': self.getTime(post, 'post_creadt'), 157 "id": self.getPostId(post),
145 'updated': self.getTime(post, 'post_upddt'), 158 "published": self.getTime(post, "post_creadt"),
146 'author': post['user_id'], # there use info are not in the archive 159 "updated": self.getTime(post, "post_upddt"),
147 # TODO: option to specify user info 160 "author": post["user_id"], # there use info are not in the archive
148 'content_xhtml': u"{}{}".format(post['post_content_xhtml'], post['post_excerpt_xhtml']), 161 # TODO: option to specify user info
149 'title': post['post_title'], 162 "content_xhtml": u"{}{}".format(
150 'allow_comments': C.boolConst(bool(int(post['post_open_comment']))), 163 post["post_content_xhtml"], post["post_excerpt_xhtml"]
151 } 164 ),
152 self.posts_data[post['post_id']] = {'blog': mb_data, 'comments':[[]], 'url': u'/post/{}'.format(post['post_url'])} 165 "title": post["post_title"],
166 "allow_comments": C.boolConst(bool(int(post["post_open_comment"]))),
167 }
168 self.posts_data[post["post_id"]] = {
169 "blog": mb_data,
170 "comments": [[]],
171 "url": u"/post/{}".format(post["post_url"]),
172 }
153 173
154 def metaHandler(self, headers, data, index): 174 def metaHandler(self, headers, data, index):
155 meta = self.parseFields(headers, data) 175 meta = self.parseFields(headers, data)
156 if meta['meta_type'] == 'tag': 176 if meta["meta_type"] == "tag":
157 tags = self.tags.setdefault(meta['post_id'], set()) 177 tags = self.tags.setdefault(meta["post_id"], set())
158 tags.add(meta['meta_id']) 178 tags.add(meta["meta_id"])
159 179
160 def metaFinishedHandler(self): 180 def metaFinishedHandler(self):
161 for post_id, tags in self.tags.iteritems(): 181 for post_id, tags in self.tags.iteritems():
162 data_format.iter2dict('tag', tags, self.posts_data[post_id]['blog']) 182 data_format.iter2dict("tag", tags, self.posts_data[post_id]["blog"])
163 del self.tags 183 del self.tags
164 184
165 def commentHandler(self, headers, data, index): 185 def commentHandler(self, headers, data, index):
166 comment = self.parseFields(headers, data) 186 comment = self.parseFields(headers, data)
167 if comment['comment_site']: 187 if comment["comment_site"]:
168 # we don't use atom:uri because it's used for jid in XMPP 188 # we don't use atom:uri because it's used for jid in XMPP
169 content = u'{}\n<hr>\n<a href="{}">author website</a>'.format( 189 content = u'{}\n<hr>\n<a href="{}">author website</a>'.format(
170 comment['comment_content'], 190 comment["comment_content"],
171 cgi.escape(comment['comment_site']).replace('"', u'%22')) 191 cgi.escape(comment["comment_site"]).replace('"', u"%22"),
192 )
172 else: 193 else:
173 content = comment['comment_content'] 194 content = comment["comment_content"]
174 mb_data = {'id': self.getCommentId(comment), 195 mb_data = {
175 'published': self.getTime(comment, 'comment_dt'), 196 "id": self.getCommentId(comment),
176 'updated': self.getTime(comment, 'comment_upddt'), 197 "published": self.getTime(comment, "comment_dt"),
177 'author': comment['comment_author'], 198 "updated": self.getTime(comment, "comment_upddt"),
178 # we don't keep email addresses to avoid the author to be spammed 199 "author": comment["comment_author"],
179 # (they would be available publicly else) 200 # we don't keep email addresses to avoid the author to be spammed
180 # 'author_email': comment['comment_email'], 201 # (they would be available publicly else)
181 'content_xhtml': content, 202 # 'author_email': comment['comment_email'],
182 } 203 "content_xhtml": content,
183 self.posts_data[comment['post_id']]['comments'][0].append( 204 }
184 {'blog': mb_data, 'comments': [[]]}) 205 self.posts_data[comment["post_id"]]["comments"][0].append(
206 {"blog": mb_data, "comments": [[]]}
207 )
185 208
186 def parse(self, db_path): 209 def parse(self, db_path):
187 with open(db_path) as f: 210 with open(db_path) as f:
188 signature = f.readline().decode('utf-8') 211 signature = f.readline().decode("utf-8")
189 try: 212 try:
190 version = signature.split('|')[1] 213 version = signature.split("|")[1]
191 except IndexError: 214 except IndexError:
192 version = None 215 version = None
193 log.debug(u"Dotclear version: {}".format(version)) 216 log.debug(u"Dotclear version: {}".format(version))
194 data_type = None 217 data_type = None
195 data_headers = None 218 data_headers = None
196 index = None 219 index = None
197 while True: 220 while True:
198 buf = f.readline().decode('utf-8') 221 buf = f.readline().decode("utf-8")
199 if not buf: 222 if not buf:
200 break 223 break
201 if buf.startswith('['): 224 if buf.startswith("["):
202 header = buf.split(' ', 1) 225 header = buf.split(" ", 1)
203 data_type = header[0][1:] 226 data_type = header[0][1:]
204 if data_type not in KNOWN_DATA_TYPES: 227 if data_type not in KNOWN_DATA_TYPES:
205 log.warning(u"unkown data type: {}".format(data_type)) 228 log.warning(u"unkown data type: {}".format(data_type))
206 index = 0 229 index = 0
207 try: 230 try:
208 data_headers = header[1].split(',') 231 data_headers = header[1].split(",")
209 # we need to remove the ']' from the last header 232 # we need to remove the ']' from the last header
210 last_header = data_headers[-1] 233 last_header = data_headers[-1]
211 data_headers[-1] = last_header[:last_header.rfind(']')] 234 data_headers[-1] = last_header[: last_header.rfind("]")]
212 except IndexError: 235 except IndexError:
213 log.warning(u"Can't read data)") 236 log.warning(u"Can't read data)")
214 else: 237 else:
215 if data_type is None: 238 if data_type is None:
216 continue 239 continue
217 buf = buf.strip() 240 buf = buf.strip()
218 if not buf and data_type in KNOWN_DATA_TYPES: 241 if not buf and data_type in KNOWN_DATA_TYPES:
219 try: 242 try:
220 finished_handler = getattr(self, '{}FinishedHandler'.format(data_type)) 243 finished_handler = getattr(
244 self, "{}FinishedHandler".format(data_type)
245 )
221 except AttributeError: 246 except AttributeError:
222 pass 247 pass
223 else: 248 else:
224 finished_handler() 249 finished_handler()
225 log.debug(u"{} data finished".format(data_type)) 250 log.debug(u"{} data finished".format(data_type))
226 data_type = None 251 data_type = None
227 continue 252 continue
228 assert data_type 253 assert data_type
229 try: 254 try:
230 fields_handler = getattr(self, '{}Handler'.format(data_type)) 255 fields_handler = getattr(self, "{}Handler".format(data_type))
231 except AttributeError: 256 except AttributeError:
232 pass 257 pass
233 else: 258 else:
234 fields_handler(data_headers, buf, index) 259 fields_handler(data_headers, buf, index)
235 index += 1 260 index += 1
236 return (self.posts_data.itervalues(), len(self.posts_data)) 261 return (self.posts_data.itervalues(), len(self.posts_data))
237 262
238 263
239 class DotclearImport(object): 264 class DotclearImport(object):
240
241 def __init__(self, host): 265 def __init__(self, host):
242 log.info(_("plugin Dotclear Import initialization")) 266 log.info(_("plugin Dotclear Import initialization"))
243 self.host = host 267 self.host = host
244 host.plugins['BLOG_IMPORT'].register('dotclear', self.DcImport, SHORT_DESC, LONG_DESC) 268 host.plugins["BLOG_IMPORT"].register(
269 "dotclear", self.DcImport, SHORT_DESC, LONG_DESC
270 )
245 271
246 def DcImport(self, client, location, options=None): 272 def DcImport(self, client, location, options=None):
247 if not os.path.isabs(location): 273 if not os.path.isabs(location):
248 raise exceptions.DataError(u"An absolute path to backup data need to be given as location") 274 raise exceptions.DataError(
275 u"An absolute path to backup data need to be given as location"
276 )
249 dc_parser = DotclearParser() 277 dc_parser = DotclearParser()
250 d = threads.deferToThread(dc_parser.parse, location) 278 d = threads.deferToThread(dc_parser.parse, location)
251 return d 279 return d