Mercurial > libervia-backend
comparison sat/plugins/plugin_blog_import_dotclear.py @ 2624:56f94936df1e
code style reformatting using black
author | Goffi <goffi@goffi.org> |
---|---|
date | Wed, 27 Jun 2018 20:14:46 +0200 |
parents | 26edcf3a30eb |
children | 003b8b4b56a7 |
comparison
equal
deleted
inserted
replaced
2623:49533de4540b | 2624:56f94936df1e |
---|---|
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. | 18 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
19 | 19 |
20 from sat.core.i18n import _, D_ | 20 from sat.core.i18n import _, D_ |
21 from sat.core.constants import Const as C | 21 from sat.core.constants import Const as C |
22 from sat.core.log import getLogger | 22 from sat.core.log import getLogger |
23 | |
23 log = getLogger(__name__) | 24 log = getLogger(__name__) |
24 from sat.core import exceptions | 25 from sat.core import exceptions |
25 from sat.tools.common import data_format | 26 from sat.tools.common import data_format |
26 from twisted.internet import threads | 27 from twisted.internet import threads |
27 from collections import OrderedDict | 28 from collections import OrderedDict |
36 C.PI_IMPORT_NAME: "IMPORT_DOTCLEAR", | 37 C.PI_IMPORT_NAME: "IMPORT_DOTCLEAR", |
37 C.PI_TYPE: C.PLUG_TYPE_BLOG, | 38 C.PI_TYPE: C.PLUG_TYPE_BLOG, |
38 C.PI_DEPENDENCIES: ["BLOG_IMPORT"], | 39 C.PI_DEPENDENCIES: ["BLOG_IMPORT"], |
39 C.PI_MAIN: "DotclearImport", | 40 C.PI_MAIN: "DotclearImport", |
40 C.PI_HANDLER: "no", | 41 C.PI_HANDLER: "no", |
41 C.PI_DESCRIPTION: _("""Blog importer for Dotclear blog engine.""") | 42 C.PI_DESCRIPTION: _("""Blog importer for Dotclear blog engine."""), |
42 } | 43 } |
43 | 44 |
44 SHORT_DESC = D_(u"import posts from Dotclear blog engine") | 45 SHORT_DESC = D_(u"import posts from Dotclear blog engine") |
45 | 46 |
46 LONG_DESC = D_(u"""This importer handle Dotclear blog engine. | 47 LONG_DESC = D_( |
48 u"""This importer handle Dotclear blog engine. | |
47 | 49 |
48 To use it, you'll need to export your blog to a flat file. | 50 To use it, you'll need to export your blog to a flat file. |
49 You must go in your admin interface and select Plugins/Maintenance then Backup. | 51 You must go in your admin interface and select Plugins/Maintenance then Backup. |
50 Export only one blog if you have many, i.e. select "Download database of current blog" | 52 Export only one blog if you have many, i.e. select "Download database of current blog" |
51 Depending on your configuration, your may need to use Import/Export plugin and export as a flat file. | 53 Depending on your configuration, your may need to use Import/Export plugin and export as a flat file. |
52 | 54 |
53 location: you must use the absolute path to your backup for the location parameter | 55 location: you must use the absolute path to your backup for the location parameter |
54 """) | 56 """ |
57 ) | |
55 POST_ID_PREFIX = u"sat_dc_" | 58 POST_ID_PREFIX = u"sat_dc_" |
56 KNOWN_DATA_TYPES = ('link', 'setting', 'post', 'meta', 'media', 'post_media', 'comment', 'captcha') | 59 KNOWN_DATA_TYPES = ( |
57 ESCAPE_MAP = { | 60 "link", |
58 'r': u'\r', | 61 "setting", |
59 'n': u'\n', | 62 "post", |
60 '"': u'"', | 63 "meta", |
61 '\\': u'\\', | 64 "media", |
62 } | 65 "post_media", |
66 "comment", | |
67 "captcha", | |
68 ) | |
69 ESCAPE_MAP = {"r": u"\r", "n": u"\n", '"': u'"', "\\": u"\\"} | |
63 | 70 |
64 | 71 |
65 class DotclearParser(object): | 72 class DotclearParser(object): |
66 # XXX: we have to parse all file to build data | 73 # XXX: we have to parse all file to build data |
67 # this can be ressource intensive on huge blogs | 74 # this can be ressource intensive on huge blogs |
74 """Return a unique and constant post id | 81 """Return a unique and constant post id |
75 | 82 |
76 @param post(dict): parsed post data | 83 @param post(dict): parsed post data |
77 @return (unicode): post unique item id | 84 @return (unicode): post unique item id |
78 """ | 85 """ |
79 return u"{}_{}_{}_{}:{}".format(POST_ID_PREFIX, post['blog_id'], post['user_id'], post['post_id'], post['post_url']) | 86 return u"{}_{}_{}_{}:{}".format( |
87 POST_ID_PREFIX, | |
88 post["blog_id"], | |
89 post["user_id"], | |
90 post["post_id"], | |
91 post["post_url"], | |
92 ) | |
80 | 93 |
81 def getCommentId(self, comment): | 94 def getCommentId(self, comment): |
82 """Return a unique and constant comment id | 95 """Return a unique and constant comment id |
83 | 96 |
84 @param comment(dict): parsed comment | 97 @param comment(dict): parsed comment |
85 @return (unicode): comment unique comment id | 98 @return (unicode): comment unique comment id |
86 """ | 99 """ |
87 post_id = comment['post_id'] | 100 post_id = comment["post_id"] |
88 parent_item_id = self.posts_data[post_id]['blog']['id'] | 101 parent_item_id = self.posts_data[post_id]["blog"]["id"] |
89 return u"{}_comment_{}".format(parent_item_id, comment['comment_id']) | 102 return u"{}_comment_{}".format(parent_item_id, comment["comment_id"]) |
90 | 103 |
91 def getTime(self, data, key): | 104 def getTime(self, data, key): |
92 """Parse time as given by dotclear, with timezone handling | 105 """Parse time as given by dotclear, with timezone handling |
93 | 106 |
94 @param data(dict): dotclear data (post or comment) | 107 @param data(dict): dotclear data (post or comment) |
110 except IndexError: | 123 except IndexError: |
111 raise exceptions.ParsingError("Data was expected") | 124 raise exceptions.ParsingError("Data was expected") |
112 if char == '"': | 125 if char == '"': |
113 # we have reached the end of this field, | 126 # we have reached the end of this field, |
114 # we try to parse a new one | 127 # we try to parse a new one |
115 yield u''.join(buf) | 128 yield u"".join(buf) |
116 buf = [] | 129 buf = [] |
117 idx += 1 | 130 idx += 1 |
118 try: | 131 try: |
119 separator = fields_data[idx] | 132 separator = fields_data[idx] |
120 except IndexError: | 133 except IndexError: |
121 return | 134 return |
122 if separator != u',': | 135 if separator != u",": |
123 raise exceptions.ParsingError("Field separator was expeceted") | 136 raise exceptions.ParsingError("Field separator was expeceted") |
124 idx += 1 | 137 idx += 1 |
125 break # we have a new field | 138 break # we have a new field |
126 elif char == u'\\': | 139 elif char == u"\\": |
127 idx += 1 | 140 idx += 1 |
128 try: | 141 try: |
129 char = ESCAPE_MAP[fields_data[idx]] | 142 char = ESCAPE_MAP[fields_data[idx]] |
130 except IndexError: | 143 except IndexError: |
131 raise exceptions.ParsingError("Escaped char was expected") | 144 raise exceptions.ParsingError("Escaped char was expected") |
137 def parseFields(self, headers, data): | 150 def parseFields(self, headers, data): |
138 return dict(itertools.izip(headers, self.readFields(data))) | 151 return dict(itertools.izip(headers, self.readFields(data))) |
139 | 152 |
140 def postHandler(self, headers, data, index): | 153 def postHandler(self, headers, data, index): |
141 post = self.parseFields(headers, data) | 154 post = self.parseFields(headers, data) |
142 log.debug(u'({}) post found: {}'.format(index, post['post_title'])) | 155 log.debug(u"({}) post found: {}".format(index, post["post_title"])) |
143 mb_data = {'id': self.getPostId(post), | 156 mb_data = { |
144 'published': self.getTime(post, 'post_creadt'), | 157 "id": self.getPostId(post), |
145 'updated': self.getTime(post, 'post_upddt'), | 158 "published": self.getTime(post, "post_creadt"), |
146 'author': post['user_id'], # there use info are not in the archive | 159 "updated": self.getTime(post, "post_upddt"), |
147 # TODO: option to specify user info | 160 "author": post["user_id"], # there use info are not in the archive |
148 'content_xhtml': u"{}{}".format(post['post_content_xhtml'], post['post_excerpt_xhtml']), | 161 # TODO: option to specify user info |
149 'title': post['post_title'], | 162 "content_xhtml": u"{}{}".format( |
150 'allow_comments': C.boolConst(bool(int(post['post_open_comment']))), | 163 post["post_content_xhtml"], post["post_excerpt_xhtml"] |
151 } | 164 ), |
152 self.posts_data[post['post_id']] = {'blog': mb_data, 'comments':[[]], 'url': u'/post/{}'.format(post['post_url'])} | 165 "title": post["post_title"], |
166 "allow_comments": C.boolConst(bool(int(post["post_open_comment"]))), | |
167 } | |
168 self.posts_data[post["post_id"]] = { | |
169 "blog": mb_data, | |
170 "comments": [[]], | |
171 "url": u"/post/{}".format(post["post_url"]), | |
172 } | |
153 | 173 |
154 def metaHandler(self, headers, data, index): | 174 def metaHandler(self, headers, data, index): |
155 meta = self.parseFields(headers, data) | 175 meta = self.parseFields(headers, data) |
156 if meta['meta_type'] == 'tag': | 176 if meta["meta_type"] == "tag": |
157 tags = self.tags.setdefault(meta['post_id'], set()) | 177 tags = self.tags.setdefault(meta["post_id"], set()) |
158 tags.add(meta['meta_id']) | 178 tags.add(meta["meta_id"]) |
159 | 179 |
160 def metaFinishedHandler(self): | 180 def metaFinishedHandler(self): |
161 for post_id, tags in self.tags.iteritems(): | 181 for post_id, tags in self.tags.iteritems(): |
162 data_format.iter2dict('tag', tags, self.posts_data[post_id]['blog']) | 182 data_format.iter2dict("tag", tags, self.posts_data[post_id]["blog"]) |
163 del self.tags | 183 del self.tags |
164 | 184 |
165 def commentHandler(self, headers, data, index): | 185 def commentHandler(self, headers, data, index): |
166 comment = self.parseFields(headers, data) | 186 comment = self.parseFields(headers, data) |
167 if comment['comment_site']: | 187 if comment["comment_site"]: |
168 # we don't use atom:uri because it's used for jid in XMPP | 188 # we don't use atom:uri because it's used for jid in XMPP |
169 content = u'{}\n<hr>\n<a href="{}">author website</a>'.format( | 189 content = u'{}\n<hr>\n<a href="{}">author website</a>'.format( |
170 comment['comment_content'], | 190 comment["comment_content"], |
171 cgi.escape(comment['comment_site']).replace('"', u'%22')) | 191 cgi.escape(comment["comment_site"]).replace('"', u"%22"), |
192 ) | |
172 else: | 193 else: |
173 content = comment['comment_content'] | 194 content = comment["comment_content"] |
174 mb_data = {'id': self.getCommentId(comment), | 195 mb_data = { |
175 'published': self.getTime(comment, 'comment_dt'), | 196 "id": self.getCommentId(comment), |
176 'updated': self.getTime(comment, 'comment_upddt'), | 197 "published": self.getTime(comment, "comment_dt"), |
177 'author': comment['comment_author'], | 198 "updated": self.getTime(comment, "comment_upddt"), |
178 # we don't keep email addresses to avoid the author to be spammed | 199 "author": comment["comment_author"], |
179 # (they would be available publicly else) | 200 # we don't keep email addresses to avoid the author to be spammed |
180 # 'author_email': comment['comment_email'], | 201 # (they would be available publicly else) |
181 'content_xhtml': content, | 202 # 'author_email': comment['comment_email'], |
182 } | 203 "content_xhtml": content, |
183 self.posts_data[comment['post_id']]['comments'][0].append( | 204 } |
184 {'blog': mb_data, 'comments': [[]]}) | 205 self.posts_data[comment["post_id"]]["comments"][0].append( |
206 {"blog": mb_data, "comments": [[]]} | |
207 ) | |
185 | 208 |
186 def parse(self, db_path): | 209 def parse(self, db_path): |
187 with open(db_path) as f: | 210 with open(db_path) as f: |
188 signature = f.readline().decode('utf-8') | 211 signature = f.readline().decode("utf-8") |
189 try: | 212 try: |
190 version = signature.split('|')[1] | 213 version = signature.split("|")[1] |
191 except IndexError: | 214 except IndexError: |
192 version = None | 215 version = None |
193 log.debug(u"Dotclear version: {}".format(version)) | 216 log.debug(u"Dotclear version: {}".format(version)) |
194 data_type = None | 217 data_type = None |
195 data_headers = None | 218 data_headers = None |
196 index = None | 219 index = None |
197 while True: | 220 while True: |
198 buf = f.readline().decode('utf-8') | 221 buf = f.readline().decode("utf-8") |
199 if not buf: | 222 if not buf: |
200 break | 223 break |
201 if buf.startswith('['): | 224 if buf.startswith("["): |
202 header = buf.split(' ', 1) | 225 header = buf.split(" ", 1) |
203 data_type = header[0][1:] | 226 data_type = header[0][1:] |
204 if data_type not in KNOWN_DATA_TYPES: | 227 if data_type not in KNOWN_DATA_TYPES: |
205 log.warning(u"unkown data type: {}".format(data_type)) | 228 log.warning(u"unkown data type: {}".format(data_type)) |
206 index = 0 | 229 index = 0 |
207 try: | 230 try: |
208 data_headers = header[1].split(',') | 231 data_headers = header[1].split(",") |
209 # we need to remove the ']' from the last header | 232 # we need to remove the ']' from the last header |
210 last_header = data_headers[-1] | 233 last_header = data_headers[-1] |
211 data_headers[-1] = last_header[:last_header.rfind(']')] | 234 data_headers[-1] = last_header[: last_header.rfind("]")] |
212 except IndexError: | 235 except IndexError: |
213 log.warning(u"Can't read data)") | 236 log.warning(u"Can't read data)") |
214 else: | 237 else: |
215 if data_type is None: | 238 if data_type is None: |
216 continue | 239 continue |
217 buf = buf.strip() | 240 buf = buf.strip() |
218 if not buf and data_type in KNOWN_DATA_TYPES: | 241 if not buf and data_type in KNOWN_DATA_TYPES: |
219 try: | 242 try: |
220 finished_handler = getattr(self, '{}FinishedHandler'.format(data_type)) | 243 finished_handler = getattr( |
244 self, "{}FinishedHandler".format(data_type) | |
245 ) | |
221 except AttributeError: | 246 except AttributeError: |
222 pass | 247 pass |
223 else: | 248 else: |
224 finished_handler() | 249 finished_handler() |
225 log.debug(u"{} data finished".format(data_type)) | 250 log.debug(u"{} data finished".format(data_type)) |
226 data_type = None | 251 data_type = None |
227 continue | 252 continue |
228 assert data_type | 253 assert data_type |
229 try: | 254 try: |
230 fields_handler = getattr(self, '{}Handler'.format(data_type)) | 255 fields_handler = getattr(self, "{}Handler".format(data_type)) |
231 except AttributeError: | 256 except AttributeError: |
232 pass | 257 pass |
233 else: | 258 else: |
234 fields_handler(data_headers, buf, index) | 259 fields_handler(data_headers, buf, index) |
235 index += 1 | 260 index += 1 |
236 return (self.posts_data.itervalues(), len(self.posts_data)) | 261 return (self.posts_data.itervalues(), len(self.posts_data)) |
237 | 262 |
238 | 263 |
239 class DotclearImport(object): | 264 class DotclearImport(object): |
240 | |
241 def __init__(self, host): | 265 def __init__(self, host): |
242 log.info(_("plugin Dotclear Import initialization")) | 266 log.info(_("plugin Dotclear Import initialization")) |
243 self.host = host | 267 self.host = host |
244 host.plugins['BLOG_IMPORT'].register('dotclear', self.DcImport, SHORT_DESC, LONG_DESC) | 268 host.plugins["BLOG_IMPORT"].register( |
269 "dotclear", self.DcImport, SHORT_DESC, LONG_DESC | |
270 ) | |
245 | 271 |
246 def DcImport(self, client, location, options=None): | 272 def DcImport(self, client, location, options=None): |
247 if not os.path.isabs(location): | 273 if not os.path.isabs(location): |
248 raise exceptions.DataError(u"An absolute path to backup data need to be given as location") | 274 raise exceptions.DataError( |
275 u"An absolute path to backup data need to be given as location" | |
276 ) | |
249 dc_parser = DotclearParser() | 277 dc_parser = DotclearParser() |
250 d = threads.deferToThread(dc_parser.parse, location) | 278 d = threads.deferToThread(dc_parser.parse, location) |
251 return d | 279 return d |