Mercurial > libervia-backend
comparison sat/plugins/plugin_blog_import_dotclear.py @ 3028:ab2696e34d29
Python 3 port:
/!\ this is a huge commit
/!\ starting from this commit, SàT is needs Python 3.6+
/!\ SàT maybe be instable or some feature may not work anymore, this will improve with time
This patch port backend, bridge and frontends to Python 3.
Roughly this has been done this way:
- 2to3 tools has been applied (with python 3.7)
- all references to python2 have been replaced with python3 (notably shebangs)
- fixed files not handled by 2to3 (notably the shell script)
- several manual fixes
- fixed issues reported by Python 3 that where not handled in Python 2
- replaced "async" with "async_" when needed (it's a reserved word from Python 3.7)
- replaced zope's "implements" with @implementer decorator
- temporary hack to handle data pickled in database, as str or bytes may be returned,
to be checked later
- fixed hash comparison for password
- removed some code which is not needed anymore with Python 3
- deactivated some code which needs to be checked (notably certificate validation)
- tested with jp, fixed reported issues until some basic commands worked
- ported Primitivus (after porting dependencies like urwid satext)
- more manual fixes
author | Goffi <goffi@goffi.org> |
---|---|
date | Tue, 13 Aug 2019 19:08:41 +0200 |
parents | 003b8b4b56a7 |
children | 9d0df638c8b4 |
comparison
equal
deleted
inserted
replaced
3027:ff5bcb12ae60 | 3028:ab2696e34d29 |
---|---|
1 #!/usr/bin/env python2 | 1 #!/usr/bin/env python3 |
2 # -*- coding: utf-8 -*- | 2 # -*- coding: utf-8 -*- |
3 | 3 |
4 # SàT plugin for import external blogs | 4 # SàT plugin for import external blogs |
5 # Copyright (C) 2009-2019 Jérôme Poisson (goffi@goffi.org) | 5 # Copyright (C) 2009-2019 Jérôme Poisson (goffi@goffi.org) |
6 | 6 |
40 C.PI_MAIN: "DotclearImport", | 40 C.PI_MAIN: "DotclearImport", |
41 C.PI_HANDLER: "no", | 41 C.PI_HANDLER: "no", |
42 C.PI_DESCRIPTION: _("""Blog importer for Dotclear blog engine."""), | 42 C.PI_DESCRIPTION: _("""Blog importer for Dotclear blog engine."""), |
43 } | 43 } |
44 | 44 |
45 SHORT_DESC = D_(u"import posts from Dotclear blog engine") | 45 SHORT_DESC = D_("import posts from Dotclear blog engine") |
46 | 46 |
47 LONG_DESC = D_( | 47 LONG_DESC = D_( |
48 u"""This importer handle Dotclear blog engine. | 48 """This importer handle Dotclear blog engine. |
49 | 49 |
50 To use it, you'll need to export your blog to a flat file. | 50 To use it, you'll need to export your blog to a flat file. |
51 You must go in your admin interface and select Plugins/Maintenance then Backup. | 51 You must go in your admin interface and select Plugins/Maintenance then Backup. |
52 Export only one blog if you have many, i.e. select "Download database of current blog" | 52 Export only one blog if you have many, i.e. select "Download database of current blog" |
53 Depending on your configuration, your may need to use Import/Export plugin and export as a flat file. | 53 Depending on your configuration, your may need to use Import/Export plugin and export as a flat file. |
54 | 54 |
55 location: you must use the absolute path to your backup for the location parameter | 55 location: you must use the absolute path to your backup for the location parameter |
56 """ | 56 """ |
57 ) | 57 ) |
58 POST_ID_PREFIX = u"sat_dc_" | 58 POST_ID_PREFIX = "sat_dc_" |
59 KNOWN_DATA_TYPES = ( | 59 KNOWN_DATA_TYPES = ( |
60 "link", | 60 "link", |
61 "setting", | 61 "setting", |
62 "post", | 62 "post", |
63 "meta", | 63 "meta", |
64 "media", | 64 "media", |
65 "post_media", | 65 "post_media", |
66 "comment", | 66 "comment", |
67 "captcha", | 67 "captcha", |
68 ) | 68 ) |
69 ESCAPE_MAP = {"r": u"\r", "n": u"\n", '"': u'"', "\\": u"\\"} | 69 ESCAPE_MAP = {"r": "\r", "n": "\n", '"': '"', "\\": "\\"} |
70 | 70 |
71 | 71 |
72 class DotclearParser(object): | 72 class DotclearParser(object): |
73 # XXX: we have to parse all file to build data | 73 # XXX: we have to parse all file to build data |
74 # this can be ressource intensive on huge blogs | 74 # this can be ressource intensive on huge blogs |
81 """Return a unique and constant post id | 81 """Return a unique and constant post id |
82 | 82 |
83 @param post(dict): parsed post data | 83 @param post(dict): parsed post data |
84 @return (unicode): post unique item id | 84 @return (unicode): post unique item id |
85 """ | 85 """ |
86 return u"{}_{}_{}_{}:{}".format( | 86 return "{}_{}_{}_{}:{}".format( |
87 POST_ID_PREFIX, | 87 POST_ID_PREFIX, |
88 post["blog_id"], | 88 post["blog_id"], |
89 post["user_id"], | 89 post["user_id"], |
90 post["post_id"], | 90 post["post_id"], |
91 post["post_url"], | 91 post["post_url"], |
97 @param comment(dict): parsed comment | 97 @param comment(dict): parsed comment |
98 @return (unicode): comment unique comment id | 98 @return (unicode): comment unique comment id |
99 """ | 99 """ |
100 post_id = comment["post_id"] | 100 post_id = comment["post_id"] |
101 parent_item_id = self.posts_data[post_id]["blog"]["id"] | 101 parent_item_id = self.posts_data[post_id]["blog"]["id"] |
102 return u"{}_comment_{}".format(parent_item_id, comment["comment_id"]) | 102 return "{}_comment_{}".format(parent_item_id, comment["comment_id"]) |
103 | 103 |
104 def getTime(self, data, key): | 104 def getTime(self, data, key): |
105 """Parse time as given by dotclear, with timezone handling | 105 """Parse time as given by dotclear, with timezone handling |
106 | 106 |
107 @param data(dict): dotclear data (post or comment) | 107 @param data(dict): dotclear data (post or comment) |
123 except IndexError: | 123 except IndexError: |
124 raise exceptions.ParsingError("Data was expected") | 124 raise exceptions.ParsingError("Data was expected") |
125 if char == '"': | 125 if char == '"': |
126 # we have reached the end of this field, | 126 # we have reached the end of this field, |
127 # we try to parse a new one | 127 # we try to parse a new one |
128 yield u"".join(buf) | 128 yield "".join(buf) |
129 buf = [] | 129 buf = [] |
130 idx += 1 | 130 idx += 1 |
131 try: | 131 try: |
132 separator = fields_data[idx] | 132 separator = fields_data[idx] |
133 except IndexError: | 133 except IndexError: |
134 return | 134 return |
135 if separator != u",": | 135 if separator != ",": |
136 raise exceptions.ParsingError("Field separator was expeceted") | 136 raise exceptions.ParsingError("Field separator was expeceted") |
137 idx += 1 | 137 idx += 1 |
138 break # we have a new field | 138 break # we have a new field |
139 elif char == u"\\": | 139 elif char == "\\": |
140 idx += 1 | 140 idx += 1 |
141 try: | 141 try: |
142 char = ESCAPE_MAP[fields_data[idx]] | 142 char = ESCAPE_MAP[fields_data[idx]] |
143 except IndexError: | 143 except IndexError: |
144 raise exceptions.ParsingError("Escaped char was expected") | 144 raise exceptions.ParsingError("Escaped char was expected") |
145 except KeyError: | 145 except KeyError: |
146 char = fields_data[idx] | 146 char = fields_data[idx] |
147 log.warning(u"Unknown key to escape: {}".format(char)) | 147 log.warning("Unknown key to escape: {}".format(char)) |
148 buf.append(char) | 148 buf.append(char) |
149 | 149 |
150 def parseFields(self, headers, data): | 150 def parseFields(self, headers, data): |
151 return dict(itertools.izip(headers, self.readFields(data))) | 151 return dict(zip(headers, self.readFields(data))) |
152 | 152 |
153 def postHandler(self, headers, data, index): | 153 def postHandler(self, headers, data, index): |
154 post = self.parseFields(headers, data) | 154 post = self.parseFields(headers, data) |
155 log.debug(u"({}) post found: {}".format(index, post["post_title"])) | 155 log.debug("({}) post found: {}".format(index, post["post_title"])) |
156 mb_data = { | 156 mb_data = { |
157 "id": self.getPostId(post), | 157 "id": self.getPostId(post), |
158 "published": self.getTime(post, "post_creadt"), | 158 "published": self.getTime(post, "post_creadt"), |
159 "updated": self.getTime(post, "post_upddt"), | 159 "updated": self.getTime(post, "post_upddt"), |
160 "author": post["user_id"], # there use info are not in the archive | 160 "author": post["user_id"], # there use info are not in the archive |
161 # TODO: option to specify user info | 161 # TODO: option to specify user info |
162 "content_xhtml": u"{}{}".format( | 162 "content_xhtml": "{}{}".format( |
163 post["post_content_xhtml"], post["post_excerpt_xhtml"] | 163 post["post_content_xhtml"], post["post_excerpt_xhtml"] |
164 ), | 164 ), |
165 "title": post["post_title"], | 165 "title": post["post_title"], |
166 "allow_comments": C.boolConst(bool(int(post["post_open_comment"]))), | 166 "allow_comments": C.boolConst(bool(int(post["post_open_comment"]))), |
167 } | 167 } |
168 self.posts_data[post["post_id"]] = { | 168 self.posts_data[post["post_id"]] = { |
169 "blog": mb_data, | 169 "blog": mb_data, |
170 "comments": [[]], | 170 "comments": [[]], |
171 "url": u"/post/{}".format(post["post_url"]), | 171 "url": "/post/{}".format(post["post_url"]), |
172 } | 172 } |
173 | 173 |
174 def metaHandler(self, headers, data, index): | 174 def metaHandler(self, headers, data, index): |
175 meta = self.parseFields(headers, data) | 175 meta = self.parseFields(headers, data) |
176 if meta["meta_type"] == "tag": | 176 if meta["meta_type"] == "tag": |
177 tags = self.tags.setdefault(meta["post_id"], set()) | 177 tags = self.tags.setdefault(meta["post_id"], set()) |
178 tags.add(meta["meta_id"]) | 178 tags.add(meta["meta_id"]) |
179 | 179 |
180 def metaFinishedHandler(self): | 180 def metaFinishedHandler(self): |
181 for post_id, tags in self.tags.iteritems(): | 181 for post_id, tags in self.tags.items(): |
182 data_format.iter2dict("tag", tags, self.posts_data[post_id]["blog"]) | 182 data_format.iter2dict("tag", tags, self.posts_data[post_id]["blog"]) |
183 del self.tags | 183 del self.tags |
184 | 184 |
185 def commentHandler(self, headers, data, index): | 185 def commentHandler(self, headers, data, index): |
186 comment = self.parseFields(headers, data) | 186 comment = self.parseFields(headers, data) |
187 if comment["comment_site"]: | 187 if comment["comment_site"]: |
188 # we don't use atom:uri because it's used for jid in XMPP | 188 # we don't use atom:uri because it's used for jid in XMPP |
189 content = u'{}\n<hr>\n<a href="{}">author website</a>'.format( | 189 content = '{}\n<hr>\n<a href="{}">author website</a>'.format( |
190 comment["comment_content"], | 190 comment["comment_content"], |
191 cgi.escape(comment["comment_site"]).replace('"', u"%22"), | 191 cgi.escape(comment["comment_site"]).replace('"', "%22"), |
192 ) | 192 ) |
193 else: | 193 else: |
194 content = comment["comment_content"] | 194 content = comment["comment_content"] |
195 mb_data = { | 195 mb_data = { |
196 "id": self.getCommentId(comment), | 196 "id": self.getCommentId(comment), |
206 {"blog": mb_data, "comments": [[]]} | 206 {"blog": mb_data, "comments": [[]]} |
207 ) | 207 ) |
208 | 208 |
209 def parse(self, db_path): | 209 def parse(self, db_path): |
210 with open(db_path) as f: | 210 with open(db_path) as f: |
211 signature = f.readline().decode("utf-8") | 211 signature = f.readline() |
212 try: | 212 try: |
213 version = signature.split("|")[1] | 213 version = signature.split("|")[1] |
214 except IndexError: | 214 except IndexError: |
215 version = None | 215 version = None |
216 log.debug(u"Dotclear version: {}".format(version)) | 216 log.debug("Dotclear version: {}".format(version)) |
217 data_type = None | 217 data_type = None |
218 data_headers = None | 218 data_headers = None |
219 index = None | 219 index = None |
220 while True: | 220 while True: |
221 buf = f.readline().decode("utf-8") | 221 buf = f.readline() |
222 if not buf: | 222 if not buf: |
223 break | 223 break |
224 if buf.startswith("["): | 224 if buf.startswith("["): |
225 header = buf.split(" ", 1) | 225 header = buf.split(" ", 1) |
226 data_type = header[0][1:] | 226 data_type = header[0][1:] |
227 if data_type not in KNOWN_DATA_TYPES: | 227 if data_type not in KNOWN_DATA_TYPES: |
228 log.warning(u"unkown data type: {}".format(data_type)) | 228 log.warning("unkown data type: {}".format(data_type)) |
229 index = 0 | 229 index = 0 |
230 try: | 230 try: |
231 data_headers = header[1].split(",") | 231 data_headers = header[1].split(",") |
232 # we need to remove the ']' from the last header | 232 # we need to remove the ']' from the last header |
233 last_header = data_headers[-1] | 233 last_header = data_headers[-1] |
234 data_headers[-1] = last_header[: last_header.rfind("]")] | 234 data_headers[-1] = last_header[: last_header.rfind("]")] |
235 except IndexError: | 235 except IndexError: |
236 log.warning(u"Can't read data)") | 236 log.warning("Can't read data)") |
237 else: | 237 else: |
238 if data_type is None: | 238 if data_type is None: |
239 continue | 239 continue |
240 buf = buf.strip() | 240 buf = buf.strip() |
241 if not buf and data_type in KNOWN_DATA_TYPES: | 241 if not buf and data_type in KNOWN_DATA_TYPES: |
245 ) | 245 ) |
246 except AttributeError: | 246 except AttributeError: |
247 pass | 247 pass |
248 else: | 248 else: |
249 finished_handler() | 249 finished_handler() |
250 log.debug(u"{} data finished".format(data_type)) | 250 log.debug("{} data finished".format(data_type)) |
251 data_type = None | 251 data_type = None |
252 continue | 252 continue |
253 assert data_type | 253 assert data_type |
254 try: | 254 try: |
255 fields_handler = getattr(self, "{}Handler".format(data_type)) | 255 fields_handler = getattr(self, "{}Handler".format(data_type)) |
256 except AttributeError: | 256 except AttributeError: |
257 pass | 257 pass |
258 else: | 258 else: |
259 fields_handler(data_headers, buf, index) | 259 fields_handler(data_headers, buf, index) |
260 index += 1 | 260 index += 1 |
261 return (self.posts_data.itervalues(), len(self.posts_data)) | 261 return (iter(self.posts_data.values()), len(self.posts_data)) |
262 | 262 |
263 | 263 |
264 class DotclearImport(object): | 264 class DotclearImport(object): |
265 def __init__(self, host): | 265 def __init__(self, host): |
266 log.info(_("plugin Dotclear Import initialization")) | 266 log.info(_("plugin Dotclear Import initialization")) |
270 ) | 270 ) |
271 | 271 |
272 def DcImport(self, client, location, options=None): | 272 def DcImport(self, client, location, options=None): |
273 if not os.path.isabs(location): | 273 if not os.path.isabs(location): |
274 raise exceptions.DataError( | 274 raise exceptions.DataError( |
275 u"An absolute path to backup data need to be given as location" | 275 "An absolute path to backup data need to be given as location" |
276 ) | 276 ) |
277 dc_parser = DotclearParser() | 277 dc_parser = DotclearParser() |
278 d = threads.deferToThread(dc_parser.parse, location) | 278 d = threads.deferToThread(dc_parser.parse, location) |
279 return d | 279 return d |