annotate src/plugins/plugin_blog_import_dotclear.py @ 2182:087eec4c6c07

memory (persistent, sqlite): better private values handling + new LazyPersistentBinaryDict: - merged private values method handling in sqlite, and added a keys arguments to get only some keys - LazyPersistentBinaryDict allow to get values in database only when they are needed, saving memory for big data
author Goffi <goffi@goffi.org>
date Sun, 12 Mar 2017 19:33:17 +0100
parents 33c8c4973743
children 8b37a62336c3
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1934
2daf7b4c6756 use of /usr/bin/env instead of /usr/bin/python in shebang
Goffi <goffi@goffi.org>
parents: 1919
diff changeset
1 #!/usr/bin/env python2
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
2 # -*- coding: utf-8 -*-
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
3
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
4 # SàT plugin for import external blogs
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
5 # Copyright (C) 2009-2016 Jérôme Poisson (goffi@goffi.org)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
6
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
7 # This program is free software: you can redistribute it and/or modify
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
8 # it under the terms of the GNU Affero General Public License as published by
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
9 # the Free Software Foundation, either version 3 of the License, or
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
10 # (at your option) any later version.
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
11
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
12 # This program is distributed in the hope that it will be useful,
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
15 # GNU Affero General Public License for more details.
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
16
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
17 # You should have received a copy of the GNU Affero General Public License
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
19
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
20 from sat.core.i18n import _, D_
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
21 from sat.core.constants import Const as C
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
22 from sat.core.log import getLogger
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
23 log = getLogger(__name__)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
24 from sat.core import exceptions
1919
d3354c80bd1f core (tools): moved common to a separate package, and put data method in a data_format module
Goffi <goffi@goffi.org>
parents: 1826
diff changeset
25 from sat.tools.common import data_format
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
26 from twisted.internet import threads
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
27 from collections import OrderedDict
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
28 import itertools
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
29 import time
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
30 import cgi
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
31 import os.path
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
32
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
33
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
34 PLUGIN_INFO = {
2145
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 1934
diff changeset
35 C.PI_NAME: "Dotclear import",
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 1934
diff changeset
36 C.PI_IMPORT_NAME: "IMPORT_DOTCLEAR",
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 1934
diff changeset
37 C.PI_TYPE: C.PLUG_TYPE_BLOG,
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 1934
diff changeset
38 C.PI_DEPENDENCIES: ["BLOG_IMPORT"],
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 1934
diff changeset
39 C.PI_MAIN: "DotclearImport",
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 1934
diff changeset
40 C.PI_HANDLER: "no",
33c8c4973743 core (plugins): added missing contants + use of new constants in PLUGIN_INFO
Goffi <goffi@goffi.org>
parents: 1934
diff changeset
41 C.PI_DESCRIPTION: _("""Blog importer for Dotclear blog engine.""")
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
42 }
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
43
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
44 SHORT_DESC = D_(u"import posts from Dotclear blog engine")
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
45
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
46 LONG_DESC = D_(u"""This importer handle Dotclear blog engine.
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
47
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
48 To use it, you'll need to export your blog to a flat file.
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
49 You must go in your admin interface and select Plugins/Maintenance then Backup.
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
50 Export only one blog if you have many, i.e. select "Download database of current blog"
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
51 Depending on your configuration, your may need to use Import/Export plugin and export as a flat file.
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
52
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
53 location: you must use the absolute path to your backup for the location parameter
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
54 """)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
55 POST_ID_PREFIX = u"SàT_DOTCLEAR_IMPORT_BLOG"
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
56 KNOWN_DATA_TYPES = ('link', 'setting', 'post', 'meta', 'media', 'post_media', 'comment', 'captcha')
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
57 ESCAPE_MAP = {
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
58 'r': u'\r',
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
59 'n': u'\n',
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
60 '"': u'"',
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
61 '\\': u'\\',
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
62 }
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
63
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
64
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
65 class DotclearParser(object):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
66 # XXX: we have to parse all file to build data
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
67 # this can be ressource intensive on huge blogs
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
68
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
69 def __init__(self):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
70 self.posts_data = OrderedDict()
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
71 self.tags = {}
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
72
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
73 def getPostId(self, post):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
74 """Return a unique and constant post id
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
75
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
76 @param post(dict): parsed post data
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
77 @return (unicode): post unique item id
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
78 """
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
79 return u"{}_{}_{}_{}:{}".format(POST_ID_PREFIX, post['blog_id'], post['user_id'], post['post_id'], post['post_url'])
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
80
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
81 def getCommentId(self, comment):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
82 """Return a unique and constant comment id
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
83
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
84 @param comment(dict): parsed comment
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
85 @return (unicode): comment unique comment id
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
86 """
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
87 post_id = comment['post_id']
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
88 parent_item_id = self.posts_data[post_id]['blog']['id']
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
89 return u"{}_comment_{}".format(parent_item_id, comment['comment_id'])
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
90
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
91 def getTime(self, data, key):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
92 """Parse time as given by dotclear, with timezone handling
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
93
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
94 @param data(dict): dotclear data (post or comment)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
95 @param key(unicode): key to get (e.g. "post_creadt")
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
96 @return (float): Unix time
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
97 """
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
98 return time.mktime(time.strptime(data[key], "%Y-%m-%d %H:%M:%S"))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
99
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
100 def readFields(self, fields_data):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
101 buf = []
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
102 idx = 0
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
103 while True:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
104 if fields_data[idx] != '"':
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
105 raise exceptions.ParsingError
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
106 while True:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
107 idx += 1
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
108 try:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
109 char = fields_data[idx]
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
110 except IndexError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
111 raise exceptions.ParsingError("Data was expected")
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
112 if char == '"':
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
113 # we have reached the end of this field,
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
114 # we try to parse a new one
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
115 yield u''.join(buf)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
116 buf = []
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
117 idx += 1
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
118 try:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
119 separator = fields_data[idx]
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
120 except IndexError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
121 return
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
122 if separator != u',':
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
123 raise exceptions.ParsingError("Field separator was expeceted")
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
124 idx += 1
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
125 break # we have a new field
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
126 elif char == u'\\':
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
127 idx += 1
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
128 try:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
129 char = ESCAPE_MAP[fields_data[idx]]
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
130 except IndexError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
131 raise exceptions.ParsingError("Escaped char was expected")
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
132 except KeyError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
133 char = fields_data[idx]
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
134 log.warning(u"Unknown key to escape: {}".format(char))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
135 buf.append(char)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
136
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
137 def parseFields(self, headers, data):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
138 return dict(itertools.izip(headers, self.readFields(data)))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
139
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
140 def postHandler(self, headers, data, index):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
141 post = self.parseFields(headers, data)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
142 log.debug(u'({}) post found: {}'.format(index, post['post_title']))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
143 mb_data = {'id': self.getPostId(post),
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
144 'published': self.getTime(post, 'post_creadt'),
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
145 'updated': self.getTime(post, 'post_upddt'),
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
146 'author': post['user_id'], # there use info are not in the archive
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
147 # TODO: option to specify user info
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
148 'content_xhtml': u"{}{}".format(post['post_content_xhtml'], post['post_excerpt_xhtml']),
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
149 'title': post['post_title'],
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
150 'allow_comments': C.boolConst(bool(int(post['post_open_comment']))),
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
151 }
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
152 self.posts_data[post['post_id']] = {'blog': mb_data, 'comments':[[]], 'url': u'/post/{}'.format(post['post_url'])}
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
153
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
154 def metaHandler(self, headers, data, index):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
155 meta = self.parseFields(headers, data)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
156 if meta['meta_type'] == 'tag':
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
157 tags = self.tags.setdefault(meta['post_id'], set())
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
158 tags.add(meta['meta_id'])
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
159
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
160 def metaFinishedHandler(self):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
161 for post_id, tags in self.tags.iteritems():
1919
d3354c80bd1f core (tools): moved common to a separate package, and put data method in a data_format module
Goffi <goffi@goffi.org>
parents: 1826
diff changeset
162 data_format.iter2dict('tag', tags, self.posts_data[post_id]['blog'])
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
163 del self.tags
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
164
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
165 def commentHandler(self, headers, data, index):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
166 comment = self.parseFields(headers, data)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
167 if comment['comment_site']:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
168 # we don't use atom:uri because it's used for jid in XMPP
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
169 content = u'{}\n<hr>\n<a href="{}">author website</a>'.format(
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
170 comment['comment_content'],
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
171 cgi.escape(comment['comment_site']).replace('"', u'%22'))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
172 else:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
173 content = comment['comment_content']
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
174 mb_data = {'id': self.getCommentId(comment),
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
175 'published': self.getTime(comment, 'comment_dt'),
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
176 'updated': self.getTime(comment, 'comment_upddt'),
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
177 'author': comment['comment_author'],
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
178 # we don't keep email addresses to avoid the author to be spammed
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
179 # (they would be available publicly else)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
180 # 'author_email': comment['comment_email'],
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
181 'content_xhtml': content,
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
182 }
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
183 self.posts_data[comment['post_id']]['comments'][0].append(
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
184 {'blog': mb_data, 'comments': [[]]})
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
185
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
186 def parse(self, db_path):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
187 with open(db_path) as f:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
188 signature = f.readline().decode('utf-8')
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
189 try:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
190 version = signature.split('|')[1]
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
191 except IndexError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
192 version = None
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
193 log.debug(u"Dotclear version: {}".format(version))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
194 data_type = None
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
195 data_headers = None
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
196 index = None
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
197 while True:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
198 buf = f.readline().decode('utf-8')
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
199 if not buf:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
200 break
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
201 if buf.startswith('['):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
202 header = buf.split(' ', 1)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
203 data_type = header[0][1:]
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
204 if data_type not in KNOWN_DATA_TYPES:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
205 log.warning(u"unkown data type: {}".format(data_type))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
206 index = 0
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
207 try:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
208 data_headers = header[1].split(',')
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
209 # we need to remove the ']' from the last header
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
210 last_header = data_headers[-1]
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
211 data_headers[-1] = last_header[:last_header.rfind(']')]
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
212 except IndexError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
213 log.warning(u"Can't read data)")
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
214 else:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
215 if data_type is None:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
216 continue
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
217 buf = buf.strip()
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
218 if not buf and data_type in KNOWN_DATA_TYPES:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
219 try:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
220 finished_handler = getattr(self, '{}FinishedHandler'.format(data_type))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
221 except AttributeError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
222 pass
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
223 else:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
224 finished_handler()
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
225 log.debug(u"{} data finished".format(data_type))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
226 data_type = None
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
227 continue
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
228 assert data_type
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
229 try:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
230 fields_handler = getattr(self, '{}Handler'.format(data_type))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
231 except AttributeError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
232 pass
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
233 else:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
234 fields_handler(data_headers, buf, index)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
235 index += 1
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
236 return (self.posts_data.itervalues(), len(self.posts_data))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
237
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
238
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
239 class DotclearImport(object):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
240
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
241 def __init__(self, host):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
242 log.info(_("plugin Dotclear Import initialization"))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
243 self.host = host
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
244 host.plugins['BLOG_IMPORT'].register('dotclear', self.DcImport, SHORT_DESC, LONG_DESC)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
245
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
246 def DcImport(self, client, location, options=None):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
247 if not os.path.isabs(location):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
248 raise exceptions.DataError(u"An absolute path to backup data need to be given as location")
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
249 dc_parser = DotclearParser()
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
250 d = threads.deferToThread(dc_parser.parse, location)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
251 return d