annotate src/plugins/plugin_blog_import_dotclear.py @ 2138:6e509ee853a8

plugin OTR, core; use of new sendMessage + OTR mini refactoring: - new client.sendMessage method is used instead of sendMessageToStream - client.feedback is used in OTR - OTR now add message processing hints and carbon private element as recommanded by XEP-0364. Explicit Message Encryption is still TODO - OTR use the new sendMessageFinish trigger, this has a number of advantages: * there is little risk that OTR is skipped by other plugins (they have to use client.sendMessage as recommanded) * being at the end of the chain, OTR can check and remove any HTML or other leaking elements * OTR doesn't have to skip other plugins anymore, this means that things like delivery receipts are now working with OTR (but because there is not full stanza encryption, they can leak metadata) * OTR can decide to follow storage hint by letting or deleting "history" key
author Goffi <goffi@goffi.org>
date Sun, 05 Feb 2017 15:00:01 +0100
parents 2daf7b4c6756
children 33c8c4973743
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1934
2daf7b4c6756 use of /usr/bin/env instead of /usr/bin/python in shebang
Goffi <goffi@goffi.org>
parents: 1919
diff changeset
1 #!/usr/bin/env python2
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
2 # -*- coding: utf-8 -*-
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
3
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
4 # SàT plugin for import external blogs
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
5 # Copyright (C) 2009-2016 Jérôme Poisson (goffi@goffi.org)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
6
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
7 # This program is free software: you can redistribute it and/or modify
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
8 # it under the terms of the GNU Affero General Public License as published by
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
9 # the Free Software Foundation, either version 3 of the License, or
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
10 # (at your option) any later version.
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
11
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
12 # This program is distributed in the hope that it will be useful,
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
15 # GNU Affero General Public License for more details.
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
16
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
17 # You should have received a copy of the GNU Affero General Public License
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
19
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
20 from sat.core.i18n import _, D_
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
21 from sat.core.constants import Const as C
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
22 from sat.core.log import getLogger
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
23 log = getLogger(__name__)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
24 from sat.core import exceptions
1919
d3354c80bd1f core (tools): moved common to a separate package, and put data method in a data_format module
Goffi <goffi@goffi.org>
parents: 1826
diff changeset
25 from sat.tools.common import data_format
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
26 from twisted.internet import threads
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
27 from collections import OrderedDict
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
28 import itertools
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
29 import time
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
30 import cgi
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
31 import os.path
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
32
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
33
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
34 PLUGIN_INFO = {
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
35 "name": "Dotclear import",
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
36 "import_name": "IMPORT_DOTCLEAR",
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
37 "type": C.PLUG_TYPE_BLOG,
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
38 "dependencies": ["BLOG_IMPORT"],
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
39 "main": "DotclearImport",
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
40 "handler": "no",
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
41 "description": _("""Blog importer for Dotclear blog engine.""")
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
42 }
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
43
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
44 SHORT_DESC = D_(u"import posts from Dotclear blog engine")
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
45
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
46 LONG_DESC = D_(u"""This importer handle Dotclear blog engine.
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
47
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
48 To use it, you'll need to export your blog to a flat file.
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
49 You must go in your admin interface and select Plugins/Maintenance then Backup.
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
50 Export only one blog if you have many, i.e. select "Download database of current blog"
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
51 Depending on your configuration, your may need to use Import/Export plugin and export as a flat file.
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
52
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
53 location: you must use the absolute path to your backup for the location parameter
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
54 """)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
55 POST_ID_PREFIX = u"SàT_DOTCLEAR_IMPORT_BLOG"
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
56 KNOWN_DATA_TYPES = ('link', 'setting', 'post', 'meta', 'media', 'post_media', 'comment', 'captcha')
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
57 ESCAPE_MAP = {
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
58 'r': u'\r',
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
59 'n': u'\n',
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
60 '"': u'"',
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
61 '\\': u'\\',
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
62 }
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
63
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
64
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
65 class DotclearParser(object):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
66 # XXX: we have to parse all file to build data
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
67 # this can be ressource intensive on huge blogs
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
68
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
69 def __init__(self):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
70 self.posts_data = OrderedDict()
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
71 self.tags = {}
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
72
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
73 def getPostId(self, post):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
74 """Return a unique and constant post id
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
75
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
76 @param post(dict): parsed post data
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
77 @return (unicode): post unique item id
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
78 """
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
79 return u"{}_{}_{}_{}:{}".format(POST_ID_PREFIX, post['blog_id'], post['user_id'], post['post_id'], post['post_url'])
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
80
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
81 def getCommentId(self, comment):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
82 """Return a unique and constant comment id
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
83
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
84 @param comment(dict): parsed comment
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
85 @return (unicode): comment unique comment id
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
86 """
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
87 post_id = comment['post_id']
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
88 parent_item_id = self.posts_data[post_id]['blog']['id']
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
89 return u"{}_comment_{}".format(parent_item_id, comment['comment_id'])
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
90
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
91 def getTime(self, data, key):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
92 """Parse time as given by dotclear, with timezone handling
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
93
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
94 @param data(dict): dotclear data (post or comment)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
95 @param key(unicode): key to get (e.g. "post_creadt")
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
96 @return (float): Unix time
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
97 """
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
98 return time.mktime(time.strptime(data[key], "%Y-%m-%d %H:%M:%S"))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
99
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
100 def readFields(self, fields_data):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
101 buf = []
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
102 idx = 0
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
103 while True:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
104 if fields_data[idx] != '"':
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
105 raise exceptions.ParsingError
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
106 while True:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
107 idx += 1
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
108 try:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
109 char = fields_data[idx]
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
110 except IndexError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
111 raise exceptions.ParsingError("Data was expected")
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
112 if char == '"':
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
113 # we have reached the end of this field,
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
114 # we try to parse a new one
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
115 yield u''.join(buf)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
116 buf = []
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
117 idx += 1
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
118 try:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
119 separator = fields_data[idx]
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
120 except IndexError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
121 return
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
122 if separator != u',':
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
123 raise exceptions.ParsingError("Field separator was expeceted")
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
124 idx += 1
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
125 break # we have a new field
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
126 elif char == u'\\':
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
127 idx += 1
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
128 try:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
129 char = ESCAPE_MAP[fields_data[idx]]
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
130 except IndexError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
131 raise exceptions.ParsingError("Escaped char was expected")
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
132 except KeyError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
133 char = fields_data[idx]
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
134 log.warning(u"Unknown key to escape: {}".format(char))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
135 buf.append(char)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
136
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
137 def parseFields(self, headers, data):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
138 return dict(itertools.izip(headers, self.readFields(data)))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
139
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
140 def postHandler(self, headers, data, index):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
141 post = self.parseFields(headers, data)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
142 log.debug(u'({}) post found: {}'.format(index, post['post_title']))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
143 mb_data = {'id': self.getPostId(post),
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
144 'published': self.getTime(post, 'post_creadt'),
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
145 'updated': self.getTime(post, 'post_upddt'),
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
146 'author': post['user_id'], # there use info are not in the archive
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
147 # TODO: option to specify user info
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
148 'content_xhtml': u"{}{}".format(post['post_content_xhtml'], post['post_excerpt_xhtml']),
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
149 'title': post['post_title'],
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
150 'allow_comments': C.boolConst(bool(int(post['post_open_comment']))),
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
151 }
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
152 self.posts_data[post['post_id']] = {'blog': mb_data, 'comments':[[]], 'url': u'/post/{}'.format(post['post_url'])}
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
153
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
154 def metaHandler(self, headers, data, index):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
155 meta = self.parseFields(headers, data)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
156 if meta['meta_type'] == 'tag':
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
157 tags = self.tags.setdefault(meta['post_id'], set())
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
158 tags.add(meta['meta_id'])
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
159
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
160 def metaFinishedHandler(self):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
161 for post_id, tags in self.tags.iteritems():
1919
d3354c80bd1f core (tools): moved common to a separate package, and put data method in a data_format module
Goffi <goffi@goffi.org>
parents: 1826
diff changeset
162 data_format.iter2dict('tag', tags, self.posts_data[post_id]['blog'])
1826
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
163 del self.tags
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
164
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
165 def commentHandler(self, headers, data, index):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
166 comment = self.parseFields(headers, data)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
167 if comment['comment_site']:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
168 # we don't use atom:uri because it's used for jid in XMPP
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
169 content = u'{}\n<hr>\n<a href="{}">author website</a>'.format(
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
170 comment['comment_content'],
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
171 cgi.escape(comment['comment_site']).replace('"', u'%22'))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
172 else:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
173 content = comment['comment_content']
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
174 mb_data = {'id': self.getCommentId(comment),
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
175 'published': self.getTime(comment, 'comment_dt'),
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
176 'updated': self.getTime(comment, 'comment_upddt'),
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
177 'author': comment['comment_author'],
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
178 # we don't keep email addresses to avoid the author to be spammed
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
179 # (they would be available publicly else)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
180 # 'author_email': comment['comment_email'],
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
181 'content_xhtml': content,
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
182 }
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
183 self.posts_data[comment['post_id']]['comments'][0].append(
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
184 {'blog': mb_data, 'comments': [[]]})
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
185
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
186 def parse(self, db_path):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
187 with open(db_path) as f:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
188 signature = f.readline().decode('utf-8')
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
189 try:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
190 version = signature.split('|')[1]
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
191 except IndexError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
192 version = None
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
193 log.debug(u"Dotclear version: {}".format(version))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
194 data_type = None
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
195 data_headers = None
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
196 index = None
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
197 while True:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
198 buf = f.readline().decode('utf-8')
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
199 if not buf:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
200 break
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
201 if buf.startswith('['):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
202 header = buf.split(' ', 1)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
203 data_type = header[0][1:]
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
204 if data_type not in KNOWN_DATA_TYPES:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
205 log.warning(u"unkown data type: {}".format(data_type))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
206 index = 0
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
207 try:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
208 data_headers = header[1].split(',')
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
209 # we need to remove the ']' from the last header
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
210 last_header = data_headers[-1]
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
211 data_headers[-1] = last_header[:last_header.rfind(']')]
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
212 except IndexError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
213 log.warning(u"Can't read data)")
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
214 else:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
215 if data_type is None:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
216 continue
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
217 buf = buf.strip()
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
218 if not buf and data_type in KNOWN_DATA_TYPES:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
219 try:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
220 finished_handler = getattr(self, '{}FinishedHandler'.format(data_type))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
221 except AttributeError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
222 pass
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
223 else:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
224 finished_handler()
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
225 log.debug(u"{} data finished".format(data_type))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
226 data_type = None
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
227 continue
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
228 assert data_type
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
229 try:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
230 fields_handler = getattr(self, '{}Handler'.format(data_type))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
231 except AttributeError:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
232 pass
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
233 else:
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
234 fields_handler(data_headers, buf, index)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
235 index += 1
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
236 return (self.posts_data.itervalues(), len(self.posts_data))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
237
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
238
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
239 class DotclearImport(object):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
240
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
241 def __init__(self, host):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
242 log.info(_("plugin Dotclear Import initialization"))
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
243 self.host = host
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
244 host.plugins['BLOG_IMPORT'].register('dotclear', self.DcImport, SHORT_DESC, LONG_DESC)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
245
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
246 def DcImport(self, client, location, options=None):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
247 if not os.path.isabs(location):
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
248 raise exceptions.DataError(u"An absolute path to backup data need to be given as location")
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
249 dc_parser = DotclearParser()
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
250 d = threads.deferToThread(dc_parser.parse, location)
d80ccf4bf201 plugin blog import dotclear: this plugin import Dotclear 2 backups
Goffi <goffi@goffi.org>
parents:
diff changeset
251 return d