libervia-backend: sat/plugins/plugin_blog_import

comparison sat/plugins/plugin_blog_import_dotclear.py @ 2624:56f94936df1e

code style reformatting using black

author	Goffi <goffi@goffi.org>
date	Wed, 27 Jun 2018 20:14:46 +0200
parents	26edcf3a30eb
children	003b8b4b56a7

comparison

equal deleted inserted replaced

-:49533de4540b
+:56f94936df1e
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 from sat.core.i18n import _, D_
 from sat.core.constants import Const as C
 from sat.core.log import getLogger
 log = getLogger(__name__)
 from sat.core import exceptions
 from sat.tools.common import data_format
 from twisted.internet import threads
 from collections import OrderedDict
 C.PI_IMPORT_NAME: "IMPORT_DOTCLEAR",
 C.PI_TYPE: C.PLUG_TYPE_BLOG,
 C.PI_DEPENDENCIES: ["BLOG_IMPORT"],
 C.PI_MAIN: "DotclearImport",
 C.PI_HANDLER: "no",
-C.PI_DESCRIPTION: _("""Blog importer for Dotclear blog engine.""")
+C.PI_DESCRIPTION: _("""Blog importer for Dotclear blog engine."""),
 }
 SHORT_DESC = D_(u"import posts from Dotclear blog engine")
-LONG_DESC = D_(u"""This importer handle Dotclear blog engine.
+LONG_DESC = D_(
+u"""This importer handle Dotclear blog engine.
 To use it, you'll need to export your blog to a flat file.
 You must go in your admin interface and select Plugins/Maintenance then Backup.
 Export only one blog if you have many, i.e. select "Download database of current blog"
 Depending on your configuration, your may need to use Import/Export plugin and export as a flat file.
 location: you must use the absolute path to your backup for the location parameter
-""")
+"""
+)
 POST_ID_PREFIX = u"sat_dc_"
-KNOWN_DATA_TYPES = ('link', 'setting', 'post', 'meta', 'media', 'post_media', 'comment', 'captcha')
+KNOWN_DATA_TYPES = (
-ESCAPE_MAP = {
+"link",
-'r': u'\r',
+"setting",
-'n': u'\n',
+"post",
-'"': u'"',
+"meta",
-'\\': u'\\',
+"media",
-}
+"post_media",
+"comment",
+"captcha",
+)
+ESCAPE_MAP = {"r": u"\r", "n": u"\n", '"': u'"', "\\": u"\\"}
 class DotclearParser(object):
 # XXX: we have to parse all file to build data
 #      this can be ressource intensive on huge blogs
 """Return a unique and constant post id
 @param post(dict): parsed post data
 @return (unicode): post unique item id
 """
-return u"{}_{}_{}_{}:{}".format(POST_ID_PREFIX, post['blog_id'], post['user_id'], post['post_id'], post['post_url'])
+return u"{}_{}_{}_{}:{}".format(
+POST_ID_PREFIX,
+post["blog_id"],
+post["user_id"],
+post["post_id"],
+post["post_url"],
+)
 def getCommentId(self, comment):
 """Return a unique and constant comment id
 @param comment(dict): parsed comment
 @return (unicode): comment unique comment id
 """
-post_id = comment['post_id']
+post_id = comment["post_id"]
-parent_item_id = self.posts_data[post_id]['blog']['id']
+parent_item_id = self.posts_data[post_id]["blog"]["id"]
-return u"{}_comment_{}".format(parent_item_id, comment['comment_id'])
+return u"{}_comment_{}".format(parent_item_id, comment["comment_id"])
 def getTime(self, data, key):
 """Parse time as given by dotclear, with timezone handling
 @param data(dict): dotclear data (post or comment)
 except IndexError:
 raise exceptions.ParsingError("Data was expected")
 if char == '"':
 # we have reached the end of this field,
 # we try to parse a new one
-yield u''.join(buf)
+yield u"".join(buf)
 buf = []
 idx += 1
 try:
 separator = fields_data[idx]
 except IndexError:
 return
-if separator != u',':
+if separator != u",":
 raise exceptions.ParsingError("Field separator was expeceted")
 idx += 1
-break # we have a new field
+break  # we have a new field
-elif char == u'\\':
+elif char == u"\\":
 idx += 1
 try:
 char = ESCAPE_MAP[fields_data[idx]]
 except IndexError:
 raise exceptions.ParsingError("Escaped char was expected")
 def parseFields(self, headers, data):
 return dict(itertools.izip(headers, self.readFields(data)))
 def postHandler(self, headers, data, index):
 post = self.parseFields(headers, data)
-log.debug(u'({}) post found: {}'.format(index, post['post_title']))
+log.debug(u"({}) post found: {}".format(index, post["post_title"]))
-mb_data = {'id': self.getPostId(post),
+mb_data = {
-'published': self.getTime(post, 'post_creadt'),
+"id": self.getPostId(post),
-'updated': self.getTime(post, 'post_upddt'),
+"published": self.getTime(post, "post_creadt"),
-'author': post['user_id'], # there use info are not in the archive
+"updated": self.getTime(post, "post_upddt"),
-# TODO: option to specify user info
+"author": post["user_id"],  # there use info are not in the archive
-'content_xhtml': u"{}{}".format(post['post_content_xhtml'], post['post_excerpt_xhtml']),
+# TODO: option to specify user info
-'title': post['post_title'],
+"content_xhtml": u"{}{}".format(
-'allow_comments': C.boolConst(bool(int(post['post_open_comment']))),
+post["post_content_xhtml"], post["post_excerpt_xhtml"]
-}
+),
-self.posts_data[post['post_id']] = {'blog': mb_data, 'comments':[[]], 'url': u'/post/{}'.format(post['post_url'])}
+"title": post["post_title"],
+"allow_comments": C.boolConst(bool(int(post["post_open_comment"]))),
+}
+self.posts_data[post["post_id"]] = {
+"blog": mb_data,
+"comments": [[]],
+"url": u"/post/{}".format(post["post_url"]),
+}
 def metaHandler(self, headers, data, index):
 meta = self.parseFields(headers, data)
-if meta['meta_type'] == 'tag':
+if meta["meta_type"] == "tag":
-tags = self.tags.setdefault(meta['post_id'], set())
+tags = self.tags.setdefault(meta["post_id"], set())
-tags.add(meta['meta_id'])
+tags.add(meta["meta_id"])
 def metaFinishedHandler(self):
 for post_id, tags in self.tags.iteritems():
-data_format.iter2dict('tag', tags, self.posts_data[post_id]['blog'])
+data_format.iter2dict("tag", tags, self.posts_data[post_id]["blog"])
 del self.tags
 def commentHandler(self, headers, data, index):
 comment = self.parseFields(headers, data)
-if comment['comment_site']:
+if comment["comment_site"]:
 # we don't use atom:uri because it's used for jid in XMPP
 content = u'{}\n<hr>\n<a href="{}">author website</a>'.format(
-comment['comment_content'],
+comment["comment_content"],
-cgi.escape(comment['comment_site']).replace('"', u'%22'))
+cgi.escape(comment["comment_site"]).replace('"', u"%22"),
+)
 else:
-content = comment['comment_content']
+content = comment["comment_content"]
-mb_data = {'id': self.getCommentId(comment),
+mb_data = {
-'published': self.getTime(comment, 'comment_dt'),
+"id": self.getCommentId(comment),
-'updated': self.getTime(comment, 'comment_upddt'),
+"published": self.getTime(comment, "comment_dt"),
-'author': comment['comment_author'],
+"updated": self.getTime(comment, "comment_upddt"),
-# we don't keep email addresses to avoid the author to be spammed
+"author": comment["comment_author"],
-# (they would be available publicly else)
+# we don't keep email addresses to avoid the author to be spammed
-# 'author_email': comment['comment_email'],
+# (they would be available publicly else)
-'content_xhtml': content,
+# 'author_email': comment['comment_email'],
-}
+"content_xhtml": content,
-self.posts_data[comment['post_id']]['comments'][0].append(
+}
-{'blog': mb_data, 'comments': [[]]})
+self.posts_data[comment["post_id"]]["comments"][0].append(
+{"blog": mb_data, "comments": [[]]}
+)
 def parse(self, db_path):
 with open(db_path) as f:
-signature = f.readline().decode('utf-8')
+signature = f.readline().decode("utf-8")
 try:
-version = signature.split('|')[1]
+version = signature.split("|")[1]
 except IndexError:
 version = None
 log.debug(u"Dotclear version: {}".format(version))
 data_type = None
 data_headers = None
 index = None
 while True:
-buf = f.readline().decode('utf-8')
+buf = f.readline().decode("utf-8")
 if not buf:
 break
-if buf.startswith('['):
+if buf.startswith("["):
-header = buf.split(' ', 1)
+header = buf.split(" ", 1)
 data_type = header[0][1:]
 if data_type not in KNOWN_DATA_TYPES:
 log.warning(u"unkown data type: {}".format(data_type))
 index = 0
 try:
-data_headers = header[1].split(',')
+data_headers = header[1].split(",")
 # we need to remove the ']' from the last header
 last_header = data_headers[-1]
-data_headers[-1] = last_header[:last_header.rfind(']')]
+data_headers[-1] = last_header[: last_header.rfind("]")]
 except IndexError:
 log.warning(u"Can't read data)")
 else:
 if data_type is None:
 continue
 buf = buf.strip()
 if not buf and data_type in KNOWN_DATA_TYPES:
 try:
-finished_handler = getattr(self, '{}FinishedHandler'.format(data_type))
+finished_handler = getattr(
+self, "{}FinishedHandler".format(data_type)
+)
 except AttributeError:
 pass
 else:
 finished_handler()
 log.debug(u"{} data finished".format(data_type))
 data_type = None
 continue
 assert data_type
 try:
-fields_handler = getattr(self, '{}Handler'.format(data_type))
+fields_handler = getattr(self, "{}Handler".format(data_type))
 except AttributeError:
 pass
 else:
 fields_handler(data_headers, buf, index)
 index += 1
 return (self.posts_data.itervalues(), len(self.posts_data))
 class DotclearImport(object):
 def __init__(self, host):
 log.info(_("plugin Dotclear Import initialization"))
 self.host = host
-host.plugins['BLOG_IMPORT'].register('dotclear', self.DcImport, SHORT_DESC, LONG_DESC)
+host.plugins["BLOG_IMPORT"].register(
+"dotclear", self.DcImport, SHORT_DESC, LONG_DESC
+)
 def DcImport(self, client, location, options=None):
 if not os.path.isabs(location):
-raise exceptions.DataError(u"An absolute path to backup data need to be given as location")
+raise exceptions.DataError(
+u"An absolute path to backup data need to be given as location"
+)
 dc_parser = DotclearParser()
 d = threads.deferToThread(dc_parser.parse, location)
 return d

Mercurial > libervia-backend

comparison sat/plugins/plugin_blog_import_dotclear.py @ 2624:56f94936df1e