view frontends/src/jp/common.py @ 2269:606ff34d30f2

jp (blog, common): moved and improved edit code from blog: - a new "common" module is there for code commonly used in commands - moved code for editing item with $EDITOR there - moved code to identify item to edit there - aforementioned fontions have been made generic - a class BaseEdit is now available to implement edition - HTTPS links are handled (only HTTP links were working before) - item can be use if all previous methods fail (url, keyword, file path).
author Goffi <goffi@goffi.org>
date Tue, 27 Jun 2017 16:23:28 +0200
parents
children 07caa12be945
line wrap: on
line source

#!/usr/bin/env python2
# -*- coding: utf-8 -*-

# jp: a SàT command line tool
# Copyright (C) 2009-2016 Jérôme Poisson (goffi@goffi.org)

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.

# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from sat_frontends.jp.constants import Const as C
from sat.core.i18n import _
from sat.tools.common import regex
from sat.tools import config
from ConfigParser import NoSectionError, NoOptionError
import json
import os
import os.path
import time
import tempfile
import subprocess
import glob

# defaut arguments used for some known editors (editing with metadata)
VIM_SPLIT_ARGS = "-c 'vsplit|wincmd w|next|wincmd w'"
EMACS_SPLIT_ARGS = '--eval "(split-window-horizontally)"'
EDITOR_ARGS_MAGIC = {
    'vim': VIM_SPLIT_ARGS + ' {content_file} {metadata_file}',
    'gvim': VIM_SPLIT_ARGS + ' --nofork {content_file} {metadata_file}',
    'emacs': EMACS_SPLIT_ARGS + ' {content_file} {metadata_file}',
    'xemacs': EMACS_SPLIT_ARGS + ' {content_file} {metadata_file}',
    'nano': ' -F {content_file} {metadata_file}',
    }

SECURE_UNLINK_MAX = 10
SECURE_UNLINK_DIR = ".backup"
METADATA_SUFF = '_metadata.json'


def getTmpDir(sat_conf, cat_dir, sub_dir=None):
    """Return directory used to store temporary files

    @param sat_conf(ConfigParser.ConfigParser): instance opened on sat configuration
    @param cat_dir(str): directory of the category (e.g. "blog")
    @param sub_dir(str): sub directory where data need to be put
        profile can be used here, or special directory name
        sub_dir will be escaped to be usable in path (use regex.pathUnescape to find
        initial str)
    @return (str): path to the dir
    """
    local_dir = config.getConfig(sat_conf, '', 'local_dir', Exception)
    path = [local_dir, cat_dir]
    if sub_dir is not None:
        path.append(regex.pathEscape(sub_dir))
    return os.path.join(*path)


class BaseEdit(object):
    u"""base class for editing commands

    This class allows to edit file for PubSub or something else.
    It works with temporary files in SàT local_dir, in a "cat_dir" subdir
    """

    def __init__(self, host, cat_dir, use_metadata=True):
        """
        @param sat_conf(ConfigParser.ConfigParser): instance opened on sat configuration
        @param cat_dir(unicode): directory to use for drafts
            this will be a sub-directory of SàT's local_dir
        @param use_metadata(bool): True is edition need a second file for metadata
            most of signature change with use_metadata with an additional metadata argument.
            This is done to raise error if a command needs metadata but forget the flag, and vice versa
        """
        self.host = host
        self.sat_conf = config.parseMainConf()
        self.cat_dir = cat_dir.encode('utf-8')
        self.use_metadata = use_metadata

    def secureUnlink(self, path):
        """Unlink given path after keeping it for a while

        This method is used to prevent accidental deletion of a draft
        If there are more file in SECURE_UNLINK_DIR than SECURE_UNLINK_MAX,
        older file are deleted
        @param path(str): file to unlink
        """
        if not os.path.isfile(path):
            raise OSError(u"path must link to a regular file")
        if not path.startswith(getTmpDir(self.sat_conf, self.cat_dir)):
            self.disp(u"File {} is not in SàT temporary hierarchy, we do not remove it".format(path.decode('utf-8')), 2)
            return
        # we have 2 files per draft with use_metadata, so we double max
        unlink_max = SECURE_UNLINK_MAX * 2 if self.use_metadata else SECURE_UNLINK_MAX
        backup_dir = getTmpDir(self.sat_conf, self.cat_dir, SECURE_UNLINK_DIR)
        if not os.path.exists(backup_dir):
            os.makedirs(backup_dir)
        filename = os.path.basename(path)
        backup_path = os.path.join(backup_dir, filename)
        # we move file to backup dir
        self.host.disp(u"Backuping file {src} to {dst}".format(
            src=path.decode('utf-8'), dst=backup_path.decode('utf-8')), 1)
        os.rename(path, backup_path)
        # and if we exceeded the limit, we remove older file
        backup_files = [os.path.join(backup_dir, f) for f in os.listdir(backup_dir)]
        if len(backup_files) > unlink_max:
            backup_files.sort(key=lambda path: os.stat(path).st_mtime)
            for path in backup_files[:len(backup_files) - unlink_max]:
                self.host.disp(u"Purging backup file {}".format(path.decode('utf-8')), 2)
                os.unlink(path)

    def runEditor(self, editor_args_opt, content_file_path,
                 content_file_obj, meta_file_path=None, meta_ori=None):
        """run editor to edit content and metadata

        @param editor_args_opt(unicode): option in [jp] section in configuration for
            specific args
        @param content_file_path(str): path to the content file
        @param content_file_obj(file): opened file instance
        @param meta_file_path(str, None): metadata file path
            if None metadata will not be used
        @param meta_ori(dict, None): original cotent of metadata
            can't be used if use_metadata is False
        """
        if not self.use_metadata:
            assert meta_file_path is None
            assert meta_ori is None

        # we calculate hashes to check for modifications
        import hashlib
        content_file_obj.seek(0)
        tmp_ori_hash = hashlib.sha1(content_file_obj.read()).digest()
        content_file_obj.close()

        # we prepare arguments
        editor = config.getConfig(self.sat_conf, 'jp', 'editor') or os.getenv('EDITOR', 'vi')
        try:
            # is there custom arguments in sat.conf ?
            editor_args = config.getConfig(self.sat_conf, 'jp', editor_args_opt, Exception)
        except (NoOptionError, NoSectionError):
            # no, we check if we know the editor and have special arguments
            editor_args = EDITOR_ARGS_MAGIC.get(os.path.basename(editor), '')
        parse_kwargs = {'content_file': content_file_path}
        if self.use_metadata:
            parse_kwargs['metadata_file'] = meta_file_path
        args = self.parse_args(editor_args, **parse_kwargs)
        if not args:
            args = [content_file_path]

        # actual editing
        editor_exit = subprocess.call([editor] + args)

        # edition will now be checked, and data will be sent if it was a success
        if editor_exit != 0:
            self.disp(u"Editor exited with an error code, so temporary file has not be deleted, and item is not published.\nYou can find temporary file at {path}".format(
                path=content_file_path), error=True)
        else:
            # main content
            try:
                with open(content_file_path, 'rb') as f:
                    content = f.read()
            except (OSError, IOError):
                self.disp(u"Can read file at {content_path}, have it been deleted?\nCancelling edition".format(
                    content_path=content_file_path), error=True)
                self.host.quit(C.EXIT_NOT_FOUND)

            # metadata
            if self.use_metadata:
                try:
                    with open(meta_file_path, 'rb') as f:
                        metadata = json.load(f)
                except (OSError, IOError):
                    self.disp(u"Can read file at {meta_file_path}, have it been deleted?\nCancelling edition".format(
                        content_path=content_file_path, meta_path=meta_file_path), error=True)
                    self.host.quit(C.EXIT_NOT_FOUND)
                except ValueError:
                    self.disp(u"Can't parse metadata, please check it is correct JSON format. Cancelling edition.\n" +
                        "You can find tmp file at {content_path} and temporary meta file at {meta_path}.".format(
                        content_path=content_file_path,
                        meta_path=meta_file_path), error=True)
                    self.host.quit(C.EXIT_DATA_ERROR)

            if self.use_metadata and not C.bool(metadata.get('publish', "true")):
                self.disp(u'Publication blocked by "publish" key in metadata, cancelling edition.\n\n' +
                    "temporary file path:\t{content_path}\nmetadata file path:\t{meta_path}".format(
                    content_path=content_file_path, meta_path=meta_file_path), error=True)
                self.host.quit()

            if len(content) == 0:
                self.disp(u"Content is empty, cancelling the blog edition")
                if not content_file_path.startswith(getTmpDir(self.sat_conf, self.cat_dir)):
                    self.disp(u"File are not in SàT temporary hierarchy, we do not remove them", 2)
                    self.host.quit()
                self.disp(u"Deletion of {}".format(content_file_path.decode('utf-8')), 2)
                os.unlink(content_file_path)
                self.disp(u"Deletion of {}".format(meta_file_path.decode('utf-8')), 2)
                os.unlink(meta_file_path)
                self.host.quit()

            # time to re-check the hash
            elif (tmp_ori_hash == hashlib.sha1(content).digest() and
                  (not self.use_metadata or meta_ori == metadata)):
                self.disp(u"The content has not been modified, cancelling the blog edition")
                self.host.quit()

            else:
                # we can now send the item
                content = content.decode('utf-8-sig') # we use utf-8-sig to avoid BOM
                try:
                    if self.use_metadata:
                        self.publish(content, metadata)
                    else:
                        self.publish(content)
                except Exception as e:
                    if self.use_metadata:
                        self.disp(u"Error while sending your item, the temporary files have been kept at {content_path} and {meta_path}: {reason}".format(
                            content_path=content_file_path, meta_path=meta_file_path, reason=e), error=True)
                    else:
                        self.disp(u"Error while sending your item, the temporary file has been kept at {content_path}: {reason}".format(
                            content_path=content_file_path, reason=e), error=True)
                    self.host.quit(1)

            self.secureUnlink(content_file_path)
            self.secureUnlink(meta_file_path)

    def publish(self, content):
        # if metadata is needed, publish will be called with it last argument
        raise NotImplementedError

    def getTmpFile(self, suff):
        """Create a temporary file

        @param suff (str): suffix to use for the filename
        @return (tuple(file, str)): opened (w+b) file object and file path
        """
        tmp_dir = getTmpDir(self.sat_conf, self.cat_dir, self.profile.encode('utf-8'))
        if not os.path.exists(tmp_dir):
            try:
                os.makedirs(tmp_dir)
            except OSError as e:
                self.disp(u"Can't create {path} directory: {reason}".format(
                    path=tmp_dir, reason=e), error=True)
                self.host.quit(1)
        try:
            fd, path = tempfile.mkstemp(suffix=suff,
                prefix=time.strftime(self.cat_dir.encode('utf-8') + '_%Y-%m-%d_%H:%M:%S_'),
                dir=tmp_dir, text=True)
            return os.fdopen(fd, 'w+b'), path
        except OSError as e:
            self.disp(u"Can't create temporary file: {reason}".format(reason=e), error=True)
            self.host.quit(1)

    def getCurrentFile(self, profile):
        """Get most recently edited file

        @param profile(unicode): profile linked to the draft
        @return(str): full path of current file
        """
        # we guess the blog item currently edited by choosing
        # the most recent file corresponding to temp file pattern
        # in tmp_dir, excluding metadata files
        cat_dir_str = self.cat_dir.encode('utf-8')
        tmp_dir = getTmpDir(self.sat_conf, cat_dir_str, profile.encode('utf-8'))
        available = [path for path in glob.glob(os.path.join(tmp_dir, cat_dir_str + '_*')) if not path.endswith(METADATA_SUFF)]
        if not available:
            self.disp(u"Could not find any content draft in {path}".format(path=tmp_dir), error=True)
            self.host.quit(1)
        return max(available, key=lambda path: os.stat(path).st_mtime)

    def getItemData(self, service, node, item):
        """return formatted content and metadata (or not if use_metadata is false)"""
        raise NotImplementedError

    def getTmpSuff(self):
        """return suffix used for content file"""
        return 'xml'

    def getItemPath(self, item):
        """retrieve item path (i.e. service and node) from item argument

        This method is obviously only useful for edition of PubSub based features
        service, node and item must be named like this in args
        @param item(unicode): item to get or url or magic keyword
            item argument can be used to specify :
                - HTTP(S) URL
                - XMPP URL
                - keyword, which can be:
                    - new: create new item
                    - last: retrieve last published item
                    - current: continue current local draft
                - file path
                - item id
        """
        command = item.lower()
        pubsub_service = self.args.service
        pubsub_node = self.args.node
        pubsub_item = None

        if command not in ('new', 'last', 'current'):
            # we have probably an URL, we try to parse it
            import urlparse
            url = self.args.item
            parsed_url = urlparse.urlsplit(url)
            if parsed_url.scheme.startswith('http'):
                self.disp(u"{} URL found, trying to find associated xmpp: URI".format(parsed_url.scheme.upper()),1)
                # HTTP URL, we try to find xmpp: links
                try:
                    from lxml import etree
                except ImportError:
                    self.disp(u"lxml module must be installed to use http(s) scheme, please install it with \"pip install lxml\"", error=True)
                    self.host.quit(1)
                import urllib2
                parser = etree.HTMLParser()
                try:
                    root = etree.parse(urllib2.urlopen(url), parser)
                except etree.XMLSyntaxError as e:
                    self.disp(_(u"Can't parse HTML page : {msg}").format(msg=e))
                    links = []
                else:
                    links = root.xpath("//link[@rel='alternate' and starts-with(@href, 'xmpp:')]")
                if not links:
                    self.disp(u'Could not find alternate "xmpp:" URI, can\'t find associated XMPP PubSub node/item', error=True)
                    self.host.quit(1)
                url = links[0].get('href')
                parsed_url = urlparse.urlsplit(url)

            if parsed_url.scheme == 'xmpp':
                if self.args.service or self.args.node:
                    self.parser.error(_(u"You can't use URI and --service or --node at the same time"))

                self.disp(u"XMPP URI used: {}".format(url),2)
                # XXX: if we have not xmpp: URI here, we'll take the data as a file path
                pubsub_service = parsed_url.path
                pubsub_data = urlparse.parse_qs(parsed_url.query)
                try:
                    pubsub_node = pubsub_data['node'][0]
                except KeyError:
                    self.disp(u'No node found in xmpp: URI, can\'t retrieve item', error=True)
                    self.host.quit(1)
                pubsub_item = pubsub_data.get('item',[None])[0]
                if pubsub_item is not None:
                    command = 'edit' # XXX: edit command is only used internaly, it similar to last, but with the item given in the URL
                else:
                    command = 'new'

        if command in ('new', 'last', 'edit'):
            # we need a temporary file
            tmp_suff = '.' + self.getTmpSuff()
            content_file_obj, content_file_path = self.getTmpFile(tmp_suff)
            if command == 'new':
                self.disp(u'Editing a new item', 2)
                if self.use_metadata:
                    metadata = None
            elif command in ('last', 'edit'):
                self.disp(u'Editing requested published item', 2)
                try:
                    if self.use_metadata:
                        content, metadata = self.getItemData(pubsub_service, pubsub_node, pubsub_item)
                    else:
                        content = self.getItemData(pubsub_service, pubsub_node, pubsub_item)
                except Exception as e:
                    self.disp(u"Error while retrieving last item: {}".format(e))
                    self.host.quit(1)
                content_file_obj.write(content.encode('utf-8'))
                content_file_obj.seek(0)
        else:
            if self.use_metadata:
                metadata = None
            if command == 'current':
                # user wants to continue current draft
                content_file_path = self.getCurrentFile(self.profile)
                self.disp(u'Continuing edition of current draft', 2)
                content_file_obj = open(content_file_path, 'r+b')
            elif os.path.isfile(self.args.item):
                # there is an existing draft that we use
                content_file_path = os.path.expanduser(self.args.item)
                content_file_obj = open(content_file_path, 'r+b')
            else:
                # last chance, it should be an item
                tmp_suff = '.' + self.getTmpSuff()
                content_file_obj, content_file_path = self.getTmpFile(tmp_suff)

                if self.use_metadata:
                    content, metadata = self.getItemData(pubsub_service, pubsub_node, self.args.item)
                else:
                    content = self.getItemData(pubsub_service, pubsub_node, self.args.item)
                content_file_obj.write(content.encode('utf-8'))
                content_file_obj.seek(0)

        if self.use_metadata:
            return pubsub_service, pubsub_node, content_file_path, content_file_obj, metadata
        else:
            return pubsub_service, pubsub_node, content_file_path, content_file_obj