view frontends/src/jp/common.py @ 2292:bd4d8c73b1d3

jp (pubsub/node/affiliations/set): call checkURI
author Goffi <goffi@goffi.org>
date Fri, 30 Jun 2017 00:54:37 +0200
parents 4bc9a2c2d6c9
children 276e546b7619
line wrap: on
line source

#!/usr/bin/env python2
# -*- coding: utf-8 -*-

# jp: a SàT command line tool
# Copyright (C) 2009-2016 Jérôme Poisson (goffi@goffi.org)

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.

# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from sat_frontends.jp.constants import Const as C
from sat.core.i18n import _
from sat.tools.common import regex
from sat.tools.common import uri
from sat.tools import config
from ConfigParser import NoSectionError, NoOptionError
import json
import os
import os.path
import time
import tempfile
import subprocess
import glob
import shlex

# defaut arguments used for some known editors (editing with metadata)
VIM_SPLIT_ARGS = "-c 'vsplit|wincmd w|next|wincmd w'"
EMACS_SPLIT_ARGS = '--eval "(split-window-horizontally)"'
EDITOR_ARGS_MAGIC = {
    'vim': VIM_SPLIT_ARGS + ' {content_file} {metadata_file}',
    'gvim': VIM_SPLIT_ARGS + ' --nofork {content_file} {metadata_file}',
    'emacs': EMACS_SPLIT_ARGS + ' {content_file} {metadata_file}',
    'xemacs': EMACS_SPLIT_ARGS + ' {content_file} {metadata_file}',
    'nano': ' -F {content_file} {metadata_file}',
    }

SECURE_UNLINK_MAX = 10
SECURE_UNLINK_DIR = ".backup"
METADATA_SUFF = '_metadata.json'


def getTmpDir(sat_conf, cat_dir, sub_dir=None):
    """Return directory used to store temporary files

    @param sat_conf(ConfigParser.ConfigParser): instance opened on sat configuration
    @param cat_dir(unicode): directory of the category (e.g. "blog")
    @param sub_dir(str): sub directory where data need to be put
        profile can be used here, or special directory name
        sub_dir will be escaped to be usable in path (use regex.pathUnescape to find
        initial str)
    @return (str): path to the dir
    """
    local_dir = config.getConfig(sat_conf, '', 'local_dir', Exception)
    path = [local_dir.encode('utf-8'), cat_dir.encode('utf-8')]
    if sub_dir is not None:
        path.append(regex.pathEscape(sub_dir))
    return os.path.join(*path)


def parse_args(host, cmd_line, **format_kw):
    """Parse command arguments

    @param cmd_line(unicode): command line as found in sat.conf
    @param format_kw: keywords used for formating
    @return (list(unicode)): list of arguments to pass to subprocess function
    """
    try:
        # we split the arguments and add the known fields
        # we split arguments first to avoid escaping issues in file names
        return [a.format(**format_kw) for a in shlex.split(cmd_line)]
    except ValueError as e:
        host.disp(u"Couldn't parse editor cmd [{cmd}]: {reason}".format(cmd=cmd_line, reason=e))
        return []


def checkURI(args):
    """check if args.node is an URI

    if a valid xmpp: URI is found, args.service, args.node and args.item will be set
    """
    # FIXME: Q&D way to handle xmpp: uris, a generic way is needed
    #        and it should be merged with code in BaseEdit
    if not args.service and args.node.startswith('xmpp:'):
        try:
            uri_data = uri.parseXMPPUri(args.node)
        except ValueError:
            pass
        else:
            if uri_data[u'type'] == 'pubsub':
                args.service = uri_data[u'path']
                args.node = uri_data[u'node']
                if u'item' in uri_data:
                    try:
                        item = getattr(uri_data, 'item')
                    except AttributeError:
                        pass
                    else:
                        if item is None:
                            args.item = uri_data


class BaseEdit(object):
    u"""base class for editing commands

    This class allows to edit file for PubSub or something else.
    It works with temporary files in SàT local_dir, in a "cat_dir" subdir
    """

    def __init__(self, host, cat_dir, use_metadata=False):
        """
        @param sat_conf(ConfigParser.ConfigParser): instance opened on sat configuration
        @param cat_dir(unicode): directory to use for drafts
            this will be a sub-directory of SàT's local_dir
        @param use_metadata(bool): True is edition need a second file for metadata
            most of signature change with use_metadata with an additional metadata argument.
            This is done to raise error if a command needs metadata but forget the flag, and vice versa
        """
        self.host = host
        self.sat_conf = config.parseMainConf()
        self.cat_dir_str = cat_dir.encode('utf-8')
        self.use_metadata = use_metadata

    def add_parser_options(self):
        group = self.parser.add_mutually_exclusive_group()
        group.add_argument("--force-item", action='store_true', help=_(u"don't use magic and take item argument as an actual item"))
        group.add_argument("--last-item", action='store_true', help=_(u"take last item instead of creating a new one if no item id is found"))

    def secureUnlink(self, path):
        """Unlink given path after keeping it for a while

        This method is used to prevent accidental deletion of a draft
        If there are more file in SECURE_UNLINK_DIR than SECURE_UNLINK_MAX,
        older file are deleted
        @param path(str): file to unlink
        """
        if not os.path.isfile(path):
            raise OSError(u"path must link to a regular file")
        if not path.startswith(getTmpDir(self.sat_conf, self.cat_dir_str)):
            self.disp(u"File {} is not in SàT temporary hierarchy, we do not remove it".format(path.decode('utf-8')), 2)
            return
        # we have 2 files per draft with use_metadata, so we double max
        unlink_max = SECURE_UNLINK_MAX * 2 if self.use_metadata else SECURE_UNLINK_MAX
        backup_dir = getTmpDir(self.sat_conf, self.cat_dir_str, SECURE_UNLINK_DIR)
        if not os.path.exists(backup_dir):
            os.makedirs(backup_dir)
        filename = os.path.basename(path)
        backup_path = os.path.join(backup_dir, filename)
        # we move file to backup dir
        self.host.disp(u"Backuping file {src} to {dst}".format(
            src=path.decode('utf-8'), dst=backup_path.decode('utf-8')), 1)
        os.rename(path, backup_path)
        # and if we exceeded the limit, we remove older file
        backup_files = [os.path.join(backup_dir, f) for f in os.listdir(backup_dir)]
        if len(backup_files) > unlink_max:
            backup_files.sort(key=lambda path: os.stat(path).st_mtime)
            for path in backup_files[:len(backup_files) - unlink_max]:
                self.host.disp(u"Purging backup file {}".format(path.decode('utf-8')), 2)
                os.unlink(path)

    def runEditor(self, editor_args_opt, content_file_path,
                 content_file_obj, meta_file_path=None, meta_ori=None):
        """run editor to edit content and metadata

        @param editor_args_opt(unicode): option in [jp] section in configuration for
            specific args
        @param content_file_path(str): path to the content file
        @param content_file_obj(file): opened file instance
        @param meta_file_path(str, None): metadata file path
            if None metadata will not be used
        @param meta_ori(dict, None): original cotent of metadata
            can't be used if use_metadata is False
        """
        if not self.use_metadata:
            assert meta_file_path is None
            assert meta_ori is None

        # we calculate hashes to check for modifications
        import hashlib
        content_file_obj.seek(0)
        tmp_ori_hash = hashlib.sha1(content_file_obj.read()).digest()
        content_file_obj.close()

        # we prepare arguments
        editor = config.getConfig(self.sat_conf, 'jp', 'editor') or os.getenv('EDITOR', 'vi')
        try:
            # is there custom arguments in sat.conf ?
            editor_args = config.getConfig(self.sat_conf, 'jp', editor_args_opt, Exception)
        except (NoOptionError, NoSectionError):
            # no, we check if we know the editor and have special arguments
            if self.use_metadata:
                editor_args = EDITOR_ARGS_MAGIC.get(os.path.basename(editor), '')
            else:
                editor_args = ''
        parse_kwargs = {'content_file': content_file_path}
        if self.use_metadata:
            parse_kwargs['metadata_file'] = meta_file_path
        args = parse_args(self.host, editor_args, **parse_kwargs)
        if not args:
            args = [content_file_path]

        # actual editing
        editor_exit = subprocess.call([editor] + args)

        # edition will now be checked, and data will be sent if it was a success
        if editor_exit != 0:
            self.disp(u"Editor exited with an error code, so temporary file has not be deleted, and item is not published.\nYou can find temporary file at {path}".format(
                path=content_file_path), error=True)
        else:
            # main content
            try:
                with open(content_file_path, 'rb') as f:
                    content = f.read()
            except (OSError, IOError):
                self.disp(u"Can read file at {content_path}, have it been deleted?\nCancelling edition".format(
                    content_path=content_file_path), error=True)
                self.host.quit(C.EXIT_NOT_FOUND)

            # metadata
            if self.use_metadata:
                try:
                    with open(meta_file_path, 'rb') as f:
                        metadata = json.load(f)
                except (OSError, IOError):
                    self.disp(u"Can read file at {meta_file_path}, have it been deleted?\nCancelling edition".format(
                        content_path=content_file_path, meta_path=meta_file_path), error=True)
                    self.host.quit(C.EXIT_NOT_FOUND)
                except ValueError:
                    self.disp(u"Can't parse metadata, please check it is correct JSON format. Cancelling edition.\n" +
                        "You can find tmp file at {content_path} and temporary meta file at {meta_path}.".format(
                        content_path=content_file_path,
                        meta_path=meta_file_path), error=True)
                    self.host.quit(C.EXIT_DATA_ERROR)

            if self.use_metadata and not C.bool(metadata.get('publish', "true")):
                self.disp(u'Publication blocked by "publish" key in metadata, cancelling edition.\n\n' +
                    "temporary file path:\t{content_path}\nmetadata file path:\t{meta_path}".format(
                    content_path=content_file_path, meta_path=meta_file_path), error=True)
                self.host.quit()

            if len(content) == 0:
                self.disp(u"Content is empty, cancelling the edition")
                if not content_file_path.startswith(getTmpDir(self.sat_conf, self.cat_dir_str)):
                    self.disp(u"File are not in SàT temporary hierarchy, we do not remove them", 2)
                    self.host.quit()
                self.disp(u"Deletion of {}".format(content_file_path.decode('utf-8')), 2)
                os.unlink(content_file_path)
                if self.use_metadata:
                    self.disp(u"Deletion of {}".format(meta_file_path.decode('utf-8')), 2)
                    os.unlink(meta_file_path)
                self.host.quit()

            # time to re-check the hash
            elif (tmp_ori_hash == hashlib.sha1(content).digest() and
                  (not self.use_metadata or meta_ori == metadata)):
                self.disp(u"The content has not been modified, cancelling the edition")
                self.host.quit()

            else:
                # we can now send the item
                content = content.decode('utf-8-sig') # we use utf-8-sig to avoid BOM
                try:
                    if self.use_metadata:
                        self.publish(content, metadata)
                    else:
                        self.publish(content)
                except Exception as e:
                    if self.use_metadata:
                        self.disp(u"Error while sending your item, the temporary files have been kept at {content_path} and {meta_path}: {reason}".format(
                            content_path=content_file_path, meta_path=meta_file_path, reason=e), error=True)
                    else:
                        self.disp(u"Error while sending your item, the temporary file has been kept at {content_path}: {reason}".format(
                            content_path=content_file_path, reason=e), error=True)
                    self.host.quit(1)

            self.secureUnlink(content_file_path)
            if self.use_metadata:
                self.secureUnlink(meta_file_path)

    def publish(self, content):
        # if metadata is needed, publish will be called with it last argument
        raise NotImplementedError

    def getTmpFile(self, suff):
        """Create a temporary file

        @param suff (str): suffix to use for the filename
        @return (tuple(file, str)): opened (w+b) file object and file path
        """
        cat_dir_str = self.cat_dir_str
        tmp_dir = getTmpDir(self.sat_conf, self.cat_dir_str, self.profile.encode('utf-8'))
        if not os.path.exists(tmp_dir):
            try:
                os.makedirs(tmp_dir)
            except OSError as e:
                self.disp(u"Can't create {path} directory: {reason}".format(
                    path=tmp_dir, reason=e), error=True)
                self.host.quit(1)
        try:
            fd, path = tempfile.mkstemp(suffix=suff.encode('utf-8'),
                prefix=time.strftime(cat_dir_str + '_%Y-%m-%d_%H:%M:%S_'),
                dir=tmp_dir, text=True)
            return os.fdopen(fd, 'w+b'), path
        except OSError as e:
            self.disp(u"Can't create temporary file: {reason}".format(reason=e), error=True)
            self.host.quit(1)

    def getCurrentFile(self, profile):
        """Get most recently edited file

        @param profile(unicode): profile linked to the draft
        @return(str): full path of current file
        """
        # we guess the item currently edited by choosing
        # the most recent file corresponding to temp file pattern
        # in tmp_dir, excluding metadata files
        cat_dir_str = self.cat_dir_str
        tmp_dir = getTmpDir(self.sat_conf, self.cat_dir_str, profile.encode('utf-8'))
        available = [path for path in glob.glob(os.path.join(tmp_dir, cat_dir_str + '_*')) if not path.endswith(METADATA_SUFF)]
        if not available:
            self.disp(u"Could not find any content draft in {path}".format(path=tmp_dir), error=True)
            self.host.quit(1)
        return max(available, key=lambda path: os.stat(path).st_mtime)

    def getItemData(self, service, node, item):
        """return formatted content, metadata (or not if use_metadata is false), and item id"""
        raise NotImplementedError

    def getTmpSuff(self):
        """return suffix used for content file"""
        return u'xml'

    def getItemPath(self, item):
        """retrieve item path (i.e. service and node) from item argument

        This method is obviously only useful for edition of PubSub based features
        service, node and item must be named like this in args
        @param item(unicode): item to get or url or magic keyword
            item argument can be used to specify :
                - HTTP(S) URL
                - XMPP URL
                - keyword, which can be:
                    - new: create new item
                    - last: retrieve last published item
                    - current: continue current local draft
                - file path
                - item id
        """
        force_item = self.args.force_item
        if force_item and not item:
            self.parser.error(_(u"an item id must be specified if you use --force-item"))
        command = item.lower()
        pubsub_service = self.args.service
        pubsub_node = self.args.node
        pubsub_item = None

        if not force_item and command not in ('new', 'last', 'current'):
            # we have probably an URL, we try to parse it
            import urlparse
            url = self.args.item
            parsed_url = urlparse.urlsplit(url)
            if parsed_url.scheme.startswith('http'):
                self.disp(u"{} URL found, trying to find associated xmpp: URI".format(parsed_url.scheme.upper()),1)
                # HTTP URL, we try to find xmpp: links
                try:
                    from lxml import etree
                except ImportError:
                    self.disp(u"lxml module must be installed to use http(s) scheme, please install it with \"pip install lxml\"", error=True)
                    self.host.quit(1)
                import urllib2
                parser = etree.HTMLParser()
                try:
                    root = etree.parse(urllib2.urlopen(url), parser)
                except etree.XMLSyntaxError as e:
                    self.disp(_(u"Can't parse HTML page : {msg}").format(msg=e))
                    links = []
                else:
                    links = root.xpath("//link[@rel='alternate' and starts-with(@href, 'xmpp:')]")
                if not links:
                    self.disp(u'Could not find alternate "xmpp:" URI, can\'t find associated XMPP PubSub node/item', error=True)
                    self.host.quit(1)
                url = links[0].get('href')
                parsed_url = urlparse.urlsplit(url)

            if parsed_url.scheme == 'xmpp':
                if self.args.service or self.args.node:
                    self.parser.error(_(u"You can't use URI and --service or --node at the same time"))

                self.disp(u"XMPP URI used: {}".format(url),2)
                # XXX: if we have not xmpp: URI here, we'll take the data as a file path
                pubsub_service = parsed_url.path
                pubsub_data = urlparse.parse_qs(parsed_url.query)
                try:
                    pubsub_node = pubsub_data['node'][0]
                except KeyError:
                    self.disp(u'No node found in xmpp: URI, can\'t retrieve item', error=True)
                    self.host.quit(1)
                pubsub_item = pubsub_data.get('item',[None])[0]
                if pubsub_item is None and self.args.last_item:
                    command = 'last'
                elif pubsub_item is not None:
                    command = 'edit' # XXX: edit command is only used internaly, it similar to last, but with the item given in the URL
                else:
                    command = 'new'

        if self.args.last_item and command != 'last':
            self.parser.error(_(u"--last-item can't be used with a specified item"))

        if not force_item and command in ('new', 'last', 'edit'):
            # we need a temporary file
            tmp_suff = '.' + self.getTmpSuff()
            content_file_obj, content_file_path = self.getTmpFile(tmp_suff)
            if command == 'new':
                self.disp(u'Editing a new item', 2)
                if self.use_metadata:
                    metadata = None
            elif command in ('last', 'edit'):
                self.disp(u'Editing requested published item', 2)
                try:
                    if self.use_metadata:
                        content, metadata, pubsub_item = self.getItemData(pubsub_service, pubsub_node, pubsub_item)
                    else:
                        content, pubsub_item = self.getItemData(pubsub_service, pubsub_node, pubsub_item)
                except Exception as e:
                    self.disp(u"Error while retrieving last item: {}".format(e))
                    self.host.quit(1)
                content_file_obj.write(content.encode('utf-8'))
                content_file_obj.seek(0)
        else:
            if self.use_metadata:
                metadata = None
            if not force_item and command == 'current':
                # user wants to continue current draft
                content_file_path = self.getCurrentFile(self.profile)
                self.disp(u'Continuing edition of current draft', 2)
                content_file_obj = open(content_file_path, 'r+b')
            elif not force_item and os.path.isfile(self.args.item):
                # there is an existing draft that we use
                content_file_path = os.path.expanduser(self.args.item)
                content_file_obj = open(content_file_path, 'r+b')
            else:
                # last chance, it should be an item
                tmp_suff = '.' + self.getTmpSuff()
                content_file_obj, content_file_path = self.getTmpFile(tmp_suff)
                pubsub_item = self.args.item

                try:
                    # we try to get existing item
                    if self.use_metadata:
                        content, metadata, pubsub_item = self.getItemData(pubsub_service, pubsub_node, self.args.item)
                    else:
                        content, pubsub_item = self.getItemData(pubsub_service, pubsub_node, self.args.item)
                except Exception as e:
                    # FIXME: ugly but we have not good may to check errors in bridge
                    if u'item-not-found' in unicode(e):
                        # item doesn't exist, we create a new one with requested id
                        metadata = None
                        self.disp(_(u'item "{item_id}" not found, we create a new item with this id').format(item_id=pubsub_item), 2)
                else:
                    # item exists, we write content if content file
                    content_file_obj.write(content.encode('utf-8'))
                    content_file_obj.seek(0)
                    self.disp(_(u'item "{item_id}" found, we edit it').format(item_id=pubsub_item), 2)

        if self.use_metadata:
            return pubsub_service, pubsub_node, pubsub_item, content_file_path, content_file_obj, metadata
        else:
            return pubsub_service, pubsub_node, pubsub_item, content_file_path, content_file_obj