# HG changeset patch
# User Goffi <goffi@goffi.org>
# Date 1499543664 -7200
# Node ID 7b448ac50a6995f1c59bc5b5441258b3f37b7619
# Parent  8d9bd5d77336bbb13aa13d6b1e93d78e82a0458c
jp (pubsub): new search command:

search is a kind of "grep for Pubsub". It's a powerful command which allows to look for specific data in a pubsub node, recurse sub nodes if requested, and execute an action on the result.

search allows to look for items with following filter:
    - simple text search
    - regex
    - xpath
    - python code

filters are read an applied in the order in which they appear on the command line.

Then flags can be used to modify behaviour, currently there are:
    - ignore-case to specify if search must be case sensitive or not
    - invert to invert result of the search (i.e. don't match instead of match)
    - dot-all which is specific for regex, cf. re module
    - only-matching which return the matching part instead of the full item

Once a item match filters, an action is applied to it, currenlty there are:
    - print, which do a simple output of the full item (default)
    - exec, which run a jp command, specifying the service, node and item corresponding to the match
    - exteral, which run a external command, sending the full item on stdin

By default search is only done on requested node, but if max-depth is more than 0, sub nodes will be searched too.

diff -r 8d9bd5d77336 -r 7b448ac50a69 frontends/src/jp/cmd_pubsub.py
--- a/frontends/src/jp/cmd_pubsub.py	Sat Jul 08 21:45:14 2017 +0200
+++ b/frontends/src/jp/cmd_pubsub.py	Sat Jul 08 21:54:24 2017 +0200
@@ -20,12 +20,19 @@
 
 import base
 from sat.core.i18n import _
+from sat.core import exceptions
 from sat_frontends.jp.constants import Const as C
 from sat_frontends.jp import common
+from sat_frontends.jp import arg_tools
 from functools import partial
 from sat.tools.common import uri
-from sat_frontends.tools import jid
+from sat.tools.common.ansi import ANSI as A
+from sat_frontends.tools import jid, strings
+import argparse
 import os.path
+import re
+import subprocess
+import sys
 
 __commands__ = ["Pubsub"]
 
@@ -413,6 +420,337 @@
             errback=self.psAffiliationsGetEb)
 
 
+class Search(base.CommandBase):
+    """this command to a search without using MAM, i.e. by checking every items if dound by itself, so it may be heavy in resources both for server and client"""
+    RE_FLAGS = re.MULTILINE | re.UNICODE
+    EXEC_ACTIONS = (u'exec', u'external')
+
+    def __init__(self, host):
+        base.CommandBase.__init__(self, host, 'search', use_output=C.OUTPUT_XML, use_pubsub=True, use_verbose=True, help=_(u'search items corresponding to filters'))
+        self.need_loop=True
+
+    @property
+    def etree(self):
+        """load lxml.etree only if needed"""
+        if self._etree is None:
+            from lxml import etree
+            self._etree = etree
+        return self._etree
+
+    def filter_opt(self, value, type_):
+        value = base.unicode_decoder(value)
+        return (type_, value)
+
+    def filter_flag(self, value, type_):
+        value = C.bool(value)
+        return (type_, value)
+
+    def add_parser_options(self):
+        self.parser.add_argument("-i", "--item", action="append", default=[], dest='items', type=base.unicode_decoder, help=_(u"item id(s)"))
+        self.parser.add_argument("-D", "--max-depth", type=int, default=0, help=_(u"maximum depth of recursion (will search linked nodes if > 0, default: 0)"))
+        self.parser.add_argument("-m", "--max", type=int, default=30, help=_(u"maximum number of items to get per node ({} to get all items, default: 30)".format(C.NO_LIMIT)))
+        self.parser.add_argument("-N", "--namespace", action='append', nargs=2, default=[],
+                                 metavar="NAME NAMESPACE", help=_(u"namespace to use for xpath"))
+
+        # filters
+        filter_text = partial(self.filter_opt, type_=u'text')
+        filter_re = partial(self.filter_opt, type_=u'regex')
+        filter_xpath = partial(self.filter_opt, type_=u'xpath')
+        filter_python = partial(self.filter_opt, type_=u'python')
+        filters = self.parser.add_argument_group(_(u'filters'), _(u'only items corresponding to following filters will be kept'))
+        filters.add_argument("-t", "--text",
+                             action='append', dest='filters', type=filter_text,
+                             metavar='TEXT',
+                             help=_(u"full text filter, item must contain this string (XML included)"))
+        filters.add_argument("-r", "--regex",
+                             action='append', dest='filters', type=filter_re,
+                             metavar='EXPRESSION',
+                             help=_(u"like --text but using a regular expression"))
+        filters.add_argument("-x", "--xpath",
+                             action='append', dest='filters', type=filter_xpath,
+                             metavar='XPATH',
+                             help=_(u"filter items which has elements matching this xpath"))
+        filters.add_argument("-P", "--python",
+                             action='append', dest='filters', type=filter_python,
+                             metavar='PYTHON_CODE',
+                             help=_(u'Python expression which much return a bool (True to keep item, False to reject it). "item" is raw text item, "item_xml" is lxml\'s etree.Element'))
+
+        # filters flags
+        flag_case = partial(self.filter_flag, type_=u'ignore-case')
+        flag_invert = partial(self.filter_flag, type_=u'invert')
+        flag_dotall = partial(self.filter_flag, type_=u'dotall')
+        flag_matching = partial(self.filter_flag, type_=u'only-matching')
+        flags = self.parser.add_argument_group(_(u'filters flags'), _(u'filters modifiers (change behaviour of following filters)'))
+        flags.add_argument("-C", "--ignore-case",
+                           action='append', dest='filters', type=flag_case,
+                           const=('ignore-case', True), nargs='?',
+                           metavar='BOOLEAN',
+                           help=_(u"(don't) ignore case in following filters (default: case sensitive)"))
+        flags.add_argument("-I", "--invert",
+                           action='append', dest='filters', type=flag_invert,
+                           const=('invert', True), nargs='?',
+                           metavar='BOOLEAN',
+                           help=_(u"(don't) invert effect of following filters (default: don't invert)"))
+        flags.add_argument("-A", "--dot-all",
+                           action='append', dest='filters', type=flag_dotall,
+                           const=('dotall', True), nargs='?',
+                           metavar='BOOLEAN',
+                           help=_(u"(don't) use DOTALL option for regex (default: don't use)"))
+        flags.add_argument("-o", "--only-matching",
+                           action='append', dest='filters', type=flag_matching,
+                           const=('only-matching', True), nargs='?',
+                           metavar='BOOLEAN',
+                           help=_(u"keep only the matching part of the item"))
+
+        # action
+        self.parser.add_argument("action",
+                                 default="print",
+                                 nargs='?',
+                                 choices=('print', 'exec', 'external'),
+                                 help=_(u"action to do on found items (default: print)"))
+        self.parser.add_argument("command", nargs=argparse.REMAINDER)
+
+    def psItemsGetEb(self, failure_, service, node):
+        self.disp(u"can't get pubsub items at {service} (node: {node}): {reason}".format(
+            service=service,
+            node=node,
+            reason=failure_), error=True)
+        self.to_get -= 1
+
+    def getItems(self, depth, service, node, items):
+        search = partial(self.search, depth=depth)
+        errback = partial(self.psItemsGetEb, service=service, node=node)
+        self.host.bridge.psItemsGet(
+            service,
+            node,
+            self.args.max,
+            [],
+            "",
+            {},
+            self.profile,
+            callback=search,
+            errback=errback
+            )
+        self.to_get += 1
+
+    def _checkPubsubURL(self, match, found_nodes):
+        """check that the matched URL is an xmpp: one
+
+        @param found_nodes(list[unicode]): found_nodes
+            this list will be filled while xmpp: URIs are discovered
+        """
+        url = match.group(0)
+        if url.startswith(u'xmpp'):
+            try:
+                url_data = uri.parseXMPPUri(url)
+            except ValueError:
+                return
+            if url_data[u'type'] == u'pubsub':
+                found_node = {u'service': url_data[u'path'],
+                              u'node': url_data[u'node']}
+                if u'item' in url_data:
+                    found_node[u'item'] = url_data[u'item']
+                found_nodes.append(found_node)
+
+    def getSubNodes(self, item, depth):
+        """look for pubsub URIs in item, and getItems on the linked nodes"""
+        found_nodes = []
+        checkURI = partial(self._checkPubsubURL, found_nodes=found_nodes)
+        strings.RE_URL.sub(checkURI, item)
+        for data in found_nodes:
+            self.getItems(depth+1,
+                          data[u'service'],
+                          data[u'node'],
+                          [data[u'item']] if u'item' in data else []
+                          )
+
+    def parseXml(self, item):
+        try:
+            return self.etree.fromstring(item)
+        except self.etree.XMLSyntaxError:
+            self.disp(_(u"item doesn't looks like XML, you have probably used --only-matching somewhere before and we have no more XML"), error=True)
+            self.host.quit(C.EXIT_BAD_ARG)
+
+    def filter(self, item):
+        """apply filters given on command line
+
+        if only-matching is used, item may be modified
+        @return (tuple[bool, unicode]): a tuple with:
+            - keep: True if item passed the filters
+            - item: it is returned in case of modifications
+        """
+        ignore_case = False
+        invert = False
+        dotall = False
+        only_matching = False
+        item_xml = None
+        for type_, value in self.args.filters:
+            keep = True
+
+            ## filters
+
+            if type_ == u'text':
+                if ignore_case:
+                    if value.lower() not in item.lower():
+                        keep = False
+                else:
+                    if value not in item:
+                        keep = False
+                if keep and only_matching:
+                    # doesn't really make sens to keep a fixed string
+                    # so we raise an error
+                    self.host.disp(_(u"--only-matching used with fixed --text string, are you sure?"), error=True)
+                    self.host.quit(C.EXIT_BAD_ARG)
+            elif type_ == u'regex':
+                flags = self.RE_FLAGS
+                if ignore_case:
+                    flags |= re.IGNORECASE
+                if dotall:
+                    flags |= re.DOTALL
+                match = re.search(value, item, flags)
+                keep = match != None
+                if keep and only_matching:
+                    item = match.group()
+                    item_xml = None
+            elif type_ == u'xpath':
+                if item_xml is None:
+                    item_xml = self.parseXml(item)
+                try:
+                    elts = item_xml.xpath(value, namespaces=self.args.namespace)
+                except self.etree.XPathEvalError as e:
+                    self.disp(_(u"can't use xpath: {reason}").format(reason=e), error=True)
+                    self.host.quit(C.EXIT_BAD_ARG)
+                keep = bool(elts)
+                if keep and only_matching:
+                    item_xml = elts[0]
+                    try:
+                        item = self.etree.tostring(item_xml, encoding='unicode')
+                    except TypeError:
+                        # we have a string only, not an element
+                        item = unicode(item_xml)
+                        item_xml = None
+            elif type_ == u'python':
+                if item_xml is None:
+                    item_xml = self.parseXml(item)
+                cmd_ns = {u'item': item,
+                          u'item_xml': item_xml
+                         }
+                try:
+                    keep = eval(value, cmd_ns)
+                except SyntaxError as e:
+                    self.disp(unicode(e), error=True)
+                    self.host.quit(C.EXIT_BAD_ARG)
+
+            ## flags
+
+            elif type_ == u'ignore-case':
+                ignore_case = value
+            elif type_ == u'invert':
+                invert = value
+                # we need to continue, else loop would end here
+                continue
+            elif type_ == u'dotall':
+                dotall = value
+            elif type_ == u'only-matching':
+                only_matching = value
+            else:
+                raise exceptions.InternalError(_(u"unknown filter type {type}").format(type=type_))
+
+            if invert:
+                keep = not keep
+            if not keep:
+                return False, item
+
+        return True, item
+
+    def doItemAction(self, item, metadata):
+        """called when item has been kepts and the action need to be done
+
+        @param item(unicode): accepted item
+        """
+        action = self.args.action
+        if action == u'print' or self.host.verbosity > 0:
+            try:
+                self.output(item)
+            except self.etree.XMLSyntaxError:
+                # item is not valid XML, but a string
+                # can happen when --only-matching is used
+                self.disp(item)
+        if action in self.EXEC_ACTIONS:
+            item_elt = self.parseXml(item)
+            if action == u'exec':
+                use = {'service': metadata[u'service'],
+                       'node': metadata[u'node'],
+                       'item': item_elt.get('id'),
+                       }
+                args = arg_tools.get_use_args(self.host,
+                                              self.args.command,
+                                              use,
+                                              verbose=self.host.verbosity > 1
+                                              )
+                cmd_args = sys.argv[0:1] + args + self.args.command
+            else:
+                cmd_args = self.args.command
+
+
+            self.disp(u'COMMAND: {command}'.format(
+                command = u' '.join([arg_tools.escape(a) for a in cmd_args])), 2)
+            if action == u'exec':
+                ret = subprocess.call(cmd_args)
+            else:
+                p = subprocess.Popen(cmd_args, stdin=subprocess.PIPE)
+                p.communicate(item)
+                ret = p.wait()
+            if ret != 0:
+                self.disp(A.color(C.A_FAILURE, _(u"executed command failed with exit code {code}").format(code=ret)))
+
+    def search(self, items_data, depth):
+        """callback of getItems
+
+        this method filters items, get sub nodes if needed,
+        do the requested action, and exit the command when everything is done
+        @param items_data(tuple): result of getItems
+        @param depth(int): current depth level
+            0 for first node, 1 for first children, and so on
+        """
+        items, metadata = items_data
+        for item in items:
+            if depth < self.args.max_depth:
+                self.getSubNodes(item, depth)
+            keep, item = self.filter(item)
+            if not keep:
+                continue
+            self.doItemAction(item, metadata)
+
+        # we check if we got all getItems results
+        self.to_get -= 1
+        if self.to_get == 0:
+            # yes, we can quit
+            self.host.quit()
+        assert self.to_get > 0
+
+    def start(self):
+        if self.args.command:
+            if self.args.action not in self.EXEC_ACTIONS:
+                self.parser.error(_(u"Command can only be used with {actions} actions").format(
+                    actions=u', '.join(self.EXEC_ACTIONS)))
+        else:
+            if self.args.action in self.EXEC_ACTIONS:
+                self.parser.error(_(u"you need to specify a command to execute"))
+        if not self.args.node:
+            # TODO: handle get service affiliations when node is not set
+            self.parser.error(_(u"empty node is not handled yet"))
+        # to_get is increased on each get and decreased on each answer
+        # when it reach 0 again, the command is finished
+        self.to_get = 0
+        self._etree = None
+        if self.args.filters is None:
+            self.args.filters = []
+        self.args.namespace = dict(self.args.namespace + [('pubsub', "http://jabber.org/protocol/pubsub")])
+        common.checkURI(self.args)
+        self.getItems(0, self.args.service, self.args.node, self.args.items)
+
+
 class Uri(base.CommandBase):
 
     def __init__(self, host):
@@ -492,8 +830,8 @@
         self.need_loop=True
 
     def add_parser_options(self):
-        self.parser.add_argument('-t', '--type', default=u'', choices=('', 'python', 'python_file', 'python_code'), help=_(u"hook type to remove, empty to remove all (DEFAULT: remove all)"))
-        self.parser.add_argument('-a', '--arg', dest='hook_arg', type=base.unicode_decoder, default=u'', help=_(u"argument of the hook to remove, empty to remove all (DEFAULT: remove all)"))
+        self.parser.add_argument('-t', '--type', default=u'', choices=('', 'python', 'python_file', 'python_code'), help=_(u"hook type to remove, empty to remove all (default: remove all)"))
+        self.parser.add_argument('-a', '--arg', dest='hook_arg', type=base.unicode_decoder, default=u'', help=_(u"argument of the hook to remove, empty to remove all (default: remove all)"))
 
     def psHookRemoveCb(self, nb_deleted):
         self.disp(_(u'{nb_deleted} hook(s) have been deleted').format(
@@ -547,7 +885,7 @@
 
 
 class Pubsub(base.CommandBase):
-    subcommands = (Get, Delete, Edit, Node, Affiliations, Hook, Uri)
+    subcommands = (Get, Delete, Edit, Node, Affiliations, Search, Hook, Uri)
 
     def __init__(self, host):
         super(Pubsub, self).__init__(host, 'pubsub', use_profile=False, help=_('PubSub nodes/items management'))