# HG changeset patch # User Goffi # Date 1499543664 -7200 # Node ID 7b448ac50a6995f1c59bc5b5441258b3f37b7619 # Parent 8d9bd5d77336bbb13aa13d6b1e93d78e82a0458c jp (pubsub): new search command: search is a kind of "grep for Pubsub". It's a powerful command which allows to look for specific data in a pubsub node, recurse sub nodes if requested, and execute an action on the result. search allows to look for items with following filter: - simple text search - regex - xpath - python code filters are read an applied in the order in which they appear on the command line. Then flags can be used to modify behaviour, currently there are: - ignore-case to specify if search must be case sensitive or not - invert to invert result of the search (i.e. don't match instead of match) - dot-all which is specific for regex, cf. re module - only-matching which return the matching part instead of the full item Once a item match filters, an action is applied to it, currenlty there are: - print, which do a simple output of the full item (default) - exec, which run a jp command, specifying the service, node and item corresponding to the match - exteral, which run a external command, sending the full item on stdin By default search is only done on requested node, but if max-depth is more than 0, sub nodes will be searched too. diff -r 8d9bd5d77336 -r 7b448ac50a69 frontends/src/jp/cmd_pubsub.py --- a/frontends/src/jp/cmd_pubsub.py Sat Jul 08 21:45:14 2017 +0200 +++ b/frontends/src/jp/cmd_pubsub.py Sat Jul 08 21:54:24 2017 +0200 @@ -20,12 +20,19 @@ import base from sat.core.i18n import _ +from sat.core import exceptions from sat_frontends.jp.constants import Const as C from sat_frontends.jp import common +from sat_frontends.jp import arg_tools from functools import partial from sat.tools.common import uri -from sat_frontends.tools import jid +from sat.tools.common.ansi import ANSI as A +from sat_frontends.tools import jid, strings +import argparse import os.path +import re +import subprocess +import sys __commands__ = ["Pubsub"] @@ -413,6 +420,337 @@ errback=self.psAffiliationsGetEb) +class Search(base.CommandBase): + """this command to a search without using MAM, i.e. by checking every items if dound by itself, so it may be heavy in resources both for server and client""" + RE_FLAGS = re.MULTILINE | re.UNICODE + EXEC_ACTIONS = (u'exec', u'external') + + def __init__(self, host): + base.CommandBase.__init__(self, host, 'search', use_output=C.OUTPUT_XML, use_pubsub=True, use_verbose=True, help=_(u'search items corresponding to filters')) + self.need_loop=True + + @property + def etree(self): + """load lxml.etree only if needed""" + if self._etree is None: + from lxml import etree + self._etree = etree + return self._etree + + def filter_opt(self, value, type_): + value = base.unicode_decoder(value) + return (type_, value) + + def filter_flag(self, value, type_): + value = C.bool(value) + return (type_, value) + + def add_parser_options(self): + self.parser.add_argument("-i", "--item", action="append", default=[], dest='items', type=base.unicode_decoder, help=_(u"item id(s)")) + self.parser.add_argument("-D", "--max-depth", type=int, default=0, help=_(u"maximum depth of recursion (will search linked nodes if > 0, default: 0)")) + self.parser.add_argument("-m", "--max", type=int, default=30, help=_(u"maximum number of items to get per node ({} to get all items, default: 30)".format(C.NO_LIMIT))) + self.parser.add_argument("-N", "--namespace", action='append', nargs=2, default=[], + metavar="NAME NAMESPACE", help=_(u"namespace to use for xpath")) + + # filters + filter_text = partial(self.filter_opt, type_=u'text') + filter_re = partial(self.filter_opt, type_=u'regex') + filter_xpath = partial(self.filter_opt, type_=u'xpath') + filter_python = partial(self.filter_opt, type_=u'python') + filters = self.parser.add_argument_group(_(u'filters'), _(u'only items corresponding to following filters will be kept')) + filters.add_argument("-t", "--text", + action='append', dest='filters', type=filter_text, + metavar='TEXT', + help=_(u"full text filter, item must contain this string (XML included)")) + filters.add_argument("-r", "--regex", + action='append', dest='filters', type=filter_re, + metavar='EXPRESSION', + help=_(u"like --text but using a regular expression")) + filters.add_argument("-x", "--xpath", + action='append', dest='filters', type=filter_xpath, + metavar='XPATH', + help=_(u"filter items which has elements matching this xpath")) + filters.add_argument("-P", "--python", + action='append', dest='filters', type=filter_python, + metavar='PYTHON_CODE', + help=_(u'Python expression which much return a bool (True to keep item, False to reject it). "item" is raw text item, "item_xml" is lxml\'s etree.Element')) + + # filters flags + flag_case = partial(self.filter_flag, type_=u'ignore-case') + flag_invert = partial(self.filter_flag, type_=u'invert') + flag_dotall = partial(self.filter_flag, type_=u'dotall') + flag_matching = partial(self.filter_flag, type_=u'only-matching') + flags = self.parser.add_argument_group(_(u'filters flags'), _(u'filters modifiers (change behaviour of following filters)')) + flags.add_argument("-C", "--ignore-case", + action='append', dest='filters', type=flag_case, + const=('ignore-case', True), nargs='?', + metavar='BOOLEAN', + help=_(u"(don't) ignore case in following filters (default: case sensitive)")) + flags.add_argument("-I", "--invert", + action='append', dest='filters', type=flag_invert, + const=('invert', True), nargs='?', + metavar='BOOLEAN', + help=_(u"(don't) invert effect of following filters (default: don't invert)")) + flags.add_argument("-A", "--dot-all", + action='append', dest='filters', type=flag_dotall, + const=('dotall', True), nargs='?', + metavar='BOOLEAN', + help=_(u"(don't) use DOTALL option for regex (default: don't use)")) + flags.add_argument("-o", "--only-matching", + action='append', dest='filters', type=flag_matching, + const=('only-matching', True), nargs='?', + metavar='BOOLEAN', + help=_(u"keep only the matching part of the item")) + + # action + self.parser.add_argument("action", + default="print", + nargs='?', + choices=('print', 'exec', 'external'), + help=_(u"action to do on found items (default: print)")) + self.parser.add_argument("command", nargs=argparse.REMAINDER) + + def psItemsGetEb(self, failure_, service, node): + self.disp(u"can't get pubsub items at {service} (node: {node}): {reason}".format( + service=service, + node=node, + reason=failure_), error=True) + self.to_get -= 1 + + def getItems(self, depth, service, node, items): + search = partial(self.search, depth=depth) + errback = partial(self.psItemsGetEb, service=service, node=node) + self.host.bridge.psItemsGet( + service, + node, + self.args.max, + [], + "", + {}, + self.profile, + callback=search, + errback=errback + ) + self.to_get += 1 + + def _checkPubsubURL(self, match, found_nodes): + """check that the matched URL is an xmpp: one + + @param found_nodes(list[unicode]): found_nodes + this list will be filled while xmpp: URIs are discovered + """ + url = match.group(0) + if url.startswith(u'xmpp'): + try: + url_data = uri.parseXMPPUri(url) + except ValueError: + return + if url_data[u'type'] == u'pubsub': + found_node = {u'service': url_data[u'path'], + u'node': url_data[u'node']} + if u'item' in url_data: + found_node[u'item'] = url_data[u'item'] + found_nodes.append(found_node) + + def getSubNodes(self, item, depth): + """look for pubsub URIs in item, and getItems on the linked nodes""" + found_nodes = [] + checkURI = partial(self._checkPubsubURL, found_nodes=found_nodes) + strings.RE_URL.sub(checkURI, item) + for data in found_nodes: + self.getItems(depth+1, + data[u'service'], + data[u'node'], + [data[u'item']] if u'item' in data else [] + ) + + def parseXml(self, item): + try: + return self.etree.fromstring(item) + except self.etree.XMLSyntaxError: + self.disp(_(u"item doesn't looks like XML, you have probably used --only-matching somewhere before and we have no more XML"), error=True) + self.host.quit(C.EXIT_BAD_ARG) + + def filter(self, item): + """apply filters given on command line + + if only-matching is used, item may be modified + @return (tuple[bool, unicode]): a tuple with: + - keep: True if item passed the filters + - item: it is returned in case of modifications + """ + ignore_case = False + invert = False + dotall = False + only_matching = False + item_xml = None + for type_, value in self.args.filters: + keep = True + + ## filters + + if type_ == u'text': + if ignore_case: + if value.lower() not in item.lower(): + keep = False + else: + if value not in item: + keep = False + if keep and only_matching: + # doesn't really make sens to keep a fixed string + # so we raise an error + self.host.disp(_(u"--only-matching used with fixed --text string, are you sure?"), error=True) + self.host.quit(C.EXIT_BAD_ARG) + elif type_ == u'regex': + flags = self.RE_FLAGS + if ignore_case: + flags |= re.IGNORECASE + if dotall: + flags |= re.DOTALL + match = re.search(value, item, flags) + keep = match != None + if keep and only_matching: + item = match.group() + item_xml = None + elif type_ == u'xpath': + if item_xml is None: + item_xml = self.parseXml(item) + try: + elts = item_xml.xpath(value, namespaces=self.args.namespace) + except self.etree.XPathEvalError as e: + self.disp(_(u"can't use xpath: {reason}").format(reason=e), error=True) + self.host.quit(C.EXIT_BAD_ARG) + keep = bool(elts) + if keep and only_matching: + item_xml = elts[0] + try: + item = self.etree.tostring(item_xml, encoding='unicode') + except TypeError: + # we have a string only, not an element + item = unicode(item_xml) + item_xml = None + elif type_ == u'python': + if item_xml is None: + item_xml = self.parseXml(item) + cmd_ns = {u'item': item, + u'item_xml': item_xml + } + try: + keep = eval(value, cmd_ns) + except SyntaxError as e: + self.disp(unicode(e), error=True) + self.host.quit(C.EXIT_BAD_ARG) + + ## flags + + elif type_ == u'ignore-case': + ignore_case = value + elif type_ == u'invert': + invert = value + # we need to continue, else loop would end here + continue + elif type_ == u'dotall': + dotall = value + elif type_ == u'only-matching': + only_matching = value + else: + raise exceptions.InternalError(_(u"unknown filter type {type}").format(type=type_)) + + if invert: + keep = not keep + if not keep: + return False, item + + return True, item + + def doItemAction(self, item, metadata): + """called when item has been kepts and the action need to be done + + @param item(unicode): accepted item + """ + action = self.args.action + if action == u'print' or self.host.verbosity > 0: + try: + self.output(item) + except self.etree.XMLSyntaxError: + # item is not valid XML, but a string + # can happen when --only-matching is used + self.disp(item) + if action in self.EXEC_ACTIONS: + item_elt = self.parseXml(item) + if action == u'exec': + use = {'service': metadata[u'service'], + 'node': metadata[u'node'], + 'item': item_elt.get('id'), + } + args = arg_tools.get_use_args(self.host, + self.args.command, + use, + verbose=self.host.verbosity > 1 + ) + cmd_args = sys.argv[0:1] + args + self.args.command + else: + cmd_args = self.args.command + + + self.disp(u'COMMAND: {command}'.format( + command = u' '.join([arg_tools.escape(a) for a in cmd_args])), 2) + if action == u'exec': + ret = subprocess.call(cmd_args) + else: + p = subprocess.Popen(cmd_args, stdin=subprocess.PIPE) + p.communicate(item) + ret = p.wait() + if ret != 0: + self.disp(A.color(C.A_FAILURE, _(u"executed command failed with exit code {code}").format(code=ret))) + + def search(self, items_data, depth): + """callback of getItems + + this method filters items, get sub nodes if needed, + do the requested action, and exit the command when everything is done + @param items_data(tuple): result of getItems + @param depth(int): current depth level + 0 for first node, 1 for first children, and so on + """ + items, metadata = items_data + for item in items: + if depth < self.args.max_depth: + self.getSubNodes(item, depth) + keep, item = self.filter(item) + if not keep: + continue + self.doItemAction(item, metadata) + + # we check if we got all getItems results + self.to_get -= 1 + if self.to_get == 0: + # yes, we can quit + self.host.quit() + assert self.to_get > 0 + + def start(self): + if self.args.command: + if self.args.action not in self.EXEC_ACTIONS: + self.parser.error(_(u"Command can only be used with {actions} actions").format( + actions=u', '.join(self.EXEC_ACTIONS))) + else: + if self.args.action in self.EXEC_ACTIONS: + self.parser.error(_(u"you need to specify a command to execute")) + if not self.args.node: + # TODO: handle get service affiliations when node is not set + self.parser.error(_(u"empty node is not handled yet")) + # to_get is increased on each get and decreased on each answer + # when it reach 0 again, the command is finished + self.to_get = 0 + self._etree = None + if self.args.filters is None: + self.args.filters = [] + self.args.namespace = dict(self.args.namespace + [('pubsub', "http://jabber.org/protocol/pubsub")]) + common.checkURI(self.args) + self.getItems(0, self.args.service, self.args.node, self.args.items) + + class Uri(base.CommandBase): def __init__(self, host): @@ -492,8 +830,8 @@ self.need_loop=True def add_parser_options(self): - self.parser.add_argument('-t', '--type', default=u'', choices=('', 'python', 'python_file', 'python_code'), help=_(u"hook type to remove, empty to remove all (DEFAULT: remove all)")) - self.parser.add_argument('-a', '--arg', dest='hook_arg', type=base.unicode_decoder, default=u'', help=_(u"argument of the hook to remove, empty to remove all (DEFAULT: remove all)")) + self.parser.add_argument('-t', '--type', default=u'', choices=('', 'python', 'python_file', 'python_code'), help=_(u"hook type to remove, empty to remove all (default: remove all)")) + self.parser.add_argument('-a', '--arg', dest='hook_arg', type=base.unicode_decoder, default=u'', help=_(u"argument of the hook to remove, empty to remove all (default: remove all)")) def psHookRemoveCb(self, nb_deleted): self.disp(_(u'{nb_deleted} hook(s) have been deleted').format( @@ -547,7 +885,7 @@ class Pubsub(base.CommandBase): - subcommands = (Get, Delete, Edit, Node, Affiliations, Hook, Uri) + subcommands = (Get, Delete, Edit, Node, Affiliations, Search, Hook, Uri) def __init__(self, host): super(Pubsub, self).__init__(host, 'pubsub', use_profile=False, help=_('PubSub nodes/items management'))