# HG changeset patch # User Goffi # Date 1547538716 -3600 # Node ID ff1b40823b074f232e99973c41fcc28fd4fb12b0 # Parent 838f53730ce4bbfa0030bbdabdd27f660b758ab0 jp (pubsub): new "transform" command: This command allows to pass all requested items through an external command to filter them (i.e. modify their content). - created new jp.xml_tools module with some common functions (like lxml parsing) - new EXIT code EXIT_CMD_ERROR (used when a third party utility returns an error) diff -r 838f53730ce4 -r ff1b40823b07 sat_frontends/jp/cmd_pubsub.py --- a/sat_frontends/jp/cmd_pubsub.py Tue Jan 15 08:51:54 2019 +0100 +++ b/sat_frontends/jp/cmd_pubsub.py Tue Jan 15 08:51:56 2019 +0100 @@ -24,6 +24,7 @@ from sat_frontends.jp.constants import Const as C from sat_frontends.jp import common from sat_frontends.jp import arg_tools +from sat_frontends.jp import xml_tools from functools import partial from sat.tools.common import uri from sat.tools.common.ansi import ANSI as A @@ -683,26 +684,8 @@ self.host.quit(C.EXIT_OK) def start(self): - try: - from lxml import etree - except ImportError: - self.disp( - u'lxml module must be installed to use edit, please install it with "pip install lxml"', - error=True, - ) - self.host.quit(1) - try: - element = etree.parse(sys.stdin).getroot() - except Exception as e: - self.parser.error( - _(u"Can't parse the payload XML in input: {msg}").format(msg=e) - ) - if element.tag in ("item", "{http://jabber.org/protocol/pubsub}item"): - if len(element) > 1: - self.parser.error( - _(u" can only have one child element (the payload)") - ) - element = element[0] + element, etree = xml_tools.etreeParse(self, sys.stdin) + element = xml_tools.getPayload(self, element) payload = etree.tostring(element, encoding="unicode") self.host.bridge.psItemSend( @@ -1070,7 +1053,7 @@ type=int, default=0, help=_( - u"maximum depth of recursion (will search linked nodes if > 0, default: 0)" + u"maximum depth of recursion (will search linked nodes if > 0, DEFAULT: 0)" ), ) self.parser.add_argument( @@ -1079,7 +1062,7 @@ type=int, default=30, help=_(u"maximum number of items to get per node ({} to get all items, " - u"default: 30)".format( C.NO_LIMIT)), + u"DEFAULT: 30)".format( C.NO_LIMIT)), ) self.parser.add_argument( "-N", @@ -1157,7 +1140,7 @@ const=("ignore-case", True), nargs="?", metavar="BOOLEAN", - help=_(u"(don't) ignore case in following filters (default: case sensitive)"), + help=_(u"(don't) ignore case in following filters (DEFAULT: case sensitive)"), ) flags.add_argument( "-I", @@ -1168,7 +1151,7 @@ const=("invert", True), nargs="?", metavar="BOOLEAN", - help=_(u"(don't) invert effect of following filters (default: don't invert)"), + help=_(u"(don't) invert effect of following filters (DEFAULT: don't invert)"), ) flags.add_argument( "-A", @@ -1179,7 +1162,7 @@ const=("dotall", True), nargs="?", metavar="BOOLEAN", - help=_(u"(don't) use DOTALL option for regex (default: don't use)"), + help=_(u"(don't) use DOTALL option for regex (DEFAULT: don't use)"), ) flags.add_argument( "-k", @@ -1199,7 +1182,7 @@ default="print", nargs="?", choices=("print", "exec", "external"), - help=_(u"action to do on found items (default: print)"), + help=_(u"action to do on found items (DEFAULT: print)"), ) self.parser.add_argument("command", nargs=argparse.REMAINDER) @@ -1474,6 +1457,283 @@ self.getItems(0, self.args.service, self.args.node, self.args.items) +class Transform(base.CommandBase): + def __init__(self, host): + base.CommandBase.__init__( + self, + host, + "transform", + use_pubsub=True, + pubsub_flags={C.NODE, C.MULTI_ITEMS}, + help=_(u"modify items of a node using an external command/script"), + ) + self.need_loop = True + + def add_parser_options(self): + self.parser.add_argument( + "--apply", + action="store_true", + help=_(u"apply transformation (DEFAULT: do a dry run)"), + ) + self.parser.add_argument( + "--admin", + action="store_true", + help=_(u"do a pubsub admin request, needed to change publisher"), + ) + self.parser.add_argument( + "-I", + "--ignore_errors", + action="store_true", + help=_( + u"if command return a non zero exit code, ignore the item and continue"), + ) + self.parser.add_argument( + "-A", + "--all", + action="store_true", + help=_(u"get all items by looping over all pages using RSM") + ) + self.parser.add_argument( + "command_path", + help=_(u"path to the command to use. Will be called repetitivly with an " + u"item as input. Output (full item XML) will be used as new one. " + u'Return "DELETE" string to delete the item, and "SKIP" to ignore it'), + ) + + def psAdminItemsSendCb(self, item_ids, metadata): + self.disp(_(u'items published with ids {item_ids}').format( + item_ids=u', '.join(item_ids))) + if self.args.all: + return self.handleNextPage(metadata) + else: + self.host.quit() + + def psItemsSendCb(self, item_id, metadata, show_mess=True): + if show_mess: + self.disp(u'item published with id {item_id}'.format(item_id=item_id)) + if self.items_sent == self.items_to_send: + if self.args.all: + return self.handleNextPage(metadata) + self.disp(u'all items published') + self.host.quit() + + def psRetractItemCb(self, item_id, metadata): + self.psItemsSendCb(item_id, metadata, show_mess=False) + + def handleNextPage(self, metadata): + """Retrieve new page through RSM or quit if we're in the last page + + use to handle --all option + @param metadata(dict): metadata as returned by psItemsGet + """ + try: + last = metadata[u'rsm_last'] + index = int(metadata[u'rsm_index']) + count = int(metadata[u'rsm_count']) + except KeyError: + self.disp(_(u"Can't retrieve all items, RSM metadata not available"), + error=True) + self.host.quit(C.EXIT_MISSING_FEATURE) + except ValueError as e: + self.disp(_(u"Can't retrieve all items, bad RSM metadata: {msg}") + .format(msg=e), error=True) + self.host.quit(C.EXIT_ERROR) + + if index + self.args.rsm_max >= count: + self.disp(_(u'All items transformed')) + self.host.quit(0) + + self.disp(_(u'Retrieving next page ({page_idx}/{page_total})').format( + page_idx = int(index/self.args.rsm_max) + 1, + page_total = int(count/self.args.rsm_max), + ) + ) + + extra = self.getPubsubExtra() + extra[u'rsm_after'] = last + self.host.bridge.psItemsGet( + self.args.service, + self.args.node, + self.args.rsm_max, + self.args.items, + "", + extra, + self.profile, + callback=self.psItemsGetCb, + errback=partial( + self.errback, + msg=_(u"can't retrieve items: {}"), + exit_code=C.EXIT_BRIDGE_ERRBACK, + ), + ) + + def psItemsGetCb(self, ps_result): + items, metadata = ps_result + if self.args.admin: + new_items = [] + else: + self.items_to_send = len(items) + self.items_sent = 0 + + for item in items: + if self.check_duplicates: + # this is used when we are not ordering by creation + # to avoid infinite loop + item_elt, __ = xml_tools.etreeParse(self, item) + item_id = item_elt.get('id') + if item_id in self.items_ids: + self.disp(_( + u"Duplicate found on item {item_id}, we have probably handled " + u"all items.").format(item_id=item_id)) + self.host.quit() + self.items_ids.append(item_id) + + # we launch the command to filter the item + try: + p = subprocess.Popen(self.args.command_path, stdin=subprocess.PIPE, + stdout=subprocess.PIPE) + except OSError as e: + exit_code = C.EXIT_CMD_NOT_FOUND if e.errno == 2 else C.EXIT_ERROR + e = str(e).decode('utf-8', errors="ignore") + self.disp(u"Can't execute the command: {msg}".format(msg=e), error=True) + self.host.quit(exit_code) + cmd_std_out, cmd_std_err = p.communicate(item.encode("utf-8")) + ret = p.wait() + if ret != 0: + self.disp(u"The command returned a non zero status while parsing the " + u"following item:\n\n{item}".format(item=item), error=True) + if self.args.ignore_errors: + if not self.args.admin: + self.items_to_send -= 1 + continue + else: + self.host.quit(C.EXIT_CMD_ERROR) + if cmd_std_err is not None: + cmd_std_err = cmd_std_err.decode('utf-8', errors='ignore') + self.disp(cmd_std_err, error=True) + cmd_std_out = cmd_std_out.strip() + if cmd_std_out == "DELETE": + item_elt, __ = xml_tools.etreeParse(self, item) + item_id = item_elt.get('id') + self.disp(_(u"Deleting item {item_id}").format(item_id=item_id)) + if self.args.apply: + if not self.args.admin: + # we need to increase the counter as if the item were re-published + self.items_sent += 1 + self.host.bridge.psRetractItem( + self.args.service, + self.args.node, + item_id, + False, + self.profile, + callback=partial(self.psRetractItemCb, metadata=metadata), + errback=partial( + self.errback, + msg=_(u"can't delete item: {}"), + exit_code=C.EXIT_BRIDGE_ERRBACK, + ), + ) + continue + elif cmd_std_out == "SKIP": + item_elt, __ = xml_tools.etreeParse(self, item) + item_id = item_elt.get('id') + self.disp(_(u"Skipping item {item_id}").format(item_id=item_id)) + if self.args.apply: + if not self.args.admin: + # see above + self.items_sent += 1 + self.psItemsSendCb(item_id, metadata, show_mess=False) + continue + element, etree = xml_tools.etreeParse(self, cmd_std_out) + + # at this point command has been run and we have a etree.Element object + if element.tag not in ("item", "{http://jabber.org/protocol/pubsub}item"): + self.disp(u"your script must return a whole item, this is not:\n{xml}" + .format(xml=etree.tostring(element, encoding="unicode")), error=True) + self.host.quit(C.EXIT_DATA_ERROR) + + if not self.args.apply: + # we have a dry run, we just display filtered items + serialised = etree.tostring(element, encoding=u'unicode', + pretty_print=True) + self.disp(serialised) + else: + # we will apply the change, either in admin request or as a simple + # pubsub one + if self.args.admin: + new_items.append(etree.tostring(element, encoding="unicode")) + else: + # there is currently no method to send several items at once + # so we publish them one by one + payload = etree.tostring(xml_tools.getPayload(self, element), + encoding="unicode") + item_id = element.get(u'id', '') + self.host.bridge.psItemSend( + self.args.service, + self.args.node, + payload, + item_id, + {}, + self.profile, + callback=partial(self.psItemsSendCb, metadata=metadata), + errback=partial( + self.errback, + msg=_(u"can't send item: {}"), + exit_code=C.EXIT_BRIDGE_ERRBACK, + ), + ) + self.items_sent += 1 + + if not self.args.apply: + # on dry run we have nothing to wait for, we can quit + if self.args.all: + return self.handleNextPage(metadata) + self.host.quit() + elif self.args.admin: + self.host.bridge.psAdminItemsSend( + self.args.service, + self.args.node, + new_items, + u"", + self.profile, + callback=partial(self.psAdminItemsSendCb, metadata=metadata), + errback=partial( + self.errback, + msg=_(u"can't send item: {}"), + exit_code=C.EXIT_BRIDGE_ERRBACK, + ), + ) + + def start(self): + if self.args.all and self.args.order_by != C.ORDER_BY_CREATION: + self.check_duplicates = True + self.items_ids = [] + self.disp(A.color( + A.FG_RED, A.BOLD, + u'/!\\ "--all" should be used with "--order-by creation" /!\\\n', + A.RESET, + u"We'll update items, so order may change during transformation,\n" + u"we'll try to mitigate that by stopping on first duplicate,\n" + u"but this method is not safe, and some items may be missed.\n---\n")) + else: + self.check_duplicates = False + self.host.bridge.psItemsGet( + self.args.service, + self.args.node, + self.args.max, + self.args.items, + "", + self.getPubsubExtra(), + self.profile, + callback=self.psItemsGetCb, + errback=partial( + self.errback, + msg=_(u"can't retrieve items: {}"), + exit_code=C.EXIT_BRIDGE_ERRBACK, + ), + ) + + class Uri(base.CommandBase): def __init__(self, host): base.CommandBase.__init__( @@ -1604,7 +1864,7 @@ "--type", default=u"", choices=("", "python", "python_file", "python_code"), - help=_(u"hook type to remove, empty to remove all (default: remove all)"), + help=_(u"hook type to remove, empty to remove all (DEFAULT: remove all)"), ) self.parser.add_argument( "-a", @@ -1613,7 +1873,7 @@ type=base.unicode_decoder, default=u"", help=_( - u"argument of the hook to remove, empty to remove all (default: remove all)" + u"argument of the hook to remove, empty to remove all (DEFAULT: remove all)" ), ) @@ -1697,6 +1957,7 @@ Node, Affiliations, Search, + Transform, Hook, Uri, ) diff -r 838f53730ce4 -r ff1b40823b07 sat_frontends/jp/constants.py --- a/sat_frontends/jp/constants.py Tue Jan 15 08:51:54 2019 +0100 +++ b/sat_frontends/jp/constants.py Tue Jan 15 08:51:56 2019 +0100 @@ -80,10 +80,12 @@ EXIT_BRIDGE_ERROR = 3 # can't connect to bridge EXIT_BRIDGE_ERRBACK = 4 # something went wrong when calling a bridge method EXIT_NOT_FOUND = 16 # an item required by a command was not found - EXIT_DATA_ERROR = 17 #  data needed for a command is invalid + EXIT_DATA_ERROR = 17 # data needed for a command is invalid + EXIT_MISSING_FEATURE = 18 # a needed plugin or feature is not available EXIT_USER_CANCELLED = 20 # user cancelled action EXIT_FILE_NOT_EXE = ( 126 ) # a file to be executed was found, but it was not an executable utility (cf. man 1 exit) EXIT_CMD_NOT_FOUND = 127 # a utility to be executed was not found (cf. man 1 exit) + EXIT_CMD_ERROR = 127 # a utility to be executed returned an error exit code EXIT_SIGNAL_INT = 128 # a command was interrupted by a signal (cf. man 1 exit) diff -r 838f53730ce4 -r ff1b40823b07 sat_frontends/jp/xml_tools.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sat_frontends/jp/xml_tools.py Tue Jan 15 08:51:56 2019 +0100 @@ -0,0 +1,62 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- + +# jp: a SàT command line tool +# Copyright (C) 2009-2019 Jérôme Poisson (goffi@goffi.org) + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from sat.core.i18n import _ +from sat_frontends.jp.constants import Const as C + +def etreeParse(cmd, raw_xml): + """Import lxml and parse raw XML + + @param cmd(CommandBase): current command instance + @param raw_xml(file, str): an XML bytestring, string or file-like object + @return (tuple(etree.Element, module): parsed element, etree module + """ + try: + from lxml import etree + except ImportError: + cmd.disp( + u'lxml module must be installed, please install it with "pip install lxml"', + error=True, + ) + cmd.host.quit(C.EXIT_ERROR) + try: + if isinstance(raw_xml, basestring): + parser = etree.XMLParser(remove_blank_text=True) + element = etree.fromstring(raw_xml, parser) + else: + element = etree.parse(raw_xml).getroot() + except Exception as e: + cmd.parser.error( + _(u"Can't parse the payload XML in input: {msg}").format(msg=e) + ) + return element, etree + +def getPayload(cmd, element): + """Retrieve payload element and exit with and error if not found + + @param element(etree.Element): root element + @return element(etree.Element): payload element + """ + if element.tag in ("item", "{http://jabber.org/protocol/pubsub}item"): + if len(element) > 1: + cmd.disp(_(u" can only have one child element (the payload)"), + error=True) + cmd.host.quit(C.EXIT_DATA_ERROR) + element = element[0] + return element