X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/479636a38b85de7a458c8d05f5f0d4080e1b6024..5de4474dee09d243a9c940481334d22da6f1d8d7:/scripts/gnt-node diff --git a/scripts/gnt-node b/scripts/gnt-node index a9fca28..a49eb3d 100755 --- a/scripts/gnt-node +++ b/scripts/gnt-node @@ -27,6 +27,7 @@ import sys from optparse import make_option from ganeti.cli import * +from ganeti import cli from ganeti import opcodes from ganeti import utils from ganeti import constants @@ -49,12 +50,12 @@ _LIST_HEADERS = { "dtotal": "DTotal", "dfree": "DFree", "mtotal": "MTotal", "mnode": "MNode", "mfree": "MFree", "bootid": "BootID", - "ctotal": "CTotal", + "ctotal": "CTotal", "cnodes": "CNodes", "csockets": "CSockets", "tags": "Tags", "serial_no": "SerialNo", "master_candidate": "MasterC", "master": "IsMaster", - "offline": "Offline", + "offline": "Offline", "drained": "Drained", } @@ -72,31 +73,54 @@ def AddNode(opts, args): cl = GetClient() dns_data = utils.HostInfo(args[0]) node = dns_data.name - - if not opts.readd: - try: - output = cl.QueryNodes(names=[node], fields=['name']) - except (errors.OpPrereqError, errors.OpExecError): - pass - else: + readd = opts.readd + + try: + output = cl.QueryNodes(names=[node], fields=['name', 'sip'], + use_locking=True) + node_exists, sip = output[0] + except (errors.OpPrereqError, errors.OpExecError): + node_exists = "" + sip = None + + if readd: + if not node_exists: + ToStderr("Node %s not in the cluster" + " - please retry without '--readd'", node) + return 1 + else: + if node_exists: ToStderr("Node %s already in the cluster (as %s)" - " - please use --readd", args[0], output[0][0]) + " - please retry with '--readd'", node, node_exists) return 1 + sip = opts.secondary_ip # read the cluster name from the master output = cl.QueryConfigValues(['cluster_name']) cluster_name = output[0] - ToStderr("-- WARNING -- \n" - "Performing this operation is going to replace the ssh daemon" - " keypair\n" - "on the target machine (%s) with the ones of the" - " current one\n" - "and grant full intra-cluster ssh root access to/from it\n", node) + if readd: + # clear the offline and drain flags on the node + ToStdout("Resetting the 'offline' and 'drained' flags due to re-add") + op = opcodes.OpSetNodeParams(node_name=node, force=True, + offline=False, drained=False) + + result = SubmitOpCode(op, cl=cl) + if result: + ToStdout("Modified:") + for param, data in result: + ToStdout(" - %-5s -> %s", param, data) + else: + ToStderr("-- WARNING -- \n" + "Performing this operation is going to replace the ssh daemon" + " keypair\n" + "on the target machine (%s) with the ones of the" + " current one\n" + "and grant full intra-cluster ssh root access to/from it\n", node) bootstrap.SetupNodeDaemon(cluster_name, node, opts.ssh_key_check) - op = opcodes.OpAddNode(node_name=args[0], secondary_ip=opts.secondary_ip, + op = opcodes.OpAddNode(node_name=args[0], secondary_ip=sip, readd=opts.readd) SubmitOpCode(op) @@ -118,7 +142,7 @@ def ListNodes(opts, args): else: selected_fields = opts.output.split(",") - output = GetClient().QueryNodes([], selected_fields) + output = GetClient().QueryNodes([], selected_fields, opts.do_locking) if not opts.no_headers: headers = _LIST_HEADERS @@ -139,7 +163,7 @@ def ListNodes(opts, args): val = row[idx] if field in list_type_fields: val = ",".join(val) - elif field in ('master', 'master_candidate', 'offline'): + elif field in ('master', 'master_candidate', 'offline', 'drained'): if val: val = 'Y' else: @@ -169,52 +193,57 @@ def EvacuateNode(opts, args): """ cl = GetClient() force = opts.force + + dst_node = opts.dst_node + iallocator = opts.iallocator + + cnt = [dst_node, iallocator].count(None) + if cnt != 1: + raise errors.OpPrereqError("One and only one of the -n and -i" + " options must be passed") + selected_fields = ["name", "sinst_list"] - src_node, dst_node = args + src_node = args[0] - op = opcodes.OpQueryNodes(output_fields=selected_fields, names=[src_node]) - result = SubmitOpCode(op, cl=cl) + result = cl.QueryNodes(names=[src_node], fields=selected_fields, + use_locking=True) src_node, sinst = result[0] - op = opcodes.OpQueryNodes(output_fields=["name"], names=[dst_node]) - result = SubmitOpCode(op, cl=cl) - dst_node = result[0][0] - - if src_node == dst_node: - raise errors.OpPrereqError("Evacuate node needs different source and" - " target nodes (node %s given twice)" % - src_node) if not sinst: ToStderr("No secondary instances on node %s, exiting.", src_node) return constants.EXIT_SUCCESS + if dst_node is not None: + result = cl.QueryNodes(names=[dst_node], fields=["name"], use_locking=True) + dst_node = result[0][0] + + if src_node == dst_node: + raise errors.OpPrereqError("Evacuate node needs different source and" + " target nodes (node %s given twice)" % + src_node) + txt_msg = "to node %s" % dst_node + else: + txt_msg = "using iallocator %s" % iallocator + sinst = utils.NiceSort(sinst) if not force and not AskUser("Relocate instance(s) %s from node\n" - " %s to node\n %s?" % + " %s %s?" % (",".join("'%s'" % name for name in sinst), - src_node, dst_node)): + src_node, txt_msg)): return constants.EXIT_CONFIRMATION - jex = JobExecutor() + ops = [] for iname in sinst: op = opcodes.OpReplaceDisks(instance_name=iname, remote_node=dst_node, mode=constants.REPLACE_DISK_CHG, + iallocator=iallocator, disks=[]) - jex.QueueJob(iname, op) - - results = jex.GetResults() + ops.append(op) - bad_cnt = len([row for row in results if not row[0]]) - if bad_cnt == 0: - ToStdout("All %d instance(s) relocated successfully.", len(results)) - retcode = constants.EXIT_SUCCESS - else: - ToStdout("There were errors during the relocation:\n" - "%d error(s) out of %d instance(s).", bad_cnt, len(results)) - retcode = constants.EXIT_FAILURE - return retcode + job_id = cli.SendJob(ops, cl=cl) + cli.PollJob(job_id, cl=cl) def FailoverNode(opts, args): @@ -231,8 +260,10 @@ def FailoverNode(opts, args): force = opts.force selected_fields = ["name", "pinst_list"] - op = opcodes.OpQueryNodes(output_fields=selected_fields, names=args) - result = SubmitOpCode(op, cl=cl) + # these fields are static data anyway, so it doesn't matter, but + # locking=True should be safer + result = cl.QueryNodes(names=args, fields=selected_fields, + use_locking=True) node, pinst = result[0] if not pinst: @@ -262,6 +293,45 @@ def FailoverNode(opts, args): return retcode +def MigrateNode(opts, args): + """Migrate all primary instance on a node. + + """ + cl = GetClient() + force = opts.force + selected_fields = ["name", "pinst_list"] + + result = cl.QueryNodes(names=args, fields=selected_fields, use_locking=True) + node, pinst = result[0] + + if not pinst: + ToStdout("No primary instances on node %s, exiting." % node) + return 0 + + pinst = utils.NiceSort(pinst) + + retcode = 0 + + if not force and not AskUser("Migrate instance(s) %s?" % + (",".join("'%s'" % name for name in pinst))): + return 2 + + jex = JobExecutor(cl=cl) + for iname in pinst: + op = opcodes.OpMigrateInstance(instance_name=iname, live=opts.live, + cleanup=False) + jex.QueueJob(iname, op) + + results = jex.GetResults() + bad_cnt = len([row for row in results if not row[0]]) + if bad_cnt == 0: + ToStdout("All %d instance(s) migrated successfully.", len(results)) + else: + ToStdout("There were errors during the migration:\n" + "%d error(s) out of %d instance(s).", bad_cnt, len(results)) + return retcode + + def ShowNodeConfig(opts, args): """Show node information. @@ -274,24 +344,29 @@ def ShowNodeConfig(opts, args): @return: the desired exit code """ - op = opcodes.OpQueryNodes(output_fields=["name", "pip", "sip", - "pinst_list", "sinst_list"], - names=args) - result = SubmitOpCode(op) + cl = GetClient() + result = cl.QueryNodes(fields=["name", "pip", "sip", + "pinst_list", "sinst_list", + "master_candidate", "drained", "offline"], + names=args, use_locking=True) - for name, primary_ip, secondary_ip, pinst, sinst in result: + for (name, primary_ip, secondary_ip, pinst, sinst, + is_mc, drained, offline) in result: ToStdout("Node name: %s", name) ToStdout(" primary ip: %s", primary_ip) ToStdout(" secondary ip: %s", secondary_ip) + ToStdout(" master candidate: %s", is_mc) + ToStdout(" drained: %s", drained) + ToStdout(" offline: %s", offline) if pinst: ToStdout(" primary for instances:") - for iname in pinst: + for iname in utils.NiceSort(pinst): ToStdout(" - %s", iname) else: ToStdout(" primary for no instances") if sinst: ToStdout(" secondary for instances:") - for iname in sinst: + for iname in utils.NiceSort(sinst): ToStdout(" - %s", iname) else: ToStdout(" secondary for no instances") @@ -367,7 +442,7 @@ def SetNodeParams(opts, args): @return: the desired exit code """ - if opts.master_candidate is None and opts.offline is None: + if [opts.master_candidate, opts.drained, opts.offline].count(None) == 3: ToStderr("Please give at least one of the parameters.") return 1 @@ -379,9 +454,15 @@ def SetNodeParams(opts, args): offline = opts.offline == 'yes' else: offline = None + + if opts.drained is not None: + drained = opts.drained == 'yes' + else: + drained = None op = opcodes.OpSetNodeParams(node_name=args[0], master_candidate=candidate, offline=offline, + drained=drained, force=opts.force) # even if here we process the result, we allow submit only @@ -409,12 +490,20 @@ commands = { ], "[-s ip] [--readd] [--no-ssh-key-check] ", "Add a node to the cluster"), - 'evacuate': (EvacuateNode, ARGS_FIXED(2), - [DEBUG_OPT, FORCE_OPT], - "[-f] ", - "Relocate the secondary instances from the first node" - " to the second one (only for instances with drbd disk template" - ), + 'evacuate': (EvacuateNode, ARGS_ONE, + [DEBUG_OPT, FORCE_OPT, + make_option("-n", "--new-secondary", dest="dst_node", + help="New secondary node", metavar="NODE", + default=None), + make_option("-i", "--iallocator", metavar="", + help="Select new secondary for the instance" + " automatically using the" + " iallocator plugin", + default=None, type="string"), + ], + "[-f] {-i | -n } ", + "Relocate the secondary instances from a node" + " to other nodes (only for instances with drbd disk template)"), 'failover': (FailoverNode, ARGS_ONE, [DEBUG_OPT, FORCE_OPT, make_option("--ignore-consistency", dest="ignore_consistency", @@ -425,11 +514,23 @@ commands = { "[-f] ", "Stops the primary instances on a node and start them on their" " secondary node (only for instances with drbd disk template)"), + 'migrate': (MigrateNode, ARGS_ONE, + [DEBUG_OPT, FORCE_OPT, + make_option("--non-live", dest="live", + default=True, action="store_false", + help="Do a non-live migration (this usually means" + " freeze the instance, save the state," + " transfer and only then resume running on the" + " secondary node)"), + ], + "[-f] ", + "Migrate all the primary instance on a node away from it" + " (only for instances of type drbd)"), 'info': (ShowNodeConfig, ARGS_ANY, [DEBUG_OPT], "[...]", "Show information about the node(s)"), 'list': (ListNodes, ARGS_NONE, [DEBUG_OPT, NOHDR_OPT, SEP_OPT, USEUNITS_OPT, FIELDS_OPT, - SUBMIT_OPT], + SUBMIT_OPT, SYNC_OPT], "", "Lists the nodes in the cluster. The available fields" " are (see the man page for details): %s" " The default field list is (in order): %s." % @@ -443,6 +544,9 @@ commands = { make_option("-O", "--offline", dest="offline", choices=('yes', 'no'), default=None, help="Set the offline flag on the node"), + make_option("-D", "--drained", dest="drained", + choices=('yes', 'no'), default=None, + help="Set the drained flag on the node"), ], "", "Alters the parameters of an instance"), 'remove': (RemoveNode, ARGS_ONE, [DEBUG_OPT],