master daemon: allow skipping the voting process
[ganeti-local] / scripts / gnt-node
index a9fca28..a49eb3d 100755 (executable)
@@ -27,6 +27,7 @@ import sys
 from optparse import make_option
 
 from ganeti.cli import *
+from ganeti import cli
 from ganeti import opcodes
 from ganeti import utils
 from ganeti import constants
@@ -49,12 +50,12 @@ _LIST_HEADERS = {
   "dtotal": "DTotal", "dfree": "DFree",
   "mtotal": "MTotal", "mnode": "MNode", "mfree": "MFree",
   "bootid": "BootID",
-  "ctotal": "CTotal",
+  "ctotal": "CTotal", "cnodes": "CNodes", "csockets": "CSockets",
   "tags": "Tags",
   "serial_no": "SerialNo",
   "master_candidate": "MasterC",
   "master": "IsMaster",
-  "offline": "Offline",
+  "offline": "Offline", "drained": "Drained",
   }
 
 
@@ -72,31 +73,54 @@ def AddNode(opts, args):
   cl = GetClient()
   dns_data = utils.HostInfo(args[0])
   node = dns_data.name
-
-  if not opts.readd:
-    try:
-      output = cl.QueryNodes(names=[node], fields=['name'])
-    except (errors.OpPrereqError, errors.OpExecError):
-      pass
-    else:
+  readd = opts.readd
+
+  try:
+    output = cl.QueryNodes(names=[node], fields=['name', 'sip'],
+                           use_locking=True)
+    node_exists, sip = output[0]
+  except (errors.OpPrereqError, errors.OpExecError):
+    node_exists = ""
+    sip = None
+
+  if readd:
+    if not node_exists:
+      ToStderr("Node %s not in the cluster"
+               " - please retry without '--readd'", node)
+      return 1
+  else:
+    if node_exists:
       ToStderr("Node %s already in the cluster (as %s)"
-               " - please use --readd", args[0], output[0][0])
+               " - please retry with '--readd'", node, node_exists)
       return 1
+    sip = opts.secondary_ip
 
   # read the cluster name from the master
   output = cl.QueryConfigValues(['cluster_name'])
   cluster_name = output[0]
 
-  ToStderr("-- WARNING -- \n"
-           "Performing this operation is going to replace the ssh daemon"
-           " keypair\n"
-           "on the target machine (%s) with the ones of the"
-           " current one\n"
-           "and grant full intra-cluster ssh root access to/from it\n", node)
+  if readd:
+    # clear the offline and drain flags on the node
+    ToStdout("Resetting the 'offline' and 'drained' flags due to re-add")
+    op = opcodes.OpSetNodeParams(node_name=node, force=True,
+                                 offline=False, drained=False)
+
+    result = SubmitOpCode(op, cl=cl)
+    if result:
+      ToStdout("Modified:")
+      for param, data in result:
+        ToStdout(" - %-5s -> %s", param, data)
+  else:
+    ToStderr("-- WARNING -- \n"
+             "Performing this operation is going to replace the ssh daemon"
+             " keypair\n"
+             "on the target machine (%s) with the ones of the"
+             " current one\n"
+             "and grant full intra-cluster ssh root access to/from it\n", node)
 
   bootstrap.SetupNodeDaemon(cluster_name, node, opts.ssh_key_check)
 
-  op = opcodes.OpAddNode(node_name=args[0], secondary_ip=opts.secondary_ip,
+  op = opcodes.OpAddNode(node_name=args[0], secondary_ip=sip,
                          readd=opts.readd)
   SubmitOpCode(op)
 
@@ -118,7 +142,7 @@ def ListNodes(opts, args):
   else:
     selected_fields = opts.output.split(",")
 
-  output = GetClient().QueryNodes([], selected_fields)
+  output = GetClient().QueryNodes([], selected_fields, opts.do_locking)
 
   if not opts.no_headers:
     headers = _LIST_HEADERS
@@ -139,7 +163,7 @@ def ListNodes(opts, args):
       val = row[idx]
       if field in list_type_fields:
         val = ",".join(val)
-      elif field in ('master', 'master_candidate', 'offline'):
+      elif field in ('master', 'master_candidate', 'offline', 'drained'):
         if val:
           val = 'Y'
         else:
@@ -169,52 +193,57 @@ def EvacuateNode(opts, args):
   """
   cl = GetClient()
   force = opts.force
+
+  dst_node = opts.dst_node
+  iallocator = opts.iallocator
+
+  cnt = [dst_node, iallocator].count(None)
+  if cnt != 1:
+    raise errors.OpPrereqError("One and only one of the -n and -i"
+                               " options must be passed")
+
   selected_fields = ["name", "sinst_list"]
-  src_node, dst_node = args
+  src_node = args[0]
 
-  op = opcodes.OpQueryNodes(output_fields=selected_fields, names=[src_node])
-  result = SubmitOpCode(op, cl=cl)
+  result = cl.QueryNodes(names=[src_node], fields=selected_fields,
+                         use_locking=True)
   src_node, sinst = result[0]
-  op = opcodes.OpQueryNodes(output_fields=["name"], names=[dst_node])
-  result = SubmitOpCode(op, cl=cl)
-  dst_node = result[0][0]
-
-  if src_node == dst_node:
-    raise errors.OpPrereqError("Evacuate node needs different source and"
-                               " target nodes (node %s given twice)" %
-                               src_node)
 
   if not sinst:
     ToStderr("No secondary instances on node %s, exiting.", src_node)
     return constants.EXIT_SUCCESS
 
+  if dst_node is not None:
+    result = cl.QueryNodes(names=[dst_node], fields=["name"], use_locking=True)
+    dst_node = result[0][0]
+
+    if src_node == dst_node:
+      raise errors.OpPrereqError("Evacuate node needs different source and"
+                                 " target nodes (node %s given twice)" %
+                                 src_node)
+    txt_msg = "to node %s" % dst_node
+  else:
+    txt_msg = "using iallocator %s" % iallocator
+
   sinst = utils.NiceSort(sinst)
 
   if not force and not AskUser("Relocate instance(s) %s from node\n"
-                               " %s to node\n %s?" %
+                               " %s %s?" %
                                (",".join("'%s'" % name for name in sinst),
-                               src_node, dst_node)):
+                               src_node, txt_msg)):
     return constants.EXIT_CONFIRMATION
 
-  jex = JobExecutor()
+  ops = []
   for iname in sinst:
     op = opcodes.OpReplaceDisks(instance_name=iname,
                                 remote_node=dst_node,
                                 mode=constants.REPLACE_DISK_CHG,
+                                iallocator=iallocator,
                                 disks=[])
-    jex.QueueJob(iname, op)
-
-  results = jex.GetResults()
+    ops.append(op)
 
-  bad_cnt = len([row for row in results if not row[0]])
-  if bad_cnt == 0:
-    ToStdout("All %d instance(s) relocated successfully.", len(results))
-    retcode = constants.EXIT_SUCCESS
-  else:
-    ToStdout("There were errors during the relocation:\n"
-             "%d error(s) out of %d instance(s).", bad_cnt, len(results))
-    retcode = constants.EXIT_FAILURE
-  return retcode
+  job_id = cli.SendJob(ops, cl=cl)
+  cli.PollJob(job_id, cl=cl)
 
 
 def FailoverNode(opts, args):
@@ -231,8 +260,10 @@ def FailoverNode(opts, args):
   force = opts.force
   selected_fields = ["name", "pinst_list"]
 
-  op = opcodes.OpQueryNodes(output_fields=selected_fields, names=args)
-  result = SubmitOpCode(op, cl=cl)
+  # these fields are static data anyway, so it doesn't matter, but
+  # locking=True should be safer
+  result = cl.QueryNodes(names=args, fields=selected_fields,
+                         use_locking=True)
   node, pinst = result[0]
 
   if not pinst:
@@ -262,6 +293,45 @@ def FailoverNode(opts, args):
   return retcode
 
 
+def MigrateNode(opts, args):
+  """Migrate all primary instance on a node.
+
+  """
+  cl = GetClient()
+  force = opts.force
+  selected_fields = ["name", "pinst_list"]
+
+  result = cl.QueryNodes(names=args, fields=selected_fields, use_locking=True)
+  node, pinst = result[0]
+
+  if not pinst:
+    ToStdout("No primary instances on node %s, exiting." % node)
+    return 0
+
+  pinst = utils.NiceSort(pinst)
+
+  retcode = 0
+
+  if not force and not AskUser("Migrate instance(s) %s?" %
+                               (",".join("'%s'" % name for name in pinst))):
+    return 2
+
+  jex = JobExecutor(cl=cl)
+  for iname in pinst:
+    op = opcodes.OpMigrateInstance(instance_name=iname, live=opts.live,
+                                   cleanup=False)
+    jex.QueueJob(iname, op)
+
+  results = jex.GetResults()
+  bad_cnt = len([row for row in results if not row[0]])
+  if bad_cnt == 0:
+    ToStdout("All %d instance(s) migrated successfully.", len(results))
+  else:
+    ToStdout("There were errors during the migration:\n"
+             "%d error(s) out of %d instance(s).", bad_cnt, len(results))
+  return retcode
+
+
 def ShowNodeConfig(opts, args):
   """Show node information.
 
@@ -274,24 +344,29 @@ def ShowNodeConfig(opts, args):
   @return: the desired exit code
 
   """
-  op = opcodes.OpQueryNodes(output_fields=["name", "pip", "sip",
-                                           "pinst_list", "sinst_list"],
-                            names=args)
-  result = SubmitOpCode(op)
+  cl = GetClient()
+  result = cl.QueryNodes(fields=["name", "pip", "sip",
+                                 "pinst_list", "sinst_list",
+                                 "master_candidate", "drained", "offline"],
+                         names=args, use_locking=True)
 
-  for name, primary_ip, secondary_ip, pinst, sinst in result:
+  for (name, primary_ip, secondary_ip, pinst, sinst,
+       is_mc, drained, offline) in result:
     ToStdout("Node name: %s", name)
     ToStdout("  primary ip: %s", primary_ip)
     ToStdout("  secondary ip: %s", secondary_ip)
+    ToStdout("  master candidate: %s", is_mc)
+    ToStdout("  drained: %s", drained)
+    ToStdout("  offline: %s", offline)
     if pinst:
       ToStdout("  primary for instances:")
-      for iname in pinst:
+      for iname in utils.NiceSort(pinst):
         ToStdout("    - %s", iname)
     else:
       ToStdout("  primary for no instances")
     if sinst:
       ToStdout("  secondary for instances:")
-      for iname in sinst:
+      for iname in utils.NiceSort(sinst):
         ToStdout("    - %s", iname)
     else:
       ToStdout("  secondary for no instances")
@@ -367,7 +442,7 @@ def SetNodeParams(opts, args):
   @return: the desired exit code
 
   """
-  if opts.master_candidate is None and opts.offline is None:
+  if [opts.master_candidate, opts.drained, opts.offline].count(None) == 3:
     ToStderr("Please give at least one of the parameters.")
     return 1
 
@@ -379,9 +454,15 @@ def SetNodeParams(opts, args):
     offline = opts.offline == 'yes'
   else:
     offline = None
+
+  if opts.drained is not None:
+    drained = opts.drained == 'yes'
+  else:
+    drained = None
   op = opcodes.OpSetNodeParams(node_name=args[0],
                                master_candidate=candidate,
                                offline=offline,
+                               drained=drained,
                                force=opts.force)
 
   # even if here we process the result, we allow submit only
@@ -409,12 +490,20 @@ commands = {
            ],
           "[-s ip] [--readd] [--no-ssh-key-check] <node_name>",
           "Add a node to the cluster"),
-  'evacuate': (EvacuateNode, ARGS_FIXED(2),
-               [DEBUG_OPT, FORCE_OPT],
-               "[-f] <src> <dst>",
-               "Relocate the secondary instances from the first node"
-               " to the second one (only for instances with drbd disk template"
-               ),
+  'evacuate': (EvacuateNode, ARGS_ONE,
+               [DEBUG_OPT, FORCE_OPT,
+                make_option("-n", "--new-secondary", dest="dst_node",
+                            help="New secondary node", metavar="NODE",
+                            default=None),
+                make_option("-i", "--iallocator", metavar="<NAME>",
+                            help="Select new secondary for the instance"
+                            " automatically using the"
+                            " <NAME> iallocator plugin",
+                            default=None, type="string"),
+                ],
+               "[-f] {-i <iallocator> | -n <dst>} <node>",
+               "Relocate the secondary instances from a node"
+               " to other nodes (only for instances with drbd disk template)"),
   'failover': (FailoverNode, ARGS_ONE,
                [DEBUG_OPT, FORCE_OPT,
                 make_option("--ignore-consistency", dest="ignore_consistency",
@@ -425,11 +514,23 @@ commands = {
                "[-f] <node>",
                "Stops the primary instances on a node and start them on their"
                " secondary node (only for instances with drbd disk template)"),
+  'migrate': (MigrateNode, ARGS_ONE,
+               [DEBUG_OPT, FORCE_OPT,
+                make_option("--non-live", dest="live",
+                            default=True, action="store_false",
+                            help="Do a non-live migration (this usually means"
+                            " freeze the instance, save the state,"
+                            " transfer and only then resume running on the"
+                            " secondary node)"),
+                ],
+               "[-f] <node>",
+               "Migrate all the primary instance on a node away from it"
+               " (only for instances of type drbd)"),
   'info': (ShowNodeConfig, ARGS_ANY, [DEBUG_OPT],
            "[<node_name>...]", "Show information about the node(s)"),
   'list': (ListNodes, ARGS_NONE,
            [DEBUG_OPT, NOHDR_OPT, SEP_OPT, USEUNITS_OPT, FIELDS_OPT,
-            SUBMIT_OPT],
+            SUBMIT_OPT, SYNC_OPT],
            "", "Lists the nodes in the cluster. The available fields"
            " are (see the man page for details): %s"
            " The default field list is (in order): %s." %
@@ -443,6 +544,9 @@ commands = {
               make_option("-O", "--offline", dest="offline",
                           choices=('yes', 'no'), default=None,
                           help="Set the offline flag on the node"),
+              make_option("-D", "--drained", dest="drained",
+                          choices=('yes', 'no'), default=None,
+                          help="Set the drained flag on the node"),
               ],
              "<instance>", "Alters the parameters of an instance"),
   'remove': (RemoveNode, ARGS_ONE, [DEBUG_OPT],