Switch the instance_shutdown rpc to (status, data)
[ganeti-local] / lib / cmdlib.py
index a23594f..a06decb 100644 (file)
@@ -434,13 +434,25 @@ def _CheckNodeOnline(lu, node):
 
   @param lu: the LU on behalf of which we make the check
   @param node: the node to check
-  @raise errors.OpPrereqError: if the nodes is offline
+  @raise errors.OpPrereqError: if the node is offline
 
   """
   if lu.cfg.GetNodeInfo(node).offline:
     raise errors.OpPrereqError("Can't use offline node %s" % node)
 
 
+def _CheckNodeNotDrained(lu, node):
+  """Ensure that a given node is not drained.
+
+  @param lu: the LU on behalf of which we make the check
+  @param node: the node to check
+  @raise errors.OpPrereqError: if the node is drained
+
+  """
+  if lu.cfg.GetNodeInfo(node).drained:
+    raise errors.OpPrereqError("Can't use drained node %s" % node)
+
+
 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
                           memory, vcpus, nics):
   """Builds instance related env variables for hooks
@@ -903,6 +915,7 @@ class LUVerifyCluster(LogicalUnit):
     i_non_redundant = [] # Non redundant instances
     i_non_a_balanced = [] # Non auto-balanced instances
     n_offline = [] # List of offline nodes
+    n_drained = [] # List of nodes being drained
     node_volume = {}
     node_instance = {}
     node_info = {}
@@ -955,6 +968,9 @@ class LUVerifyCluster(LogicalUnit):
         ntype = "master"
       elif node_i.master_candidate:
         ntype = "master candidate"
+      elif node_i.drained:
+        ntype = "drained"
+        n_drained.append(node)
       else:
         ntype = "regular"
       feedback_fn("* Verifying node %s (%s)" % (node, ntype))
@@ -1107,6 +1123,9 @@ class LUVerifyCluster(LogicalUnit):
     if n_offline:
       feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
 
+    if n_drained:
+      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
+
     return not bad
 
   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
@@ -1390,8 +1409,6 @@ class LUSetClusterParams(LogicalUnit):
     if the given volume group is valid.
 
     """
-    # FIXME: This only works because there is only one parameter that can be
-    # changed or removed.
     if self.op.vg_name is not None and not self.op.vg_name:
       instances = self.cfg.GetAllInstancesInfo().values()
       for inst in instances:
@@ -1420,7 +1437,7 @@ class LUSetClusterParams(LogicalUnit):
     self.cluster = cluster = self.cfg.GetClusterInfo()
     # validate beparams changes
     if self.op.beparams:
-      utils.CheckBEParams(self.op.beparams)
+      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
       self.new_beparams = cluster.FillDict(
         cluster.beparams[constants.BEGR_DEFAULT], self.op.beparams)
 
@@ -1448,6 +1465,7 @@ class LUSetClusterParams(LogicalUnit):
              hv_name in self.op.enabled_hypervisors)):
           # either this is a new hypervisor, or its parameters have changed
           hv_class = hypervisor.GetHypervisor(hv_name)
+          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
           hv_class.CheckParameterSyntax(hv_params)
           _CheckHVParams(self, node_list, hv_name, hv_params)
 
@@ -2191,7 +2209,7 @@ class LUAddNode(LogicalUnit):
 
     to_copy = []
     enabled_hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
-    if constants.HTS_USE_VNC.intersection(enabled_hypervisors):
+    if constants.HTS_COPY_VNC_PASSWORD.intersection(enabled_hypervisors):
       to_copy.append(constants.VNC_PASSWORD_FILE)
 
     for fname in to_copy:
@@ -2221,11 +2239,13 @@ class LUSetNodeParams(LogicalUnit):
     self.op.node_name = node_name
     _CheckBooleanOpField(self.op, 'master_candidate')
     _CheckBooleanOpField(self.op, 'offline')
-    if self.op.master_candidate is None and self.op.offline is None:
+    _CheckBooleanOpField(self.op, 'drained')
+    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
+    if all_mods.count(None) == 3:
       raise errors.OpPrereqError("Please pass at least one modification")
-    if self.op.offline == True and self.op.master_candidate == True:
-      raise errors.OpPrereqError("Can't set the node into offline and"
-                                 " master_candidate at the same time")
+    if all_mods.count(True) > 1:
+      raise errors.OpPrereqError("Can't set the node into more than one"
+                                 " state at the same time")
 
   def ExpandNames(self):
     self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
@@ -2240,6 +2260,7 @@ class LUSetNodeParams(LogicalUnit):
       "OP_TARGET": self.op.node_name,
       "MASTER_CANDIDATE": str(self.op.master_candidate),
       "OFFLINE": str(self.op.offline),
+      "DRAINED": str(self.op.drained),
       }
     nl = [self.cfg.GetMasterNode(),
           self.op.node_name]
@@ -2253,12 +2274,12 @@ class LUSetNodeParams(LogicalUnit):
     """
     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
 
-    if ((self.op.master_candidate == False or self.op.offline == True)
-        and node.master_candidate):
+    if ((self.op.master_candidate == False or self.op.offline == True or
+         self.op.drained == True) and node.master_candidate):
       # we will demote the node from master_candidate
       if self.op.node_name == self.cfg.GetMasterNode():
         raise errors.OpPrereqError("The master node has to be a"
-                                   " master candidate and online")
+                                   " master candidate, online and not drained")
       cp_size = self.cfg.GetClusterInfo().candidate_pool_size
       num_candidates, _ = self.cfg.GetMasterCandidateStats()
       if num_candidates <= cp_size:
@@ -2269,10 +2290,11 @@ class LUSetNodeParams(LogicalUnit):
         else:
           raise errors.OpPrereqError(msg)
 
-    if (self.op.master_candidate == True and node.offline and
-        not self.op.offline == False):
-      raise errors.OpPrereqError("Can't set an offline node to"
-                                 " master_candidate")
+    if (self.op.master_candidate == True and
+        ((node.offline and not self.op.offline == False) or
+         (node.drained and not self.op.drained == False))):
+      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
+                                 " to master_candidate")
 
     return
 
@@ -2283,16 +2305,23 @@ class LUSetNodeParams(LogicalUnit):
     node = self.node
 
     result = []
+    changed_mc = False
 
     if self.op.offline is not None:
       node.offline = self.op.offline
       result.append(("offline", str(self.op.offline)))
-      if self.op.offline == True and node.master_candidate:
-        node.master_candidate = False
-        result.append(("master_candidate", "auto-demotion due to offline"))
+      if self.op.offline == True:
+        if node.master_candidate:
+          node.master_candidate = False
+          changed_mc = True
+          result.append(("master_candidate", "auto-demotion due to offline"))
+        if node.drained:
+          node.drained = False
+          result.append(("drained", "clear drained status due to offline"))
 
     if self.op.master_candidate is not None:
       node.master_candidate = self.op.master_candidate
+      changed_mc = True
       result.append(("master_candidate", str(self.op.master_candidate)))
       if self.op.master_candidate == False:
         rrc = self.rpc.call_node_demote_from_mc(node.name)
@@ -2300,10 +2329,22 @@ class LUSetNodeParams(LogicalUnit):
         if msg:
           self.LogWarning("Node failed to demote itself: %s" % msg)
 
+    if self.op.drained is not None:
+      node.drained = self.op.drained
+      result.append(("drained", str(self.op.drained)))
+      if self.op.drained == True:
+        if node.master_candidate:
+          node.master_candidate = False
+          changed_mc = True
+          result.append(("master_candidate", "auto-demotion due to drain"))
+        if node.offline:
+          node.offline = False
+          result.append(("offline", "clear offline status due to drain"))
+
     # this will trigger configuration file update, if needed
     self.cfg.Update(node)
     # this will trigger job queue propagation or cleanup
-    if self.op.node_name != self.cfg.GetMasterNode():
+    if changed_mc:
       self.context.ReaddNode(node)
 
     return result
@@ -2751,11 +2792,15 @@ class LURebootInstance(LogicalUnit):
                        constants.INSTANCE_REBOOT_HARD]:
       result = self.rpc.call_instance_reboot(node_current, instance,
                                              reboot_type, extra_args)
-      if result.failed or not result.data:
-        raise errors.OpExecError("Could not reboot instance")
+      msg = result.RemoteFailMsg()
+      if msg:
+        raise errors.OpExecError("Could not reboot instance: %s" % msg)
     else:
-      if not self.rpc.call_instance_shutdown(node_current, instance):
-        raise errors.OpExecError("could not shutdown instance for full reboot")
+      result = self.rpc.call_instance_shutdown(node_current, instance)
+      msg = result.RemoteFailMsg()
+      if msg:
+        raise errors.OpExecError("Could not shutdown instance for"
+                                 " full reboot: %s" % msg)
       _ShutdownInstanceDisks(self, instance)
       _StartInstanceDisks(self, instance, ignore_secondaries)
       result = self.rpc.call_instance_start(node_current, instance, extra_args)
@@ -2809,8 +2854,9 @@ class LUShutdownInstance(LogicalUnit):
     node_current = instance.primary_node
     self.cfg.MarkInstanceDown(instance.name)
     result = self.rpc.call_instance_shutdown(node_current, instance)
-    if result.failed or not result.data:
-      self.proc.LogWarning("Could not shutdown instance")
+    msg = result.RemoteFailMsg()
+    if msg:
+      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
 
     _ShutdownInstanceDisks(self, instance)
 
@@ -3060,12 +3106,14 @@ class LURemoveInstance(LogicalUnit):
                  instance.name, instance.primary_node)
 
     result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
-    if result.failed or not result.data:
+    msg = result.RemoteFailMsg()
+    if msg:
       if self.op.ignore_failures:
-        feedback_fn("Warning: can't shutdown instance")
+        feedback_fn("Warning: can't shutdown instance: %s" % msg)
       else:
-        raise errors.OpExecError("Could not shutdown instance %s on node %s" %
-                                 (instance.name, instance.primary_node))
+        raise errors.OpExecError("Could not shutdown instance %s on"
+                                 " node %s: %s" %
+                                 (instance.name, instance.primary_node, msg))
 
     logging.info("Removing block devices for instance %s", instance.name)
 
@@ -3372,6 +3420,7 @@ class LUFailoverInstance(LogicalUnit):
 
     target_node = secondary_nodes[0]
     _CheckNodeOnline(self, target_node)
+    _CheckNodeNotDrained(self, target_node)
     # check memory requirements on the secondary node
     _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
                          instance.name, bep[constants.BE_MEMORY],
@@ -3411,15 +3460,17 @@ class LUFailoverInstance(LogicalUnit):
                  instance.name, source_node)
 
     result = self.rpc.call_instance_shutdown(source_node, instance)
-    if result.failed or not result.data:
+    msg = result.RemoteFailMsg()
+    if msg:
       if self.op.ignore_consistency:
         self.proc.LogWarning("Could not shutdown instance %s on node %s."
-                             " Proceeding"
-                             " anyway. Please make sure node %s is down",
-                             instance.name, source_node, source_node)
+                             " Proceeding anyway. Please make sure node"
+                             " %s is down. Error details: %s",
+                             instance.name, source_node, source_node, msg)
       else:
-        raise errors.OpExecError("Could not shutdown instance %s on node %s" %
-                                 (instance.name, source_node))
+        raise errors.OpExecError("Could not shutdown instance %s on"
+                                 " node %s: %s" %
+                                 (instance.name, source_node, msg))
 
     feedback_fn("* deactivating the instance's disks on source node")
     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
@@ -3500,8 +3551,8 @@ class LUMigrateInstance(LogicalUnit):
 
     secondary_nodes = instance.secondary_nodes
     if not secondary_nodes:
-      raise errors.ProgrammerError("no secondary node but using "
-                                   "drbd8 disk template")
+      raise errors.ConfigurationError("No secondary node but using"
+                                      " drbd8 disk template")
 
     i_be = self.cfg.GetClusterInfo().FillBE(instance)
 
@@ -3520,6 +3571,7 @@ class LUMigrateInstance(LogicalUnit):
                                  (brlist, target_node))
 
     if not self.op.cleanup:
+      _CheckNodeNotDrained(self, target_node)
       result = self.rpc.call_instance_migratable(instance.primary_node,
                                                  instance)
       msg = result.RemoteFailMsg()
@@ -3985,7 +4037,7 @@ def _GenerateDiskTemplate(lu, template_name,
                               iv_name="disk/%d" % disk_index,
                               logical_id=(file_driver,
                                           "%s/disk%d" % (file_storage_dir,
-                                                         idx)),
+                                                         disk_index)),
                               mode=disk["mode"])
       disks.append(disk_dev)
   else:
@@ -4184,14 +4236,14 @@ class LUCreateInstance(LogicalUnit):
                                   ",".join(enabled_hvs)))
 
     # check hypervisor parameter syntax (locally)
-
+    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
     filled_hvp = cluster.FillDict(cluster.hvparams[self.op.hypervisor],
                                   self.op.hvparams)
     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
     hv_type.CheckParameterSyntax(filled_hvp)
 
     # fill and remember the beparams dict
-    utils.CheckBEParams(self.op.beparams)
+    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
     self.be_full = cluster.FillDict(cluster.beparams[constants.BEGR_DEFAULT],
                                     self.op.beparams)
 
@@ -4471,6 +4523,9 @@ class LUCreateInstance(LogicalUnit):
     if pnode.offline:
       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
                                  pnode.name)
+    if pnode.drained:
+      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
+                                 pnode.name)
 
     self.secondaries = []
 
@@ -4482,8 +4537,9 @@ class LUCreateInstance(LogicalUnit):
       if self.op.snode == pnode.name:
         raise errors.OpPrereqError("The secondary node cannot be"
                                    " the primary node.")
-      self.secondaries.append(self.op.snode)
       _CheckNodeOnline(self, self.op.snode)
+      _CheckNodeNotDrained(self, self.op.snode)
+      self.secondaries.append(self.op.snode)
 
     nodenames = [pnode.name] + self.secondaries
 
@@ -4880,6 +4936,7 @@ class LUReplaceDisks(LogicalUnit):
       n1 = self.new_node = remote_node
       n2 = self.oth_node = instance.primary_node
       self.tgt_node = self.sec_node
+      _CheckNodeNotDrained(self, remote_node)
     else:
       raise errors.ProgrammerError("Unhandled disk replace mode")
 
@@ -5560,8 +5617,6 @@ class LUSetInstanceParams(LogicalUnit):
             self.op.hvparams or self.op.beparams):
       raise errors.OpPrereqError("No changes submitted")
 
-    utils.CheckBEParams(self.op.beparams)
-
     # Disk validation
     disk_addremove = 0
     for disk_op, disk_dict in self.op.disks:
@@ -5683,11 +5738,10 @@ class LUSetInstanceParams(LogicalUnit):
             del i_hvdict[key]
           except KeyError:
             pass
-        elif val == constants.VALUE_NONE:
-          i_hvdict[key] = None
         else:
           i_hvdict[key] = val
       cluster = self.cfg.GetClusterInfo()
+      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
       hv_new = cluster.FillDict(cluster.hvparams[instance.hypervisor],
                                 i_hvdict)
       # local check
@@ -5711,6 +5765,7 @@ class LUSetInstanceParams(LogicalUnit):
         else:
           i_bedict[key] = val
       cluster = self.cfg.GetClusterInfo()
+      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
       be_new = cluster.FillDict(cluster.beparams[constants.BEGR_DEFAULT],
                                 i_bedict)
       self.be_new = be_new # the new actual values
@@ -6018,6 +6073,7 @@ class LUExportInstance(LogicalUnit):
       # This is wrong node name, not a non-locked node
       raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
     _CheckNodeOnline(self, self.dst_node.name)
+    _CheckNodeNotDrained(self, self.dst_node.name)
 
     # instance disk type verification
     for disk in self.instance.disks:
@@ -6035,10 +6091,11 @@ class LUExportInstance(LogicalUnit):
     if self.op.shutdown:
       # shutdown the instance, but not the disks
       result = self.rpc.call_instance_shutdown(src_node, instance)
-      result.Raise()
-      if not result.data:
-        raise errors.OpExecError("Could not shutdown instance %s on node %s" %
-                                 (instance.name, src_node))
+      msg = result.RemoteFailMsg()
+      if msg:
+        raise errors.OpExecError("Could not shutdown instance %s on"
+                                 " node %s: %s" %
+                                 (instance.name, src_node, msg))
 
     vgname = self.cfg.GetVGName()