confd/client: make it possible to update peer list

[ganeti-local] / lib / cmdlib.py
diff --git a/lib/cmdlib.py b/lib/cmdlib.py

index 7d44580..41e2408 100644 (file)
--- a/lib/cmdlib.py
+++ b/lib/cmdlib.py
@@ -797,12 +797,21 @@ class LUPostInitCluster(LogicalUnit):
      return True
  
  
      return True
  
  
-class LUDestroyCluster(NoHooksLU):
+class LUDestroyCluster(LogicalUnit):
    """Logical unit for destroying the cluster.
  
    """
    """Logical unit for destroying the cluster.
  
    """
+  HPATH = "cluster-destroy"
+  HTYPE = constants.HTYPE_CLUSTER
    _OP_REQP = []
  
    _OP_REQP = []
  
+  def BuildHooksEnv(self):
+    """Build hooks env.
+
+    """
+    env = {"OP_TARGET": self.cfg.GetClusterName()}
+    return env, [], []
+
    def CheckPrereq(self):
      """Check prerequisites.
  
    def CheckPrereq(self):
      """Check prerequisites.
  
@@ -827,6 +836,14 @@ class LUDestroyCluster(NoHooksLU):
  
      """
      master = self.cfg.GetMasterNode()
  
      """
      master = self.cfg.GetMasterNode()
+
+    # Run post hooks on master node before it's removed
+    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
+    try:
+      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
+    except:
+      self.LogWarning("Errors occurred running hooks on %s" % master)
+
      result = self.rpc.call_node_stop_master(master, False)
      result.Raise("Could not disable the master role")
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
      result = self.rpc.call_node_stop_master(master, False)
      result.Raise("Could not disable the master role")
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
@@ -841,9 +858,37 @@ class LUVerifyCluster(LogicalUnit):
    """
    HPATH = "cluster-verify"
    HTYPE = constants.HTYPE_CLUSTER
    """
    HPATH = "cluster-verify"
    HTYPE = constants.HTYPE_CLUSTER
-  _OP_REQP = ["skip_checks"]
+  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
    REQ_BGL = False
  
    REQ_BGL = False
  
+  TCLUSTER = "cluster"
+  TNODE = "node"
+  TINSTANCE = "instance"
+
+  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
+  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
+  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
+  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
+  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
+  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
+  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
+  ENODEDRBD = (TNODE, "ENODEDRBD")
+  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
+  ENODEHOOKS = (TNODE, "ENODEHOOKS")
+  ENODEHV = (TNODE, "ENODEHV")
+  ENODELVM = (TNODE, "ENODELVM")
+  ENODEN1 = (TNODE, "ENODEN1")
+  ENODENET = (TNODE, "ENODENET")
+  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
+  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
+  ENODERPC = (TNODE, "ENODERPC")
+  ENODESSH = (TNODE, "ENODESSH")
+  ENODEVERSION = (TNODE, "ENODEVERSION")
+
+  ETYPE_FIELD = "code"
+  ETYPE_ERROR = "ERROR"
+  ETYPE_WARNING = "WARNING"
+
    def ExpandNames(self):
      self.needed_locks = {
        locking.LEVEL_NODE: locking.ALL_SET,
    def ExpandNames(self):
      self.needed_locks = {
        locking.LEVEL_NODE: locking.ALL_SET,
@@ -851,9 +896,45 @@ class LUVerifyCluster(LogicalUnit):
      }
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
  
      }
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
  
+  def _Error(self, ecode, item, msg, *args, **kwargs):
+    """Format an error message.
+
+    Based on the opcode's error_codes parameter, either format a
+    parseable error code, or a simpler error string.
+
+    This must be called only from Exec and functions called from Exec.
+
+    """
+    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
+    itype, etxt = ecode
+    # first complete the msg
+    if args:
+      msg = msg % args
+    # then format the whole message
+    if self.op.error_codes:
+      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
+    else:
+      if item:
+        item = " " + item
+      else:
+        item = ""
+      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
+    # and finally report it via the feedback_fn
+    self._feedback_fn("  - %s" % msg)
+
+  def _ErrorIf(self, cond, *args, **kwargs):
+    """Log an error message if the passed condition is True.
+
+    """
+    cond = bool(cond) or self.op.debug_simulate_errors
+    if cond:
+      self._Error(*args, **kwargs)
+    # do not mark the operation as failed for WARN cases only
+    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
+      self.bad = self.bad or cond
+
    def _VerifyNode(self, nodeinfo, file_list, local_cksum,
    def _VerifyNode(self, nodeinfo, file_list, local_cksum,
-                  node_result, feedback_fn, master_files,
-                  drbd_map, vg_name):
+                  node_result, master_files, drbd_map, vg_name):
      """Run multiple tests against a node.
  
      Test list:
      """Run multiple tests against a node.
  
      Test list:
@@ -868,7 +949,6 @@ class LUVerifyCluster(LogicalUnit):
      @param file_list: required list of files
      @param local_cksum: dictionary of local files and their checksums
      @param node_result: the results from the node
      @param file_list: required list of files
      @param local_cksum: dictionary of local files and their checksums
      @param node_result: the results from the node
-    @param feedback_fn: function used to accumulate results
      @param master_files: list of files that only masters should have
      @param drbd_map: the useddrbd minors for this node, in
          form of minor: (instance, must_exist) which correspond to instances
      @param master_files: list of files that only masters should have
      @param drbd_map: the useddrbd minors for this node, in
          form of minor: (instance, must_exist) which correspond to instances
@@ -877,138 +957,136 @@ class LUVerifyCluster(LogicalUnit):
  
      """
      node = nodeinfo.name
  
      """
      node = nodeinfo.name
+    _ErrorIf = self._ErrorIf
  
      # main result, node_result should be a non-empty dict
  
      # main result, node_result should be a non-empty dict
-    if not node_result or not isinstance(node_result, dict):
-      feedback_fn("  - ERROR: unable to verify node %s." % (node,))
-      return True
+    test = not node_result or not isinstance(node_result, dict)
+    _ErrorIf(test, self.ENODERPC, node,
+                  "unable to verify node: no data returned")
+    if test:
+      return
  
      # compares ganeti version
      local_version = constants.PROTOCOL_VERSION
      remote_version = node_result.get('version', None)
  
      # compares ganeti version
      local_version = constants.PROTOCOL_VERSION
      remote_version = node_result.get('version', None)
-    if not (remote_version and isinstance(remote_version, (list, tuple)) and
-            len(remote_version) == 2):
-      feedback_fn("  - ERROR: connection to %s failed" % (node))
-      return True
+    test = not (remote_version and
+                isinstance(remote_version, (list, tuple)) and
+                len(remote_version) == 2)
+    _ErrorIf(test, self.ENODERPC, node,
+             "connection to node returned invalid data")
+    if test:
+      return
  
  
-    if local_version != remote_version[0]:
-      feedback_fn("  - ERROR: incompatible protocol versions: master %s,"
-                  " node %s %s" % (local_version, node, remote_version[0]))
-      return True
+    test = local_version != remote_version[0]
+    _ErrorIf(test, self.ENODEVERSION, node,
+             "incompatible protocol versions: master %s,"
+             " node %s", local_version, remote_version[0])
+    if test:
+      return
  
      # node seems compatible, we can actually try to look into its results
  
  
      # node seems compatible, we can actually try to look into its results
  
-    bad = False
-
      # full package version
      # full package version
-    if constants.RELEASE_VERSION != remote_version[1]:
-      feedback_fn("  - WARNING: software version mismatch: master %s,"
-                  " node %s %s" %
-                  (constants.RELEASE_VERSION, node, remote_version[1]))
+    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
+                  self.ENODEVERSION, node,
+                  "software version mismatch: master %s, node %s",
+                  constants.RELEASE_VERSION, remote_version[1],
+                  code=self.ETYPE_WARNING)
  
      # checks vg existence and size > 20G
      if vg_name is not None:
        vglist = node_result.get(constants.NV_VGLIST, None)
  
      # checks vg existence and size > 20G
      if vg_name is not None:
        vglist = node_result.get(constants.NV_VGLIST, None)
-      if not vglist:
-        feedback_fn("  - ERROR: unable to check volume groups on node %s." %
-                        (node,))
-        bad = True
-      else:
+      test = not vglist
+      _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
+      if not test:
          vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
                                                constants.MIN_VG_SIZE)
          vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
                                                constants.MIN_VG_SIZE)
-        if vgstatus:
-          feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
-          bad = True
+        _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
  
      # checks config file checksum
  
      remote_cksum = node_result.get(constants.NV_FILELIST, None)
  
      # checks config file checksum
  
      remote_cksum = node_result.get(constants.NV_FILELIST, None)
-    if not isinstance(remote_cksum, dict):
-      bad = True
-      feedback_fn("  - ERROR: node hasn't returned file checksum data")
-    else:
+    test = not isinstance(remote_cksum, dict)
+    _ErrorIf(test, self.ENODEFILECHECK, node,
+             "node hasn't returned file checksum data")
+    if not test:
        for file_name in file_list:
          node_is_mc = nodeinfo.master_candidate
        for file_name in file_list:
          node_is_mc = nodeinfo.master_candidate
-        must_have_file = file_name not in master_files
-        if file_name not in remote_cksum:
-          if node_is_mc or must_have_file:
-            bad = True
-            feedback_fn("  - ERROR: file '%s' missing" % file_name)
-        elif remote_cksum[file_name] != local_cksum[file_name]:
-          if node_is_mc or must_have_file:
-            bad = True
-            feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
-          else:
-            # not candidate and this is not a must-have file
-            bad = True
-            feedback_fn("  - ERROR: file '%s' should not exist on non master"
-                        " candidates (and the file is outdated)" % file_name)
-        else:
-          # all good, except non-master/non-must have combination
-          if not node_is_mc and not must_have_file:
-            feedback_fn("  - ERROR: file '%s' should not exist on non master"
-                        " candidates" % file_name)
+        must_have = (file_name not in master_files) or node_is_mc
+        # missing
+        test1 = file_name not in remote_cksum
+        # invalid checksum
+        test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
+        # existing and good
+        test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
+        _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
+                 "file '%s' missing", file_name)
+        _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
+                 "file '%s' has wrong checksum", file_name)
+        # not candidate and this is not a must-have file
+        _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
+                 "file '%s' should not exist on non master"
+                 " candidates (and the file is outdated)", file_name)
+        # all good, except non-master/non-must have combination
+        _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
+                 "file '%s' should not exist"
+                 " on non master candidates", file_name)
  
      # checks ssh to any
  
  
      # checks ssh to any
  
-    if constants.NV_NODELIST not in node_result:
-      bad = True
-      feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
-    else:
+    test = constants.NV_NODELIST not in node_result
+    _ErrorIf(test, self.ENODESSH, node,
+             "node hasn't returned node ssh connectivity data")
+    if not test:
        if node_result[constants.NV_NODELIST]:
        if node_result[constants.NV_NODELIST]:
-        bad = True
-        for node in node_result[constants.NV_NODELIST]:
-          feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
-                          (node, node_result[constants.NV_NODELIST][node]))
-
-    if constants.NV_NODENETTEST not in node_result:
-      bad = True
-      feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
-    else:
+        for a_node, a_msg in node_result[constants.NV_NODELIST].items():
+          _ErrorIf(True, self.ENODESSH, node,
+                   "ssh communication with node '%s': %s", a_node, a_msg)
+
+    test = constants.NV_NODENETTEST not in node_result
+    _ErrorIf(test, self.ENODENET, node,
+             "node hasn't returned node tcp connectivity data")
+    if not test:
        if node_result[constants.NV_NODENETTEST]:
        if node_result[constants.NV_NODENETTEST]:
-        bad = True
          nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
          nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
-        for node in nlist:
-          feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
-                          (node, node_result[constants.NV_NODENETTEST][node]))
+        for anode in nlist:
+          _ErrorIf(True, self.ENODENET, node,
+                   "tcp communication with node '%s': %s",
+                   anode, node_result[constants.NV_NODENETTEST][anode])
  
      hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
      if isinstance(hyp_result, dict):
        for hv_name, hv_result in hyp_result.iteritems():
  
      hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
      if isinstance(hyp_result, dict):
        for hv_name, hv_result in hyp_result.iteritems():
-        if hv_result is not None:
-          feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
-                      (hv_name, hv_result))
+        test = hv_result is not None
+        _ErrorIf(test, self.ENODEHV, node,
+                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
  
      # check used drbd list
      if vg_name is not None:
        used_minors = node_result.get(constants.NV_DRBDLIST, [])
  
      # check used drbd list
      if vg_name is not None:
        used_minors = node_result.get(constants.NV_DRBDLIST, [])
-      if not isinstance(used_minors, (tuple, list)):
-        feedback_fn("  - ERROR: cannot parse drbd status file: %s" %
-                    str(used_minors))
-      else:
+      test = not isinstance(used_minors, (tuple, list))
+      _ErrorIf(test, self.ENODEDRBD, node,
+               "cannot parse drbd status file: %s", str(used_minors))
+      if not test:
          for minor, (iname, must_exist) in drbd_map.items():
          for minor, (iname, must_exist) in drbd_map.items():
-          if minor not in used_minors and must_exist:
-            feedback_fn("  - ERROR: drbd minor %d of instance %s is"
-                        " not active" % (minor, iname))
-            bad = True
+          test = minor not in used_minors and must_exist
+          _ErrorIf(test, self.ENODEDRBD, node,
+                   "drbd minor %d of instance %s is not active",
+                   minor, iname)
          for minor in used_minors:
          for minor in used_minors:
-          if minor not in drbd_map:
-            feedback_fn("  - ERROR: unallocated drbd minor %d is in use" %
-                        minor)
-            bad = True
-
-    return bad
+          test = minor not in drbd_map
+          _ErrorIf(test, self.ENODEDRBD, node,
+                   "unallocated drbd minor %d is in use", minor)
  
    def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
  
    def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
-                      node_instance, feedback_fn, n_offline):
+                      node_instance, n_offline):
      """Verify an instance.
  
      This function checks to see if the required block devices are
      available on the instance's node.
  
      """
      """Verify an instance.
  
      This function checks to see if the required block devices are
      available on the instance's node.
  
      """
-    bad = False
-
+    _ErrorIf = self._ErrorIf
      node_current = instanceconfig.primary_node
  
      node_vol_should = {}
      node_current = instanceconfig.primary_node
  
      node_vol_should = {}
@@ -1019,69 +1097,57 @@ class LUVerifyCluster(LogicalUnit):
          # ignore missing volumes on offline nodes
          continue
        for volume in node_vol_should[node]:
          # ignore missing volumes on offline nodes
          continue
        for volume in node_vol_should[node]:
-        if node not in node_vol_is or volume not in node_vol_is[node]:
-          feedback_fn("  - ERROR: volume %s missing on node %s" %
-                          (volume, node))
-          bad = True
+        test = node not in node_vol_is or volume not in node_vol_is[node]
+        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
+                 "volume %s missing on node %s", volume, node)
  
      if instanceconfig.admin_up:
  
      if instanceconfig.admin_up:
-      if ((node_current not in node_instance or
-          not instance in node_instance[node_current]) and
-          node_current not in n_offline):
-        feedback_fn("  - ERROR: instance %s not running on node %s" %
-                        (instance, node_current))
-        bad = True
+      test = ((node_current not in node_instance or
+               not instance in node_instance[node_current]) and
+              node_current not in n_offline)
+      _ErrorIf(test, self.EINSTANCEDOWN, instance,
+               "instance not running on its primary node %s",
+               node_current)
  
      for node in node_instance:
        if (not node == node_current):
  
      for node in node_instance:
        if (not node == node_current):
-        if instance in node_instance[node]:
-          feedback_fn("  - ERROR: instance %s should not run on node %s" %
-                          (instance, node))
-          bad = True
-
-    return bad
+        test = instance in node_instance[node]
+        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
+                 "instance should not run on node %s", node)
  
  
-  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
+  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
      """Verify if there are any unknown volumes in the cluster.
  
      The .os, .swap and backup volumes are ignored. All other volumes are
      reported as unknown.
  
      """
      """Verify if there are any unknown volumes in the cluster.
  
      The .os, .swap and backup volumes are ignored. All other volumes are
      reported as unknown.
  
      """
-    bad = False
-
      for node in node_vol_is:
        for volume in node_vol_is[node]:
      for node in node_vol_is:
        for volume in node_vol_is[node]:
-        if node not in node_vol_should or volume not in node_vol_should[node]:
-          feedback_fn("  - ERROR: volume %s on node %s should not exist" %
-                      (volume, node))
-          bad = True
-    return bad
+        test = (node not in node_vol_should or
+                volume not in node_vol_should[node])
+        self._ErrorIf(test, self.ENODEORPHANLV, node,
+                      "volume %s is unknown", volume)
  
  
-  def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
+  def _VerifyOrphanInstances(self, instancelist, node_instance):
      """Verify the list of running instances.
  
      This checks what instances are running but unknown to the cluster.
  
      """
      """Verify the list of running instances.
  
      This checks what instances are running but unknown to the cluster.
  
      """
-    bad = False
      for node in node_instance:
      for node in node_instance:
-      for runninginstance in node_instance[node]:
-        if runninginstance not in instancelist:
-          feedback_fn("  - ERROR: instance %s on node %s should not exist" %
-                          (runninginstance, node))
-          bad = True
-    return bad
-
-  def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
+      for o_inst in node_instance[node]:
+        test = o_inst not in instancelist
+        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
+                      "instance %s on node %s should not exist", o_inst, node)
+
+  def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
      """Verify N+1 Memory Resilience.
  
      Check that if one single node dies we can still start all the instances it
      was primary for.
  
      """
      """Verify N+1 Memory Resilience.
  
      Check that if one single node dies we can still start all the instances it
      was primary for.
  
      """
-    bad = False
-
      for node, nodeinfo in node_info.iteritems():
        # This code checks that every node which is now listed as secondary has
        # enough memory to host all instances it is supposed to should a single
      for node, nodeinfo in node_info.iteritems():
        # This code checks that every node which is now listed as secondary has
        # enough memory to host all instances it is supposed to should a single
@@ -1097,11 +1163,10 @@ class LUVerifyCluster(LogicalUnit):
            bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
            if bep[constants.BE_AUTO_BALANCE]:
              needed_mem += bep[constants.BE_MEMORY]
            bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
            if bep[constants.BE_AUTO_BALANCE]:
              needed_mem += bep[constants.BE_MEMORY]
-        if nodeinfo['mfree'] < needed_mem:
-          feedback_fn("  - ERROR: not enough memory on node %s to accommodate"
-                      " failovers should node %s fail" % (node, prinode))
-          bad = True
-    return bad
+        test = nodeinfo['mfree'] < needed_mem
+        self._ErrorIf(test, self.ENODEN1, node,
+                      "not enough memory on to accommodate"
+                      " failovers should peer node %s fail", prinode)
  
    def CheckPrereq(self):
      """Check prerequisites.
  
    def CheckPrereq(self):
      """Check prerequisites.
@@ -1134,10 +1199,13 @@ class LUVerifyCluster(LogicalUnit):
      """Verify integrity of cluster, performing various test on nodes.
  
      """
      """Verify integrity of cluster, performing various test on nodes.
  
      """
-    bad = False
+    self.bad = False
+    _ErrorIf = self._ErrorIf
+    verbose = self.op.verbose
+    self._feedback_fn = feedback_fn
      feedback_fn("* Verifying global settings")
      for msg in self.cfg.VerifyConfig():
      feedback_fn("* Verifying global settings")
      for msg in self.cfg.VerifyConfig():
-      feedback_fn("  - ERROR: %s" % msg)
+      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
  
      vg_name = self.cfg.GetVGName()
      hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
  
      vg_name = self.cfg.GetVGName()
      hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
@@ -1190,11 +1258,13 @@ class LUVerifyCluster(LogicalUnit):
      master_node = self.cfg.GetMasterNode()
      all_drbd_map = self.cfg.ComputeDRBDMap()
  
      master_node = self.cfg.GetMasterNode()
      all_drbd_map = self.cfg.ComputeDRBDMap()
  
+    feedback_fn("* Verifying node status")
      for node_i in nodeinfo:
        node = node_i.name
  
        if node_i.offline:
      for node_i in nodeinfo:
        node = node_i.name
  
        if node_i.offline:
-        feedback_fn("* Skipping offline node %s" % (node,))
+        if verbose:
+          feedback_fn("* Skipping offline node %s" % (node,))
          n_offline.append(node)
          continue
  
          n_offline.append(node)
          continue
  
@@ -1207,62 +1277,59 @@ class LUVerifyCluster(LogicalUnit):
          n_drained.append(node)
        else:
          ntype = "regular"
          n_drained.append(node)
        else:
          ntype = "regular"
-      feedback_fn("* Verifying node %s (%s)" % (node, ntype))
+      if verbose:
+        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
  
        msg = all_nvinfo[node].fail_msg
  
        msg = all_nvinfo[node].fail_msg
+      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
        if msg:
        if msg:
-        feedback_fn("  - ERROR: while contacting node %s: %s" % (node, msg))
-        bad = True
          continue
  
        nresult = all_nvinfo[node].payload
        node_drbd = {}
        for minor, instance in all_drbd_map[node].items():
          continue
  
        nresult = all_nvinfo[node].payload
        node_drbd = {}
        for minor, instance in all_drbd_map[node].items():
-        if instance not in instanceinfo:
-          feedback_fn("  - ERROR: ghost instance '%s' in temporary DRBD map" %
-                      instance)
+        test = instance not in instanceinfo
+        _ErrorIf(test, self.ECLUSTERCFG, None,
+                 "ghost instance '%s' in temporary DRBD map", instance)
            # ghost instance should not be running, but otherwise we
            # don't give double warnings (both ghost instance and
            # unallocated minor in use)
            # ghost instance should not be running, but otherwise we
            # don't give double warnings (both ghost instance and
            # unallocated minor in use)
+        if test:
            node_drbd[minor] = (instance, False)
          else:
            instance = instanceinfo[instance]
            node_drbd[minor] = (instance.name, instance.admin_up)
            node_drbd[minor] = (instance, False)
          else:
            instance = instanceinfo[instance]
            node_drbd[minor] = (instance.name, instance.admin_up)
-      result = self._VerifyNode(node_i, file_names, local_checksums,
-                                nresult, feedback_fn, master_files,
-                                node_drbd, vg_name)
-      bad = bad or result
+      self._VerifyNode(node_i, file_names, local_checksums,
+                       nresult, master_files, node_drbd, vg_name)
  
        lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
        if vg_name is None:
          node_volume[node] = {}
        elif isinstance(lvdata, basestring):
  
        lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
        if vg_name is None:
          node_volume[node] = {}
        elif isinstance(lvdata, basestring):
-        feedback_fn("  - ERROR: LVM problem on node %s: %s" %
-                    (node, utils.SafeEncode(lvdata)))
-        bad = True
+        _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
+                 utils.SafeEncode(lvdata))
          node_volume[node] = {}
        elif not isinstance(lvdata, dict):
          node_volume[node] = {}
        elif not isinstance(lvdata, dict):
-        feedback_fn("  - ERROR: connection to %s failed (lvlist)" % (node,))
-        bad = True
+        _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
          continue
        else:
          node_volume[node] = lvdata
  
        # node_instance
        idata = nresult.get(constants.NV_INSTANCELIST, None)
          continue
        else:
          node_volume[node] = lvdata
  
        # node_instance
        idata = nresult.get(constants.NV_INSTANCELIST, None)
-      if not isinstance(idata, list):
-        feedback_fn("  - ERROR: connection to %s failed (instancelist)" %
-                    (node,))
-        bad = True
+      test = not isinstance(idata, list)
+      _ErrorIf(test, self.ENODEHV, node,
+               "rpc call to node failed (instancelist)")
+      if test:
          continue
  
        node_instance[node] = idata
  
        # node_info
        nodeinfo = nresult.get(constants.NV_HVINFO, None)
          continue
  
        node_instance[node] = idata
  
        # node_info
        nodeinfo = nresult.get(constants.NV_HVINFO, None)
-      if not isinstance(nodeinfo, dict):
-        feedback_fn("  - ERROR: connection to %s failed (hvinfo)" % (node,))
-        bad = True
+      test = not isinstance(nodeinfo, dict)
+      _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
+      if test:
          continue
  
        try:
          continue
  
        try:
@@ -1280,28 +1347,28 @@ class LUVerifyCluster(LogicalUnit):
          }
          # FIXME: devise a free space model for file based instances as well
          if vg_name is not None:
          }
          # FIXME: devise a free space model for file based instances as well
          if vg_name is not None:
-          if (constants.NV_VGLIST not in nresult or
-              vg_name not in nresult[constants.NV_VGLIST]):
-            feedback_fn("  - ERROR: node %s didn't return data for the"
-                        " volume group '%s' - it is either missing or broken" %
-                        (node, vg_name))
-            bad = True
+          test = (constants.NV_VGLIST not in nresult or
+                  vg_name not in nresult[constants.NV_VGLIST])
+          _ErrorIf(test, self.ENODELVM, node,
+                   "node didn't return data for the volume group '%s'"
+                   " - it is either missing or broken", vg_name)
+          if test:
              continue
            node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
        except (ValueError, KeyError):
              continue
            node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
        except (ValueError, KeyError):
-        feedback_fn("  - ERROR: invalid nodeinfo value returned"
-                    " from node %s" % (node,))
-        bad = True
+        _ErrorIf(True, self.ENODERPC, node,
+                 "node returned invalid nodeinfo, check lvm/hypervisor")
          continue
  
      node_vol_should = {}
  
          continue
  
      node_vol_should = {}
  
+    feedback_fn("* Verifying instance status")
      for instance in instancelist:
      for instance in instancelist:
-      feedback_fn("* Verifying instance %s" % instance)
+      if verbose:
+        feedback_fn("* Verifying instance %s" % instance)
        inst_config = instanceinfo[instance]
        inst_config = instanceinfo[instance]
-      result =  self._VerifyInstance(instance, inst_config, node_volume,
-                                     node_instance, feedback_fn, n_offline)
-      bad = bad or result
+      self._VerifyInstance(instance, inst_config, node_volume,
+                           node_instance, n_offline)
        inst_nodes_offline = []
  
        inst_config.MapLVsByNode(node_vol_should)
        inst_nodes_offline = []
  
        inst_config.MapLVsByNode(node_vol_should)
@@ -1309,12 +1376,11 @@ class LUVerifyCluster(LogicalUnit):
        instance_cfg[instance] = inst_config
  
        pnode = inst_config.primary_node
        instance_cfg[instance] = inst_config
  
        pnode = inst_config.primary_node
+      _ErrorIf(pnode not in node_info and pnode not in n_offline,
+               self.ENODERPC, pnode, "instance %s, connection to"
+               " primary node failed", instance)
        if pnode in node_info:
          node_info[pnode]['pinst'].append(instance)
        if pnode in node_info:
          node_info[pnode]['pinst'].append(instance)
-      elif pnode not in n_offline:
-        feedback_fn("  - ERROR: instance %s, connection to primary node"
-                    " %s failed" % (instance, pnode))
-        bad = True
  
        if pnode in n_offline:
          inst_nodes_offline.append(pnode)
  
        if pnode in n_offline:
          inst_nodes_offline.append(pnode)
@@ -1326,46 +1392,42 @@ class LUVerifyCluster(LogicalUnit):
        # FIXME: does not support file-backed instances
        if len(inst_config.secondary_nodes) == 0:
          i_non_redundant.append(instance)
        # FIXME: does not support file-backed instances
        if len(inst_config.secondary_nodes) == 0:
          i_non_redundant.append(instance)
-      elif len(inst_config.secondary_nodes) > 1:
-        feedback_fn("  - WARNING: multiple secondaries for instance %s"
-                    % instance)
+      _ErrorIf(len(inst_config.secondary_nodes) > 1,
+               self.EINSTANCELAYOUT, instance,
+               "instance has multiple secondary nodes", code="WARNING")
  
        if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
          i_non_a_balanced.append(instance)
  
        for snode in inst_config.secondary_nodes:
  
        if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
          i_non_a_balanced.append(instance)
  
        for snode in inst_config.secondary_nodes:
+        _ErrorIf(snode not in node_info and snode not in n_offline,
+                 self.ENODERPC, snode,
+                 "instance %s, connection to secondary node"
+                 "failed", instance)
+
          if snode in node_info:
            node_info[snode]['sinst'].append(instance)
            if pnode not in node_info[snode]['sinst-by-pnode']:
              node_info[snode]['sinst-by-pnode'][pnode] = []
            node_info[snode]['sinst-by-pnode'][pnode].append(instance)
          if snode in node_info:
            node_info[snode]['sinst'].append(instance)
            if pnode not in node_info[snode]['sinst-by-pnode']:
              node_info[snode]['sinst-by-pnode'][pnode] = []
            node_info[snode]['sinst-by-pnode'][pnode].append(instance)
-        elif snode not in n_offline:
-          feedback_fn("  - ERROR: instance %s, connection to secondary node"
-                      " %s failed" % (instance, snode))
-          bad = True
+
          if snode in n_offline:
            inst_nodes_offline.append(snode)
  
          if snode in n_offline:
            inst_nodes_offline.append(snode)
  
-      if inst_nodes_offline:
-        # warn that the instance lives on offline nodes, and set bad=True
-        feedback_fn("  - ERROR: instance lives on offline node(s) %s" %
-                    ", ".join(inst_nodes_offline))
-        bad = True
+      # warn that the instance lives on offline nodes
+      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
+               "instance lives on offline node(s) %s",
+               ", ".join(inst_nodes_offline))
  
      feedback_fn("* Verifying orphan volumes")
  
      feedback_fn("* Verifying orphan volumes")
-    result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
-                                       feedback_fn)
-    bad = bad or result
+    self._VerifyOrphanVolumes(node_vol_should, node_volume)
  
      feedback_fn("* Verifying remaining instances")
  
      feedback_fn("* Verifying remaining instances")
-    result = self._VerifyOrphanInstances(instancelist, node_instance,
-                                         feedback_fn)
-    bad = bad or result
+    self._VerifyOrphanInstances(instancelist, node_instance)
  
      if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
        feedback_fn("* Verifying N+1 Memory redundancy")
  
      if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
        feedback_fn("* Verifying N+1 Memory redundancy")
-      result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
-      bad = bad or result
+      self._VerifyNPlusOneMemory(node_info, instance_cfg)
  
      feedback_fn("* Other Notes")
      if i_non_redundant:
  
      feedback_fn("* Other Notes")
      if i_non_redundant:
@@ -1382,7 +1444,7 @@ class LUVerifyCluster(LogicalUnit):
      if n_drained:
        feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
  
      if n_drained:
        feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
  
-    return not bad
+    return not self.bad
  
    def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
      """Analyze the post-hooks' result
  
    def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
      """Analyze the post-hooks' result
@@ -1405,33 +1467,28 @@ class LUVerifyCluster(LogicalUnit):
        # Used to change hooks' output to proper indentation
        indent_re = re.compile('^', re.M)
        feedback_fn("* Hooks Results")
        # Used to change hooks' output to proper indentation
        indent_re = re.compile('^', re.M)
        feedback_fn("* Hooks Results")
-      if not hooks_results:
-        feedback_fn("  - ERROR: general communication failure")
-        lu_result = 1
-      else:
-        for node_name in hooks_results:
-          show_node_header = True
-          res = hooks_results[node_name]
-          msg = res.fail_msg
-          if msg:
-            if res.offline:
-              # no need to warn or set fail return value
-              continue
-            feedback_fn("    Communication failure in hooks execution: %s" %
-                        msg)
+      assert hooks_results, "invalid result from hooks"
+
+      for node_name in hooks_results:
+        show_node_header = True
+        res = hooks_results[node_name]
+        msg = res.fail_msg
+        test = msg and not res.offline
+        self._ErrorIf(test, self.ENODEHOOKS, node_name,
+                      "Communication failure in hooks execution: %s", msg)
+        if test:
+          # override manually lu_result here as _ErrorIf only
+          # overrides self.bad
+          lu_result = 1
+          continue
+        for script, hkr, output in res.payload:
+          test = hkr == constants.HKR_FAIL
+          self._ErrorIf(test, self.ENODEHOOKS, node_name,
+                        "Script %s failed, output:", script)
+          if test:
+            output = indent_re.sub('      ', output)
+            feedback_fn("%s" % output)
              lu_result = 1
              lu_result = 1
-            continue
-          for script, hkr, output in res.payload:
-            if hkr == constants.HKR_FAIL:
-              # The node header is only shown once, if there are
-              # failing hooks on that node
-              if show_node_header:
-                feedback_fn("  Node %s:" % node_name)
-                show_node_header = False
-              feedback_fn("    ERROR: Script %s failed, output:" % script)
-              output = indent_re.sub('      ', output)
-              feedback_fn("%s" % output)
-              lu_result = 1
  
        return lu_result
  
  
        return lu_result
  
@@ -1527,7 +1584,6 @@ class LURepairDiskSizes(NoHooksLU):
    REQ_BGL = False
  
    def ExpandNames(self):
    REQ_BGL = False
  
    def ExpandNames(self):
-
      if not isinstance(self.op.instances, list):
        raise errors.OpPrereqError("Invalid argument type 'instances'")
  
      if not isinstance(self.op.instances, list):
        raise errors.OpPrereqError("Invalid argument type 'instances'")
  
@@ -1585,7 +1641,7 @@ class LURepairDiskSizes(NoHooksLU):
      changed = []
      for node, dskl in per_node_disks.items():
        result = self.rpc.call_blockdev_getsizes(node, [v[2] for v in dskl])
      changed = []
      for node, dskl in per_node_disks.items():
        result = self.rpc.call_blockdev_getsizes(node, [v[2] for v in dskl])
-      if result.failed:
+      if result.fail_msg:
          self.LogWarning("Failure in blockdev_getsizes call to node"
                          " %s, ignoring", node)
          continue
          self.LogWarning("Failure in blockdev_getsizes call to node"
                          " %s, ignoring", node)
          continue
@@ -1987,7 +2043,8 @@ def _WaitForSync(lu, instance, oneshot=False, unlock=False):
          else:
            rem_time = "no time estimate"
          lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
          else:
            rem_time = "no time estimate"
          lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
-                        (instance.disks[i].iv_name, mstat.sync_percent, rem_time))
+                        (instance.disks[i].iv_name, mstat.sync_percent,
+                         rem_time))
  
      # if we're done but degraded, let's do a few small retries, to
      # make sure we see a stable and not transient situation; therefore
  
      # if we're done but degraded, let's do a few small retries, to
      # make sure we see a stable and not transient situation; therefore
@@ -2222,6 +2279,10 @@ class LUQueryNodes(NoHooksLU):
    """
    _OP_REQP = ["output_fields", "names", "use_locking"]
    REQ_BGL = False
    """
    _OP_REQP = ["output_fields", "names", "use_locking"]
    REQ_BGL = False
+
+  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
+                    "master_candidate", "offline", "drained"]
+
    _FIELDS_DYNAMIC = utils.FieldSet(
      "dtotal", "dfree",
      "mtotal", "mnode", "mfree",
    _FIELDS_DYNAMIC = utils.FieldSet(
      "dtotal", "dfree",
      "mtotal", "mnode", "mfree",
@@ -2229,16 +2290,12 @@ class LUQueryNodes(NoHooksLU):
      "ctotal", "cnodes", "csockets",
      )
  
      "ctotal", "cnodes", "csockets",
      )
  
-  _FIELDS_STATIC = utils.FieldSet(
-    "name", "pinst_cnt", "sinst_cnt",
+  _FIELDS_STATIC = utils.FieldSet(*[
+    "pinst_cnt", "sinst_cnt",
      "pinst_list", "sinst_list",
      "pip", "sip", "tags",
      "pinst_list", "sinst_list",
      "pip", "sip", "tags",
-    "serial_no", "ctime", "mtime",
-    "master_candidate",
      "master",
      "master",
-    "offline",
-    "drained",
-    "role",
+    "role"] + _SIMPLE_FIELDS
      )
  
    def ExpandNames(self):
      )
  
    def ExpandNames(self):
@@ -2339,8 +2396,8 @@ class LUQueryNodes(NoHooksLU):
      for node in nodelist:
        node_output = []
        for field in self.op.output_fields:
      for node in nodelist:
        node_output = []
        for field in self.op.output_fields:
-        if field == "name":
-          val = node.name
+        if field in self._SIMPLE_FIELDS:
+          val = getattr(node, field)
          elif field == "pinst_list":
            val = list(node_to_primary[node.name])
          elif field == "sinst_list":
          elif field == "pinst_list":
            val = list(node_to_primary[node.name])
          elif field == "sinst_list":
@@ -2355,20 +2412,8 @@ class LUQueryNodes(NoHooksLU):
            val = node.secondary_ip
          elif field == "tags":
            val = list(node.GetTags())
            val = node.secondary_ip
          elif field == "tags":
            val = list(node.GetTags())
-        elif field == "serial_no":
-          val = node.serial_no
-        elif field == "ctime":
-          val = node.ctime
-        elif field == "mtime":
-          val = node.mtime
-        elif field == "master_candidate":
-          val = node.master_candidate
          elif field == "master":
            val = node.name == master_node
          elif field == "master":
            val = node.name == master_node
-        elif field == "offline":
-          val = node.offline
-        elif field == "drained":
-          val = node.drained
          elif self._FIELDS_DYNAMIC.Matches(field):
            val = live_data[node.name].get(field, None)
          elif field == "role":
          elif self._FIELDS_DYNAMIC.Matches(field):
            val = live_data[node.name].get(field, None)
          elif field == "role":
@@ -2782,11 +2827,7 @@ class LUAddNode(LogicalUnit):
                  priv_key, pub_key]
  
      for i in keyfiles:
                  priv_key, pub_key]
  
      for i in keyfiles:
-      f = open(i, 'r')
-      try:
-        keyarray.append(f.read())
-      finally:
-        f.close()
+      keyarray.append(utils.ReadFile(i))
  
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
                                      keyarray[2],
  
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
                                      keyarray[2],
@@ -2809,7 +2850,7 @@ class LUAddNode(LogicalUnit):
  
      node_verify_list = [self.cfg.GetMasterNode()]
      node_verify_param = {
  
      node_verify_list = [self.cfg.GetMasterNode()]
      node_verify_param = {
-      'nodelist': [node],
+      constants.NV_NODELIST: [node],
        # TODO: do a node-net-test as well?
      }
  
        # TODO: do a node-net-test as well?
      }
  
@@ -2817,7 +2858,7 @@ class LUAddNode(LogicalUnit):
                                         self.cfg.GetClusterName())
      for verifier in node_verify_list:
        result[verifier].Raise("Cannot communicate with node %s" % verifier)
                                         self.cfg.GetClusterName())
      for verifier in node_verify_list:
        result[verifier].Raise("Cannot communicate with node %s" % verifier)
-      nl_payload = result[verifier].payload['nodelist']
+      nl_payload = result[verifier].payload[constants.NV_NODELIST]
        if nl_payload:
          for failed in nl_payload:
            feedback_fn("ssh/hostname verification failed %s -> %s" %
        if nl_payload:
          for failed in nl_payload:
            feedback_fn("ssh/hostname verification failed %s -> %s" %
@@ -2832,7 +2873,7 @@ class LUAddNode(LogicalUnit):
        # and make sure the new node will not have old files around
        if not new_node.master_candidate:
          result = self.rpc.call_node_demote_from_mc(new_node.name)
        # and make sure the new node will not have old files around
        if not new_node.master_candidate:
          result = self.rpc.call_node_demote_from_mc(new_node.name)
-        msg = result.RemoteFailMsg()
+        msg = result.fail_msg
          if msg:
            self.LogWarning("Node failed to demote itself from master"
                            " candidate status: %s" % msg)
          if msg:
            self.LogWarning("Node failed to demote itself from master"
                            " candidate status: %s" % msg)
@@ -2892,12 +2933,16 @@ class LUSetNodeParams(LogicalUnit):
      """
      node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
  
      """
      node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
  
+    if (self.op.master_candidate is not None or
+        self.op.drained is not None or
+        self.op.offline is not None):
+      # we can't change the master's node flags
+      if self.op.node_name == self.cfg.GetMasterNode():
+        raise errors.OpPrereqError("The master role can be changed"
+                                   " only via masterfailover")
+
      if ((self.op.master_candidate == False or self.op.offline == True or
           self.op.drained == True) and node.master_candidate):
      if ((self.op.master_candidate == False or self.op.offline == True or
           self.op.drained == True) and node.master_candidate):
-      # we will demote the node from master_candidate
-      if self.op.node_name == self.cfg.GetMasterNode():
-        raise errors.OpPrereqError("The master node has to be a"
-                                   " master candidate, online and not drained")
        cp_size = self.cfg.GetClusterInfo().candidate_pool_size
        num_candidates, _ = self.cfg.GetMasterCandidateStats()
        if num_candidates <= cp_size:
        cp_size = self.cfg.GetClusterInfo().candidate_pool_size
        num_candidates, _ = self.cfg.GetMasterCandidateStats()
        if num_candidates <= cp_size:
@@ -2956,7 +3001,7 @@ class LUSetNodeParams(LogicalUnit):
            changed_mc = True
            result.append(("master_candidate", "auto-demotion due to drain"))
            rrc = self.rpc.call_node_demote_from_mc(node.name)
            changed_mc = True
            result.append(("master_candidate", "auto-demotion due to drain"))
            rrc = self.rpc.call_node_demote_from_mc(node.name)
-          msg = rrc.RemoteFailMsg()
+          msg = rrc.fail_msg
            if msg:
              self.LogWarning("Node failed to demote itself: %s" % msg)
          if node.offline:
            if msg:
              self.LogWarning("Node failed to demote itself: %s" % msg)
          if node.offline:
@@ -3057,6 +3102,8 @@ class LUQueryClusterInfo(NoHooksLU):
        "file_storage_dir": cluster.file_storage_dir,
        "ctime": cluster.ctime,
        "mtime": cluster.mtime,
        "file_storage_dir": cluster.file_storage_dir,
        "ctime": cluster.ctime,
        "mtime": cluster.mtime,
+      "uuid": cluster.uuid,
+      "tags": list(cluster.GetTags()),
        }
  
      return result
        }
  
      return result
@@ -3069,7 +3116,8 @@ class LUQueryConfigValues(NoHooksLU):
    _OP_REQP = []
    REQ_BGL = False
    _FIELDS_DYNAMIC = utils.FieldSet()
    _OP_REQP = []
    REQ_BGL = False
    _FIELDS_DYNAMIC = utils.FieldSet()
-  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag")
+  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
+                                  "watcher_pause")
  
    def ExpandNames(self):
      self.needed_locks = {}
  
    def ExpandNames(self):
      self.needed_locks = {}
@@ -3096,6 +3144,8 @@ class LUQueryConfigValues(NoHooksLU):
          entry = self.cfg.GetMasterNode()
        elif field == "drain_flag":
          entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
          entry = self.cfg.GetMasterNode()
        elif field == "drain_flag":
          entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
+      elif field == "watcher_pause":
+        return utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
        else:
          raise errors.ParameterError(field)
        values.append(entry)
        else:
          raise errors.ParameterError(field)
        values.append(entry)
@@ -3922,6 +3972,8 @@ class LUQueryInstances(NoHooksLU):
    """
    _OP_REQP = ["output_fields", "names", "use_locking"]
    REQ_BGL = False
    """
    _OP_REQP = ["output_fields", "names", "use_locking"]
    REQ_BGL = False
+  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
+                    "serial_no", "ctime", "mtime", "uuid"]
    _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
                                      "admin_state",
                                      "disk_template", "ip", "mac", "bridge",
    _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
                                      "admin_state",
                                      "disk_template", "ip", "mac", "bridge",
@@ -3934,9 +3986,8 @@ class LUQueryInstances(NoHooksLU):
                                      r"(nic)\.(bridge)/([0-9]+)",
                                      r"(nic)\.(macs|ips|modes|links|bridges)",
                                      r"(disk|nic)\.(count)",
                                      r"(nic)\.(bridge)/([0-9]+)",
                                      r"(nic)\.(macs|ips|modes|links|bridges)",
                                      r"(disk|nic)\.(count)",
-                                    "serial_no", "hypervisor", "hvparams",
-                                    "ctime", "mtime",
-                                    ] +
+                                    "hvparams",
+                                    ] + _SIMPLE_FIELDS +
                                    ["hv/%s" % name
                                     for name in constants.HVS_PARAMETERS] +
                                    ["be/%s" % name
                                    ["hv/%s" % name
                                     for name in constants.HVS_PARAMETERS] +
                                    ["be/%s" % name
@@ -4016,7 +4067,7 @@ class LUQueryInstances(NoHooksLU):
          if result.offline:
            # offline nodes will be in both lists
            off_nodes.append(name)
          if result.offline:
            # offline nodes will be in both lists
            off_nodes.append(name)
-        if result.failed or result.fail_msg:
+        if result.fail_msg:
            bad_nodes.append(name)
          else:
            if result.payload:
            bad_nodes.append(name)
          else:
            if result.payload:
@@ -4039,10 +4090,8 @@ class LUQueryInstances(NoHooksLU):
                                   nic.nicparams) for nic in instance.nics]
        for field in self.op.output_fields:
          st_match = self._FIELDS_STATIC.Matches(field)
                                   nic.nicparams) for nic in instance.nics]
        for field in self.op.output_fields:
          st_match = self._FIELDS_STATIC.Matches(field)
-        if field == "name":
-          val = instance.name
-        elif field == "os":
-          val = instance.os
+        if field in self._SIMPLE_FIELDS:
+          val = getattr(instance, field)
          elif field == "pnode":
            val = instance.primary_node
          elif field == "snodes":
          elif field == "pnode":
            val = instance.primary_node
          elif field == "snodes":
@@ -4119,16 +4168,6 @@ class LUQueryInstances(NoHooksLU):
            val = _ComputeDiskSize(instance.disk_template, disk_sizes)
          elif field == "tags":
            val = list(instance.GetTags())
            val = _ComputeDiskSize(instance.disk_template, disk_sizes)
          elif field == "tags":
            val = list(instance.GetTags())
-        elif field == "serial_no":
-          val = instance.serial_no
-        elif field == "ctime":
-          val = instance.ctime
-        elif field == "mtime":
-          val = instance.mtime
-        elif field == "network_port":
-          val = instance.network_port
-        elif field == "hypervisor":
-          val = instance.hypervisor
          elif field == "hvparams":
            val = i_hv
          elif (field.startswith(HVPREFIX) and
          elif field == "hvparams":
            val = i_hv
          elif (field.startswith(HVPREFIX) and
@@ -5146,7 +5185,7 @@ def _CreateDisks(lu, instance, to_skip=None, target_node=None):
      result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
  
      result.Raise("Failed to create directory '%s' on"
      result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
  
      result.Raise("Failed to create directory '%s' on"
-                 " node %s: %s" % (file_storage_dir, pnode))
+                 " node %s" % (file_storage_dir, pnode))
  
    # Note: this needs to be kept in sync with adding of disks in
    # LUSetInstanceParams
  
    # Note: this needs to be kept in sync with adding of disks in
    # LUSetInstanceParams
@@ -5197,10 +5236,10 @@ def _RemoveDisks(lu, instance, target_node=None):
  
    if instance.disk_template == constants.DT_FILE:
      file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
  
    if instance.disk_template == constants.DT_FILE:
      file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
-    if target_node is node:
-      tgt = instance.primary_node
+    if target_node:
+      tgt = target_node
      else:
      else:
-      tgt = instance.target_node
+      tgt = instance.primary_node
      result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
      if result.fail_msg:
        lu.LogWarning("Could not remove directory '%s' on node %s: %s",
      result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
      if result.fail_msg:
        lu.LogWarning("Could not remove directory '%s' on node %s: %s",
@@ -5376,6 +5415,12 @@ class LUCreateInstance(LogicalUnit):
          if not utils.IsValidMac(mac.lower()):
            raise errors.OpPrereqError("Invalid MAC address specified: %s" %
                                       mac)
          if not utils.IsValidMac(mac.lower()):
            raise errors.OpPrereqError("Invalid MAC address specified: %s" %
                                       mac)
+        else:
+          # or validate/reserve the current one
+          if self.cfg.IsMacInUse(mac):
+            raise errors.OpPrereqError("MAC address %s already in use"
+                                       " in cluster" % mac)
+
        # bridge verification
        bridge = nic.get("bridge", None)
        link = nic.get("link", None)
        # bridge verification
        bridge = nic.get("bridge", None)
        link = nic.get("link", None)
@@ -6442,7 +6487,8 @@ class TLReplaceDisks(Tasklet):
      for dev, old_lvs, new_lvs in iv_names.itervalues():
        self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
  
      for dev, old_lvs, new_lvs in iv_names.itervalues():
        self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
  
-      result = self.rpc.call_blockdev_removechildren(self.target_node, dev, old_lvs)
+      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
+                                                     old_lvs)
        result.Raise("Can't detach drbd from local storage on node"
                     " %s for device %s" % (self.target_node, dev.iv_name))
        #dev.children = []
        result.Raise("Can't detach drbd from local storage on node"
                     " %s for device %s" % (self.target_node, dev.iv_name))
        #dev.children = []
@@ -6468,14 +6514,16 @@ class TLReplaceDisks(Tasklet):
            rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
  
        self.lu.LogInfo("Renaming the old LVs on the target node")
            rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
  
        self.lu.LogInfo("Renaming the old LVs on the target node")
-      result = self.rpc.call_blockdev_rename(self.target_node, rename_old_to_new)
+      result = self.rpc.call_blockdev_rename(self.target_node,
+                                             rename_old_to_new)
        result.Raise("Can't rename old LVs on node %s" % self.target_node)
  
        # Now we rename the new LVs to the old LVs
        self.lu.LogInfo("Renaming the new LVs on the target node")
        rename_new_to_old = [(new, old.physical_id)
                             for old, new in zip(old_lvs, new_lvs)]
        result.Raise("Can't rename old LVs on node %s" % self.target_node)
  
        # Now we rename the new LVs to the old LVs
        self.lu.LogInfo("Renaming the new LVs on the target node")
        rename_new_to_old = [(new, old.physical_id)
                             for old, new in zip(old_lvs, new_lvs)]
-      result = self.rpc.call_blockdev_rename(self.target_node, rename_new_to_old)
+      result = self.rpc.call_blockdev_rename(self.target_node,
+                                             rename_new_to_old)
        result.Raise("Can't rename new LVs on node %s" % self.target_node)
  
        for old, new in zip(old_lvs, new_lvs):
        result.Raise("Can't rename new LVs on node %s" % self.target_node)
  
        for old, new in zip(old_lvs, new_lvs):
@@ -6488,11 +6536,13 @@ class TLReplaceDisks(Tasklet):
  
        # Now that the new lvs have the old name, we can add them to the device
        self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
  
        # Now that the new lvs have the old name, we can add them to the device
        self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
-      result = self.rpc.call_blockdev_addchildren(self.target_node, dev, new_lvs)
+      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
+                                                  new_lvs)
        msg = result.fail_msg
        if msg:
          for new_lv in new_lvs:
        msg = result.fail_msg
        if msg:
          for new_lv in new_lvs:
-          msg2 = self.rpc.call_blockdev_remove(self.target_node, new_lv).fail_msg
+          msg2 = self.rpc.call_blockdev_remove(self.target_node,
+                                               new_lv).fail_msg
            if msg2:
              self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
                                 hint=("cleanup manually the unused logical"
            if msg2:
              self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
                                 hint=("cleanup manually the unused logical"
@@ -6560,13 +6610,15 @@ class TLReplaceDisks(Tasklet):
      # after this, we must manually remove the drbd minors on both the
      # error and the success paths
      self.lu.LogStep(4, steps_total, "Changing drbd configuration")
      # after this, we must manually remove the drbd minors on both the
      # error and the success paths
      self.lu.LogStep(4, steps_total, "Changing drbd configuration")
-    minors = self.cfg.AllocateDRBDMinor([self.new_node for dev in self.instance.disks],
+    minors = self.cfg.AllocateDRBDMinor([self.new_node
+                                         for dev in self.instance.disks],
                                          self.instance.name)
      logging.debug("Allocated minors %r" % (minors,))
  
      iv_names = {}
      for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
                                          self.instance.name)
      logging.debug("Allocated minors %r" % (minors,))
  
      iv_names = {}
      for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
-      self.lu.LogInfo("activating a new drbd on %s for disk/%d" % (self.new_node, idx))
+      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
+                      (self.new_node, idx))
        # create new devices on new_node; note that we create two IDs:
        # one without port, so the drbd will be activated without
        # networking information on the new node at this stage, and one
        # create new devices on new_node; note that we create two IDs:
        # one without port, so the drbd will be activated without
        # networking information on the new node at this stage, and one
@@ -6577,8 +6629,10 @@ class TLReplaceDisks(Tasklet):
        else:
          p_minor = o_minor2
  
        else:
          p_minor = o_minor2
  
-      new_alone_id = (self.instance.primary_node, self.new_node, None, p_minor, new_minor, o_secret)
-      new_net_id = (self.instance.primary_node, self.new_node, o_port, p_minor, new_minor, o_secret)
+      new_alone_id = (self.instance.primary_node, self.new_node, None,
+                      p_minor, new_minor, o_secret)
+      new_net_id = (self.instance.primary_node, self.new_node, o_port,
+                    p_minor, new_minor, o_secret)
  
        iv_names[idx] = (dev, dev.children, new_net_id)
        logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
  
        iv_names[idx] = (dev, dev.children, new_net_id)
        logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
@@ -6606,8 +6660,10 @@ class TLReplaceDisks(Tasklet):
                                   " soon as possible"))
  
      self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
                                   " soon as possible"))
  
      self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
-    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node], self.node_secondary_ip,
-                                               self.instance.disks)[self.instance.primary_node]
+    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
+                                               self.node_secondary_ip,
+                                               self.instance.disks)\
+                                              [self.instance.primary_node]
  
      msg = result.fail_msg
      if msg:
  
      msg = result.fail_msg
      if msg:
@@ -6628,13 +6684,17 @@ class TLReplaceDisks(Tasklet):
      # and now perform the drbd attach
      self.lu.LogInfo("Attaching primary drbds to new secondary"
                      " (standalone => connected)")
      # and now perform the drbd attach
      self.lu.LogInfo("Attaching primary drbds to new secondary"
                      " (standalone => connected)")
-    result = self.rpc.call_drbd_attach_net([self.instance.primary_node, self.new_node], self.node_secondary_ip,
-                                           self.instance.disks, self.instance.name,
+    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
+                                            self.new_node],
+                                           self.node_secondary_ip,
+                                           self.instance.disks,
+                                           self.instance.name,
                                             False)
      for to_node, to_result in result.items():
        msg = to_result.fail_msg
        if msg:
                                             False)
      for to_node, to_result in result.items():
        msg = to_result.fail_msg
        if msg:
-        self.lu.LogWarning("Can't attach drbd disks on node %s: %s", to_node, msg,
+        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
+                           to_node, msg,
                             hint=("please do a gnt-instance info to see the"
                                   " status of disks"))
  
                             hint=("please do a gnt-instance info to see the"
                                   " status of disks"))
  
@@ -6675,7 +6735,7 @@ class LURepairNodeStorage(NoHooksLU):
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
                                  node_name, True):
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
                                  node_name, True):
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
-                                 " node '%s'" % (inst.name, node_name))
+                                 " node '%s'" % (instance.name, node_name))
  
    def CheckPrereq(self):
      """Check prerequisites.
  
    def CheckPrereq(self):
      """Check prerequisites.
@@ -6948,6 +7008,7 @@ class LUQueryInstanceData(NoHooksLU):
          "serial_no": instance.serial_no,
          "mtime": instance.mtime,
          "ctime": instance.ctime,
          "serial_no": instance.serial_no,
          "mtime": instance.mtime,
          "ctime": instance.ctime,
+        "uuid": instance.uuid,
          }
  
        result[instance.name] = idict
          }
  
        result[instance.name] = idict
@@ -7580,8 +7641,10 @@ class LUExportInstance(LogicalUnit):
      instance = self.instance
      dst_node = self.dst_node
      src_node = instance.primary_node
      instance = self.instance
      dst_node = self.dst_node
      src_node = instance.primary_node
+
      if self.op.shutdown:
        # shutdown the instance, but not the disks
      if self.op.shutdown:
        # shutdown the instance, but not the disks
+      feedback_fn("Shutting down instance %s" % instance.name)
        result = self.rpc.call_instance_shutdown(src_node, instance)
        result.Raise("Could not shutdown instance %s on"
                     " node %s" % (instance.name, src_node))
        result = self.rpc.call_instance_shutdown(src_node, instance)
        result.Raise("Could not shutdown instance %s on"
                     " node %s" % (instance.name, src_node))
@@ -7599,6 +7662,9 @@ class LUExportInstance(LogicalUnit):
      dresults = []
      try:
        for idx, disk in enumerate(instance.disks):
      dresults = []
      try:
        for idx, disk in enumerate(instance.disks):
+        feedback_fn("Creating a snapshot of disk/%s on node %s" %
+                    (idx, src_node))
+
          # result.payload will be a snapshot of an lvm leaf of the one we passed
          result = self.rpc.call_blockdev_snapshot(src_node, disk)
          msg = result.fail_msg
          # result.payload will be a snapshot of an lvm leaf of the one we passed
          result = self.rpc.call_blockdev_snapshot(src_node, disk)
          msg = result.fail_msg
@@ -7615,6 +7681,7 @@ class LUExportInstance(LogicalUnit):
  
      finally:
        if self.op.shutdown and instance.admin_up:
  
      finally:
        if self.op.shutdown and instance.admin_up:
+        feedback_fn("Starting instance %s" % instance.name)
          result = self.rpc.call_instance_start(src_node, instance, None, None)
          msg = result.fail_msg
          if msg:
          result = self.rpc.call_instance_start(src_node, instance, None, None)
          msg = result.fail_msg
          if msg:
@@ -7625,6 +7692,8 @@ class LUExportInstance(LogicalUnit):
  
      cluster_name = self.cfg.GetClusterName()
      for idx, dev in enumerate(snap_disks):
  
      cluster_name = self.cfg.GetClusterName()
      for idx, dev in enumerate(snap_disks):
+      feedback_fn("Exporting snapshot %s from %s to %s" %
+                  (idx, src_node, dst_node.name))
        if dev:
          result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
                                                 instance, cluster_name, idx)
        if dev:
          result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
                                                 instance, cluster_name, idx)
@@ -7642,6 +7711,7 @@ class LUExportInstance(LogicalUnit):
        else:
          dresults.append(False)
  
        else:
          dresults.append(False)
  
+    feedback_fn("Finalizing export on %s" % dst_node.name)
      result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
      fin_resu = True
      msg = result.fail_msg
      result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
      fin_resu = True
      msg = result.fail_msg
@@ -7658,6 +7728,7 @@ class LUExportInstance(LogicalUnit):
      # substitutes an empty list with the full cluster node list.
      iname = instance.name
      if nodelist:
      # substitutes an empty list with the full cluster node list.
      iname = instance.name
      if nodelist:
+      feedback_fn("Removing old exports for instance %s" % iname)
        exportlist = self.rpc.call_export_list(nodelist)
        for node in exportlist:
          if exportlist[node].fail_msg:
        exportlist = self.rpc.call_export_list(nodelist)
        for node in exportlist:
          if exportlist[node].fail_msg: