Improve LUQueryNodes for lockless case

[ganeti-local] / lib / cmdlib.py
diff --git a/lib/cmdlib.py b/lib/cmdlib.py

index 92567a7..87ce76d 100644 (file)
--- a/lib/cmdlib.py
+++ b/lib/cmdlib.py
@@ -25,14 +25,11 @@
  
  import os
  import os.path
-import sha
  import time
-import tempfile
  import re
  import platform
  import logging
  import copy
-import random
  
  from ganeti import ssh
  from ganeti import utils
@@ -41,7 +38,6 @@ from ganeti import hypervisor
  from ganeti import locking
  from ganeti import constants
  from ganeti import objects
-from ganeti import opcodes
  from ganeti import serializer
  from ganeti import ssconf
  
@@ -69,7 +65,7 @@ class LogicalUnit(object):
    def __init__(self, processor, op, context, rpc):
      """Constructor for LogicalUnit.
  
-    This needs to be overriden in derived classes in order to check op
+    This needs to be overridden in derived classes in order to check op
      validity.
  
      """
@@ -117,7 +113,7 @@ class LogicalUnit(object):
      CheckPrereq, doing these separate is better because:
  
        - ExpandNames is left as as purely a lock-related function
-      - CheckPrereq is run after we have aquired locks (and possible
+      - CheckPrereq is run after we have acquired locks (and possible
          waited for them)
  
      The function is allowed to change the self.op attribute so that
@@ -392,8 +388,8 @@ def _GetWantedInstances(lu, instances):
        wanted.append(instance)
  
    else:
-    wanted = lu.cfg.GetInstanceList()
-  return utils.NiceSort(wanted)
+    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
+  return wanted
  
  
  def _CheckOutputFields(static, dynamic, selected):
@@ -434,15 +430,28 @@ def _CheckNodeOnline(lu, node):
  
    @param lu: the LU on behalf of which we make the check
    @param node: the node to check
-  @raise errors.OpPrereqError: if the nodes is offline
+  @raise errors.OpPrereqError: if the node is offline
  
    """
    if lu.cfg.GetNodeInfo(node).offline:
      raise errors.OpPrereqError("Can't use offline node %s" % node)
  
  
+def _CheckNodeNotDrained(lu, node):
+  """Ensure that a given node is not drained.
+
+  @param lu: the LU on behalf of which we make the check
+  @param node: the node to check
+  @raise errors.OpPrereqError: if the node is drained
+
+  """
+  if lu.cfg.GetNodeInfo(node).drained:
+    raise errors.OpPrereqError("Can't use drained node %s" % node)
+
+
  def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
-                          memory, vcpus, nics):
+                          memory, vcpus, nics, disk_template, disks,
+                          bep, hvp, hypervisor_name):
    """Builds instance related env variables for hooks
  
    This builds the hook environment from individual variables.
@@ -455,8 +464,8 @@ def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
    @param secondary_nodes: list of secondary nodes as strings
    @type os_type: string
    @param os_type: the name of the instance's OS
-  @type status: string
-  @param status: the desired status of the instances
+  @type status: boolean
+  @param status: the should_run status of the instance
    @type memory: string
    @param memory: the memory size of the instance
    @type vcpus: string
@@ -464,19 +473,35 @@ def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
    @type nics: list
    @param nics: list of tuples (ip, bridge, mac) representing
        the NICs the instance  has
+  @type disk_template: string
+  @param disk_template: the disk template of the instance
+  @type disks: list
+  @param disks: the list of (size, mode) pairs
+  @type bep: dict
+  @param bep: the backend parameters for the instance
+  @type hvp: dict
+  @param hvp: the hypervisor parameters for the instance
+  @type hypervisor_name: string
+  @param hypervisor_name: the hypervisor for the instance
    @rtype: dict
    @return: the hook environment for this instance
  
    """
+  if status:
+    str_status = "up"
+  else:
+    str_status = "down"
    env = {
      "OP_TARGET": name,
      "INSTANCE_NAME": name,
      "INSTANCE_PRIMARY": primary_node,
      "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
      "INSTANCE_OS_TYPE": os_type,
-    "INSTANCE_STATUS": status,
+    "INSTANCE_STATUS": str_status,
      "INSTANCE_MEMORY": memory,
      "INSTANCE_VCPUS": vcpus,
+    "INSTANCE_DISK_TEMPLATE": disk_template,
+    "INSTANCE_HYPERVISOR": hypervisor_name,
    }
  
    if nics:
@@ -486,12 +511,26 @@ def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
          ip = ""
        env["INSTANCE_NIC%d_IP" % idx] = ip
        env["INSTANCE_NIC%d_BRIDGE" % idx] = bridge
-      env["INSTANCE_NIC%d_HWADDR" % idx] = mac
+      env["INSTANCE_NIC%d_MAC" % idx] = mac
    else:
      nic_count = 0
  
    env["INSTANCE_NIC_COUNT"] = nic_count
  
+  if disks:
+    disk_count = len(disks)
+    for idx, (size, mode) in enumerate(disks):
+      env["INSTANCE_DISK%d_SIZE" % idx] = size
+      env["INSTANCE_DISK%d_MODE" % idx] = mode
+  else:
+    disk_count = 0
+
+  env["INSTANCE_DISK_COUNT"] = disk_count
+
+  for source, kind in [(bep, "BE"), (hvp, "HV")]:
+    for key, value in source.items():
+      env["INSTANCE_%s_%s" % (kind, key)] = value
+
    return env
  
  
@@ -510,16 +549,23 @@ def _BuildInstanceHookEnvByObject(lu, instance, override=None):
    @return: the hook environment dictionary
  
    """
-  bep = lu.cfg.GetClusterInfo().FillBE(instance)
+  cluster = lu.cfg.GetClusterInfo()
+  bep = cluster.FillBE(instance)
+  hvp = cluster.FillHV(instance)
    args = {
      'name': instance.name,
      'primary_node': instance.primary_node,
      'secondary_nodes': instance.secondary_nodes,
      'os_type': instance.os,
-    'status': instance.os,
+    'status': instance.admin_up,
      'memory': bep[constants.BE_MEMORY],
      'vcpus': bep[constants.BE_VCPUS],
      'nics': [(nic.ip, nic.bridge, nic.mac) for nic in instance.nics],
+    'disk_template': instance.disk_template,
+    'disks': [(disk.size, disk.mode) for disk in instance.disks],
+    'bep': bep,
+    'hvp': hvp,
+    'hypervisor_name': instance.hypervisor,
    }
    if override:
      args.update(override)
@@ -543,10 +589,10 @@ def _AdjustCandidatePool(lu):
  
  
  def _CheckInstanceBridgesExist(lu, instance):
-  """Check that the brigdes needed by an instance exist.
+  """Check that the bridges needed by an instance exist.
  
    """
-  # check bridges existance
+  # check bridges existence
    brlist = [nic.bridge for nic in instance.nics]
    result = lu.rpc.call_bridges_exist(instance.primary_node, brlist)
    result.Raise()
@@ -567,7 +613,7 @@ class LUDestroyCluster(NoHooksLU):
  
      This checks whether the cluster is empty.
  
-    Any errors are signalled by raising errors.OpPrereqError.
+    Any errors are signaled by raising errors.OpPrereqError.
  
      """
      master = self.cfg.GetMasterNode()
@@ -613,13 +659,14 @@ class LUVerifyCluster(LogicalUnit):
      self.share_locks = dict(((i, 1) for i in locking.LEVELS))
  
    def _VerifyNode(self, nodeinfo, file_list, local_cksum,
-                  node_result, feedback_fn, master_files):
+                  node_result, feedback_fn, master_files,
+                  drbd_map, vg_name):
      """Run multiple tests against a node.
  
      Test list:
  
        - compares ganeti version
-      - checks vg existance and size > 20G
+      - checks vg existence and size > 20G
        - checks config file checksum
        - checks ssh to other nodes
  
@@ -630,6 +677,10 @@ class LUVerifyCluster(LogicalUnit):
      @param node_result: the results from the node
      @param feedback_fn: function used to accumulate results
      @param master_files: list of files that only masters should have
+    @param drbd_map: the useddrbd minors for this node, in
+        form of minor: (instance, must_exist) which correspond to instances
+        and their running status
+    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
  
      """
      node = nodeinfo.name
@@ -642,29 +693,39 @@ class LUVerifyCluster(LogicalUnit):
      # compares ganeti version
      local_version = constants.PROTOCOL_VERSION
      remote_version = node_result.get('version', None)
-    if not remote_version:
+    if not (remote_version and isinstance(remote_version, (list, tuple)) and
+            len(remote_version) == 2):
        feedback_fn("  - ERROR: connection to %s failed" % (node))
        return True
  
-    if local_version != remote_version:
-      feedback_fn("  - ERROR: sw version mismatch: master %s, node(%s) %s" %
-                      (local_version, node, remote_version))
+    if local_version != remote_version[0]:
+      feedback_fn("  - ERROR: incompatible protocol versions: master %s,"
+                  " node %s %s" % (local_version, node, remote_version[0]))
        return True
  
-    # checks vg existance and size > 20G
+    # node seems compatible, we can actually try to look into its results
  
      bad = False
-    vglist = node_result.get(constants.NV_VGLIST, None)
-    if not vglist:
-      feedback_fn("  - ERROR: unable to check volume groups on node %s." %
-                      (node,))
-      bad = True
-    else:
-      vgstatus = utils.CheckVolumeGroupSize(vglist, self.cfg.GetVGName(),
-                                            constants.MIN_VG_SIZE)
-      if vgstatus:
-        feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
+
+    # full package version
+    if constants.RELEASE_VERSION != remote_version[1]:
+      feedback_fn("  - WARNING: software version mismatch: master %s,"
+                  " node %s %s" %
+                  (constants.RELEASE_VERSION, node, remote_version[1]))
+
+    # checks vg existence and size > 20G
+    if vg_name is not None:
+      vglist = node_result.get(constants.NV_VGLIST, None)
+      if not vglist:
+        feedback_fn("  - ERROR: unable to check volume groups on node %s." %
+                        (node,))
          bad = True
+      else:
+        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
+                                              constants.MIN_VG_SIZE)
+        if vgstatus:
+          feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
+          bad = True
  
      # checks config file checksum
  
@@ -687,8 +748,8 @@ class LUVerifyCluster(LogicalUnit):
            else:
              # not candidate and this is not a must-have file
              bad = True
-            feedback_fn("  - ERROR: non master-candidate has old/wrong file"
-                        " '%s'" % file_name)
+            feedback_fn("  - ERROR: file '%s' should not exist on non master"
+                        " candidates (and the file is outdated)" % file_name)
          else:
            # all good, except non-master/non-must have combination
            if not node_is_mc and not must_have_file:
@@ -724,6 +785,25 @@ class LUVerifyCluster(LogicalUnit):
          if hv_result is not None:
            feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
                        (hv_name, hv_result))
+
+    # check used drbd list
+    if vg_name is not None:
+      used_minors = node_result.get(constants.NV_DRBDLIST, [])
+      if not isinstance(used_minors, (tuple, list)):
+        feedback_fn("  - ERROR: cannot parse drbd status file: %s" %
+                    str(used_minors))
+      else:
+        for minor, (iname, must_exist) in drbd_map.items():
+          if minor not in used_minors and must_exist:
+            feedback_fn("  - ERROR: drbd minor %d of instance %s is"
+                        " not active" % (minor, iname))
+            bad = True
+        for minor in used_minors:
+          if minor not in drbd_map:
+            feedback_fn("  - ERROR: unallocated drbd minor %d is in use" %
+                        minor)
+            bad = True
+
      return bad
  
    def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
@@ -751,7 +831,7 @@ class LUVerifyCluster(LogicalUnit):
                            (volume, node))
            bad = True
  
-    if not instanceconfig.status == 'down':
+    if instanceconfig.admin_up:
        if ((node_current not in node_instance or
            not instance in node_instance[node_current]) and
            node_current not in n_offline):
@@ -825,7 +905,7 @@ class LUVerifyCluster(LogicalUnit):
            if bep[constants.BE_AUTO_BALANCE]:
              needed_mem += bep[constants.BE_MEMORY]
          if nodeinfo['mfree'] < needed_mem:
-          feedback_fn("  - ERROR: not enough memory on node %s to accomodate"
+          feedback_fn("  - ERROR: not enough memory on node %s to accommodate"
                        " failovers should node %s fail" % (node, prinode))
            bad = True
      return bad
@@ -844,13 +924,17 @@ class LUVerifyCluster(LogicalUnit):
    def BuildHooksEnv(self):
      """Build hooks env.
  
-    Cluster-Verify hooks just rone in the post phase and their failure makes
+    Cluster-Verify hooks just ran in the post phase and their failure makes
      the output be logged in the verify output and the verification to fail.
  
      """
      all_nodes = self.cfg.GetNodeList()
-    # TODO: populate the environment with useful information for verify hooks
-    env = {}
+    env = {
+      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
+      }
+    for node in self.cfg.GetAllNodesInfo().values():
+      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
+
      return env, [], all_nodes
  
    def Exec(self, feedback_fn):
@@ -867,9 +951,12 @@ class LUVerifyCluster(LogicalUnit):
      nodelist = utils.NiceSort(self.cfg.GetNodeList())
      nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
      instancelist = utils.NiceSort(self.cfg.GetInstanceList())
+    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
+                        for iname in instancelist)
      i_non_redundant = [] # Non redundant instances
      i_non_a_balanced = [] # Non auto-balanced instances
      n_offline = [] # List of offline nodes
+    n_drained = [] # List of nodes being drained
      node_volume = {}
      node_instance = {}
      node_info = {}
@@ -889,21 +976,27 @@ class LUVerifyCluster(LogicalUnit):
      feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
      node_verify_param = {
        constants.NV_FILELIST: file_names,
-      constants.NV_NODELIST: nodelist,
+      constants.NV_NODELIST: [node.name for node in nodeinfo
+                              if not node.offline],
        constants.NV_HYPERVISOR: hypervisors,
        constants.NV_NODENETTEST: [(node.name, node.primary_ip,
-                                  node.secondary_ip) for node in nodeinfo],
-      constants.NV_LVLIST: vg_name,
+                                  node.secondary_ip) for node in nodeinfo
+                                 if not node.offline],
        constants.NV_INSTANCELIST: hypervisors,
-      constants.NV_VGLIST: None,
        constants.NV_VERSION: None,
        constants.NV_HVINFO: self.cfg.GetHypervisorType(),
        }
+    if vg_name is not None:
+      node_verify_param[constants.NV_VGLIST] = None
+      node_verify_param[constants.NV_LVLIST] = vg_name
+      node_verify_param[constants.NV_DRBDLIST] = None
      all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
                                             self.cfg.GetClusterName())
  
      cluster = self.cfg.GetClusterInfo()
      master_node = self.cfg.GetMasterNode()
+    all_drbd_map = self.cfg.ComputeDRBDMap()
+
      for node_i in nodeinfo:
        node = node_i.name
        nresult = all_nvinfo[node].data
@@ -917,6 +1010,9 @@ class LUVerifyCluster(LogicalUnit):
          ntype = "master"
        elif node_i.master_candidate:
          ntype = "master candidate"
+      elif node_i.drained:
+        ntype = "drained"
+        n_drained.append(node)
        else:
          ntype = "regular"
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
@@ -926,14 +1022,29 @@ class LUVerifyCluster(LogicalUnit):
          bad = True
          continue
  
+      node_drbd = {}
+      for minor, instance in all_drbd_map[node].items():
+        if instance not in instanceinfo:
+          feedback_fn("  - ERROR: ghost instance '%s' in temporary DRBD map" %
+                      instance)
+          # ghost instance should not be running, but otherwise we
+          # don't give double warnings (both ghost instance and
+          # unallocated minor in use)
+          node_drbd[minor] = (instance, False)
+        else:
+          instance = instanceinfo[instance]
+          node_drbd[minor] = (instance.name, instance.admin_up)
        result = self._VerifyNode(node_i, file_names, local_checksums,
-                                nresult, feedback_fn, master_files)
+                                nresult, feedback_fn, master_files,
+                                node_drbd, vg_name)
        bad = bad or result
  
        lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
-      if isinstance(lvdata, basestring):
+      if vg_name is None:
+        node_volume[node] = {}
+      elif isinstance(lvdata, basestring):
          feedback_fn("  - ERROR: LVM problem on node %s: %s" %
-                    (node, lvdata.encode('string_escape')))
+                    (node, utils.SafeEncode(lvdata)))
          bad = True
          node_volume[node] = {}
        elif not isinstance(lvdata, dict):
@@ -963,7 +1074,6 @@ class LUVerifyCluster(LogicalUnit):
        try:
          node_info[node] = {
            "mfree": int(nodeinfo['memory_free']),
-          "dfree": int(nresult[constants.NV_VGLIST][vg_name]),
            "pinst": [],
            "sinst": [],
            # dictionary holding all instances this node is secondary for,
@@ -974,8 +1084,19 @@ class LUVerifyCluster(LogicalUnit):
            # secondary.
            "sinst-by-pnode": {},
          }
-      except ValueError:
-        feedback_fn("  - ERROR: invalid value returned from node %s" % (node,))
+        # FIXME: devise a free space model for file based instances as well
+        if vg_name is not None:
+          if (constants.NV_VGLIST not in nresult or
+              vg_name not in nresult[constants.NV_VGLIST]):
+            feedback_fn("  - ERROR: node %s didn't return data for the"
+                        " volume group '%s' - it is either missing or broken" %
+                        (node, vg_name))
+            bad = True
+            continue
+          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
+      except (ValueError, KeyError):
+        feedback_fn("  - ERROR: invalid nodeinfo value returned"
+                    " from node %s" % (node,))
          bad = True
          continue
  
@@ -983,7 +1104,7 @@ class LUVerifyCluster(LogicalUnit):
  
      for instance in instancelist:
        feedback_fn("* Verifying instance %s" % instance)
-      inst_config = self.cfg.GetInstanceInfo(instance)
+      inst_config = instanceinfo[instance]
        result =  self._VerifyInstance(instance, inst_config, node_volume,
                                       node_instance, feedback_fn, n_offline)
        bad = bad or result
@@ -1064,10 +1185,13 @@ class LUVerifyCluster(LogicalUnit):
      if n_offline:
        feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
  
+    if n_drained:
+      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
+
      return not bad
  
    def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
-    """Analize the post-hooks' result
+    """Analyze the post-hooks' result
  
      This method analyses the hook result, handles it, and sends some
      nicely-formatted feedback back to the user.
@@ -1152,7 +1276,7 @@ class LUVerifyDisks(NoHooksLU):
      nv_dict = {}
      for inst in instances:
        inst_lvs = {}
-      if (inst.status != "up" or
+      if (not inst.admin_up or
            inst.disk_template not in constants.DTS_NET_MIRROR):
          continue
        inst.MapLVsByNode(inst_lvs)
@@ -1166,7 +1290,6 @@ class LUVerifyDisks(NoHooksLU):
  
      node_lvs = self.rpc.call_volume_list(nodes, vg_name)
  
-    to_act = set()
      for node in nodes:
        # node_volume
        lvs = node_lvs[node]
@@ -1179,6 +1302,7 @@ class LUVerifyDisks(NoHooksLU):
        if isinstance(lvs, basestring):
          logging.warning("Error enumerating LVs on node %s: %s", node, lvs)
          res_nlvm[node] = lvs
+        continue
        elif not isinstance(lvs, dict):
          logging.warning("Connection to node %s failed or invalid data"
                          " returned", node)
@@ -1201,6 +1325,128 @@ class LUVerifyDisks(NoHooksLU):
      return result
  
  
+class LURepairDiskSizes(NoHooksLU):
+  """Verifies the cluster disks sizes.
+
+  """
+  _OP_REQP = ["instances"]
+  REQ_BGL = False
+
+  def ExpandNames(self):
+
+    if not isinstance(self.op.instances, list):
+      raise errors.OpPrereqError("Invalid argument type 'instances'")
+
+    if self.op.instances:
+      self.wanted_names = []
+      for name in self.op.instances:
+        full_name = self.cfg.ExpandInstanceName(name)
+        if full_name is None:
+          raise errors.OpPrereqError("Instance '%s' not known" % name)
+        self.wanted_names.append(full_name)
+      self.needed_locks = {
+        locking.LEVEL_NODE: [],
+        locking.LEVEL_INSTANCE: self.wanted_names,
+        }
+      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
+    else:
+      self.wanted_names = None
+      self.needed_locks = {
+        locking.LEVEL_NODE: locking.ALL_SET,
+        locking.LEVEL_INSTANCE: locking.ALL_SET,
+        }
+    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
+
+  def DeclareLocks(self, level):
+    if level == locking.LEVEL_NODE and self.wanted_names is not None:
+      self._LockInstancesNodes(primary_only=True)
+
+  def CheckPrereq(self):
+    """Check prerequisites.
+
+    This only checks the optional instance list against the existing names.
+
+    """
+    if self.wanted_names is None:
+      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
+
+    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
+                             in self.wanted_names]
+
+  def _EnsureChildSizes(self, disk):
+    """Ensure children of the disk have the needed disk size.
+
+    This is valid mainly for DRBD8 and fixes an issue where the
+    children have smaller disk size.
+
+    @param disk: an L{ganeti.objects.Disk} object
+
+    """
+    if disk.dev_type == constants.LD_DRBD8:
+      assert disk.children, "Empty children for DRBD8?"
+      fchild = disk.children[0]
+      mismatch = fchild.size < disk.size
+      if mismatch:
+        self.LogInfo("Child disk has size %d, parent %d, fixing",
+                     fchild.size, disk.size)
+        fchild.size = disk.size
+
+      # and we recurse on this child only, not on the metadev
+      return self._EnsureChildSizes(fchild) or mismatch
+    else:
+      return False
+
+  def Exec(self, feedback_fn):
+    """Verify the size of cluster disks.
+
+    """
+    # TODO: check child disks too
+    # TODO: check differences in size between primary/secondary nodes
+    per_node_disks = {}
+    for instance in self.wanted_instances:
+      pnode = instance.primary_node
+      if pnode not in per_node_disks:
+        per_node_disks[pnode] = []
+      for idx, disk in enumerate(instance.disks):
+        per_node_disks[pnode].append((instance, idx, disk))
+
+    changed = []
+    for node, dskl in per_node_disks.items():
+      newl = [v[2].Copy() for v in dskl]
+      for dsk in newl:
+        self.cfg.SetDiskID(dsk, node)
+      result = self.rpc.call_blockdev_getsizes(node, newl)
+      if result.failed:
+        self.LogWarning("Failure in blockdev_getsizes call to node"
+                        " %s, ignoring", node)
+        continue
+      if len(result.data) != len(dskl):
+        self.LogWarning("Invalid result from node %s, ignoring node results",
+                        node)
+        continue
+      for ((instance, idx, disk), size) in zip(dskl, result.data):
+        if size is None:
+          self.LogWarning("Disk %d of instance %s did not return size"
+                          " information, ignoring", idx, instance.name)
+          continue
+        if not isinstance(size, (int, long)):
+          self.LogWarning("Disk %d of instance %s did not return valid"
+                          " size information, ignoring", idx, instance.name)
+          continue
+        size = size >> 20
+        if size != disk.size:
+          self.LogInfo("Disk %d of instance %s has mismatched size,"
+                       " correcting: recorded %d, actual %d", idx,
+                       instance.name, disk.size, size)
+          disk.size = size
+          self.cfg.Update(instance)
+          changed.append((instance.name, idx, size))
+        if self._EnsureChildSizes(disk):
+          self.cfg.Update(instance)
+          changed.append((instance.name, idx, disk.size))
+    return changed
+
+
  class LURenameCluster(LogicalUnit):
    """Rename the cluster.
  
@@ -1275,7 +1521,7 @@ class LURenameCluster(LogicalUnit):
                          constants.SSH_KNOWN_HOSTS_FILE, to_node)
  
      finally:
-      result = self.rpc.call_node_start_master(master, False)
+      result = self.rpc.call_node_start_master(master, False, False)
        if result.failed or not result.data:
          self.LogWarning("Could not re-enable the master role on"
                          " the master, please restart manually.")
@@ -1286,7 +1532,7 @@ def _RecursiveCheckIfLVMBased(disk):
  
    @type disk: L{objects.Disk}
    @param disk: the disk to check
-  @rtype: booleean
+  @rtype: boolean
    @return: boolean indicating whether a LD_LV dev_type was found or not
  
    """
@@ -1306,7 +1552,7 @@ class LUSetClusterParams(LogicalUnit):
    _OP_REQP = []
    REQ_BGL = False
  
-  def CheckParameters(self):
+  def CheckArguments(self):
      """Check parameters
  
      """
@@ -1315,7 +1561,7 @@ class LUSetClusterParams(LogicalUnit):
      if self.op.candidate_pool_size is not None:
        try:
          self.op.candidate_pool_size = int(self.op.candidate_pool_size)
-      except ValueError, err:
+      except (ValueError, TypeError), err:
          raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
                                     str(err))
        if self.op.candidate_pool_size < 1:
@@ -1347,8 +1593,6 @@ class LUSetClusterParams(LogicalUnit):
      if the given volume group is valid.
  
      """
-    # FIXME: This only works because there is only one parameter that can be
-    # changed or removed.
      if self.op.vg_name is not None and not self.op.vg_name:
        instances = self.cfg.GetAllInstancesInfo().values()
        for inst in instances:
@@ -1377,7 +1621,7 @@ class LUSetClusterParams(LogicalUnit):
      self.cluster = cluster = self.cfg.GetClusterInfo()
      # validate beparams changes
      if self.op.beparams:
-      utils.CheckBEParams(self.op.beparams)
+      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
        self.new_beparams = cluster.FillDict(
          cluster.beparams[constants.BEGR_DEFAULT], self.op.beparams)
  
@@ -1394,6 +1638,13 @@ class LUSetClusterParams(LogicalUnit):
  
      if self.op.enabled_hypervisors is not None:
        self.hv_list = self.op.enabled_hypervisors
+      if not self.hv_list:
+        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
+                                   " least one member")
+      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
+      if invalid_hvs:
+        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
+                                   " entries: %s" % invalid_hvs)
      else:
        self.hv_list = cluster.enabled_hypervisors
  
@@ -1405,6 +1656,7 @@ class LUSetClusterParams(LogicalUnit):
               hv_name in self.op.enabled_hypervisors)):
            # either this is a new hypervisor, or its parameters have changed
            hv_class = hypervisor.GetHypervisor(hv_name)
+          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
            hv_class.CheckParameterSyntax(hv_params)
            _CheckHVParams(self, node_list, hv_name, hv_params)
  
@@ -1413,8 +1665,11 @@ class LUSetClusterParams(LogicalUnit):
  
      """
      if self.op.vg_name is not None:
-      if self.op.vg_name != self.cfg.GetVGName():
-        self.cfg.SetVGName(self.op.vg_name)
+      new_volume = self.op.vg_name
+      if not new_volume:
+        new_volume = None
+      if new_volume != self.cfg.GetVGName():
+        self.cfg.SetVGName(new_volume)
        else:
          feedback_fn("Cluster LVM configuration already in desired"
                      " state, not changing")
@@ -1426,13 +1681,37 @@ class LUSetClusterParams(LogicalUnit):
        self.cluster.beparams[constants.BEGR_DEFAULT] = self.new_beparams
      if self.op.candidate_pool_size is not None:
        self.cluster.candidate_pool_size = self.op.candidate_pool_size
+      # we need to update the pool size here, otherwise the save will fail
+      _AdjustCandidatePool(self)
  
      self.cfg.Update(self.cluster)
  
-    # we want to update nodes after the cluster so that if any errors
-    # happen, we have recorded and saved the cluster info
-    if self.op.candidate_pool_size is not None:
-      _AdjustCandidatePool(self)
+
+class LURedistributeConfig(NoHooksLU):
+  """Force the redistribution of cluster configuration.
+
+  This is a very simple LU.
+
+  """
+  _OP_REQP = []
+  REQ_BGL = False
+
+  def ExpandNames(self):
+    self.needed_locks = {
+      locking.LEVEL_NODE: locking.ALL_SET,
+    }
+    self.share_locks[locking.LEVEL_NODE] = 1
+
+  def CheckPrereq(self):
+    """Check prerequisites.
+
+    """
+
+  def Exec(self, feedback_fn):
+    """Redistribute the configuration.
+
+    """
+    self.cfg.Update(self.cfg.GetClusterInfo())
  
  
  def _WaitForSync(lu, instance, oneshot=False, unlock=False):
@@ -1451,6 +1730,7 @@ def _WaitForSync(lu, instance, oneshot=False, unlock=False):
      lu.cfg.SetDiskID(dev, node)
  
    retries = 0
+  degr_retries = 10 # in seconds, as we sleep 1 second each time
    while True:
      max_time = 0
      done = True
@@ -1466,8 +1746,7 @@ def _WaitForSync(lu, instance, oneshot=False, unlock=False):
        continue
      rstats = rstats.data
      retries = 0
-    for i in range(len(rstats)):
-      mstat = rstats[i]
+    for i, mstat in enumerate(rstats):
        if mstat is None:
          lu.LogWarning("Can't compute data for node %s/%s",
                             node, instance.disks[i].iv_name)
@@ -1484,6 +1763,16 @@ def _WaitForSync(lu, instance, oneshot=False, unlock=False):
            rem_time = "no time estimate"
          lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
                          (instance.disks[i].iv_name, perc_done, rem_time))
+
+    # if we're done but degraded, let's do a few small retries, to
+    # make sure we see a stable and not transient situation; therefore
+    # we force restart of the loop
+    if (done or oneshot) and cumul_degraded and degr_retries > 0:
+      logging.info("Degraded disks found, %d retries left", degr_retries)
+      degr_retries -= 1
+      time.sleep(1)
+      continue
+
      if done or oneshot:
        break
  
@@ -1511,11 +1800,15 @@ def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
    result = True
    if on_primary or dev.AssembleOnSecondary():
      rstats = lu.rpc.call_blockdev_find(node, dev)
-    if rstats.failed or not rstats.data:
-      logging.warning("Node %s: disk degraded, not found or node down", node)
+    msg = rstats.RemoteFailMsg()
+    if msg:
+      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
+      result = False
+    elif not rstats.payload:
+      lu.LogWarning("Can't find disk on node %s", node)
        result = False
      else:
-      result = result and (not rstats.data[idx])
+      result = result and (not rstats.payload[idx])
    if dev.children:
      for child in dev.children:
        result = result and _CheckDiskConsistency(lu, child, node, on_primary)
@@ -1541,9 +1834,11 @@ class LUDiagnoseOS(NoHooksLU):
                         selected=self.op.output_fields)
  
      # Lock all nodes, in shared mode
+    # Temporary removal of locks, should be reverted later
+    # TODO: reintroduce locks when they are lighter-weight
      self.needed_locks = {}
-    self.share_locks[locking.LEVEL_NODE] = 1
-    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+    #self.share_locks[locking.LEVEL_NODE] = 1
+    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
  
    def CheckPrereq(self):
      """Check prerequisites.
@@ -1558,7 +1853,7 @@ class LUDiagnoseOS(NoHooksLU):
      @param rlist: a map with node names as keys and OS objects as values
  
      @rtype: dict
-    @returns: a dictionary with osnames as keys and as value another map, with
+    @return: a dictionary with osnames as keys and as value another map, with
          nodes as keys and list of OS objects as values, eg::
  
            {"debian-etch": {"node1": [<object>,...],
@@ -1567,6 +1862,11 @@ class LUDiagnoseOS(NoHooksLU):
  
      """
      all_os = {}
+    # we build here the list of nodes that didn't fail the RPC (at RPC
+    # level), so that nodes with a non-responding node daemon don't
+    # make all OSes invalid
+    good_nodes = [node_name for node_name in rlist
+                  if not rlist[node_name].failed]
      for node_name, nr in rlist.iteritems():
        if nr.failed or not nr.data:
          continue
@@ -1575,7 +1875,7 @@ class LUDiagnoseOS(NoHooksLU):
            # build a list of nodes for this os containing empty lists
            # for each node in node_list
            all_os[os_obj.name] = {}
-          for nname in node_list:
+          for nname in good_nodes:
              all_os[os_obj.name][nname] = []
          all_os[os_obj.name][node_name].append(os_obj)
      return all_os
@@ -1584,11 +1884,11 @@ class LUDiagnoseOS(NoHooksLU):
      """Compute the list of OSes.
  
      """
-    node_list = self.acquired_locks[locking.LEVEL_NODE]
-    node_data = self.rpc.call_os_diagnose(node_list)
+    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
+    node_data = self.rpc.call_os_diagnose(valid_nodes)
      if node_data == False:
        raise errors.OpExecError("Can't gather the list of OSes")
-    pol = self._DiagnoseByOS(node_list, node_data)
+    pol = self._DiagnoseByOS(valid_nodes, node_data)
      output = []
      for os_name, os_data in pol.iteritems():
        row = []
@@ -1640,7 +1940,7 @@ class LURemoveNode(LogicalUnit):
       - it does not have primary or secondary instances
       - it's not the master
  
-    Any errors are signalled by raising errors.OpPrereqError.
+    Any errors are signaled by raising errors.OpPrereqError.
  
      """
      node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
@@ -1656,11 +1956,8 @@ class LURemoveNode(LogicalUnit):
  
      for instance_name in instance_list:
        instance = self.cfg.GetInstanceInfo(instance_name)
-      if node.name == instance.primary_node:
-        raise errors.OpPrereqError("Instance %s still running on the node,"
-                                   " please remove first." % instance_name)
-      if node.name in instance.secondary_nodes:
-        raise errors.OpPrereqError("Instance %s has node as a secondary,"
+      if node.name in instance.all_nodes:
+        raise errors.OpPrereqError("Instance %s is still running on the node,"
                                     " please remove first." % instance_name)
      self.op.node_name = node.name
      self.node = node
@@ -1685,13 +1982,13 @@ class LUQueryNodes(NoHooksLU):
    """Logical unit for querying nodes.
  
    """
-  _OP_REQP = ["output_fields", "names"]
+  _OP_REQP = ["output_fields", "names", "use_locking"]
    REQ_BGL = False
    _FIELDS_DYNAMIC = utils.FieldSet(
      "dtotal", "dfree",
      "mtotal", "mnode", "mfree",
      "bootid",
-    "ctotal",
+    "ctotal", "cnodes", "csockets",
      )
  
    _FIELDS_STATIC = utils.FieldSet(
@@ -1702,6 +1999,8 @@ class LUQueryNodes(NoHooksLU):
      "master_candidate",
      "master",
      "offline",
+    "drained",
+    "role",
      )
  
    def ExpandNames(self):
@@ -1717,7 +2016,8 @@ class LUQueryNodes(NoHooksLU):
      else:
        self.wanted = locking.ALL_SET
  
-    self.do_locking = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
+    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
+    self.do_locking = self.do_node_query and self.op.use_locking
      if self.do_locking:
        # if we don't request only static fields, we need to lock the nodes
        self.needed_locks[locking.LEVEL_NODE] = self.wanted
@@ -1752,7 +2052,7 @@ class LUQueryNodes(NoHooksLU):
  
      # begin data gathering
  
-    if self.do_locking:
+    if self.do_node_query:
        live_data = {}
        node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
                                            self.cfg.GetHypervisorType())
@@ -1769,6 +2069,8 @@ class LUQueryNodes(NoHooksLU):
              "dfree": fn(int, nodeinfo.get('vg_free', None)),
              "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
              "bootid": nodeinfo.get('bootid', None),
+            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
+            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
              }
          else:
            live_data[name] = {}
@@ -1781,10 +2083,9 @@ class LUQueryNodes(NoHooksLU):
      inst_fields = frozenset(("pinst_cnt", "pinst_list",
                               "sinst_cnt", "sinst_list"))
      if inst_fields & frozenset(self.op.output_fields):
-      instancelist = self.cfg.GetInstanceList()
+      inst_data = self.cfg.GetAllInstancesInfo()
  
-      for instance_name in instancelist:
-        inst = self.cfg.GetInstanceInfo(instance_name)
+      for instance_name, inst in inst_data.items():
          if inst.primary_node in node_to_primary:
            node_to_primary[inst.primary_node].add(inst.name)
          for secnode in inst.secondary_nodes:
@@ -1823,8 +2124,21 @@ class LUQueryNodes(NoHooksLU):
            val = node.name == master_node
          elif field == "offline":
            val = node.offline
+        elif field == "drained":
+          val = node.drained
          elif self._FIELDS_DYNAMIC.Matches(field):
            val = live_data[node.name].get(field, None)
+        elif field == "role":
+          if node.name == master_node:
+            val = "M"
+          elif node.master_candidate:
+            val = "C"
+          elif node.drained:
+            val = "D"
+          elif node.offline:
+            val = "O"
+          else:
+            val = "R"
          else:
            raise errors.ParameterError(field)
          node_output.append(val)
@@ -1946,7 +2260,7 @@ class LUAddNode(LogicalUnit):
       - it is resolvable
       - its parameters (single/dual homed) matches the cluster
  
-    Any errors are signalled by raising errors.OpPrereqError.
+    Any errors are signaled by raising errors.OpPrereqError.
  
      """
      node_name = self.op.node_name
@@ -2000,7 +2314,7 @@ class LUAddNode(LogicalUnit):
          raise errors.OpPrereqError("The master has a private ip but the"
                                     " new node doesn't have one")
  
-    # checks reachablity
+    # checks reachability
      if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
        raise errors.OpPrereqError("Node not reachable by ping")
  
@@ -2012,14 +2326,24 @@ class LUAddNode(LogicalUnit):
                                     " based ping to noded port")
  
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
-    mc_now, _ = self.cfg.GetMasterCandidateStats()
-    master_candidate = mc_now < cp_size
+    if self.op.readd:
+      exceptions = [node]
+    else:
+      exceptions = []
+    mc_now, mc_max = self.cfg.GetMasterCandidateStats(exceptions)
+    # the new node will increase mc_max with one, so:
+    mc_max = min(mc_max + 1, cp_size)
+    self.master_candidate = mc_now < mc_max
  
-    self.new_node = objects.Node(name=node,
-                                 primary_ip=primary_ip,
-                                 secondary_ip=secondary_ip,
-                                 master_candidate=master_candidate,
-                                 offline=False)
+    if self.op.readd:
+      self.new_node = self.cfg.GetNodeInfo(node)
+      assert self.new_node is not None, "Can't retrieve locked node %s" % node
+    else:
+      self.new_node = objects.Node(name=node,
+                                   primary_ip=primary_ip,
+                                   secondary_ip=secondary_ip,
+                                   master_candidate=self.master_candidate,
+                                   offline=False, drained=False)
  
    def Exec(self, feedback_fn):
      """Adds the new node to the cluster.
@@ -2028,6 +2352,20 @@ class LUAddNode(LogicalUnit):
      new_node = self.new_node
      node = new_node.name
  
+    # for re-adds, reset the offline/drained/master-candidate flags;
+    # we need to reset here, otherwise offline would prevent RPC calls
+    # later in the procedure; this also means that if the re-add
+    # fails, we are left with a non-offlined, broken node
+    if self.op.readd:
+      new_node.drained = new_node.offline = False
+      self.LogInfo("Readding a node, the offline/drained flags were reset")
+      # if we demote the node, we do cleanup later in the procedure
+      new_node.master_candidate = self.master_candidate
+
+    # notify the user about any possible mc promotion
+    if new_node.master_candidate:
+      self.LogInfo("Node will be a master candidate")
+
      # check connectivity
      result = self.rpc.call_version([node])[node]
      result.Raise()
@@ -2061,11 +2399,14 @@ class LUAddNode(LogicalUnit):
                                      keyarray[2],
                                      keyarray[3], keyarray[4], keyarray[5])
  
-    if result.failed or not result.data:
-      raise errors.OpExecError("Cannot transfer ssh keys to the new node")
+    msg = result.RemoteFailMsg()
+    if msg:
+      raise errors.OpExecError("Cannot transfer ssh keys to the"
+                               " new node: %s" % msg)
  
      # Add node to our /etc/hosts, and add key to known_hosts
-    utils.AddHostToEtcHosts(new_node.name)
+    if self.cfg.GetClusterInfo().modify_etc_hosts:
+      utils.AddHostToEtcHosts(new_node.name)
  
      if new_node.secondary_ip != new_node.primary_ip:
        result = self.rpc.call_node_has_ip_address(new_node.name,
@@ -2089,8 +2430,9 @@ class LUAddNode(LogicalUnit):
                                   " for remote verification" % verifier)
        if result[verifier].data['nodelist']:
          for failed in result[verifier].data['nodelist']:
-          feedback_fn("ssh/hostname verification failed %s -> %s" %
-                      (verifier, result[verifier]['nodelist'][failed]))
+          feedback_fn("ssh/hostname verification failed"
+                      " (checking from %s): %s" %
+                      (verifier, result[verifier].data['nodelist'][failed]))
          raise errors.OpExecError("ssh/hostname verification failed.")
  
      # Distribute updated /etc/hosts and known_hosts to all nodes,
@@ -2110,8 +2452,10 @@ class LUAddNode(LogicalUnit):
            logging.error("Copy of file %s to node %s failed", fname, to_node)
  
      to_copy = []
-    if constants.HT_XEN_HVM in self.cfg.GetClusterInfo().enabled_hypervisors:
+    enabled_hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
+    if constants.HTS_COPY_VNC_PASSWORD.intersection(enabled_hypervisors):
        to_copy.append(constants.VNC_PASSWORD_FILE)
+
      for fname in to_copy:
        result = self.rpc.call_upload_file([node], fname)
        if result[node].failed or not result[node]:
@@ -2119,6 +2463,15 @@ class LUAddNode(LogicalUnit):
  
      if self.op.readd:
        self.context.ReaddNode(new_node)
+      # make sure we redistribute the config
+      self.cfg.Update(new_node)
+      # and make sure the new node will not have old files around
+      if not new_node.master_candidate:
+        result = self.rpc.call_node_demote_from_mc(new_node.name)
+        msg = result.RemoteFailMsg()
+        if msg:
+          self.LogWarning("Node failed to demote itself from master"
+                          " candidate status: %s" % msg)
      else:
        self.context.AddNode(new_node)
  
@@ -2139,11 +2492,13 @@ class LUSetNodeParams(LogicalUnit):
      self.op.node_name = node_name
      _CheckBooleanOpField(self.op, 'master_candidate')
      _CheckBooleanOpField(self.op, 'offline')
-    if self.op.master_candidate is None and self.op.offline is None:
+    _CheckBooleanOpField(self.op, 'drained')
+    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
+    if all_mods.count(None) == 3:
        raise errors.OpPrereqError("Please pass at least one modification")
-    if self.op.offline == True and self.op.master_candidate == True:
-      raise errors.OpPrereqError("Can't set the node into offline and"
-                                 " master_candidate at the same time")
+    if all_mods.count(True) > 1:
+      raise errors.OpPrereqError("Can't set the node into more than one"
+                                 " state at the same time")
  
    def ExpandNames(self):
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
@@ -2158,6 +2513,7 @@ class LUSetNodeParams(LogicalUnit):
        "OP_TARGET": self.op.node_name,
        "MASTER_CANDIDATE": str(self.op.master_candidate),
        "OFFLINE": str(self.op.offline),
+      "DRAINED": str(self.op.drained),
        }
      nl = [self.cfg.GetMasterNode(),
            self.op.node_name]
@@ -2171,12 +2527,16 @@ class LUSetNodeParams(LogicalUnit):
      """
      node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
  
-    if ((self.op.master_candidate == False or self.op.offline == True)
-        and node.master_candidate):
-      # we will demote the node from master_candidate
+    if (self.op.master_candidate is not None or
+        self.op.drained is not None or
+        self.op.offline is not None):
+      # we can't change the master's node flags
        if self.op.node_name == self.cfg.GetMasterNode():
-        raise errors.OpPrereqError("The master node has to be a"
-                                   " master candidate and online")
+        raise errors.OpPrereqError("The master role can be changed"
+                                   " only via masterfailover")
+
+    if ((self.op.master_candidate == False or self.op.offline == True or
+         self.op.drained == True) and node.master_candidate):
        cp_size = self.cfg.GetClusterInfo().candidate_pool_size
        num_candidates, _ = self.cfg.GetMasterCandidateStats()
        if num_candidates <= cp_size:
@@ -2187,10 +2547,11 @@ class LUSetNodeParams(LogicalUnit):
          else:
            raise errors.OpPrereqError(msg)
  
-    if (self.op.master_candidate == True and node.offline and
-        not self.op.offline == False):
-      raise errors.OpPrereqError("Can't set an offline node to"
-                                 " master_candidate")
+    if (self.op.master_candidate == True and
+        ((node.offline and not self.op.offline == False) or
+         (node.drained and not self.op.drained == False))):
+      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
+                                 " to master_candidate" % node.name)
  
      return
  
@@ -2201,29 +2562,50 @@ class LUSetNodeParams(LogicalUnit):
      node = self.node
  
      result = []
+    changed_mc = False
  
      if self.op.offline is not None:
        node.offline = self.op.offline
        result.append(("offline", str(self.op.offline)))
-      if self.op.offline == True and node.master_candidate:
-        node.master_candidate = False
-        result.append(("master_candidate", "auto-demotion due to offline"))
+      if self.op.offline == True:
+        if node.master_candidate:
+          node.master_candidate = False
+          changed_mc = True
+          result.append(("master_candidate", "auto-demotion due to offline"))
+        if node.drained:
+          node.drained = False
+          result.append(("drained", "clear drained status due to offline"))
  
      if self.op.master_candidate is not None:
        node.master_candidate = self.op.master_candidate
+      changed_mc = True
        result.append(("master_candidate", str(self.op.master_candidate)))
        if self.op.master_candidate == False:
          rrc = self.rpc.call_node_demote_from_mc(node.name)
-        if (rrc.failed or not isinstance(rrc.data, (tuple, list))
-            or len(rrc.data) != 2):
-          self.LogWarning("Node rpc error: %s" % rrc.error)
-        elif not rrc.data[0]:
-          self.LogWarning("Node failed to demote itself: %s" % rrc.data[1])
+        msg = rrc.RemoteFailMsg()
+        if msg:
+          self.LogWarning("Node failed to demote itself: %s" % msg)
+
+    if self.op.drained is not None:
+      node.drained = self.op.drained
+      result.append(("drained", str(self.op.drained)))
+      if self.op.drained == True:
+        if node.master_candidate:
+          node.master_candidate = False
+          changed_mc = True
+          result.append(("master_candidate", "auto-demotion due to drain"))
+          rrc = self.rpc.call_node_demote_from_mc(node.name)
+          msg = rrc.RemoteFailMsg()
+          if msg:
+            self.LogWarning("Node failed to demote itself: %s" % msg)
+        if node.offline:
+          node.offline = False
+          result.append(("offline", "clear offline status due to drain"))
  
      # this will trigger configuration file update, if needed
      self.cfg.Update(node)
      # this will trigger job queue propagation or cleanup
-    if self.op.node_name != self.cfg.GetMasterNode():
+    if changed_mc:
        self.context.ReaddNode(node)
  
      return result
@@ -2261,9 +2643,15 @@ class LUQueryClusterInfo(NoHooksLU):
        "master": cluster.master_node,
        "default_hypervisor": cluster.default_hypervisor,
        "enabled_hypervisors": cluster.enabled_hypervisors,
-      "hvparams": cluster.hvparams,
+      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
+                        for hypervisor_name in cluster.enabled_hypervisors]),
        "beparams": cluster.beparams,
        "candidate_pool_size": cluster.candidate_pool_size,
+      "default_bridge": cluster.default_bridge,
+      "master_netdev": cluster.master_netdev,
+      "volume_group_name": cluster.volume_group_name,
+      "file_storage_dir": cluster.file_storage_dir,
+      "tags": list(cluster.GetTags()),
        }
  
      return result
@@ -2335,19 +2723,24 @@ class LUActivateInstanceDisks(NoHooksLU):
      assert self.instance is not None, \
        "Cannot retrieve locked instance %s" % self.op.instance_name
      _CheckNodeOnline(self, self.instance.primary_node)
+    if not hasattr(self.op, "ignore_size"):
+      self.op.ignore_size = False
  
    def Exec(self, feedback_fn):
      """Activate the disks.
  
      """
-    disks_ok, disks_info = _AssembleInstanceDisks(self, self.instance)
+    disks_ok, disks_info = \
+              _AssembleInstanceDisks(self, self.instance,
+                                     ignore_size=self.op.ignore_size)
      if not disks_ok:
        raise errors.OpExecError("Cannot activate block devices")
  
      return disks_info
  
  
-def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
+def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
+                           ignore_size=False):
    """Prepare the block devices for an instance.
  
    This sets up the block devices on all nodes.
@@ -2359,6 +2752,10 @@ def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
    @type ignore_secondaries: boolean
    @param ignore_secondaries: if true, errors on secondary nodes
        won't result in an error return from the function
+  @type ignore_size: boolean
+  @param ignore_size: if true, the current known size of the disk
+      will not be used during the disk activation, useful for cases
+      when the size is wrong
    @return: False if the operation failed, otherwise a list of
        (host, instance_visible_name, node_visible_name)
        with the mapping from node devices to instance devices
@@ -2379,12 +2776,16 @@ def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
    # 1st pass, assemble on all nodes in secondary mode
    for inst_disk in instance.disks:
      for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
+      if ignore_size:
+        node_disk = node_disk.Copy()
+        node_disk.UnsetSize()
        lu.cfg.SetDiskID(node_disk, node)
        result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
-      if result.failed or not result:
+      msg = result.RemoteFailMsg()
+      if msg:
          lu.proc.LogWarning("Could not prepare block device %s on node %s"
-                           " (is_primary=False, pass=1)",
-                           inst_disk.iv_name, node)
+                           " (is_primary=False, pass=1): %s",
+                           inst_disk.iv_name, node, msg)
          if not ignore_secondaries:
            disks_ok = False
  
@@ -2395,14 +2796,19 @@ def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
      for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
        if node != instance.primary_node:
          continue
+      if ignore_size:
+        node_disk = node_disk.Copy()
+        node_disk.UnsetSize()
        lu.cfg.SetDiskID(node_disk, node)
        result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
-      if result.failed or not result:
+      msg = result.RemoteFailMsg()
+      if msg:
          lu.proc.LogWarning("Could not prepare block device %s on node %s"
-                           " (is_primary=True, pass=2)",
-                           inst_disk.iv_name, node)
+                           " (is_primary=True, pass=2): %s",
+                           inst_disk.iv_name, node, msg)
          disks_ok = False
-    device_info.append((instance.primary_node, inst_disk.iv_name, result.data))
+    device_info.append((instance.primary_node, inst_disk.iv_name,
+                        result.payload))
  
    # leave the disks configured for the primary node
    # this is a workaround that would be fixed better by
@@ -2417,7 +2823,7 @@ def _StartInstanceDisks(lu, instance, force):
    """Start the disks of an instance.
  
    """
-  disks_ok, dummy = _AssembleInstanceDisks(lu, instance,
+  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
                                             ignore_secondaries=force)
    if not disks_ok:
      _ShutdownInstanceDisks(lu, instance)
@@ -2492,17 +2898,18 @@ def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
    ignored.
  
    """
-  result = True
+  all_result = True
    for disk in instance.disks:
      for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
        lu.cfg.SetDiskID(top_disk, node)
        result = lu.rpc.call_blockdev_shutdown(node, top_disk)
-      if result.failed or not result.data:
-        logging.error("Could not shutdown block device %s on node %s",
-                      disk.iv_name, node)
+      msg = result.RemoteFailMsg()
+      if msg:
+        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
+                      disk.iv_name, node, msg)
          if not ignore_primary or node != instance.primary_node:
-          result = False
-  return result
+          all_result = False
+  return all_result
  
  
  def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
@@ -2561,8 +2968,7 @@ class LUStartupInstance(LogicalUnit):
        "FORCE": self.op.force,
        }
      env.update(_BuildInstanceHookEnvByObject(self, self.instance))
-    nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
-          list(self.instance.secondary_nodes))
+    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
      return env, nl, nl
  
    def CheckPrereq(self):
@@ -2575,15 +2981,48 @@ class LUStartupInstance(LogicalUnit):
      assert self.instance is not None, \
        "Cannot retrieve locked instance %s" % self.op.instance_name
  
+    # extra beparams
+    self.beparams = getattr(self.op, "beparams", {})
+    if self.beparams:
+      if not isinstance(self.beparams, dict):
+        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
+                                   " dict" % (type(self.beparams), ))
+      # fill the beparams dict
+      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
+      self.op.beparams = self.beparams
+
+    # extra hvparams
+    self.hvparams = getattr(self.op, "hvparams", {})
+    if self.hvparams:
+      if not isinstance(self.hvparams, dict):
+        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
+                                   " dict" % (type(self.hvparams), ))
+
+      # check hypervisor parameter syntax (locally)
+      cluster = self.cfg.GetClusterInfo()
+      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
+      filled_hvp = cluster.FillDict(cluster.hvparams[instance.hypervisor],
+                                    instance.hvparams)
+      filled_hvp.update(self.hvparams)
+      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
+      hv_type.CheckParameterSyntax(filled_hvp)
+      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
+      self.op.hvparams = self.hvparams
+
      _CheckNodeOnline(self, instance.primary_node)
  
      bep = self.cfg.GetClusterInfo().FillBE(instance)
-    # check bridges existance
+    # check bridges existence
      _CheckInstanceBridgesExist(self, instance)
  
-    _CheckNodeFreeMemory(self, instance.primary_node,
-                         "starting instance %s" % instance.name,
-                         bep[constants.BE_MEMORY], instance.hypervisor)
+    remote_info = self.rpc.call_instance_info(instance.primary_node,
+                                              instance.name,
+                                              instance.hypervisor)
+    remote_info.Raise()
+    if not remote_info.data:
+      _CheckNodeFreeMemory(self, instance.primary_node,
+                           "starting instance %s" % instance.name,
+                           bep[constants.BE_MEMORY], instance.hypervisor)
  
    def Exec(self, feedback_fn):
      """Start the instance.
@@ -2591,7 +3030,6 @@ class LUStartupInstance(LogicalUnit):
      """
      instance = self.instance
      force = self.op.force
-    extra_args = getattr(self.op, "extra_args", "")
  
      self.cfg.MarkInstanceUp(instance.name)
  
@@ -2599,10 +3037,12 @@ class LUStartupInstance(LogicalUnit):
  
      _StartInstanceDisks(self, instance, force)
  
-    result = self.rpc.call_instance_start(node_current, instance, extra_args)
-    if result.failed or not result.data:
+    result = self.rpc.call_instance_start(node_current, instance,
+                                          self.hvparams, self.beparams)
+    msg = result.RemoteFailMsg()
+    if msg:
        _ShutdownInstanceDisks(self, instance)
-      raise errors.OpExecError("Could not start instance")
+      raise errors.OpExecError("Could not start instance: %s" % msg)
  
  
  class LURebootInstance(LogicalUnit):
@@ -2632,10 +3072,10 @@ class LURebootInstance(LogicalUnit):
      """
      env = {
        "IGNORE_SECONDARIES": self.op.ignore_secondaries,
+      "REBOOT_TYPE": self.op.reboot_type,
        }
      env.update(_BuildInstanceHookEnvByObject(self, self.instance))
-    nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
-          list(self.instance.secondary_nodes))
+    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
      return env, nl, nl
  
    def CheckPrereq(self):
@@ -2650,7 +3090,7 @@ class LURebootInstance(LogicalUnit):
  
      _CheckNodeOnline(self, instance.primary_node)
  
-    # check bridges existance
+    # check bridges existence
      _CheckInstanceBridgesExist(self, instance)
  
    def Exec(self, feedback_fn):
@@ -2660,25 +3100,32 @@ class LURebootInstance(LogicalUnit):
      instance = self.instance
      ignore_secondaries = self.op.ignore_secondaries
      reboot_type = self.op.reboot_type
-    extra_args = getattr(self.op, "extra_args", "")
  
      node_current = instance.primary_node
  
      if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
                         constants.INSTANCE_REBOOT_HARD]:
+      for disk in instance.disks:
+        self.cfg.SetDiskID(disk, node_current)
        result = self.rpc.call_instance_reboot(node_current, instance,
-                                             reboot_type, extra_args)
-      if result.failed or not result.data:
-        raise errors.OpExecError("Could not reboot instance")
+                                             reboot_type)
+      msg = result.RemoteFailMsg()
+      if msg:
+        raise errors.OpExecError("Could not reboot instance: %s" % msg)
      else:
-      if not self.rpc.call_instance_shutdown(node_current, instance):
-        raise errors.OpExecError("could not shutdown instance for full reboot")
+      result = self.rpc.call_instance_shutdown(node_current, instance)
+      msg = result.RemoteFailMsg()
+      if msg:
+        raise errors.OpExecError("Could not shutdown instance for"
+                                 " full reboot: %s" % msg)
        _ShutdownInstanceDisks(self, instance)
        _StartInstanceDisks(self, instance, ignore_secondaries)
-      result = self.rpc.call_instance_start(node_current, instance, extra_args)
-      if result.failed or not result.data:
+      result = self.rpc.call_instance_start(node_current, instance, None, None)
+      msg = result.RemoteFailMsg()
+      if msg:
          _ShutdownInstanceDisks(self, instance)
-        raise errors.OpExecError("Could not start instance for full reboot")
+        raise errors.OpExecError("Could not start instance for"
+                                 " full reboot: %s" % msg)
  
      self.cfg.MarkInstanceUp(instance.name)
  
@@ -2702,8 +3149,7 @@ class LUShutdownInstance(LogicalUnit):
  
      """
      env = _BuildInstanceHookEnvByObject(self, self.instance)
-    nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
-          list(self.instance.secondary_nodes))
+    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
      return env, nl, nl
  
    def CheckPrereq(self):
@@ -2725,8 +3171,9 @@ class LUShutdownInstance(LogicalUnit):
      node_current = instance.primary_node
      self.cfg.MarkInstanceDown(instance.name)
      result = self.rpc.call_instance_shutdown(node_current, instance)
-    if result.failed or not result.data:
-      self.proc.LogWarning("Could not shutdown instance")
+    msg = result.RemoteFailMsg()
+    if msg:
+      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
  
      _ShutdownInstanceDisks(self, instance)
  
@@ -2750,8 +3197,7 @@ class LUReinstallInstance(LogicalUnit):
  
      """
      env = _BuildInstanceHookEnvByObject(self, self.instance)
-    nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
-          list(self.instance.secondary_nodes))
+    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
      return env, nl, nl
  
    def CheckPrereq(self):
@@ -2768,13 +3214,14 @@ class LUReinstallInstance(LogicalUnit):
      if instance.disk_template == constants.DT_DISKLESS:
        raise errors.OpPrereqError("Instance '%s' has no disks" %
                                   self.op.instance_name)
-    if instance.status != "down":
+    if instance.admin_up:
        raise errors.OpPrereqError("Instance '%s' is marked to be up" %
                                   self.op.instance_name)
      remote_info = self.rpc.call_instance_info(instance.primary_node,
                                                instance.name,
                                                instance.hypervisor)
-    if remote_info.failed or remote_info.data:
+    remote_info.Raise()
+    if remote_info.data:
        raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
                                   (self.op.instance_name,
                                    instance.primary_node))
@@ -2810,11 +3257,11 @@ class LUReinstallInstance(LogicalUnit):
      try:
        feedback_fn("Running the instance OS create scripts...")
        result = self.rpc.call_instance_os_add(inst.primary_node, inst)
-      result.Raise()
-      if not result.data:
+      msg = result.RemoteFailMsg()
+      if msg:
          raise errors.OpExecError("Could not install OS for instance %s"
-                                 " on node %s" %
-                                 (inst.name, inst.primary_node))
+                                 " on node %s: %s" %
+                                 (inst.name, inst.primary_node, msg))
      finally:
        _ShutdownInstanceDisks(self, inst)
  
@@ -2835,8 +3282,7 @@ class LURenameInstance(LogicalUnit):
      """
      env = _BuildInstanceHookEnvByObject(self, self.instance)
      env["INSTANCE_NEW_NAME"] = self.op.new_name
-    nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
-          list(self.instance.secondary_nodes))
+    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
      return env, nl, nl
  
    def CheckPrereq(self):
@@ -2852,7 +3298,7 @@ class LURenameInstance(LogicalUnit):
                                   self.op.instance_name)
      _CheckNodeOnline(self, instance.primary_node)
  
-    if instance.status != "down":
+    if instance.admin_up:
        raise errors.OpPrereqError("Instance '%s' is marked to be up" %
                                   self.op.instance_name)
      remote_info = self.rpc.call_instance_info(instance.primary_node,
@@ -2921,10 +3367,11 @@ class LURenameInstance(LogicalUnit):
      try:
        result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
                                                   old_name)
-      if result.failed or not result.data:
+      msg = result.RemoteFailMsg()
+      if msg:
          msg = ("Could not run OS rename script for instance %s on node %s"
-               " (but the instance has been renamed in Ganeti)" %
-               (inst.name, inst.primary_node))
+               " (but the instance has been renamed in Ganeti): %s" %
+               (inst.name, inst.primary_node, msg))
          self.proc.LogWarning(msg)
      finally:
        _ShutdownInstanceDisks(self, inst)
@@ -2977,12 +3424,14 @@ class LURemoveInstance(LogicalUnit):
                   instance.name, instance.primary_node)
  
      result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
-    if result.failed or not result.data:
+    msg = result.RemoteFailMsg()
+    if msg:
        if self.op.ignore_failures:
-        feedback_fn("Warning: can't shutdown instance")
+        feedback_fn("Warning: can't shutdown instance: %s" % msg)
        else:
-        raise errors.OpExecError("Could not shutdown instance %s on node %s" %
-                                 (instance.name, instance.primary_node))
+        raise errors.OpExecError("Could not shutdown instance %s on"
+                                 " node %s: %s" %
+                                 (instance.name, instance.primary_node, msg))
  
      logging.info("Removing block devices for instance %s", instance.name)
  
@@ -3002,18 +3451,18 @@ class LUQueryInstances(NoHooksLU):
    """Logical unit for querying instances.
  
    """
-  _OP_REQP = ["output_fields", "names"]
+  _OP_REQP = ["output_fields", "names", "use_locking"]
    REQ_BGL = False
    _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
-                                    "admin_state", "admin_ram",
+                                    "admin_state",
                                      "disk_template", "ip", "mac", "bridge",
                                      "sda_size", "sdb_size", "vcpus", "tags",
                                      "network_port", "beparams",
-                                    "(disk).(size)/([0-9]+)",
-                                    "(disk).(sizes)",
-                                    "(nic).(mac|ip|bridge)/([0-9]+)",
-                                    "(nic).(macs|ips|bridges)",
-                                    "(disk|nic).(count)",
+                                    r"(disk)\.(size)/([0-9]+)",
+                                    r"(disk)\.(sizes)", "disk_usage",
+                                    r"(nic)\.(mac|ip|bridge)/([0-9]+)",
+                                    r"(nic)\.(macs|ips|bridges)",
+                                    r"(disk|nic)\.(count)",
                                      "serial_no", "hypervisor", "hvparams",] +
                                    ["hv/%s" % name
                                     for name in constants.HVS_PARAMETERS] +
@@ -3036,7 +3485,8 @@ class LUQueryInstances(NoHooksLU):
      else:
        self.wanted = locking.ALL_SET
  
-    self.do_locking = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
+    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
+    self.do_locking = self.do_node_query and self.op.use_locking
      if self.do_locking:
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
        self.needed_locks[locking.LEVEL_NODE] = []
@@ -3057,19 +3507,25 @@ class LUQueryInstances(NoHooksLU):
  
      """
      all_info = self.cfg.GetAllInstancesInfo()
-    if self.do_locking:
-      instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
-    elif self.wanted != locking.ALL_SET:
-      instance_names = self.wanted
-      missing = set(instance_names).difference(all_info.keys())
-      if missing:
-        raise errors.OpExecError(
-          "Some instances were removed before retrieving their data: %s"
-          % missing)
+    if self.wanted == locking.ALL_SET:
+      # caller didn't specify instance names, so ordering is not important
+      if self.do_locking:
+        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
+      else:
+        instance_names = all_info.keys()
+      instance_names = utils.NiceSort(instance_names)
      else:
-      instance_names = all_info.keys()
+      # caller did specify names, so we must keep the ordering
+      if self.do_locking:
+        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
+      else:
+        tgt_set = all_info.keys()
+      missing = set(self.wanted).difference(tgt_set)
+      if missing:
+        raise errors.OpExecError("Some instances were removed before"
+                                 " retrieving their data: %s" % missing)
+      instance_names = self.wanted
  
-    instance_names = utils.NiceSort(instance_names)
      instance_list = [all_info[iname] for iname in instance_names]
  
      # begin data gathering
@@ -3079,7 +3535,7 @@ class LUQueryInstances(NoHooksLU):
  
      bad_nodes = []
      off_nodes = []
-    if self.do_locking:
+    if self.do_node_query:
        live_data = {}
        node_data = self.rpc.call_all_instances_info(nodes, hv_list)
        for name in nodes:
@@ -3116,7 +3572,7 @@ class LUQueryInstances(NoHooksLU):
          elif field == "snodes":
            val = list(instance.secondary_nodes)
          elif field == "admin_state":
-          val = (instance.status != "down")
+          val = instance.admin_up
          elif field == "oper_state":
            if instance.primary_node in bad_nodes:
              val = None
@@ -3130,12 +3586,12 @@ class LUQueryInstances(NoHooksLU):
            else:
              running = bool(live_data.get(instance.name))
              if running:
-              if instance.status != "down":
+              if instance.admin_up:
                  val = "running"
                else:
                  val = "ERROR_up"
              else:
-              if instance.status != "down":
+              if instance.admin_up:
                  val = "ERROR_down"
                else:
                  val = "ADMIN_down"
@@ -3146,20 +3602,34 @@ class LUQueryInstances(NoHooksLU):
              val = live_data[instance.name].get("memory", "?")
            else:
              val = "-"
+        elif field == "vcpus":
+          val = i_be[constants.BE_VCPUS]
          elif field == "disk_template":
            val = instance.disk_template
          elif field == "ip":
-          val = instance.nics[0].ip
+          if instance.nics:
+            val = instance.nics[0].ip
+          else:
+            val = None
          elif field == "bridge":
-          val = instance.nics[0].bridge
+          if instance.nics:
+            val = instance.nics[0].bridge
+          else:
+            val = None
          elif field == "mac":
-          val = instance.nics[0].mac
+          if instance.nics:
+            val = instance.nics[0].mac
+          else:
+            val = None
          elif field == "sda_size" or field == "sdb_size":
            idx = ord(field[2]) - ord('a')
            try:
              val = instance.FindDisk(idx).size
            except errors.OpPrereqError:
              val = None
+        elif field == "disk_usage": # total disk usage per node
+          disk_sizes = [{'size': disk.size} for disk in instance.disks]
+          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
          elif field == "tags":
            val = list(instance.GetTags())
          elif field == "serial_no":
@@ -3217,9 +3687,10 @@ class LUQueryInstances(NoHooksLU):
                  else:
                    assert False, "Unhandled NIC parameter"
            else:
-            assert False, "Unhandled variable parameter"
+            assert False, ("Declared but unhandled variable parameter '%s'" %
+                           field)
          else:
-          raise errors.ParameterError(field)
+          assert False, "Declared but unhandled parameter '%s'" % field
          iout.append(val)
        output.append(iout)
  
@@ -3279,12 +3750,18 @@ class LUFailoverInstance(LogicalUnit):
  
      target_node = secondary_nodes[0]
      _CheckNodeOnline(self, target_node)
-    # check memory requirements on the secondary node
-    _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
-                         instance.name, bep[constants.BE_MEMORY],
-                         instance.hypervisor)
+    _CheckNodeNotDrained(self, target_node)
+
+    if instance.admin_up:
+      # check memory requirements on the secondary node
+      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
+                           instance.name, bep[constants.BE_MEMORY],
+                           instance.hypervisor)
+    else:
+      self.LogInfo("Not checking memory on the secondary node as"
+                   " instance will not be started")
  
-    # check bridge existance
+    # check bridge existence
      brlist = [nic.bridge for nic in instance.nics]
      result = self.rpc.call_bridges_exist(target_node, brlist)
      result.Raise()
@@ -3309,7 +3786,7 @@ class LUFailoverInstance(LogicalUnit):
      for dev in instance.disks:
        # for drbd, these are drbd over lvm
        if not _CheckDiskConsistency(self, dev, target_node, False):
-        if instance.status == "up" and not self.op.ignore_consistency:
+        if instance.admin_up and not self.op.ignore_consistency:
            raise errors.OpExecError("Disk %s is degraded on target node,"
                                     " aborting failover." % dev.iv_name)
  
@@ -3318,15 +3795,17 @@ class LUFailoverInstance(LogicalUnit):
                   instance.name, source_node)
  
      result = self.rpc.call_instance_shutdown(source_node, instance)
-    if result.failed or not result.data:
+    msg = result.RemoteFailMsg()
+    if msg:
        if self.op.ignore_consistency:
          self.proc.LogWarning("Could not shutdown instance %s on node %s."
-                             " Proceeding"
-                             " anyway. Please make sure node %s is down",
-                             instance.name, source_node, source_node)
+                             " Proceeding anyway. Please make sure node"
+                             " %s is down. Error details: %s",
+                             instance.name, source_node, source_node, msg)
        else:
-        raise errors.OpExecError("Could not shutdown instance %s on node %s" %
-                                 (instance.name, source_node))
+        raise errors.OpExecError("Could not shutdown instance %s on"
+                                 " node %s: %s" %
+                                 (instance.name, source_node, msg))
  
      feedback_fn("* deactivating the instance's disks on source node")
      if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
@@ -3337,73 +3816,470 @@ class LUFailoverInstance(LogicalUnit):
      self.cfg.Update(instance)
  
      # Only start the instance if it's marked as up
-    if instance.status == "up":
+    if instance.admin_up:
        feedback_fn("* activating the instance's disks on target node")
        logging.info("Starting instance %s on node %s",
                     instance.name, target_node)
  
-      disks_ok, dummy = _AssembleInstanceDisks(self, instance,
+      disks_ok, _ = _AssembleInstanceDisks(self, instance,
                                                 ignore_secondaries=True)
        if not disks_ok:
          _ShutdownInstanceDisks(self, instance)
          raise errors.OpExecError("Can't activate the instance's disks")
  
        feedback_fn("* starting the instance on the target node")
-      result = self.rpc.call_instance_start(target_node, instance, None)
-      if result.failed or not result.data:
+      result = self.rpc.call_instance_start(target_node, instance, None, None)
+      msg = result.RemoteFailMsg()
+      if msg:
          _ShutdownInstanceDisks(self, instance)
-        raise errors.OpExecError("Could not start instance %s on node %s." %
-                                 (instance.name, target_node))
+        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
+                                 (instance.name, target_node, msg))
  
  
-def _CreateBlockDevOnPrimary(lu, node, instance, device, info):
-  """Create a tree of block devices on the primary node.
+class LUMigrateInstance(LogicalUnit):
+  """Migrate an instance.
  
-  This always creates all devices.
+  This is migration without shutting down, compared to the failover,
+  which is done with shutdown.
  
    """
-  if device.children:
-    for child in device.children:
-      if not _CreateBlockDevOnPrimary(lu, node, instance, child, info):
-        return False
+  HPATH = "instance-migrate"
+  HTYPE = constants.HTYPE_INSTANCE
+  _OP_REQP = ["instance_name", "live", "cleanup"]
  
-  lu.cfg.SetDiskID(device, node)
-  new_id = lu.rpc.call_blockdev_create(node, device, device.size,
-                                       instance.name, True, info)
-  if new_id.failed or not new_id.data:
-    return False
-  if device.physical_id is None:
-    device.physical_id = new_id
-  return True
+  REQ_BGL = False
+
+  def ExpandNames(self):
+    self._ExpandAndLockInstance()
+    self.needed_locks[locking.LEVEL_NODE] = []
+    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
  
+  def DeclareLocks(self, level):
+    if level == locking.LEVEL_NODE:
+      self._LockInstancesNodes()
  
-def _CreateBlockDevOnSecondary(lu, node, instance, device, force, info):
-  """Create a tree of block devices on a secondary node.
+  def BuildHooksEnv(self):
+    """Build hooks env.
+
+    This runs on master, primary and secondary nodes of the instance.
+
+    """
+    env = _BuildInstanceHookEnvByObject(self, self.instance)
+    env["MIGRATE_LIVE"] = self.op.live
+    env["MIGRATE_CLEANUP"] = self.op.cleanup
+    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
+    return env, nl, nl
+
+  def CheckPrereq(self):
+    """Check prerequisites.
+
+    This checks that the instance is in the cluster.
+
+    """
+    instance = self.cfg.GetInstanceInfo(
+      self.cfg.ExpandInstanceName(self.op.instance_name))
+    if instance is None:
+      raise errors.OpPrereqError("Instance '%s' not known" %
+                                 self.op.instance_name)
+
+    if instance.disk_template != constants.DT_DRBD8:
+      raise errors.OpPrereqError("Instance's disk layout is not"
+                                 " drbd8, cannot migrate.")
+
+    secondary_nodes = instance.secondary_nodes
+    if not secondary_nodes:
+      raise errors.ConfigurationError("No secondary node but using"
+                                      " drbd8 disk template")
+
+    i_be = self.cfg.GetClusterInfo().FillBE(instance)
+
+    target_node = secondary_nodes[0]
+    # check memory requirements on the secondary node
+    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
+                         instance.name, i_be[constants.BE_MEMORY],
+                         instance.hypervisor)
+
+    # check bridge existence
+    brlist = [nic.bridge for nic in instance.nics]
+    result = self.rpc.call_bridges_exist(target_node, brlist)
+    if result.failed or not result.data:
+      raise errors.OpPrereqError("One or more target bridges %s does not"
+                                 " exist on destination node '%s'" %
+                                 (brlist, target_node))
+
+    if not self.op.cleanup:
+      _CheckNodeNotDrained(self, target_node)
+      result = self.rpc.call_instance_migratable(instance.primary_node,
+                                                 instance)
+      msg = result.RemoteFailMsg()
+      if msg:
+        raise errors.OpPrereqError("Can't migrate: %s - please use failover" %
+                                   msg)
+
+    self.instance = instance
+
+  def _WaitUntilSync(self):
+    """Poll with custom rpc for disk sync.
+
+    This uses our own step-based rpc call.
+
+    """
+    self.feedback_fn("* wait until resync is done")
+    all_done = False
+    while not all_done:
+      all_done = True
+      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
+                                            self.nodes_ip,
+                                            self.instance.disks)
+      min_percent = 100
+      for node, nres in result.items():
+        msg = nres.RemoteFailMsg()
+        if msg:
+          raise errors.OpExecError("Cannot resync disks on node %s: %s" %
+                                   (node, msg))
+        node_done, node_percent = nres.payload
+        all_done = all_done and node_done
+        if node_percent is not None:
+          min_percent = min(min_percent, node_percent)
+      if not all_done:
+        if min_percent < 100:
+          self.feedback_fn("   - progress: %.1f%%" % min_percent)
+        time.sleep(2)
+
+  def _EnsureSecondary(self, node):
+    """Demote a node to secondary.
+
+    """
+    self.feedback_fn("* switching node %s to secondary mode" % node)
+
+    for dev in self.instance.disks:
+      self.cfg.SetDiskID(dev, node)
+
+    result = self.rpc.call_blockdev_close(node, self.instance.name,
+                                          self.instance.disks)
+    msg = result.RemoteFailMsg()
+    if msg:
+      raise errors.OpExecError("Cannot change disk to secondary on node %s,"
+                               " error %s" % (node, msg))
+
+  def _GoStandalone(self):
+    """Disconnect from the network.
+
+    """
+    self.feedback_fn("* changing into standalone mode")
+    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
+                                               self.instance.disks)
+    for node, nres in result.items():
+      msg = nres.RemoteFailMsg()
+      if msg:
+        raise errors.OpExecError("Cannot disconnect disks node %s,"
+                                 " error %s" % (node, msg))
+
+  def _GoReconnect(self, multimaster):
+    """Reconnect to the network.
+
+    """
+    if multimaster:
+      msg = "dual-master"
+    else:
+      msg = "single-master"
+    self.feedback_fn("* changing disks into %s mode" % msg)
+    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
+                                           self.instance.disks,
+                                           self.instance.name, multimaster)
+    for node, nres in result.items():
+      msg = nres.RemoteFailMsg()
+      if msg:
+        raise errors.OpExecError("Cannot change disks config on node %s,"
+                                 " error: %s" % (node, msg))
+
+  def _ExecCleanup(self):
+    """Try to cleanup after a failed migration.
+
+    The cleanup is done by:
+      - check that the instance is running only on one node
+        (and update the config if needed)
+      - change disks on its secondary node to secondary
+      - wait until disks are fully synchronized
+      - disconnect from the network
+      - change disks into single-master mode
+      - wait again until disks are fully synchronized
+
+    """
+    instance = self.instance
+    target_node = self.target_node
+    source_node = self.source_node
+
+    # check running on only one node
+    self.feedback_fn("* checking where the instance actually runs"
+                     " (if this hangs, the hypervisor might be in"
+                     " a bad state)")
+    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
+    for node, result in ins_l.items():
+      result.Raise()
+      if not isinstance(result.data, list):
+        raise errors.OpExecError("Can't contact node '%s'" % node)
+
+    runningon_source = instance.name in ins_l[source_node].data
+    runningon_target = instance.name in ins_l[target_node].data
+
+    if runningon_source and runningon_target:
+      raise errors.OpExecError("Instance seems to be running on two nodes,"
+                               " or the hypervisor is confused. You will have"
+                               " to ensure manually that it runs only on one"
+                               " and restart this operation.")
+
+    if not (runningon_source or runningon_target):
+      raise errors.OpExecError("Instance does not seem to be running at all."
+                               " In this case, it's safer to repair by"
+                               " running 'gnt-instance stop' to ensure disk"
+                               " shutdown, and then restarting it.")
+
+    if runningon_target:
+      # the migration has actually succeeded, we need to update the config
+      self.feedback_fn("* instance running on secondary node (%s),"
+                       " updating config" % target_node)
+      instance.primary_node = target_node
+      self.cfg.Update(instance)
+      demoted_node = source_node
+    else:
+      self.feedback_fn("* instance confirmed to be running on its"
+                       " primary node (%s)" % source_node)
+      demoted_node = target_node
+
+    self._EnsureSecondary(demoted_node)
+    try:
+      self._WaitUntilSync()
+    except errors.OpExecError:
+      # we ignore here errors, since if the device is standalone, it
+      # won't be able to sync
+      pass
+    self._GoStandalone()
+    self._GoReconnect(False)
+    self._WaitUntilSync()
+
+    self.feedback_fn("* done")
+
+  def _RevertDiskStatus(self):
+    """Try to revert the disk status after a failed migration.
+
+    """
+    target_node = self.target_node
+    try:
+      self._EnsureSecondary(target_node)
+      self._GoStandalone()
+      self._GoReconnect(False)
+      self._WaitUntilSync()
+    except errors.OpExecError, err:
+      self.LogWarning("Migration failed and I can't reconnect the"
+                      " drives: error '%s'\n"
+                      "Please look and recover the instance status" %
+                      str(err))
+
+  def _AbortMigration(self):
+    """Call the hypervisor code to abort a started migration.
+
+    """
+    instance = self.instance
+    target_node = self.target_node
+    migration_info = self.migration_info
+
+    abort_result = self.rpc.call_finalize_migration(target_node,
+                                                    instance,
+                                                    migration_info,
+                                                    False)
+    abort_msg = abort_result.RemoteFailMsg()
+    if abort_msg:
+      logging.error("Aborting migration failed on target node %s: %s" %
+                    (target_node, abort_msg))
+      # Don't raise an exception here, as we stil have to try to revert the
+      # disk status, even if this step failed.
+
+  def _ExecMigration(self):
+    """Migrate an instance.
+
+    The migrate is done by:
+      - change the disks into dual-master mode
+      - wait until disks are fully synchronized again
+      - migrate the instance
+      - change disks on the new secondary node (the old primary) to secondary
+      - wait until disks are fully synchronized
+      - change disks into single-master mode
+
+    """
+    instance = self.instance
+    target_node = self.target_node
+    source_node = self.source_node
+
+    self.feedback_fn("* checking disk consistency between source and target")
+    for dev in instance.disks:
+      if not _CheckDiskConsistency(self, dev, target_node, False):
+        raise errors.OpExecError("Disk %s is degraded or not fully"
+                                 " synchronized on target node,"
+                                 " aborting migrate." % dev.iv_name)
+
+    # First get the migration information from the remote node
+    result = self.rpc.call_migration_info(source_node, instance)
+    msg = result.RemoteFailMsg()
+    if msg:
+      log_err = ("Failed fetching source migration information from %s: %s" %
+                 (source_node, msg))
+      logging.error(log_err)
+      raise errors.OpExecError(log_err)
+
+    self.migration_info = migration_info = result.payload
+
+    # Then switch the disks to master/master mode
+    self._EnsureSecondary(target_node)
+    self._GoStandalone()
+    self._GoReconnect(True)
+    self._WaitUntilSync()
+
+    self.feedback_fn("* preparing %s to accept the instance" % target_node)
+    result = self.rpc.call_accept_instance(target_node,
+                                           instance,
+                                           migration_info,
+                                           self.nodes_ip[target_node])
+
+    msg = result.RemoteFailMsg()
+    if msg:
+      logging.error("Instance pre-migration failed, trying to revert"
+                    " disk status: %s", msg)
+      self._AbortMigration()
+      self._RevertDiskStatus()
+      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
+                               (instance.name, msg))
+
+    self.feedback_fn("* migrating instance to %s" % target_node)
+    time.sleep(10)
+    result = self.rpc.call_instance_migrate(source_node, instance,
+                                            self.nodes_ip[target_node],
+                                            self.op.live)
+    msg = result.RemoteFailMsg()
+    if msg:
+      logging.error("Instance migration failed, trying to revert"
+                    " disk status: %s", msg)
+      self._AbortMigration()
+      self._RevertDiskStatus()
+      raise errors.OpExecError("Could not migrate instance %s: %s" %
+                               (instance.name, msg))
+    time.sleep(10)
+
+    instance.primary_node = target_node
+    # distribute new instance config to the other nodes
+    self.cfg.Update(instance)
+
+    result = self.rpc.call_finalize_migration(target_node,
+                                              instance,
+                                              migration_info,
+                                              True)
+    msg = result.RemoteFailMsg()
+    if msg:
+      logging.error("Instance migration succeeded, but finalization failed:"
+                    " %s" % msg)
+      raise errors.OpExecError("Could not finalize instance migration: %s" %
+                               msg)
+
+    self._EnsureSecondary(source_node)
+    self._WaitUntilSync()
+    self._GoStandalone()
+    self._GoReconnect(False)
+    self._WaitUntilSync()
+
+    self.feedback_fn("* done")
+
+  def Exec(self, feedback_fn):
+    """Perform the migration.
+
+    """
+    self.feedback_fn = feedback_fn
+
+    self.source_node = self.instance.primary_node
+    self.target_node = self.instance.secondary_nodes[0]
+    self.all_nodes = [self.source_node, self.target_node]
+    self.nodes_ip = {
+      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
+      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
+      }
+    if self.op.cleanup:
+      return self._ExecCleanup()
+    else:
+      return self._ExecMigration()
+
+
+def _CreateBlockDev(lu, node, instance, device, force_create,
+                    info, force_open):
+  """Create a tree of block devices on a given node.
  
    If this device type has to be created on secondaries, create it and
    all its children.
  
    If not, just recurse to children keeping the same 'force' value.
  
+  @param lu: the lu on whose behalf we execute
+  @param node: the node on which to create the device
+  @type instance: L{objects.Instance}
+  @param instance: the instance which owns the device
+  @type device: L{objects.Disk}
+  @param device: the device to create
+  @type force_create: boolean
+  @param force_create: whether to force creation of this device; this
+      will be change to True whenever we find a device which has
+      CreateOnSecondary() attribute
+  @param info: the extra 'metadata' we should attach to the device
+      (this will be represented as a LVM tag)
+  @type force_open: boolean
+  @param force_open: this parameter will be passes to the
+      L{backend.BlockdevCreate} function where it specifies
+      whether we run on primary or not, and it affects both
+      the child assembly and the device own Open() execution
+
    """
    if device.CreateOnSecondary():
-    force = True
+    force_create = True
+
    if device.children:
      for child in device.children:
-      if not _CreateBlockDevOnSecondary(lu, node, instance,
-                                        child, force, info):
-        return False
+      _CreateBlockDev(lu, node, instance, child, force_create,
+                      info, force_open)
  
-  if not force:
-    return True
+  if not force_create:
+    return
+
+  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
+
+
+def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
+  """Create a single block device on a given node.
+
+  This will not recurse over children of the device, so they must be
+  created in advance.
+
+  @param lu: the lu on whose behalf we execute
+  @param node: the node on which to create the device
+  @type instance: L{objects.Instance}
+  @param instance: the instance which owns the device
+  @type device: L{objects.Disk}
+  @param device: the device to create
+  @param info: the extra 'metadata' we should attach to the device
+      (this will be represented as a LVM tag)
+  @type force_open: boolean
+  @param force_open: this parameter will be passes to the
+      L{backend.BlockdevCreate} function where it specifies
+      whether we run on primary or not, and it affects both
+      the child assembly and the device own Open() execution
+
+  """
    lu.cfg.SetDiskID(device, node)
-  new_id = lu.rpc.call_blockdev_create(node, device, device.size,
-                                       instance.name, False, info)
-  if new_id.failed or not new_id.data:
-    return False
+  result = lu.rpc.call_blockdev_create(node, device, device.size,
+                                       instance.name, force_open, info)
+  msg = result.RemoteFailMsg()
+  if msg:
+    raise errors.OpExecError("Can't create block device %s on"
+                             " node %s for instance %s: %s" %
+                             (device, node, instance.name, msg))
    if device.physical_id is None:
-    device.physical_id = new_id
-  return True
+    device.physical_id = result.payload
  
  
  def _GenerateUniqueNames(lu, exts):
@@ -3459,13 +4335,14 @@ def _GenerateDiskTemplate(lu, template_name,
      if len(secondary_nodes) != 0:
        raise errors.ProgrammerError("Wrong template configuration")
  
-    names = _GenerateUniqueNames(lu, [".disk%d" % i
+    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
                                        for i in range(disk_count)])
      for idx, disk in enumerate(disk_info):
        disk_index = idx + base_index
        disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
                                logical_id=(vgname, names[idx]),
-                              iv_name="disk/%d" % disk_index)
+                              iv_name="disk/%d" % disk_index,
+                              mode=disk["mode"])
        disks.append(disk_dev)
    elif template_name == constants.DT_DRBD8:
      if len(secondary_nodes) != 1:
@@ -3474,17 +4351,18 @@ def _GenerateDiskTemplate(lu, template_name,
      minors = lu.cfg.AllocateDRBDMinor(
        [primary_node, remote_node] * len(disk_info), instance_name)
  
-    names = _GenerateUniqueNames(lu,
-                                 [".disk%d_%s" % (i, s)
-                                  for i in range(disk_count)
-                                  for s in ("data", "meta")
-                                  ])
+    names = []
+    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
+                                               for i in range(disk_count)]):
+      names.append(lv_prefix + "_data")
+      names.append(lv_prefix + "_meta")
      for idx, disk in enumerate(disk_info):
        disk_index = idx + base_index
        disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
                                        disk["size"], names[idx*2:idx*2+2],
                                        "disk/%d" % disk_index,
                                        minors[idx*2], minors[idx*2+1])
+      disk_dev.mode = disk["mode"]
        disks.append(disk_dev)
    elif template_name == constants.DT_FILE:
      if len(secondary_nodes) != 0:
@@ -3496,7 +4374,8 @@ def _GenerateDiskTemplate(lu, template_name,
                                iv_name="disk/%d" % disk_index,
                                logical_id=(file_driver,
                                            "%s/disk%d" % (file_storage_dir,
-                                                         idx)))
+                                                         disk_index)),
+                              mode=disk["mode"])
        disks.append(disk_dev)
    else:
      raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
@@ -3524,19 +4403,18 @@ def _CreateDisks(lu, instance):
  
    """
    info = _GetInstanceInfoText(instance)
+  pnode = instance.primary_node
  
    if instance.disk_template == constants.DT_FILE:
      file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
-    result = lu.rpc.call_file_storage_dir_create(instance.primary_node,
-                                                 file_storage_dir)
+    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
  
      if result.failed or not result.data:
-      logging.error("Could not connect to node '%s'", instance.primary_node)
-      return False
+      raise errors.OpExecError("Could not connect to node '%s'" % pnode)
  
      if not result.data[0]:
-      logging.error("Failed to create directory '%s'", file_storage_dir)
-      return False
+      raise errors.OpExecError("Failed to create directory '%s'" %
+                               file_storage_dir)
  
    # Note: this needs to be kept in sync with adding of disks in
    # LUSetInstanceParams
@@ -3544,19 +4422,9 @@ def _CreateDisks(lu, instance):
      logging.info("Creating volume %s for instance %s",
                   device.iv_name, instance.name)
      #HARDCODE
-    for secondary_node in instance.secondary_nodes:
-      if not _CreateBlockDevOnSecondary(lu, secondary_node, instance,
-                                        device, False, info):
-        logging.error("Failed to create volume %s (%s) on secondary node %s!",
-                      device.iv_name, device, secondary_node)
-        return False
-    #HARDCODE
-    if not _CreateBlockDevOnPrimary(lu, instance.primary_node,
-                                    instance, device, info):
-      logging.error("Failed to create volume %s on primary!", device.iv_name)
-      return False
-
-  return True
+    for node in instance.all_nodes:
+      f_create = node == pnode
+      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
  
  
  def _RemoveDisks(lu, instance):
@@ -3577,15 +4445,15 @@ def _RemoveDisks(lu, instance):
    """
    logging.info("Removing block devices for instance %s", instance.name)
  
-  result = True
+  all_result = True
    for device in instance.disks:
      for node, disk in device.ComputeNodeTree(instance.primary_node):
        lu.cfg.SetDiskID(disk, node)
-      result = lu.rpc.call_blockdev_remove(node, disk)
-      if result.failed or not result.data:
-        lu.proc.LogWarning("Could not remove block device %s on node %s,"
-                           " continuing anyway", device.iv_name, node)
-        result = False
+      msg = lu.rpc.call_blockdev_remove(node, disk).RemoteFailMsg()
+      if msg:
+        lu.LogWarning("Could not remove block device %s on node %s,"
+                      " continuing anyway: %s", device.iv_name, node, msg)
+        all_result = False
  
    if instance.disk_template == constants.DT_FILE:
      file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
@@ -3593,9 +4461,9 @@ def _RemoveDisks(lu, instance):
                                                   file_storage_dir)
      if result.failed or not result.data:
        logging.error("Could not remove directory '%s'", file_storage_dir)
-      result = False
+      all_result = False
  
-  return result
+  return all_result
  
  
  def _ComputeDiskSize(disk_template, disks):
@@ -3640,13 +4508,12 @@ def _CheckHVParams(lu, nodenames, hvname, hvparams):
                                                    hvparams)
    for node in nodenames:
      info = hvinfo[node]
-    info.Raise()
-    if not info.data or not isinstance(info.data, (tuple, list)):
-      raise errors.OpPrereqError("Cannot get current information"
-                                 " from node '%s' (%s)" % (node, info.data))
-    if not info.data[0]:
-      raise errors.OpPrereqError("Hypervisor parameter validation failed:"
-                                 " %s" % info.data[1])
+    if info.offline:
+      continue
+    msg = info.RemoteFailMsg()
+    if msg:
+      raise errors.OpPrereqError("Hypervisor parameter validation"
+                                 " failed on node %s: %s" % (node, msg))
  
  
  class LUCreateInstance(LogicalUnit):
@@ -3706,14 +4573,15 @@ class LUCreateInstance(LogicalUnit):
                                    ",".join(enabled_hvs)))
  
      # check hypervisor parameter syntax (locally)
-
+    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
      filled_hvp = cluster.FillDict(cluster.hvparams[self.op.hypervisor],
                                    self.op.hvparams)
      hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
      hv_type.CheckParameterSyntax(filled_hvp)
+    self.hv_full = filled_hvp
  
      # fill and remember the beparams dict
-    utils.CheckBEParams(self.op.beparams)
+    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
      self.be_full = cluster.FillDict(cluster.beparams[constants.BEGR_DEFAULT],
                                      self.op.beparams)
  
@@ -3752,8 +4620,16 @@ class LUCreateInstance(LogicalUnit):
          if not utils.IsValidMac(mac.lower()):
            raise errors.OpPrereqError("Invalid MAC address specified: %s" %
                                       mac)
+        else:
+          # or validate/reserve the current one
+          if self.cfg.IsMacInUse(mac):
+            raise errors.OpPrereqError("MAC address %s already in use"
+                                       " in cluster" % mac)
+
        # bridge verification
-      bridge = nic.get("bridge", self.cfg.GetDefBridge())
+      bridge = nic.get("bridge", None)
+      if bridge is None:
+        bridge = self.cfg.GetDefBridge()
        self.nics.append(objects.NIC(mac=mac, ip=nic_ip, bridge=bridge))
  
      # disk checks/pre-build
@@ -3868,23 +4744,27 @@ class LUCreateInstance(LogicalUnit):
  
      """
      env = {
-      "INSTANCE_DISK_TEMPLATE": self.op.disk_template,
-      "INSTANCE_DISK_SIZE": ",".join(str(d["size"]) for d in self.disks),
-      "INSTANCE_ADD_MODE": self.op.mode,
+      "ADD_MODE": self.op.mode,
        }
      if self.op.mode == constants.INSTANCE_IMPORT:
-      env["INSTANCE_SRC_NODE"] = self.op.src_node
-      env["INSTANCE_SRC_PATH"] = self.op.src_path
-      env["INSTANCE_SRC_IMAGES"] = self.src_images
+      env["SRC_NODE"] = self.op.src_node
+      env["SRC_PATH"] = self.op.src_path
+      env["SRC_IMAGES"] = self.src_images
  
-    env.update(_BuildInstanceHookEnv(name=self.op.instance_name,
+    env.update(_BuildInstanceHookEnv(
+      name=self.op.instance_name,
        primary_node=self.op.pnode,
        secondary_nodes=self.secondaries,
-      status=self.instance_status,
+      status=self.op.start,
        os_type=self.op.os_type,
        memory=self.be_full[constants.BE_MEMORY],
        vcpus=self.be_full[constants.BE_VCPUS],
        nics=[(n.ip, n.bridge, n.mac) for n in self.nics],
+      disk_template=self.op.disk_template,
+      disks=[(d["size"], d["mode"]) for d in self.disks],
+      bep=self.be_full,
+      hvp=self.hv_full,
+      hypervisor_name=self.op.hypervisor,
      ))
  
      nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
@@ -3901,7 +4781,6 @@ class LUCreateInstance(LogicalUnit):
        raise errors.OpPrereqError("Cluster does not support lvm-based"
                                   " instances")
  
-
      if self.op.mode == constants.INSTANCE_IMPORT:
        src_node = self.op.src_node
        src_path = self.op.src_path
@@ -3967,6 +4846,7 @@ class LUCreateInstance(LogicalUnit):
              nic_mac_ini = 'nic%d_mac' % idx
              nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
  
+    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
      # ip ping checks (we use the same ip that was resolved in ExpandNames)
      if self.op.start and not self.op.ip_check:
        raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
@@ -3977,6 +4857,18 @@ class LUCreateInstance(LogicalUnit):
          raise errors.OpPrereqError("IP %s of instance %s already in use" %
                                     (self.check_ip, self.op.instance_name))
  
+    #### mac address generation
+    # By generating here the mac address both the allocator and the hooks get
+    # the real final mac address rather than the 'auto' or 'generate' value.
+    # There is a race condition between the generation and the instance object
+    # creation, which means that we know the mac is valid now, but we're not
+    # sure it will be when we actually add the instance. If things go bad
+    # adding the instance will abort because of a duplicate mac, and the
+    # creation job will fail.
+    for nic in self.nics:
+      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
+        nic.mac = self.cfg.GenerateMAC()
+
      #### allocator run
  
      if self.op.iallocator is not None:
@@ -3991,6 +4883,9 @@ class LUCreateInstance(LogicalUnit):
      if pnode.offline:
        raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
                                   pnode.name)
+    if pnode.drained:
+      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
+                                 pnode.name)
  
      self.secondaries = []
  
@@ -4002,8 +4897,9 @@ class LUCreateInstance(LogicalUnit):
        if self.op.snode == pnode.name:
          raise errors.OpPrereqError("The secondary node cannot be"
                                     " the primary node.")
-      self.secondaries.append(self.op.snode)
        _CheckNodeOnline(self, self.op.snode)
+      _CheckNodeNotDrained(self, self.op.snode)
+      self.secondaries.append(self.op.snode)
  
      nodenames = [pnode.name] + self.secondaries
  
@@ -4035,7 +4931,7 @@ class LUCreateInstance(LogicalUnit):
      # os verification
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
      result.Raise()
-    if not isinstance(result.data, objects.OS):
+    if not isinstance(result.data, objects.OS) or not result.data:
        raise errors.OpPrereqError("OS '%s' not in supported os list for"
                                   " primary node"  % self.op.os_type)
  
@@ -4055,11 +4951,6 @@ class LUCreateInstance(LogicalUnit):
                             self.be_full[constants.BE_MEMORY],
                             self.op.hypervisor)
  
-    if self.op.start:
-      self.instance_status = 'up'
-    else:
-      self.instance_status = 'down'
-
    def Exec(self, feedback_fn):
      """Create and add the instance to the cluster.
  
@@ -4067,10 +4958,6 @@ class LUCreateInstance(LogicalUnit):
      instance = self.op.instance_name
      pnode_name = self.pnode.name
  
-    for nic in self.nics:
-      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
-        nic.mac = self.cfg.GenerateMAC()
-
      ht_kind = self.op.hypervisor
      if ht_kind in constants.HTS_REQ_PORT:
        network_port = self.cfg.AllocatePort()
@@ -4105,7 +4992,7 @@ class LUCreateInstance(LogicalUnit):
                              primary_node=pnode_name,
                              nics=self.nics, disks=disks,
                              disk_template=self.op.disk_template,
-                            status=self.instance_status,
+                            admin_up=False,
                              network_port=network_port,
                              beparams=self.op.beparams,
                              hvparams=self.op.hvparams,
@@ -4113,10 +5000,15 @@ class LUCreateInstance(LogicalUnit):
                              )
  
      feedback_fn("* creating instance disks...")
-    if not _CreateDisks(self, iobj):
-      _RemoveDisks(self, iobj)
-      self.cfg.ReleaseDRBDMinors(instance)
-      raise errors.OpExecError("Device creation failed, reverting...")
+    try:
+      _CreateDisks(self, iobj)
+    except errors.OpExecError:
+      self.LogWarning("Device creation failed, reverting...")
+      try:
+        _RemoveDisks(self, iobj)
+      finally:
+        self.cfg.ReleaseDRBDMinors(instance)
+        raise
  
      feedback_fn("adding instance %s to cluster config" % instance)
  
@@ -4124,8 +5016,6 @@ class LUCreateInstance(LogicalUnit):
      # Declare that we don't want to remove the instance lock anymore, as we've
      # added the instance to the config
      del self.remove_locks[locking.LEVEL_INSTANCE]
-    # Remove the temp. assignements for the instance's drbds
-    self.cfg.ReleaseDRBDMinors(instance)
      # Unlock all the nodes
      if self.op.mode == constants.INSTANCE_IMPORT:
        nodes_keep = [self.op.src_node]
@@ -4162,11 +5052,11 @@ class LUCreateInstance(LogicalUnit):
        if self.op.mode == constants.INSTANCE_CREATE:
          feedback_fn("* running the instance OS create scripts...")
          result = self.rpc.call_instance_os_add(pnode_name, iobj)
-        result.Raise()
-        if not result.data:
+        msg = result.RemoteFailMsg()
+        if msg:
            raise errors.OpExecError("Could not add os for instance %s"
-                                   " on node %s" %
-                                   (instance, pnode_name))
+                                   " on node %s: %s" %
+                                   (instance, pnode_name, msg))
  
        elif self.op.mode == constants.INSTANCE_IMPORT:
          feedback_fn("* running the instance OS import scripts...")
@@ -4188,12 +5078,14 @@ class LUCreateInstance(LogicalUnit):
                                       % self.op.mode)
  
      if self.op.start:
+      iobj.admin_up = True
+      self.cfg.Update(iobj)
        logging.info("Starting instance %s on node %s", instance, pnode_name)
        feedback_fn("* starting instance...")
-      result = self.rpc.call_instance_start(pnode_name, iobj, None)
-      result.Raise()
-      if not result.data:
-        raise errors.OpExecError("Could not start instance")
+      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
+      msg = result.RemoteFailMsg()
+      if msg:
+        raise errors.OpExecError("Could not start instance: %s" % msg)
  
  
  class LUConnectConsole(NoHooksLU):
@@ -4238,7 +5130,12 @@ class LUConnectConsole(NoHooksLU):
      logging.debug("Connecting to console of %s on %s", instance.name, node)
  
      hyper = hypervisor.GetHypervisor(instance.hypervisor)
-    console_cmd = hyper.GetShellCommandForConsole(instance)
+    cluster = self.cfg.GetClusterInfo()
+    # beparams and hvparams are passed separately, to avoid editing the
+    # instance and then saving the defaults in the instance itself.
+    hvparams = cluster.FillHV(instance)
+    beparams = cluster.FillBE(instance)
+    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
  
      # build ssh cmdline
      return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
@@ -4253,17 +5150,32 @@ class LUReplaceDisks(LogicalUnit):
    _OP_REQP = ["instance_name", "mode", "disks"]
    REQ_BGL = False
  
-  def ExpandNames(self):
-    self._ExpandAndLockInstance()
-
+  def CheckArguments(self):
      if not hasattr(self.op, "remote_node"):
        self.op.remote_node = None
-
-    ia_name = getattr(self.op, "iallocator", None)
-    if ia_name is not None:
-      if self.op.remote_node is not None:
+    if not hasattr(self.op, "iallocator"):
+      self.op.iallocator = None
+
+    # check for valid parameter combination
+    cnt = [self.op.remote_node, self.op.iallocator].count(None)
+    if self.op.mode == constants.REPLACE_DISK_CHG:
+      if cnt == 2:
+        raise errors.OpPrereqError("When changing the secondary either an"
+                                   " iallocator script must be used or the"
+                                   " new node given")
+      elif cnt == 0:
          raise errors.OpPrereqError("Give either the iallocator or the new"
                                     " secondary, not both")
+    else: # not replacing the secondary
+      if cnt != 2:
+        raise errors.OpPrereqError("The iallocator and new node options can"
+                                   " be used only when changing the"
+                                   " secondary node")
+
+  def ExpandNames(self):
+    self._ExpandAndLockInstance()
+
+    if self.op.iallocator is not None:
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
      elif self.op.remote_node is not None:
        remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
@@ -4271,6 +5183,10 @@ class LUReplaceDisks(LogicalUnit):
          raise errors.OpPrereqError("Node '%s' not known" %
                                     self.op.remote_node)
        self.op.remote_node = remote_node
+      # Warning: do not remove the locking of the new secondary here
+      # unless DRBD8.AddChildren is changed to work in parallel;
+      # currently it doesn't since parallel invocations of
+      # FindUnusedMinor will conflict
        self.needed_locks[locking.LEVEL_NODE] = [remote_node]
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
      else:
@@ -4338,9 +5254,9 @@ class LUReplaceDisks(LogicalUnit):
        "Cannot retrieve locked instance %s" % self.op.instance_name
      self.instance = instance
  
-    if instance.disk_template not in constants.DTS_NET_MIRROR:
-      raise errors.OpPrereqError("Instance's disk layout is not"
-                                 " network mirrored.")
+    if instance.disk_template != constants.DT_DRBD8:
+      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
+                                 " instances")
  
      if len(instance.secondary_nodes) != 1:
        raise errors.OpPrereqError("The instance has a strange layout,"
@@ -4349,8 +5265,7 @@ class LUReplaceDisks(LogicalUnit):
  
      self.sec_node = instance.secondary_nodes[0]
  
-    ia_name = getattr(self.op, "iallocator", None)
-    if ia_name is not None:
+    if self.op.iallocator is not None:
        self._RunAllocator()
  
      remote_node = self.op.remote_node
@@ -4364,42 +5279,25 @@ class LUReplaceDisks(LogicalUnit):
        raise errors.OpPrereqError("The specified node is the primary node of"
                                   " the instance.")
      elif remote_node == self.sec_node:
-      if self.op.mode == constants.REPLACE_DISK_SEC:
-        # this is for DRBD8, where we can't execute the same mode of
-        # replacement as for drbd7 (no different port allocated)
-        raise errors.OpPrereqError("Same secondary given, cannot execute"
-                                   " replacement")
-    if instance.disk_template == constants.DT_DRBD8:
-      if (self.op.mode == constants.REPLACE_DISK_ALL and
-          remote_node is not None):
-        # switch to replace secondary mode
-        self.op.mode = constants.REPLACE_DISK_SEC
-
-      if self.op.mode == constants.REPLACE_DISK_ALL:
-        raise errors.OpPrereqError("Template 'drbd' only allows primary or"
-                                   " secondary disk replacement, not"
-                                   " both at once")
-      elif self.op.mode == constants.REPLACE_DISK_PRI:
-        if remote_node is not None:
-          raise errors.OpPrereqError("Template 'drbd' does not allow changing"
-                                     " the secondary while doing a primary"
-                                     " node disk replacement")
-        self.tgt_node = instance.primary_node
-        self.oth_node = instance.secondary_nodes[0]
-        _CheckNodeOnline(self, self.tgt_node)
-        _CheckNodeOnline(self, self.oth_node)
-      elif self.op.mode == constants.REPLACE_DISK_SEC:
-        self.new_node = remote_node # this can be None, in which case
-                                    # we don't change the secondary
-        self.tgt_node = instance.secondary_nodes[0]
-        self.oth_node = instance.primary_node
-        _CheckNodeOnline(self, self.oth_node)
-        if self.new_node is not None:
-          _CheckNodeOnline(self, self.new_node)
-        else:
-          _CheckNodeOnline(self, self.tgt_node)
-      else:
-        raise errors.ProgrammerError("Unhandled disk replace mode")
+      raise errors.OpPrereqError("The specified node is already the"
+                                 " secondary node of the instance.")
+
+    if self.op.mode == constants.REPLACE_DISK_PRI:
+      n1 = self.tgt_node = instance.primary_node
+      n2 = self.oth_node = self.sec_node
+    elif self.op.mode == constants.REPLACE_DISK_SEC:
+      n1 = self.tgt_node = self.sec_node
+      n2 = self.oth_node = instance.primary_node
+    elif self.op.mode == constants.REPLACE_DISK_CHG:
+      n1 = self.new_node = remote_node
+      n2 = self.oth_node = instance.primary_node
+      self.tgt_node = self.sec_node
+      _CheckNodeNotDrained(self, remote_node)
+    else:
+      raise errors.ProgrammerError("Unhandled disk replace mode")
+
+    _CheckNodeOnline(self, n1)
+    _CheckNodeOnline(self, n2)
  
      if not self.op.disks:
        self.op.disks = range(len(instance.disks))
@@ -4457,9 +5355,13 @@ class LUReplaceDisks(LogicalUnit):
        for node in tgt_node, oth_node:
          info("checking disk/%d on %s" % (idx, node))
          cfg.SetDiskID(dev, node)
-        if not self.rpc.call_blockdev_find(node, dev):
-          raise errors.OpExecError("Can't find disk/%d on node %s" %
-                                   (idx, node))
+        result = self.rpc.call_blockdev_find(node, dev)
+        msg = result.RemoteFailMsg()
+        if not msg and not result.payload:
+          msg = "disk not found"
+        if msg:
+          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
+                                   (idx, node, msg))
  
      # Step: check other node consistency
      self.proc.LogStep(2, steps_total, "check peer consistency")
@@ -4492,15 +5394,10 @@ class LUReplaceDisks(LogicalUnit):
        iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
        info("creating new local storage on %s for %s" %
             (tgt_node, dev.iv_name))
-      # since we *always* want to create this LV, we use the
-      # _Create...OnPrimary (which forces the creation), even if we
-      # are talking about the secondary node
+      # we pass force_create=True to force the LVM creation
        for new_lv in new_lvs:
-        if not _CreateBlockDevOnPrimary(self, tgt_node, instance, new_lv,
-                                        _GetInstanceInfoText(instance)):
-          raise errors.OpExecError("Failed to create new LV named '%s' on"
-                                   " node '%s'" %
-                                   (new_lv.logical_id[1], tgt_node))
+        _CreateBlockDev(self, tgt_node, instance, new_lv, True,
+                        _GetInstanceInfoText(instance), False)
  
      # Step: for each lv, detach+rename*2+attach
      self.proc.LogStep(4, steps_total, "change drbd configuration")
@@ -4527,8 +5424,9 @@ class LUReplaceDisks(LogicalUnit):
        # build the rename list based on what LVs exist on the node
        rlist = []
        for to_ren in old_lvs:
-        find_res = self.rpc.call_blockdev_find(tgt_node, to_ren)
-        if not find_res.failed and find_res.data is not None: # device exists
+        result = self.rpc.call_blockdev_find(tgt_node, to_ren)
+        if not result.RemoteFailMsg() and result.payload:
+          # device exists
            rlist.append((to_ren, ren_fn(to_ren, temp_suffix)))
  
        info("renaming the old LVs on the target node")
@@ -4557,10 +5455,10 @@ class LUReplaceDisks(LogicalUnit):
        result = self.rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs)
        if result.failed or not result.data:
          for new_lv in new_lvs:
-          result = self.rpc.call_blockdev_remove(tgt_node, new_lv)
-          if result.failed or not result.data:
-            warning("Can't rollback device %s", hint="manually cleanup unused"
-                    " logical volumes")
+          msg = self.rpc.call_blockdev_remove(tgt_node, new_lv).RemoteFailMsg()
+          if msg:
+            warning("Can't rollback device %s: %s", dev, msg,
+                    hint="cleanup manually the unused logical volumes")
          raise errors.OpExecError("Can't add local storage to drbd")
  
        dev.children = new_lvs
@@ -4578,7 +5476,13 @@ class LUReplaceDisks(LogicalUnit):
      for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
        cfg.SetDiskID(dev, instance.primary_node)
        result = self.rpc.call_blockdev_find(instance.primary_node, dev)
-      if result.failed or result.data[5]:
+      msg = result.RemoteFailMsg()
+      if not msg and not result.payload:
+        msg = "disk not found"
+      if msg:
+        raise errors.OpExecError("Can't find DRBD device %s: %s" %
+                                 (name, msg))
+      if result.payload[5]:
          raise errors.OpExecError("DRBD device %s is degraded!" % name)
  
      # Step: remove old storage
@@ -4587,9 +5491,10 @@ class LUReplaceDisks(LogicalUnit):
        info("remove logical volumes for %s" % name)
        for lv in old_lvs:
          cfg.SetDiskID(lv, tgt_node)
-        result = self.rpc.call_blockdev_remove(tgt_node, lv)
-        if result.failed or not result.data:
-          warning("Can't remove old LV", hint="manually remove unused LVs")
+        msg = self.rpc.call_blockdev_remove(tgt_node, lv).RemoteFailMsg()
+        if msg:
+          warning("Can't remove old LV: %s" % msg,
+                  hint="manually remove unused LVs")
            continue
  
    def _ExecD8Secondary(self, feedback_fn):
@@ -4620,6 +5525,11 @@ class LUReplaceDisks(LogicalUnit):
      old_node = self.tgt_node
      new_node = self.new_node
      pri_node = instance.primary_node
+    nodes_ip = {
+      old_node: self.cfg.GetNodeInfo(old_node).secondary_ip,
+      new_node: self.cfg.GetNodeInfo(new_node).secondary_ip,
+      pri_node: self.cfg.GetNodeInfo(pri_node).secondary_ip,
+      }
  
      # Step: check device activation
      self.proc.LogStep(1, steps_total, "check device existence")
@@ -4637,10 +5547,12 @@ class LUReplaceDisks(LogicalUnit):
        info("checking disk/%d on %s" % (idx, pri_node))
        cfg.SetDiskID(dev, pri_node)
        result = self.rpc.call_blockdev_find(pri_node, dev)
-      result.Raise()
-      if not result.data:
-        raise errors.OpExecError("Can't find disk/%d on node %s" %
-                                 (idx, pri_node))
+      msg = result.RemoteFailMsg()
+      if not msg and not result.payload:
+        msg = "disk not found"
+      if msg:
+        raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
+                                 (idx, pri_node, msg))
  
      # Step: check other node consistency
      self.proc.LogStep(2, steps_total, "check peer consistency")
@@ -4658,15 +5570,10 @@ class LUReplaceDisks(LogicalUnit):
      for idx, dev in enumerate(instance.disks):
        info("adding new local storage on %s for disk/%d" %
             (new_node, idx))
-      # since we *always* want to create this LV, we use the
-      # _Create...OnPrimary (which forces the creation), even if we
-      # are talking about the secondary node
+      # we pass force_create=True to force LVM creation
        for new_lv in dev.children:
-        if not _CreateBlockDevOnPrimary(self, new_node, instance, new_lv,
-                                        _GetInstanceInfoText(instance)):
-          raise errors.OpExecError("Failed to create new LV named '%s' on"
-                                   " node '%s'" %
-                                   (new_lv.logical_id[1], new_node))
+        _CreateBlockDev(self, new_node, instance, new_lv, True,
+                        _GetInstanceInfoText(instance), False)
  
      # Step 4: dbrd minors and drbd setups changes
      # after this, we must manually remove the drbd minors on both the
@@ -4676,59 +5583,54 @@ class LUReplaceDisks(LogicalUnit):
      logging.debug("Allocated minors %s" % (minors,))
      self.proc.LogStep(4, steps_total, "changing drbd configuration")
      for idx, (dev, new_minor) in enumerate(zip(instance.disks, minors)):
-      size = dev.size
        info("activating a new drbd on %s for disk/%d" % (new_node, idx))
-      # create new devices on new_node
-      if pri_node == dev.logical_id[0]:
-        new_logical_id = (pri_node, new_node,
-                          dev.logical_id[2], dev.logical_id[3], new_minor,
-                          dev.logical_id[5])
+      # create new devices on new_node; note that we create two IDs:
+      # one without port, so the drbd will be activated without
+      # networking information on the new node at this stage, and one
+      # with network, for the latter activation in step 4
+      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
+      if pri_node == o_node1:
+        p_minor = o_minor1
        else:
-        new_logical_id = (new_node, pri_node,
-                          dev.logical_id[2], new_minor, dev.logical_id[4],
-                          dev.logical_id[5])
-      iv_names[idx] = (dev, dev.children, new_logical_id)
+        p_minor = o_minor2
+
+      new_alone_id = (pri_node, new_node, None, p_minor, new_minor, o_secret)
+      new_net_id = (pri_node, new_node, o_port, p_minor, new_minor, o_secret)
+
+      iv_names[idx] = (dev, dev.children, new_net_id)
        logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
-                    new_logical_id)
+                    new_net_id)
        new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
-                              logical_id=new_logical_id,
-                              children=dev.children)
-      if not _CreateBlockDevOnSecondary(self, new_node, instance,
-                                        new_drbd, False,
-                                        _GetInstanceInfoText(instance)):
+                              logical_id=new_alone_id,
+                              children=dev.children,
+                              size=dev.size)
+      try:
+        _CreateSingleBlockDev(self, new_node, instance, new_drbd,
+                              _GetInstanceInfoText(instance), False)
+      except errors.GenericError:
          self.cfg.ReleaseDRBDMinors(instance.name)
-        raise errors.OpExecError("Failed to create new DRBD on"
-                                 " node '%s'" % new_node)
+        raise
  
      for idx, dev in enumerate(instance.disks):
        # we have new devices, shutdown the drbd on the old secondary
        info("shutting down drbd for disk/%d on old node" % idx)
        cfg.SetDiskID(dev, old_node)
-      result = self.rpc.call_blockdev_shutdown(old_node, dev)
-      if result.failed or not result.data:
-        warning("Failed to shutdown drbd for disk/%d on old node" % idx,
+      msg = self.rpc.call_blockdev_shutdown(old_node, dev).RemoteFailMsg()
+      if msg:
+        warning("Failed to shutdown drbd for disk/%d on old node: %s" %
+                (idx, msg),
                  hint="Please cleanup this device manually as soon as possible")
  
      info("detaching primary drbds from the network (=> standalone)")
-    done = 0
-    for idx, dev in enumerate(instance.disks):
-      cfg.SetDiskID(dev, pri_node)
-      # set the network part of the physical (unique in bdev terms) id
-      # to None, meaning detach from network
-      dev.physical_id = (None, None, None, None) + dev.physical_id[4:]
-      # and 'find' the device, which will 'fix' it to match the
-      # standalone state
-      result = self.rpc.call_blockdev_find(pri_node, dev)
-      if not result.failed and result.data:
-        done += 1
-      else:
-        warning("Failed to detach drbd disk/%d from network, unusual case" %
-                idx)
+    result = self.rpc.call_drbd_disconnect_net([pri_node], nodes_ip,
+                                               instance.disks)[pri_node]
  
-    if not done:
-      # no detaches succeeded (very unlikely)
+    msg = result.RemoteFailMsg()
+    if msg:
+      # detaches didn't succeed (unlikely)
        self.cfg.ReleaseDRBDMinors(instance.name)
-      raise errors.OpExecError("Can't detach at least one DRBD from old node")
+      raise errors.OpExecError("Can't detach the disks from the network on"
+                               " old node: %s" % (msg,))
  
      # if we managed to detach at least one, we update all the disks of
      # the instance to point to the new secondary
@@ -4737,23 +5639,18 @@ class LUReplaceDisks(LogicalUnit):
        dev.logical_id = new_logical_id
        cfg.SetDiskID(dev, pri_node)
      cfg.Update(instance)
-    # we can remove now the temp minors as now the new values are
-    # written to the config file (and therefore stable)
-    self.cfg.ReleaseDRBDMinors(instance.name)
  
      # and now perform the drbd attach
      info("attaching primary drbds to new secondary (standalone => connected)")
-    for idx, dev in enumerate(instance.disks):
-      info("attaching primary drbd for disk/%d to new secondary node" % idx)
-      # since the attach is smart, it's enough to 'find' the device,
-      # it will automatically activate the network, if the physical_id
-      # is correct
-      cfg.SetDiskID(dev, pri_node)
-      logging.debug("Disk to attach: %s", dev)
-      result = self.rpc.call_blockdev_find(pri_node, dev)
-      if result.failed or not result.data:
-        warning("can't attach drbd disk/%d to new secondary!" % idx,
-                "please do a gnt-instance info to see the status of disks")
+    result = self.rpc.call_drbd_attach_net([pri_node, new_node], nodes_ip,
+                                           instance.disks, instance.name,
+                                           False)
+    for to_node, to_result in result.items():
+      msg = to_result.RemoteFailMsg()
+      if msg:
+        warning("can't attach drbd disks on node %s: %s", to_node, msg,
+                hint="please do a gnt-instance info to see the"
+                " status of disks")
  
      # this can fail as the old devices are degraded and _WaitForSync
      # does a combined result over all disks, so we don't check its
@@ -4765,8 +5662,13 @@ class LUReplaceDisks(LogicalUnit):
      for idx, (dev, old_lvs, _) in iv_names.iteritems():
        cfg.SetDiskID(dev, pri_node)
        result = self.rpc.call_blockdev_find(pri_node, dev)
-      result.Raise()
-      if result.data[5]:
+      msg = result.RemoteFailMsg()
+      if not msg and not result.payload:
+        msg = "disk not found"
+      if msg:
+        raise errors.OpExecError("Can't find DRBD device disk/%d: %s" %
+                                 (idx, msg))
+      if result.payload[5]:
          raise errors.OpExecError("DRBD device disk/%d is degraded!" % idx)
  
      self.proc.LogStep(6, steps_total, "removing old storage")
@@ -4774,9 +5676,9 @@ class LUReplaceDisks(LogicalUnit):
        info("remove logical volumes for disk/%d" % idx)
        for lv in old_lvs:
          cfg.SetDiskID(lv, old_node)
-        result = self.rpc.call_blockdev_remove(old_node, lv)
-        if result.failed or not result.data:
-          warning("Can't remove LV on old secondary",
+        msg = self.rpc.call_blockdev_remove(old_node, lv).RemoteFailMsg()
+        if msg:
+          warning("Can't remove LV on old secondary: %s", msg,
                    hint="Cleanup stale volumes by hand")
  
    def Exec(self, feedback_fn):
@@ -4788,21 +5690,18 @@ class LUReplaceDisks(LogicalUnit):
      instance = self.instance
  
      # Activate the instance disks if we're replacing them on a down instance
-    if instance.status == "down":
+    if not instance.admin_up:
        _StartInstanceDisks(self, instance, True)
  
-    if instance.disk_template == constants.DT_DRBD8:
-      if self.op.remote_node is None:
-        fn = self._ExecD8DiskOnly
-      else:
-        fn = self._ExecD8Secondary
+    if self.op.mode == constants.REPLACE_DISK_CHG:
+      fn = self._ExecD8Secondary
      else:
-      raise errors.ProgrammerError("Unhandled disk replacement case")
+      fn = self._ExecD8DiskOnly
  
      ret = fn(feedback_fn)
  
      # Deactivate the instance disks if we're replacing them on a down instance
-    if instance.status == "down":
+    if not instance.admin_up:
        _SafeShutdownInstanceDisks(self, instance)
  
      return ret
@@ -4852,8 +5751,8 @@ class LUGrowDisk(LogicalUnit):
      instance = self.cfg.GetInstanceInfo(self.op.instance_name)
      assert instance is not None, \
        "Cannot retrieve locked instance %s" % self.op.instance_name
-    _CheckNodeOnline(self, instance.primary_node)
-    for node in instance.secondary_nodes:
+    nodenames = list(instance.all_nodes)
+    for node in nodenames:
        _CheckNodeOnline(self, node)
  
  
@@ -4865,7 +5764,6 @@ class LUGrowDisk(LogicalUnit):
  
      self.disk = instance.FindDisk(self.op.disk)
  
-    nodenames = [instance.primary_node] + list(instance.secondary_nodes)
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
                                         instance.hypervisor)
      for node in nodenames:
@@ -4888,16 +5786,13 @@ class LUGrowDisk(LogicalUnit):
      """
      instance = self.instance
      disk = self.disk
-    for node in (instance.secondary_nodes + (instance.primary_node,)):
+    for node in instance.all_nodes:
        self.cfg.SetDiskID(disk, node)
        result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
-      result.Raise()
-      if (not result.data or not isinstance(result.data, (list, tuple)) or
-          len(result.data) != 2):
-        raise errors.OpExecError("Grow request failed to node %s" % node)
-      elif not result.data[0]:
+      msg = result.RemoteFailMsg()
+      if msg:
          raise errors.OpExecError("Grow request failed to node %s: %s" %
-                                 (node, result.data[1]))
+                                 (node, msg))
      disk.RecordGrow(self.op.amount)
      self.cfg.Update(instance)
      if self.op.wait_for_sync:
@@ -4926,8 +5821,7 @@ class LUQueryInstanceData(NoHooksLU):
        for name in self.op.instances:
          full_name = self.cfg.ExpandInstanceName(name)
          if full_name is None:
-          raise errors.OpPrereqError("Instance '%s' not known" %
-                                     self.op.instance_name)
+          raise errors.OpPrereqError("Instance '%s' not known" % name)
          self.wanted_names.append(full_name)
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
      else:
@@ -4962,8 +5856,14 @@ class LUQueryInstanceData(NoHooksLU):
      if not static:
        self.cfg.SetDiskID(dev, instance.primary_node)
        dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev)
-      dev_pstatus.Raise()
-      dev_pstatus = dev_pstatus.data
+      if dev_pstatus.offline:
+        dev_pstatus = None
+      else:
+        msg = dev_pstatus.RemoteFailMsg()
+        if msg:
+          raise errors.OpExecError("Can't compute disk status for %s: %s" %
+                                   (instance.name, msg))
+        dev_pstatus = dev_pstatus.payload
      else:
        dev_pstatus = None
  
@@ -4977,8 +5877,14 @@ class LUQueryInstanceData(NoHooksLU):
      if snode and not static:
        self.cfg.SetDiskID(dev, snode)
        dev_sstatus = self.rpc.call_blockdev_find(snode, dev)
-      dev_sstatus.Raise()
-      dev_sstatus = dev_sstatus.data
+      if dev_sstatus.offline:
+        dev_sstatus = None
+      else:
+        msg = dev_sstatus.RemoteFailMsg()
+        if msg:
+          raise errors.OpExecError("Can't compute disk status for %s: %s" %
+                                   (instance.name, msg))
+        dev_sstatus = dev_sstatus.payload
      else:
        dev_sstatus = None
  
@@ -4997,6 +5903,7 @@ class LUQueryInstanceData(NoHooksLU):
        "sstatus": dev_sstatus,
        "children": dev_children,
        "mode": dev.mode,
+      "size": dev.size,
        }
  
      return data
@@ -5020,10 +5927,10 @@ class LUQueryInstanceData(NoHooksLU):
            remote_state = "down"
        else:
          remote_state = None
-      if instance.status == "down":
-        config_state = "down"
-      else:
+      if instance.admin_up:
          config_state = "up"
+      else:
+        config_state = "down"
  
        disks = [self._ComputeDiskStatus(instance, None, device)
                 for device in instance.disks]
@@ -5073,8 +5980,6 @@ class LUSetInstanceParams(LogicalUnit):
              self.op.hvparams or self.op.beparams):
        raise errors.OpPrereqError("No changes submitted")
  
-    utils.CheckBEParams(self.op.beparams)
-
      # Disk validation
      disk_addremove = 0
      for disk_op, disk_dict in self.op.disks:
@@ -5088,7 +5993,7 @@ class LUSetInstanceParams(LogicalUnit):
            raise errors.OpPrereqError("Invalid disk index")
        if disk_op == constants.DDM_ADD:
          mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
-        if mode not in (constants.DISK_RDONLY, constants.DISK_RDWR):
+        if mode not in constants.DISK_ACCESS_SET:
            raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
          size = disk_dict.get('size', None)
          if size is None:
@@ -5124,24 +6029,29 @@ class LUSetInstanceParams(LogicalUnit):
        # nic_dict should be a dict
        nic_ip = nic_dict.get('ip', None)
        if nic_ip is not None:
-        if nic_ip.lower() == "none":
+        if nic_ip.lower() == constants.VALUE_NONE:
            nic_dict['ip'] = None
          else:
            if not utils.IsValidIP(nic_ip):
              raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
-      # we can only check None bridges and assign the default one
-      nic_bridge = nic_dict.get('bridge', None)
-      if nic_bridge is None:
-        nic_dict['bridge'] = self.cfg.GetDefBridge()
-      # but we can validate MACs
-      nic_mac = nic_dict.get('mac', None)
-      if nic_mac is not None:
-        if self.cfg.IsMacInUse(nic_mac):
-          raise errors.OpPrereqError("MAC address %s already in use"
-                                     " in cluster" % nic_mac)
+
+      if nic_op == constants.DDM_ADD:
+        nic_bridge = nic_dict.get('bridge', None)
+        if nic_bridge is None:
+          nic_dict['bridge'] = self.cfg.GetDefBridge()
+        nic_mac = nic_dict.get('mac', None)
+        if nic_mac is None:
+          nic_dict['mac'] = constants.VALUE_AUTO
+
+      if 'mac' in nic_dict:
+        nic_mac = nic_dict['mac']
          if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
            if not utils.IsValidMac(nic_mac):
              raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
+        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
+          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
+                                     " modifying an existing nic")
+
      if nic_addremove > 1:
        raise errors.OpPrereqError("Only one NIC add or remove operation"
                                   " supported at a time")
@@ -5166,10 +6076,39 @@ class LUSetInstanceParams(LogicalUnit):
        args['memory'] = self.be_new[constants.BE_MEMORY]
      if constants.BE_VCPUS in self.be_new:
        args['vcpus'] = self.be_new[constants.BE_VCPUS]
-    # FIXME: readd disk/nic changes
+    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
+    # information at all.
+    if self.op.nics:
+      args['nics'] = []
+      nic_override = dict(self.op.nics)
+      for idx, nic in enumerate(self.instance.nics):
+        if idx in nic_override:
+          this_nic_override = nic_override[idx]
+        else:
+          this_nic_override = {}
+        if 'ip' in this_nic_override:
+          ip = this_nic_override['ip']
+        else:
+          ip = nic.ip
+        if 'bridge' in this_nic_override:
+          bridge = this_nic_override['bridge']
+        else:
+          bridge = nic.bridge
+        if 'mac' in this_nic_override:
+          mac = this_nic_override['mac']
+        else:
+          mac = nic.mac
+        args['nics'].append((ip, bridge, mac))
+      if constants.DDM_ADD in nic_override:
+        ip = nic_override[constants.DDM_ADD].get('ip', None)
+        bridge = nic_override[constants.DDM_ADD]['bridge']
+        mac = nic_override[constants.DDM_ADD]['mac']
+        args['nics'].append((ip, bridge, mac))
+      elif constants.DDM_REMOVE in nic_override:
+        del args['nics'][-1]
+
      env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
-    nl = [self.cfg.GetMasterNode(),
-          self.instance.primary_node] + list(self.instance.secondary_nodes)
+    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
      return env, nl, nl
  
    def CheckPrereq(self):
@@ -5178,16 +6117,15 @@ class LUSetInstanceParams(LogicalUnit):
      This only checks the instance list against the existing names.
  
      """
-    force = self.force = self.op.force
+    self.force = self.op.force
  
      # checking the new params on the primary/secondary nodes
  
      instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
      assert self.instance is not None, \
        "Cannot retrieve locked instance %s" % self.op.instance_name
-    pnode = self.instance.primary_node
-    nodelist = [pnode]
-    nodelist.extend(instance.secondary_nodes)
+    pnode = instance.primary_node
+    nodelist = list(instance.all_nodes)
  
      # hvparams processing
      if self.op.hvparams:
@@ -5198,11 +6136,10 @@ class LUSetInstanceParams(LogicalUnit):
              del i_hvdict[key]
            except KeyError:
              pass
-        elif val == constants.VALUE_NONE:
-          i_hvdict[key] = None
          else:
            i_hvdict[key] = val
        cluster = self.cfg.GetClusterInfo()
+      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
        hv_new = cluster.FillDict(cluster.hvparams[instance.hypervisor],
                                  i_hvdict)
        # local check
@@ -5226,6 +6163,7 @@ class LUSetInstanceParams(LogicalUnit):
          else:
            i_bedict[key] = val
        cluster = self.cfg.GetClusterInfo()
+      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
        be_new = cluster.FillDict(cluster.beparams[constants.BEGR_DEFAULT],
                                  i_bedict)
        self.be_new = be_new # the new actual values
@@ -5249,7 +6187,7 @@ class LUSetInstanceParams(LogicalUnit):
          self.warn.append("Can't get info from primary node %s" % pnode)
        else:
          if not instance_info.failed and instance_info.data:
-          current_mem = instance_info.data['memory']
+          current_mem = int(instance_info.data['memory'])
          else:
            # Assume instance not running
            # (there is a slight race condition here, but it's not very probable,
@@ -5263,7 +6201,9 @@ class LUSetInstanceParams(LogicalUnit):
                                       " missing on its primary node" % miss_mem)
  
        if be_new[constants.BE_AUTO_BALANCE]:
-        for node, nres in instance.secondary_nodes.iteritems():
+        for node, nres in nodeinfo.iteritems():
+          if node not in instance.secondary_nodes:
+            continue
            if nres.failed or not isinstance(nres.data, dict):
              self.warn.append("Can't get info from secondary node %s" % node)
            elif be_new[constants.BE_MEMORY] > nres.data['memory_free']:
@@ -5282,8 +6222,10 @@ class LUSetInstanceParams(LogicalUnit):
            raise errors.OpPrereqError("Invalid NIC index %s, valid values"
                                       " are 0 to %d" %
                                       (nic_op, len(instance.nics)))
-      nic_bridge = nic_dict.get('bridge', None)
-      if nic_bridge is not None:
+      if 'bridge' in nic_dict:
+        nic_bridge = nic_dict['bridge']
+        if nic_bridge is None:
+          raise errors.OpPrereqError('Cannot set the nic bridge to None')
          if not self.rpc.call_bridges_exist(pnode, [nic_bridge]):
            msg = ("Bridge '%s' doesn't exist on one of"
                   " the instance nodes" % nic_bridge)
@@ -5291,6 +6233,18 @@ class LUSetInstanceParams(LogicalUnit):
              self.warn.append(msg)
            else:
              raise errors.OpPrereqError(msg)
+      if 'mac' in nic_dict:
+        nic_mac = nic_dict['mac']
+        if nic_mac is None:
+          raise errors.OpPrereqError('Cannot set the nic mac to None')
+        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
+          # otherwise generate the mac
+          nic_dict['mac'] = self.cfg.GenerateMAC()
+        else:
+          # or validate/reserve the current one
+          if self.cfg.IsMacInUse(nic_mac):
+            raise errors.OpPrereqError("MAC address %s already in use"
+                                       " in cluster" % nic_mac)
  
      # DISK processing
      if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
@@ -5303,9 +6257,9 @@ class LUSetInstanceParams(LogicalUnit):
                                       " an instance")
          ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
          ins_l = ins_l[pnode]
-        if not type(ins_l) is list:
+        if ins_l.failed or not isinstance(ins_l.data, list):
            raise errors.OpPrereqError("Can't contact node '%s'" % pnode)
-        if instance.name in ins_l:
+        if instance.name in ins_l.data:
            raise errors.OpPrereqError("Instance is running, can't remove"
                                       " disks.")
  
@@ -5343,10 +6297,10 @@ class LUSetInstanceParams(LogicalUnit):
          device_idx = len(instance.disks)
          for node, disk in device.ComputeNodeTree(instance.primary_node):
            self.cfg.SetDiskID(disk, node)
-          result = self.rpc.call_blockdev_remove(node, disk)
-          if result.failed or not result.data:
-            self.proc.LogWarning("Could not remove disk/%d on node %s,"
-                                 " continuing anyway", device_idx, node)
+          msg = self.rpc.call_blockdev_remove(node, disk).RemoteFailMsg()
+          if msg:
+            self.LogWarning("Could not remove disk/%d on node %s: %s,"
+                            " continuing anyway", device_idx, node, msg)
          result.append(("disk/%d" % device_idx, "remove"))
        elif disk_op == constants.DDM_ADD:
          # add a new disk
@@ -5358,13 +6312,12 @@ class LUSetInstanceParams(LogicalUnit):
          disk_idx_base = len(instance.disks)
          new_disk = _GenerateDiskTemplate(self,
                                           instance.disk_template,
-                                         instance, instance.primary_node,
+                                         instance.name, instance.primary_node,
                                           instance.secondary_nodes,
                                           [disk_dict],
                                           file_path,
                                           file_driver,
                                           disk_idx_base)[0]
-        new_disk.mode = disk_dict['mode']
          instance.disks.append(new_disk)
          info = _GetInstanceInfoText(instance)
  
@@ -5372,17 +6325,15 @@ class LUSetInstanceParams(LogicalUnit):
                       new_disk.iv_name, instance.name)
          # Note: this needs to be kept in sync with _CreateDisks
          #HARDCODE
-        for secondary_node in instance.secondary_nodes:
-          if not _CreateBlockDevOnSecondary(self, secondary_node, instance,
-                                            new_disk, False, info):
+        for node in instance.all_nodes:
+          f_create = node == instance.primary_node
+          try:
+            _CreateBlockDev(self, node, instance, new_disk,
+                            f_create, info, f_create)
+          except errors.OpExecError, err:
              self.LogWarning("Failed to create volume %s (%s) on"
-                            " secondary node %s!",
-                            new_disk.iv_name, new_disk, secondary_node)
-        #HARDCODE
-        if not _CreateBlockDevOnPrimary(self, instance.primary_node,
-                                        instance, new_disk, info):
-          self.LogWarning("Failed to create volume %s on primary!",
-                          new_disk.iv_name)
+                            " node %s: %s",
+                            new_disk.iv_name, new_disk, node, err)
          result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
                         (new_disk.size, new_disk.mode)))
        else:
@@ -5396,15 +6347,11 @@ class LUSetInstanceParams(LogicalUnit):
          del instance.nics[-1]
          result.append(("nic.%d" % len(instance.nics), "remove"))
        elif nic_op == constants.DDM_ADD:
-        # add a new nic
-        if 'mac' not in nic_dict:
-          mac = constants.VALUE_GENERATE
-        else:
-          mac = nic_dict['mac']
-        if mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
-          mac = self.cfg.GenerateMAC()
+        # mac and bridge should be set, by now
+        mac = nic_dict['mac']
+        bridge = nic_dict['bridge']
          new_nic = objects.NIC(mac=mac, ip=nic_dict.get('ip', None),
-                              bridge=nic_dict.get('bridge', None))
+                              bridge=bridge)
          instance.nics.append(new_nic)
          result.append(("nic.%d" % (len(instance.nics) - 1),
                         "add:mac=%s,ip=%s,bridge=%s" %
@@ -5418,7 +6365,7 @@ class LUSetInstanceParams(LogicalUnit):
  
      # hvparams changes
      if self.op.hvparams:
-      instance.hvparams = self.hv_new
+      instance.hvparams = self.hv_inst
        for key, val in self.op.hvparams.iteritems():
          result.append(("hv/%s" % key, val))
  
@@ -5493,7 +6440,7 @@ class LUExportInstance(LogicalUnit):
      # remove it from its current node. In the future we could fix this by:
      #  - making a tasklet to search (share-lock all), then create the new one,
      #    then one to remove, after
-    #  - removing the removal operation altoghether
+    #  - removing the removal operation altogether
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
  
    def DeclareLocks(self, level):
@@ -5533,7 +6480,8 @@ class LUExportInstance(LogicalUnit):
      if self.dst_node is None:
        # This is wrong node name, not a non-locked node
        raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
-    _CheckNodeOnline(self, self.op.target_node)
+    _CheckNodeOnline(self, self.dst_node.name)
+    _CheckNodeNotDrained(self, self.dst_node.name)
  
      # instance disk type verification
      for disk in self.instance.disks:
@@ -5551,22 +6499,30 @@ class LUExportInstance(LogicalUnit):
      if self.op.shutdown:
        # shutdown the instance, but not the disks
        result = self.rpc.call_instance_shutdown(src_node, instance)
-      result.Raise()
-      if not result.data:
-        raise errors.OpExecError("Could not shutdown instance %s on node %s" %
-                                 (instance.name, src_node))
+      msg = result.RemoteFailMsg()
+      if msg:
+        raise errors.OpExecError("Could not shutdown instance %s on"
+                                 " node %s: %s" %
+                                 (instance.name, src_node, msg))
  
      vgname = self.cfg.GetVGName()
  
      snap_disks = []
  
+    # set the disks ID correctly since call_instance_start needs the
+    # correct drbd minor to create the symlinks
+    for disk in instance.disks:
+      self.cfg.SetDiskID(disk, src_node)
+
+    # per-disk results
+    dresults = []
      try:
-      for disk in instance.disks:
+      for idx, disk in enumerate(instance.disks):
          # new_dev_name will be a snapshot of an lvm leaf of the one we passed
          new_dev_name = self.rpc.call_blockdev_snapshot(src_node, disk)
          if new_dev_name.failed or not new_dev_name.data:
-          self.LogWarning("Could not snapshot block device %s on node %s",
-                          disk.logical_id[1], src_node)
+          self.LogWarning("Could not snapshot disk/%d on node %s",
+                          idx, src_node)
            snap_disks.append(False)
          else:
            new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
@@ -5576,11 +6532,12 @@ class LUExportInstance(LogicalUnit):
            snap_disks.append(new_dev)
  
      finally:
-      if self.op.shutdown and instance.status == "up":
-        result = self.rpc.call_instance_start(src_node, instance, None)
-        if result.failed or not result.data:
+      if self.op.shutdown and instance.admin_up:
+        result = self.rpc.call_instance_start(src_node, instance, None, None)
+        msg = result.RemoteFailMsg()
+        if msg:
            _ShutdownInstanceDisks(self, instance)
-          raise errors.OpExecError("Could not start instance")
+          raise errors.OpExecError("Could not start instance: %s" % msg)
  
      # TODO: check for size
  
@@ -5590,18 +6547,24 @@ class LUExportInstance(LogicalUnit):
          result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
                                                 instance, cluster_name, idx)
          if result.failed or not result.data:
-          self.LogWarning("Could not export block device %s from node %s to"
-                          " node %s", dev.logical_id[1], src_node,
-                          dst_node.name)
-        result = self.rpc.call_blockdev_remove(src_node, dev)
-        if result.failed or not result.data:
-          self.LogWarning("Could not remove snapshot block device %s from node"
-                          " %s", dev.logical_id[1], src_node)
+          self.LogWarning("Could not export disk/%d from node %s to"
+                          " node %s", idx, src_node, dst_node.name)
+          dresults.append(False)
+        else:
+          dresults.append(True)
+        msg = self.rpc.call_blockdev_remove(src_node, dev).RemoteFailMsg()
+        if msg:
+          self.LogWarning("Could not remove snapshot for disk/%d from node"
+                          " %s: %s", idx, src_node, msg)
+      else:
+        dresults.append(False)
  
      result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
+    fin_resu = True
      if result.failed or not result.data:
        self.LogWarning("Could not finalize export for instance %s on node %s",
                        instance.name, dst_node.name)
+      fin_resu = False
  
      nodelist = self.cfg.GetNodeList()
      nodelist.remove(dst_node.name)
@@ -5618,6 +6581,7 @@ class LUExportInstance(LogicalUnit):
            if not self.rpc.call_export_remove(node, instance.name):
              self.LogWarning("Could not remove older export for instance %s"
                              " on node %s", instance.name, node)
+    return fin_resu, dresults
  
  
  class LURemoveExport(NoHooksLU):
@@ -5948,10 +6912,10 @@ class IAllocator(object):
      cluster_info = cfg.GetClusterInfo()
      # cluster data
      data = {
-      "version": 1,
+      "version": constants.IALLOCATOR_VERSION,
        "cluster_name": cfg.GetClusterName(),
        "cluster_tags": list(cluster_info.GetTags()),
-      "enable_hypervisors": list(cluster_info.enabled_hypervisors),
+      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
        # we don't have job IDs
        }
      iinfo = cfg.GetAllInstancesInfo().values()
@@ -5970,52 +6934,61 @@ class IAllocator(object):
                                             hypervisor_name)
      node_iinfo = self.lu.rpc.call_all_instances_info(node_list,
                         cluster_info.enabled_hypervisors)
-    for nname in node_list:
+    for nname, nresult in node_data.items():
+      # first fill in static (config-based) values
        ninfo = cfg.GetNodeInfo(nname)
-      node_data[nname].Raise()
-      if not isinstance(node_data[nname].data, dict):
-        raise errors.OpExecError("Can't get data for node %s" % nname)
-      remote_info = node_data[nname].data
-      for attr in ['memory_total', 'memory_free', 'memory_dom0',
-                   'vg_size', 'vg_free', 'cpu_total']:
-        if attr not in remote_info:
-          raise errors.OpExecError("Node '%s' didn't return attribute '%s'" %
-                                   (nname, attr))
-        try:
-          remote_info[attr] = int(remote_info[attr])
-        except ValueError, err:
-          raise errors.OpExecError("Node '%s' returned invalid value for '%s':"
-                                   " %s" % (nname, attr, str(err)))
-      # compute memory used by primary instances
-      i_p_mem = i_p_up_mem = 0
-      for iinfo, beinfo in i_list:
-        if iinfo.primary_node == nname:
-          i_p_mem += beinfo[constants.BE_MEMORY]
-          if iinfo.name not in node_iinfo[nname]:
-            i_used_mem = 0
-          else:
-            i_used_mem = int(node_iinfo[nname][iinfo.name]['memory'])
-          i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
-          remote_info['memory_free'] -= max(0, i_mem_diff)
-
-          if iinfo.status == "up":
-            i_p_up_mem += beinfo[constants.BE_MEMORY]
-
-      # compute memory used by instances
        pnr = {
          "tags": list(ninfo.GetTags()),
-        "total_memory": remote_info['memory_total'],
-        "reserved_memory": remote_info['memory_dom0'],
-        "free_memory": remote_info['memory_free'],
-        "i_pri_memory": i_p_mem,
-        "i_pri_up_memory": i_p_up_mem,
-        "total_disk": remote_info['vg_size'],
-        "free_disk": remote_info['vg_free'],
          "primary_ip": ninfo.primary_ip,
          "secondary_ip": ninfo.secondary_ip,
-        "total_cpus": remote_info['cpu_total'],
          "offline": ninfo.offline,
+        "drained": ninfo.drained,
+        "master_candidate": ninfo.master_candidate,
          }
+
+      if not (ninfo.offline or ninfo.drained):
+        nresult.Raise()
+        if not isinstance(nresult.data, dict):
+          raise errors.OpExecError("Can't get data for node %s" % nname)
+        remote_info = nresult.data
+        for attr in ['memory_total', 'memory_free', 'memory_dom0',
+                     'vg_size', 'vg_free', 'cpu_total']:
+          if attr not in remote_info:
+            raise errors.OpExecError("Node '%s' didn't return attribute"
+                                     " '%s'" % (nname, attr))
+          try:
+            remote_info[attr] = int(remote_info[attr])
+          except ValueError, err:
+            raise errors.OpExecError("Node '%s' returned invalid value"
+                                     " for '%s': %s" % (nname, attr, err))
+        # compute memory used by primary instances
+        i_p_mem = i_p_up_mem = 0
+        for iinfo, beinfo in i_list:
+          if iinfo.primary_node == nname:
+            i_p_mem += beinfo[constants.BE_MEMORY]
+            if iinfo.name not in node_iinfo[nname].data:
+              i_used_mem = 0
+            else:
+              i_used_mem = int(node_iinfo[nname].data[iinfo.name]['memory'])
+            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
+            remote_info['memory_free'] -= max(0, i_mem_diff)
+
+            if iinfo.admin_up:
+              i_p_up_mem += beinfo[constants.BE_MEMORY]
+
+        # compute memory used by instances
+        pnr_dyn = {
+          "total_memory": remote_info['memory_total'],
+          "reserved_memory": remote_info['memory_dom0'],
+          "free_memory": remote_info['memory_free'],
+          "total_disk": remote_info['vg_size'],
+          "free_disk": remote_info['vg_free'],
+          "total_cpus": remote_info['cpu_total'],
+          "i_pri_memory": i_p_mem,
+          "i_pri_up_memory": i_p_up_mem,
+          }
+        pnr.update(pnr_dyn)
+
        node_results[nname] = pnr
      data["nodes"] = node_results
  
@@ -6026,16 +6999,18 @@ class IAllocator(object):
                    for n in iinfo.nics]
        pir = {
          "tags": list(iinfo.GetTags()),
-        "should_run": iinfo.status == "up",
+        "admin_up": iinfo.admin_up,
          "vcpus": beinfo[constants.BE_VCPUS],
          "memory": beinfo[constants.BE_MEMORY],
          "os": iinfo.os,
          "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
          "nics": nic_data,
-        "disks": [{"size": dsk.size, "mode": "w"} for dsk in iinfo.disks],
+        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
          "disk_template": iinfo.disk_template,
          "hypervisor": iinfo.hypervisor,
          }
+      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
+                                                 pir["disks"])
        instance_data[iinfo.name] = pir
  
      data["instances"] = instance_data
@@ -6053,8 +7028,6 @@ class IAllocator(object):
  
      """
      data = self.in_data
-    if len(self.disks) != 2:
-      raise errors.OpExecError("Only two-disk configurations supported")
  
      disk_space = _ComputeDiskSize(self.disk_template, self.disks)
  
@@ -6130,7 +7103,6 @@ class IAllocator(object):
      """
      if call_fn is None:
        call_fn = self.lu.rpc.call_iallocator_runner
-    data = self.in_text
  
      result = call_fn(self.lu.cfg.GetMasterNode(), name, self.in_text)
      result.Raise()
@@ -6211,8 +7183,6 @@ class LUTestAllocator(NoHooksLU):
                                       " 'nics' parameter")
        if not isinstance(self.op.disks, list):
          raise errors.OpPrereqError("Invalid parameter 'disks'")
-      if len(self.op.disks) != 2:
-        raise errors.OpPrereqError("Only two-disk configurations supported")
        for row in self.op.disks:
          if (not isinstance(row, dict) or
              "size" not in row or
@@ -6221,7 +7191,7 @@ class LUTestAllocator(NoHooksLU):
              row["mode"] not in ['r', 'w']):
            raise errors.OpPrereqError("Invalid contents of the"
                                       " 'disks' parameter")
-      if self.op.hypervisor is None:
+      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
          self.op.hypervisor = self.cfg.GetHypervisorType()
      elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
        if not hasattr(self.op, "name"):