Add utils.IsNormAbsPath function
[ganeti-local] / lib / cmdlib.py
index 4140623..27dfe5f 100644 (file)
@@ -25,7 +25,6 @@
 
 import os
 import os.path
 
 import os
 import os.path
-import sha
 import time
 import tempfile
 import re
 import time
 import tempfile
 import re
@@ -392,8 +391,8 @@ def _GetWantedInstances(lu, instances):
       wanted.append(instance)
 
   else:
       wanted.append(instance)
 
   else:
-    wanted = lu.cfg.GetInstanceList()
-  return utils.NiceSort(wanted)
+    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
+  return wanted
 
 
 def _CheckOutputFields(static, dynamic, selected):
 
 
 def _CheckOutputFields(static, dynamic, selected):
@@ -434,15 +433,27 @@ def _CheckNodeOnline(lu, node):
 
   @param lu: the LU on behalf of which we make the check
   @param node: the node to check
 
   @param lu: the LU on behalf of which we make the check
   @param node: the node to check
-  @raise errors.OpPrereqError: if the nodes is offline
+  @raise errors.OpPrereqError: if the node is offline
 
   """
   if lu.cfg.GetNodeInfo(node).offline:
     raise errors.OpPrereqError("Can't use offline node %s" % node)
 
 
 
   """
   if lu.cfg.GetNodeInfo(node).offline:
     raise errors.OpPrereqError("Can't use offline node %s" % node)
 
 
+def _CheckNodeNotDrained(lu, node):
+  """Ensure that a given node is not drained.
+
+  @param lu: the LU on behalf of which we make the check
+  @param node: the node to check
+  @raise errors.OpPrereqError: if the node is drained
+
+  """
+  if lu.cfg.GetNodeInfo(node).drained:
+    raise errors.OpPrereqError("Can't use drained node %s" % node)
+
+
 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
-                          memory, vcpus, nics):
+                          memory, vcpus, nics, disk_template, disks):
   """Builds instance related env variables for hooks
 
   This builds the hook environment from individual variables.
   """Builds instance related env variables for hooks
 
   This builds the hook environment from individual variables.
@@ -455,8 +466,8 @@ def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
   @param secondary_nodes: list of secondary nodes as strings
   @type os_type: string
   @param os_type: the name of the instance's OS
   @param secondary_nodes: list of secondary nodes as strings
   @type os_type: string
   @param os_type: the name of the instance's OS
-  @type status: string
-  @param status: the desired status of the instances
+  @type status: boolean
+  @param status: the should_run status of the instance
   @type memory: string
   @param memory: the memory size of the instance
   @type vcpus: string
   @type memory: string
   @param memory: the memory size of the instance
   @type vcpus: string
@@ -464,19 +475,28 @@ def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
   @type nics: list
   @param nics: list of tuples (ip, bridge, mac) representing
       the NICs the instance  has
   @type nics: list
   @param nics: list of tuples (ip, bridge, mac) representing
       the NICs the instance  has
+  @type disk_template: string
+  @param disk_template: the distk template of the instance
+  @type disks: list
+  @param disks: the list of (size, mode) pairs
   @rtype: dict
   @return: the hook environment for this instance
 
   """
   @rtype: dict
   @return: the hook environment for this instance
 
   """
+  if status:
+    str_status = "up"
+  else:
+    str_status = "down"
   env = {
     "OP_TARGET": name,
     "INSTANCE_NAME": name,
     "INSTANCE_PRIMARY": primary_node,
     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
     "INSTANCE_OS_TYPE": os_type,
   env = {
     "OP_TARGET": name,
     "INSTANCE_NAME": name,
     "INSTANCE_PRIMARY": primary_node,
     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
     "INSTANCE_OS_TYPE": os_type,
-    "INSTANCE_STATUS": status,
+    "INSTANCE_STATUS": str_status,
     "INSTANCE_MEMORY": memory,
     "INSTANCE_VCPUS": vcpus,
     "INSTANCE_MEMORY": memory,
     "INSTANCE_VCPUS": vcpus,
+    "INSTANCE_DISK_TEMPLATE": disk_template,
   }
 
   if nics:
   }
 
   if nics:
@@ -486,12 +506,22 @@ def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
         ip = ""
       env["INSTANCE_NIC%d_IP" % idx] = ip
       env["INSTANCE_NIC%d_BRIDGE" % idx] = bridge
         ip = ""
       env["INSTANCE_NIC%d_IP" % idx] = ip
       env["INSTANCE_NIC%d_BRIDGE" % idx] = bridge
-      env["INSTANCE_NIC%d_HWADDR" % idx] = mac
+      env["INSTANCE_NIC%d_MAC" % idx] = mac
   else:
     nic_count = 0
 
   env["INSTANCE_NIC_COUNT"] = nic_count
 
   else:
     nic_count = 0
 
   env["INSTANCE_NIC_COUNT"] = nic_count
 
+  if disks:
+    disk_count = len(disks)
+    for idx, (size, mode) in enumerate(disks):
+      env["INSTANCE_DISK%d_SIZE" % idx] = size
+      env["INSTANCE_DISK%d_MODE" % idx] = mode
+  else:
+    disk_count = 0
+
+  env["INSTANCE_DISK_COUNT"] = disk_count
+
   return env
 
 
   return env
 
 
@@ -516,10 +546,12 @@ def _BuildInstanceHookEnvByObject(lu, instance, override=None):
     'primary_node': instance.primary_node,
     'secondary_nodes': instance.secondary_nodes,
     'os_type': instance.os,
     'primary_node': instance.primary_node,
     'secondary_nodes': instance.secondary_nodes,
     'os_type': instance.os,
-    'status': instance.os,
+    'status': instance.admin_up,
     'memory': bep[constants.BE_MEMORY],
     'vcpus': bep[constants.BE_VCPUS],
     'nics': [(nic.ip, nic.bridge, nic.mac) for nic in instance.nics],
     'memory': bep[constants.BE_MEMORY],
     'vcpus': bep[constants.BE_VCPUS],
     'nics': [(nic.ip, nic.bridge, nic.mac) for nic in instance.nics],
+    'disk_template': instance.disk_template,
+    'disks': [(disk.size, disk.mode) for disk in instance.disks],
   }
   if override:
     args.update(override)
   }
   if override:
     args.update(override)
@@ -613,7 +645,8 @@ class LUVerifyCluster(LogicalUnit):
     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
 
   def _VerifyNode(self, nodeinfo, file_list, local_cksum,
     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
 
   def _VerifyNode(self, nodeinfo, file_list, local_cksum,
-                  node_result, feedback_fn, master_files):
+                  node_result, feedback_fn, master_files,
+                  drbd_map, vg_name):
     """Run multiple tests against a node.
 
     Test list:
     """Run multiple tests against a node.
 
     Test list:
@@ -630,6 +663,10 @@ class LUVerifyCluster(LogicalUnit):
     @param node_result: the results from the node
     @param feedback_fn: function used to accumulate results
     @param master_files: list of files that only masters should have
     @param node_result: the results from the node
     @param feedback_fn: function used to accumulate results
     @param master_files: list of files that only masters should have
+    @param drbd_map: the useddrbd minors for this node, in
+        form of minor: (instance, must_exist) which correspond to instances
+        and their running status
+    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
 
     """
     node = nodeinfo.name
 
     """
     node = nodeinfo.name
@@ -642,29 +679,39 @@ class LUVerifyCluster(LogicalUnit):
     # compares ganeti version
     local_version = constants.PROTOCOL_VERSION
     remote_version = node_result.get('version', None)
     # compares ganeti version
     local_version = constants.PROTOCOL_VERSION
     remote_version = node_result.get('version', None)
-    if not remote_version:
+    if not (remote_version and isinstance(remote_version, (list, tuple)) and
+            len(remote_version) == 2):
       feedback_fn("  - ERROR: connection to %s failed" % (node))
       return True
 
       feedback_fn("  - ERROR: connection to %s failed" % (node))
       return True
 
-    if local_version != remote_version:
-      feedback_fn("  - ERROR: sw version mismatch: master %s, node(%s) %s" %
-                      (local_version, node, remote_version))
+    if local_version != remote_version[0]:
+      feedback_fn("  - ERROR: incompatible protocol versions: master %s,"
+                  " node %s %s" % (local_version, node, remote_version[0]))
       return True
 
       return True
 
-    # checks vg existance and size > 20G
+    # node seems compatible, we can actually try to look into its results
 
     bad = False
 
     bad = False
-    vglist = node_result.get(constants.NV_VGLIST, None)
-    if not vglist:
-      feedback_fn("  - ERROR: unable to check volume groups on node %s." %
-                      (node,))
-      bad = True
-    else:
-      vgstatus = utils.CheckVolumeGroupSize(vglist, self.cfg.GetVGName(),
-                                            constants.MIN_VG_SIZE)
-      if vgstatus:
-        feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
+
+    # full package version
+    if constants.RELEASE_VERSION != remote_version[1]:
+      feedback_fn("  - WARNING: software version mismatch: master %s,"
+                  " node %s %s" %
+                  (constants.RELEASE_VERSION, node, remote_version[1]))
+
+    # checks vg existence and size > 20G
+    if vg_name is not None:
+      vglist = node_result.get(constants.NV_VGLIST, None)
+      if not vglist:
+        feedback_fn("  - ERROR: unable to check volume groups on node %s." %
+                        (node,))
         bad = True
         bad = True
+      else:
+        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
+                                              constants.MIN_VG_SIZE)
+        if vgstatus:
+          feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
+          bad = True
 
     # checks config file checksum
 
 
     # checks config file checksum
 
@@ -724,6 +771,25 @@ class LUVerifyCluster(LogicalUnit):
         if hv_result is not None:
           feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
                       (hv_name, hv_result))
         if hv_result is not None:
           feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
                       (hv_name, hv_result))
+
+    # check used drbd list
+    if vg_name is not None:
+      used_minors = node_result.get(constants.NV_DRBDLIST, [])
+      if not isinstance(used_minors, (tuple, list)):
+        feedback_fn("  - ERROR: cannot parse drbd status file: %s" %
+                    str(used_minors))
+      else:
+        for minor, (iname, must_exist) in drbd_map.items():
+          if minor not in used_minors and must_exist:
+            feedback_fn("  - ERROR: drbd minor %d of instance %s is"
+                        " not active" % (minor, iname))
+            bad = True
+        for minor in used_minors:
+          if minor not in drbd_map:
+            feedback_fn("  - ERROR: unallocated drbd minor %d is in use" %
+                        minor)
+            bad = True
+
     return bad
 
   def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
     return bad
 
   def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
@@ -751,7 +817,7 @@ class LUVerifyCluster(LogicalUnit):
                           (volume, node))
           bad = True
 
                           (volume, node))
           bad = True
 
-    if not instanceconfig.status == 'down':
+    if instanceconfig.admin_up:
       if ((node_current not in node_instance or
           not instance in node_instance[node_current]) and
           node_current not in n_offline):
       if ((node_current not in node_instance or
           not instance in node_instance[node_current]) and
           node_current not in n_offline):
@@ -849,8 +915,12 @@ class LUVerifyCluster(LogicalUnit):
 
     """
     all_nodes = self.cfg.GetNodeList()
 
     """
     all_nodes = self.cfg.GetNodeList()
-    # TODO: populate the environment with useful information for verify hooks
-    env = {}
+    env = {
+      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
+      }
+    for node in self.cfg.GetAllNodesInfo().values():
+      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
+
     return env, [], all_nodes
 
   def Exec(self, feedback_fn):
     return env, [], all_nodes
 
   def Exec(self, feedback_fn):
@@ -867,9 +937,12 @@ class LUVerifyCluster(LogicalUnit):
     nodelist = utils.NiceSort(self.cfg.GetNodeList())
     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
     nodelist = utils.NiceSort(self.cfg.GetNodeList())
     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
+    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
+                        for iname in instancelist)
     i_non_redundant = [] # Non redundant instances
     i_non_a_balanced = [] # Non auto-balanced instances
     n_offline = [] # List of offline nodes
     i_non_redundant = [] # Non redundant instances
     i_non_a_balanced = [] # Non auto-balanced instances
     n_offline = [] # List of offline nodes
+    n_drained = [] # List of nodes being drained
     node_volume = {}
     node_instance = {}
     node_info = {}
     node_volume = {}
     node_instance = {}
     node_info = {}
@@ -895,17 +968,21 @@ class LUVerifyCluster(LogicalUnit):
       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
                                   node.secondary_ip) for node in nodeinfo
                                  if not node.offline],
       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
                                   node.secondary_ip) for node in nodeinfo
                                  if not node.offline],
-      constants.NV_LVLIST: vg_name,
       constants.NV_INSTANCELIST: hypervisors,
       constants.NV_INSTANCELIST: hypervisors,
-      constants.NV_VGLIST: None,
       constants.NV_VERSION: None,
       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
       }
       constants.NV_VERSION: None,
       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
       }
+    if vg_name is not None:
+      node_verify_param[constants.NV_VGLIST] = None
+      node_verify_param[constants.NV_LVLIST] = vg_name
+      node_verify_param[constants.NV_DRBDLIST] = None
     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
                                            self.cfg.GetClusterName())
 
     cluster = self.cfg.GetClusterInfo()
     master_node = self.cfg.GetMasterNode()
     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
                                            self.cfg.GetClusterName())
 
     cluster = self.cfg.GetClusterInfo()
     master_node = self.cfg.GetMasterNode()
+    all_drbd_map = self.cfg.ComputeDRBDMap()
+
     for node_i in nodeinfo:
       node = node_i.name
       nresult = all_nvinfo[node].data
     for node_i in nodeinfo:
       node = node_i.name
       nresult = all_nvinfo[node].data
@@ -919,6 +996,9 @@ class LUVerifyCluster(LogicalUnit):
         ntype = "master"
       elif node_i.master_candidate:
         ntype = "master candidate"
         ntype = "master"
       elif node_i.master_candidate:
         ntype = "master candidate"
+      elif node_i.drained:
+        ntype = "drained"
+        n_drained.append(node)
       else:
         ntype = "regular"
       feedback_fn("* Verifying node %s (%s)" % (node, ntype))
       else:
         ntype = "regular"
       feedback_fn("* Verifying node %s (%s)" % (node, ntype))
@@ -928,14 +1008,29 @@ class LUVerifyCluster(LogicalUnit):
         bad = True
         continue
 
         bad = True
         continue
 
+      node_drbd = {}
+      for minor, instance in all_drbd_map[node].items():
+        if instance not in instanceinfo:
+          feedback_fn("  - ERROR: ghost instance '%s' in temporary DRBD map" %
+                      instance)
+          # ghost instance should not be running, but otherwise we
+          # don't give double warnings (both ghost instance and
+          # unallocated minor in use)
+          node_drbd[minor] = (instance, False)
+        else:
+          instance = instanceinfo[instance]
+          node_drbd[minor] = (instance.name, instance.admin_up)
       result = self._VerifyNode(node_i, file_names, local_checksums,
       result = self._VerifyNode(node_i, file_names, local_checksums,
-                                nresult, feedback_fn, master_files)
+                                nresult, feedback_fn, master_files,
+                                node_drbd, vg_name)
       bad = bad or result
 
       lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
       bad = bad or result
 
       lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
-      if isinstance(lvdata, basestring):
+      if vg_name is None:
+        node_volume[node] = {}
+      elif isinstance(lvdata, basestring):
         feedback_fn("  - ERROR: LVM problem on node %s: %s" %
         feedback_fn("  - ERROR: LVM problem on node %s: %s" %
-                    (node, lvdata.encode('string_escape')))
+                    (node, utils.SafeEncode(lvdata)))
         bad = True
         node_volume[node] = {}
       elif not isinstance(lvdata, dict):
         bad = True
         node_volume[node] = {}
       elif not isinstance(lvdata, dict):
@@ -965,7 +1060,6 @@ class LUVerifyCluster(LogicalUnit):
       try:
         node_info[node] = {
           "mfree": int(nodeinfo['memory_free']),
       try:
         node_info[node] = {
           "mfree": int(nodeinfo['memory_free']),
-          "dfree": int(nresult[constants.NV_VGLIST][vg_name]),
           "pinst": [],
           "sinst": [],
           # dictionary holding all instances this node is secondary for,
           "pinst": [],
           "sinst": [],
           # dictionary holding all instances this node is secondary for,
@@ -976,8 +1070,19 @@ class LUVerifyCluster(LogicalUnit):
           # secondary.
           "sinst-by-pnode": {},
         }
           # secondary.
           "sinst-by-pnode": {},
         }
-      except ValueError:
-        feedback_fn("  - ERROR: invalid value returned from node %s" % (node,))
+        # FIXME: devise a free space model for file based instances as well
+        if vg_name is not None:
+          if (constants.NV_VGLIST not in nresult or
+              vg_name not in nresult[constants.NV_VGLIST]):
+            feedback_fn("  - ERROR: node %s didn't return data for the"
+                        " volume group '%s' - it is either missing or broken" %
+                        (node, vg_name))
+            bad = True
+            continue
+          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
+      except (ValueError, KeyError):
+        feedback_fn("  - ERROR: invalid nodeinfo value returned"
+                    " from node %s" % (node,))
         bad = True
         continue
 
         bad = True
         continue
 
@@ -985,7 +1090,7 @@ class LUVerifyCluster(LogicalUnit):
 
     for instance in instancelist:
       feedback_fn("* Verifying instance %s" % instance)
 
     for instance in instancelist:
       feedback_fn("* Verifying instance %s" % instance)
-      inst_config = self.cfg.GetInstanceInfo(instance)
+      inst_config = instanceinfo[instance]
       result =  self._VerifyInstance(instance, inst_config, node_volume,
                                      node_instance, feedback_fn, n_offline)
       bad = bad or result
       result =  self._VerifyInstance(instance, inst_config, node_volume,
                                      node_instance, feedback_fn, n_offline)
       bad = bad or result
@@ -1066,6 +1171,9 @@ class LUVerifyCluster(LogicalUnit):
     if n_offline:
       feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
 
     if n_offline:
       feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
 
+    if n_drained:
+      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
+
     return not bad
 
   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
     return not bad
 
   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
@@ -1154,7 +1262,7 @@ class LUVerifyDisks(NoHooksLU):
     nv_dict = {}
     for inst in instances:
       inst_lvs = {}
     nv_dict = {}
     for inst in instances:
       inst_lvs = {}
-      if (inst.status != "up" or
+      if (not inst.admin_up or
           inst.disk_template not in constants.DTS_NET_MIRROR):
         continue
       inst.MapLVsByNode(inst_lvs)
           inst.disk_template not in constants.DTS_NET_MIRROR):
         continue
       inst.MapLVsByNode(inst_lvs)
@@ -1181,6 +1289,7 @@ class LUVerifyDisks(NoHooksLU):
       if isinstance(lvs, basestring):
         logging.warning("Error enumerating LVs on node %s: %s", node, lvs)
         res_nlvm[node] = lvs
       if isinstance(lvs, basestring):
         logging.warning("Error enumerating LVs on node %s: %s", node, lvs)
         res_nlvm[node] = lvs
+        continue
       elif not isinstance(lvs, dict):
         logging.warning("Connection to node %s failed or invalid data"
                         " returned", node)
       elif not isinstance(lvs, dict):
         logging.warning("Connection to node %s failed or invalid data"
                         " returned", node)
@@ -1308,7 +1417,7 @@ class LUSetClusterParams(LogicalUnit):
   _OP_REQP = []
   REQ_BGL = False
 
   _OP_REQP = []
   REQ_BGL = False
 
-  def CheckParameters(self):
+  def CheckArguments(self):
     """Check parameters
 
     """
     """Check parameters
 
     """
@@ -1317,7 +1426,7 @@ class LUSetClusterParams(LogicalUnit):
     if self.op.candidate_pool_size is not None:
       try:
         self.op.candidate_pool_size = int(self.op.candidate_pool_size)
     if self.op.candidate_pool_size is not None:
       try:
         self.op.candidate_pool_size = int(self.op.candidate_pool_size)
-      except ValueError, err:
+      except (ValueError, TypeError), err:
         raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
                                    str(err))
       if self.op.candidate_pool_size < 1:
         raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
                                    str(err))
       if self.op.candidate_pool_size < 1:
@@ -1349,8 +1458,6 @@ class LUSetClusterParams(LogicalUnit):
     if the given volume group is valid.
 
     """
     if the given volume group is valid.
 
     """
-    # FIXME: This only works because there is only one parameter that can be
-    # changed or removed.
     if self.op.vg_name is not None and not self.op.vg_name:
       instances = self.cfg.GetAllInstancesInfo().values()
       for inst in instances:
     if self.op.vg_name is not None and not self.op.vg_name:
       instances = self.cfg.GetAllInstancesInfo().values()
       for inst in instances:
@@ -1379,7 +1486,7 @@ class LUSetClusterParams(LogicalUnit):
     self.cluster = cluster = self.cfg.GetClusterInfo()
     # validate beparams changes
     if self.op.beparams:
     self.cluster = cluster = self.cfg.GetClusterInfo()
     # validate beparams changes
     if self.op.beparams:
-      utils.CheckBEParams(self.op.beparams)
+      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
       self.new_beparams = cluster.FillDict(
         cluster.beparams[constants.BEGR_DEFAULT], self.op.beparams)
 
       self.new_beparams = cluster.FillDict(
         cluster.beparams[constants.BEGR_DEFAULT], self.op.beparams)
 
@@ -1407,6 +1514,7 @@ class LUSetClusterParams(LogicalUnit):
              hv_name in self.op.enabled_hypervisors)):
           # either this is a new hypervisor, or its parameters have changed
           hv_class = hypervisor.GetHypervisor(hv_name)
              hv_name in self.op.enabled_hypervisors)):
           # either this is a new hypervisor, or its parameters have changed
           hv_class = hypervisor.GetHypervisor(hv_name)
+          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
           hv_class.CheckParameterSyntax(hv_params)
           _CheckHVParams(self, node_list, hv_name, hv_params)
 
           hv_class.CheckParameterSyntax(hv_params)
           _CheckHVParams(self, node_list, hv_name, hv_params)
 
@@ -1415,8 +1523,11 @@ class LUSetClusterParams(LogicalUnit):
 
     """
     if self.op.vg_name is not None:
 
     """
     if self.op.vg_name is not None:
-      if self.op.vg_name != self.cfg.GetVGName():
-        self.cfg.SetVGName(self.op.vg_name)
+      new_volume = self.op.vg_name
+      if not new_volume:
+        new_volume = None
+      if new_volume != self.cfg.GetVGName():
+        self.cfg.SetVGName(new_volume)
       else:
         feedback_fn("Cluster LVM configuration already in desired"
                     " state, not changing")
       else:
         feedback_fn("Cluster LVM configuration already in desired"
                     " state, not changing")
@@ -1437,6 +1548,45 @@ class LUSetClusterParams(LogicalUnit):
       _AdjustCandidatePool(self)
 
 
       _AdjustCandidatePool(self)
 
 
+def _RedistributeAncillaryFiles(lu, additional_nodes=None):
+  """Distribute additional files which are part of the cluster configuration.
+
+  ConfigWriter takes care of distributing the config and ssconf files, but
+  there are more files which should be distributed to all nodes. This function
+  makes sure those are copied.
+
+  @param lu: calling logical unit
+  @param additional_nodes: list of nodes not in the config to distribute to
+
+  """
+  # 1. Gather target nodes
+  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
+  dist_nodes = lu.cfg.GetNodeList()
+  if additional_nodes is not None:
+    dist_nodes.extend(additional_nodes)
+  if myself.name in dist_nodes:
+    dist_nodes.remove(myself.name)
+  # 2. Gather files to distribute
+  dist_files = set([constants.ETC_HOSTS,
+                    constants.SSH_KNOWN_HOSTS_FILE,
+                    constants.RAPI_CERT_FILE,
+                    constants.RAPI_USERS_FILE,
+                   ])
+
+  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
+  for hv_name in enabled_hypervisors:
+    hv_class = hypervisor.GetHypervisor(hv_name)
+    dist_files.update(hv_class.GetAncillaryFiles())
+
+  # 3. Perform the files upload
+  for fname in dist_files:
+    if os.path.exists(fname):
+      result = lu.rpc.call_upload_file(dist_nodes, fname)
+      for to_node, to_result in result.items():
+        if to_result.failed or not to_result.data:
+          logging.error("Copy of file %s to node %s failed", fname, to_node)
+
+
 class LURedistributeConfig(NoHooksLU):
   """Force the redistribution of cluster configuration.
 
 class LURedistributeConfig(NoHooksLU):
   """Force the redistribution of cluster configuration.
 
@@ -1462,6 +1612,7 @@ class LURedistributeConfig(NoHooksLU):
 
     """
     self.cfg.Update(self.cfg.GetClusterInfo())
 
     """
     self.cfg.Update(self.cfg.GetClusterInfo())
+    _RedistributeAncillaryFiles(self)
 
 
 def _WaitForSync(lu, instance, oneshot=False, unlock=False):
 
 
 def _WaitForSync(lu, instance, oneshot=False, unlock=False):
@@ -1495,8 +1646,7 @@ def _WaitForSync(lu, instance, oneshot=False, unlock=False):
       continue
     rstats = rstats.data
     retries = 0
       continue
     rstats = rstats.data
     retries = 0
-    for i in range(len(rstats)):
-      mstat = rstats[i]
+    for i, mstat in enumerate(rstats):
       if mstat is None:
         lu.LogWarning("Can't compute data for node %s/%s",
                            node, instance.disks[i].iv_name)
       if mstat is None:
         lu.LogWarning("Can't compute data for node %s/%s",
                            node, instance.disks[i].iv_name)
@@ -1540,11 +1690,15 @@ def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
   result = True
   if on_primary or dev.AssembleOnSecondary():
     rstats = lu.rpc.call_blockdev_find(node, dev)
   result = True
   if on_primary or dev.AssembleOnSecondary():
     rstats = lu.rpc.call_blockdev_find(node, dev)
-    if rstats.failed or not rstats.data:
-      logging.warning("Node %s: disk degraded, not found or node down", node)
+    msg = rstats.RemoteFailMsg()
+    if msg:
+      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
+      result = False
+    elif not rstats.payload:
+      lu.LogWarning("Can't find disk on node %s", node)
       result = False
     else:
       result = False
     else:
-      result = result and (not rstats.data[idx])
+      result = result and (not rstats.payload[idx])
   if dev.children:
     for child in dev.children:
       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
   if dev.children:
     for child in dev.children:
       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
@@ -1570,9 +1724,11 @@ class LUDiagnoseOS(NoHooksLU):
                        selected=self.op.output_fields)
 
     # Lock all nodes, in shared mode
                        selected=self.op.output_fields)
 
     # Lock all nodes, in shared mode
+    # Temporary removal of locks, should be reverted later
+    # TODO: reintroduce locks when they are lighter-weight
     self.needed_locks = {}
     self.needed_locks = {}
-    self.share_locks[locking.LEVEL_NODE] = 1
-    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+    #self.share_locks[locking.LEVEL_NODE] = 1
+    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
 
   def CheckPrereq(self):
     """Check prerequisites.
 
   def CheckPrereq(self):
     """Check prerequisites.
@@ -1587,7 +1743,7 @@ class LUDiagnoseOS(NoHooksLU):
     @param rlist: a map with node names as keys and OS objects as values
 
     @rtype: dict
     @param rlist: a map with node names as keys and OS objects as values
 
     @rtype: dict
-    @returns: a dictionary with osnames as keys and as value another map, with
+    @return: a dictionary with osnames as keys and as value another map, with
         nodes as keys and list of OS objects as values, eg::
 
           {"debian-etch": {"node1": [<object>,...],
         nodes as keys and list of OS objects as values, eg::
 
           {"debian-etch": {"node1": [<object>,...],
@@ -1596,6 +1752,11 @@ class LUDiagnoseOS(NoHooksLU):
 
     """
     all_os = {}
 
     """
     all_os = {}
+    # we build here the list of nodes that didn't fail the RPC (at RPC
+    # level), so that nodes with a non-responding node daemon don't
+    # make all OSes invalid
+    good_nodes = [node_name for node_name in rlist
+                  if not rlist[node_name].failed]
     for node_name, nr in rlist.iteritems():
       if nr.failed or not nr.data:
         continue
     for node_name, nr in rlist.iteritems():
       if nr.failed or not nr.data:
         continue
@@ -1604,7 +1765,7 @@ class LUDiagnoseOS(NoHooksLU):
           # build a list of nodes for this os containing empty lists
           # for each node in node_list
           all_os[os_obj.name] = {}
           # build a list of nodes for this os containing empty lists
           # for each node in node_list
           all_os[os_obj.name] = {}
-          for nname in node_list:
+          for nname in good_nodes:
             all_os[os_obj.name][nname] = []
         all_os[os_obj.name][node_name].append(os_obj)
     return all_os
             all_os[os_obj.name][nname] = []
         all_os[os_obj.name][node_name].append(os_obj)
     return all_os
@@ -1613,9 +1774,7 @@ class LUDiagnoseOS(NoHooksLU):
     """Compute the list of OSes.
 
     """
     """Compute the list of OSes.
 
     """
-    node_list = self.acquired_locks[locking.LEVEL_NODE]
-    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()
-                   if node in node_list]
+    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
     node_data = self.rpc.call_os_diagnose(valid_nodes)
     if node_data == False:
       raise errors.OpExecError("Can't gather the list of OSes")
     node_data = self.rpc.call_os_diagnose(valid_nodes)
     if node_data == False:
       raise errors.OpExecError("Can't gather the list of OSes")
@@ -1687,11 +1846,8 @@ class LURemoveNode(LogicalUnit):
 
     for instance_name in instance_list:
       instance = self.cfg.GetInstanceInfo(instance_name)
 
     for instance_name in instance_list:
       instance = self.cfg.GetInstanceInfo(instance_name)
-      if node.name == instance.primary_node:
-        raise errors.OpPrereqError("Instance %s still running on the node,"
-                                   " please remove first." % instance_name)
-      if node.name in instance.secondary_nodes:
-        raise errors.OpPrereqError("Instance %s has node as a secondary,"
+      if node.name in instance.all_nodes:
+        raise errors.OpPrereqError("Instance %s is still running on the node,"
                                    " please remove first." % instance_name)
     self.op.node_name = node.name
     self.node = node
                                    " please remove first." % instance_name)
     self.op.node_name = node.name
     self.node = node
@@ -1716,13 +1872,13 @@ class LUQueryNodes(NoHooksLU):
   """Logical unit for querying nodes.
 
   """
   """Logical unit for querying nodes.
 
   """
-  _OP_REQP = ["output_fields", "names"]
+  _OP_REQP = ["output_fields", "names", "use_locking"]
   REQ_BGL = False
   _FIELDS_DYNAMIC = utils.FieldSet(
     "dtotal", "dfree",
     "mtotal", "mnode", "mfree",
     "bootid",
   REQ_BGL = False
   _FIELDS_DYNAMIC = utils.FieldSet(
     "dtotal", "dfree",
     "mtotal", "mnode", "mfree",
     "bootid",
-    "ctotal",
+    "ctotal", "cnodes", "csockets",
     )
 
   _FIELDS_STATIC = utils.FieldSet(
     )
 
   _FIELDS_STATIC = utils.FieldSet(
@@ -1733,6 +1889,7 @@ class LUQueryNodes(NoHooksLU):
     "master_candidate",
     "master",
     "offline",
     "master_candidate",
     "master",
     "offline",
+    "drained",
     )
 
   def ExpandNames(self):
     )
 
   def ExpandNames(self):
@@ -1748,7 +1905,8 @@ class LUQueryNodes(NoHooksLU):
     else:
       self.wanted = locking.ALL_SET
 
     else:
       self.wanted = locking.ALL_SET
 
-    self.do_locking = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
+    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
+    self.do_locking = self.do_node_query and self.op.use_locking
     if self.do_locking:
       # if we don't request only static fields, we need to lock the nodes
       self.needed_locks[locking.LEVEL_NODE] = self.wanted
     if self.do_locking:
       # if we don't request only static fields, we need to lock the nodes
       self.needed_locks[locking.LEVEL_NODE] = self.wanted
@@ -1783,7 +1941,7 @@ class LUQueryNodes(NoHooksLU):
 
     # begin data gathering
 
 
     # begin data gathering
 
-    if self.do_locking:
+    if self.do_node_query:
       live_data = {}
       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
                                           self.cfg.GetHypervisorType())
       live_data = {}
       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
                                           self.cfg.GetHypervisorType())
@@ -1800,6 +1958,8 @@ class LUQueryNodes(NoHooksLU):
             "dfree": fn(int, nodeinfo.get('vg_free', None)),
             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
             "bootid": nodeinfo.get('bootid', None),
             "dfree": fn(int, nodeinfo.get('vg_free', None)),
             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
             "bootid": nodeinfo.get('bootid', None),
+            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
+            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
             }
         else:
           live_data[name] = {}
             }
         else:
           live_data[name] = {}
@@ -1854,6 +2014,8 @@ class LUQueryNodes(NoHooksLU):
           val = node.name == master_node
         elif field == "offline":
           val = node.offline
           val = node.name == master_node
         elif field == "offline":
           val = node.offline
+        elif field == "drained":
+          val = node.drained
         elif self._FIELDS_DYNAMIC.Matches(field):
           val = live_data[node.name].get(field, None)
         else:
         elif self._FIELDS_DYNAMIC.Matches(field):
           val = live_data[node.name].get(field, None)
         else:
@@ -2050,7 +2212,7 @@ class LUAddNode(LogicalUnit):
                                  primary_ip=primary_ip,
                                  secondary_ip=secondary_ip,
                                  master_candidate=master_candidate,
                                  primary_ip=primary_ip,
                                  secondary_ip=secondary_ip,
                                  master_candidate=master_candidate,
-                                 offline=False)
+                                 offline=False, drained=False)
 
   def Exec(self, feedback_fn):
     """Adds the new node to the cluster.
 
   def Exec(self, feedback_fn):
     """Adds the new node to the cluster.
@@ -2092,8 +2254,10 @@ class LUAddNode(LogicalUnit):
                                     keyarray[2],
                                     keyarray[3], keyarray[4], keyarray[5])
 
                                     keyarray[2],
                                     keyarray[3], keyarray[4], keyarray[5])
 
-    if result.failed or not result.data:
-      raise errors.OpExecError("Cannot transfer ssh keys to the new node")
+    msg = result.RemoteFailMsg()
+    if msg:
+      raise errors.OpExecError("Cannot transfer ssh keys to the"
+                               " new node: %s" % msg)
 
     # Add node to our /etc/hosts, and add key to known_hosts
     utils.AddHostToEtcHosts(new_node.name)
 
     # Add node to our /etc/hosts, and add key to known_hosts
     utils.AddHostToEtcHosts(new_node.name)
@@ -2121,36 +2285,14 @@ class LUAddNode(LogicalUnit):
       if result[verifier].data['nodelist']:
         for failed in result[verifier].data['nodelist']:
           feedback_fn("ssh/hostname verification failed %s -> %s" %
       if result[verifier].data['nodelist']:
         for failed in result[verifier].data['nodelist']:
           feedback_fn("ssh/hostname verification failed %s -> %s" %
-                      (verifier, result[verifier]['nodelist'][failed]))
+                      (verifier, result[verifier].data['nodelist'][failed]))
         raise errors.OpExecError("ssh/hostname verification failed.")
 
         raise errors.OpExecError("ssh/hostname verification failed.")
 
-    # Distribute updated /etc/hosts and known_hosts to all nodes,
-    # including the node just added
-    myself = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
-    dist_nodes = self.cfg.GetNodeList()
-    if not self.op.readd:
-      dist_nodes.append(node)
-    if myself.name in dist_nodes:
-      dist_nodes.remove(myself.name)
-
-    logging.debug("Copying hosts and known_hosts to all nodes")
-    for fname in (constants.ETC_HOSTS, constants.SSH_KNOWN_HOSTS_FILE):
-      result = self.rpc.call_upload_file(dist_nodes, fname)
-      for to_node, to_result in result.iteritems():
-        if to_result.failed or not to_result.data:
-          logging.error("Copy of file %s to node %s failed", fname, to_node)
-
-    to_copy = []
-    if constants.HT_XEN_HVM in self.cfg.GetClusterInfo().enabled_hypervisors:
-      to_copy.append(constants.VNC_PASSWORD_FILE)
-    for fname in to_copy:
-      result = self.rpc.call_upload_file([node], fname)
-      if result[node].failed or not result[node]:
-        logging.error("Could not copy file %s to node %s", fname, node)
-
     if self.op.readd:
     if self.op.readd:
+      _RedistributeAncillaryFiles(self)
       self.context.ReaddNode(new_node)
     else:
       self.context.ReaddNode(new_node)
     else:
+      _RedistributeAncillaryFiles(self, additional_nodes=node)
       self.context.AddNode(new_node)
 
 
       self.context.AddNode(new_node)
 
 
@@ -2170,11 +2312,13 @@ class LUSetNodeParams(LogicalUnit):
     self.op.node_name = node_name
     _CheckBooleanOpField(self.op, 'master_candidate')
     _CheckBooleanOpField(self.op, 'offline')
     self.op.node_name = node_name
     _CheckBooleanOpField(self.op, 'master_candidate')
     _CheckBooleanOpField(self.op, 'offline')
-    if self.op.master_candidate is None and self.op.offline is None:
+    _CheckBooleanOpField(self.op, 'drained')
+    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
+    if all_mods.count(None) == 3:
       raise errors.OpPrereqError("Please pass at least one modification")
       raise errors.OpPrereqError("Please pass at least one modification")
-    if self.op.offline == True and self.op.master_candidate == True:
-      raise errors.OpPrereqError("Can't set the node into offline and"
-                                 " master_candidate at the same time")
+    if all_mods.count(True) > 1:
+      raise errors.OpPrereqError("Can't set the node into more than one"
+                                 " state at the same time")
 
   def ExpandNames(self):
     self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
 
   def ExpandNames(self):
     self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
@@ -2189,6 +2333,7 @@ class LUSetNodeParams(LogicalUnit):
       "OP_TARGET": self.op.node_name,
       "MASTER_CANDIDATE": str(self.op.master_candidate),
       "OFFLINE": str(self.op.offline),
       "OP_TARGET": self.op.node_name,
       "MASTER_CANDIDATE": str(self.op.master_candidate),
       "OFFLINE": str(self.op.offline),
+      "DRAINED": str(self.op.drained),
       }
     nl = [self.cfg.GetMasterNode(),
           self.op.node_name]
       }
     nl = [self.cfg.GetMasterNode(),
           self.op.node_name]
@@ -2202,12 +2347,12 @@ class LUSetNodeParams(LogicalUnit):
     """
     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
 
     """
     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
 
-    if ((self.op.master_candidate == False or self.op.offline == True)
-        and node.master_candidate):
+    if ((self.op.master_candidate == False or self.op.offline == True or
+         self.op.drained == True) and node.master_candidate):
       # we will demote the node from master_candidate
       if self.op.node_name == self.cfg.GetMasterNode():
         raise errors.OpPrereqError("The master node has to be a"
       # we will demote the node from master_candidate
       if self.op.node_name == self.cfg.GetMasterNode():
         raise errors.OpPrereqError("The master node has to be a"
-                                   " master candidate and online")
+                                   " master candidate, online and not drained")
       cp_size = self.cfg.GetClusterInfo().candidate_pool_size
       num_candidates, _ = self.cfg.GetMasterCandidateStats()
       if num_candidates <= cp_size:
       cp_size = self.cfg.GetClusterInfo().candidate_pool_size
       num_candidates, _ = self.cfg.GetMasterCandidateStats()
       if num_candidates <= cp_size:
@@ -2218,10 +2363,11 @@ class LUSetNodeParams(LogicalUnit):
         else:
           raise errors.OpPrereqError(msg)
 
         else:
           raise errors.OpPrereqError(msg)
 
-    if (self.op.master_candidate == True and node.offline and
-        not self.op.offline == False):
-      raise errors.OpPrereqError("Can't set an offline node to"
-                                 " master_candidate")
+    if (self.op.master_candidate == True and
+        ((node.offline and not self.op.offline == False) or
+         (node.drained and not self.op.drained == False))):
+      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
+                                 " to master_candidate" % node.name)
 
     return
 
 
     return
 
@@ -2232,29 +2378,46 @@ class LUSetNodeParams(LogicalUnit):
     node = self.node
 
     result = []
     node = self.node
 
     result = []
+    changed_mc = False
 
     if self.op.offline is not None:
       node.offline = self.op.offline
       result.append(("offline", str(self.op.offline)))
 
     if self.op.offline is not None:
       node.offline = self.op.offline
       result.append(("offline", str(self.op.offline)))
-      if self.op.offline == True and node.master_candidate:
-        node.master_candidate = False
-        result.append(("master_candidate", "auto-demotion due to offline"))
+      if self.op.offline == True:
+        if node.master_candidate:
+          node.master_candidate = False
+          changed_mc = True
+          result.append(("master_candidate", "auto-demotion due to offline"))
+        if node.drained:
+          node.drained = False
+          result.append(("drained", "clear drained status due to offline"))
 
     if self.op.master_candidate is not None:
       node.master_candidate = self.op.master_candidate
 
     if self.op.master_candidate is not None:
       node.master_candidate = self.op.master_candidate
+      changed_mc = True
       result.append(("master_candidate", str(self.op.master_candidate)))
       if self.op.master_candidate == False:
         rrc = self.rpc.call_node_demote_from_mc(node.name)
       result.append(("master_candidate", str(self.op.master_candidate)))
       if self.op.master_candidate == False:
         rrc = self.rpc.call_node_demote_from_mc(node.name)
-        if (rrc.failed or not isinstance(rrc.data, (tuple, list))
-            or len(rrc.data) != 2):
-          self.LogWarning("Node rpc error: %s" % rrc.error)
-        elif not rrc.data[0]:
-          self.LogWarning("Node failed to demote itself: %s" % rrc.data[1])
+        msg = rrc.RemoteFailMsg()
+        if msg:
+          self.LogWarning("Node failed to demote itself: %s" % msg)
+
+    if self.op.drained is not None:
+      node.drained = self.op.drained
+      result.append(("drained", str(self.op.drained)))
+      if self.op.drained == True:
+        if node.master_candidate:
+          node.master_candidate = False
+          changed_mc = True
+          result.append(("master_candidate", "auto-demotion due to drain"))
+        if node.offline:
+          node.offline = False
+          result.append(("offline", "clear offline status due to drain"))
 
     # this will trigger configuration file update, if needed
     self.cfg.Update(node)
     # this will trigger job queue propagation or cleanup
 
     # this will trigger configuration file update, if needed
     self.cfg.Update(node)
     # this will trigger job queue propagation or cleanup
-    if self.op.node_name != self.cfg.GetMasterNode():
+    if changed_mc:
       self.context.ReaddNode(node)
 
     return result
       self.context.ReaddNode(node)
 
     return result
@@ -2292,9 +2455,14 @@ class LUQueryClusterInfo(NoHooksLU):
       "master": cluster.master_node,
       "default_hypervisor": cluster.default_hypervisor,
       "enabled_hypervisors": cluster.enabled_hypervisors,
       "master": cluster.master_node,
       "default_hypervisor": cluster.default_hypervisor,
       "enabled_hypervisors": cluster.enabled_hypervisors,
-      "hvparams": cluster.hvparams,
+      "hvparams": dict([(hypervisor, cluster.hvparams[hypervisor])
+                        for hypervisor in cluster.enabled_hypervisors]),
       "beparams": cluster.beparams,
       "candidate_pool_size": cluster.candidate_pool_size,
       "beparams": cluster.beparams,
       "candidate_pool_size": cluster.candidate_pool_size,
+      "default_bridge": cluster.default_bridge,
+      "master_netdev": cluster.master_netdev,
+      "volume_group_name": cluster.volume_group_name,
+      "file_storage_dir": cluster.file_storage_dir,
       }
 
     return result
       }
 
     return result
@@ -2412,10 +2580,11 @@ def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
       lu.cfg.SetDiskID(node_disk, node)
       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
       lu.cfg.SetDiskID(node_disk, node)
       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
-      if result.failed or not result:
+      msg = result.RemoteFailMsg()
+      if msg:
         lu.proc.LogWarning("Could not prepare block device %s on node %s"
         lu.proc.LogWarning("Could not prepare block device %s on node %s"
-                           " (is_primary=False, pass=1)",
-                           inst_disk.iv_name, node)
+                           " (is_primary=False, pass=1): %s",
+                           inst_disk.iv_name, node, msg)
         if not ignore_secondaries:
           disks_ok = False
 
         if not ignore_secondaries:
           disks_ok = False
 
@@ -2428,12 +2597,14 @@ def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
         continue
       lu.cfg.SetDiskID(node_disk, node)
       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
         continue
       lu.cfg.SetDiskID(node_disk, node)
       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
-      if result.failed or not result:
+      msg = result.RemoteFailMsg()
+      if msg:
         lu.proc.LogWarning("Could not prepare block device %s on node %s"
         lu.proc.LogWarning("Could not prepare block device %s on node %s"
-                           " (is_primary=True, pass=2)",
-                           inst_disk.iv_name, node)
+                           " (is_primary=True, pass=2): %s",
+                           inst_disk.iv_name, node, msg)
         disks_ok = False
         disks_ok = False
-    device_info.append((instance.primary_node, inst_disk.iv_name, result.data))
+    device_info.append((instance.primary_node, inst_disk.iv_name,
+                        result.payload))
 
   # leave the disks configured for the primary node
   # this is a workaround that would be fixed better by
 
   # leave the disks configured for the primary node
   # this is a workaround that would be fixed better by
@@ -2523,17 +2694,18 @@ def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
   ignored.
 
   """
   ignored.
 
   """
-  result = True
+  all_result = True
   for disk in instance.disks:
     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
       lu.cfg.SetDiskID(top_disk, node)
       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
   for disk in instance.disks:
     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
       lu.cfg.SetDiskID(top_disk, node)
       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
-      if result.failed or not result.data:
-        logging.error("Could not shutdown block device %s on node %s",
-                      disk.iv_name, node)
+      msg = result.RemoteFailMsg()
+      if msg:
+        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
+                      disk.iv_name, node, msg)
         if not ignore_primary or node != instance.primary_node:
         if not ignore_primary or node != instance.primary_node:
-          result = False
-  return result
+          all_result = False
+  return all_result
 
 
 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
 
 
 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
@@ -2592,8 +2764,7 @@ class LUStartupInstance(LogicalUnit):
       "FORCE": self.op.force,
       }
     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
       "FORCE": self.op.force,
       }
     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
-    nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
-          list(self.instance.secondary_nodes))
+    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
     return env, nl, nl
 
   def CheckPrereq(self):
     return env, nl, nl
 
   def CheckPrereq(self):
@@ -2606,15 +2777,48 @@ class LUStartupInstance(LogicalUnit):
     assert self.instance is not None, \
       "Cannot retrieve locked instance %s" % self.op.instance_name
 
     assert self.instance is not None, \
       "Cannot retrieve locked instance %s" % self.op.instance_name
 
+    # extra beparams
+    self.beparams = getattr(self.op, "beparams", {})
+    if self.beparams:
+      if not isinstance(self.beparams, dict):
+        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
+                                   " dict" % (type(self.beparams), ))
+      # fill the beparams dict
+      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
+      self.op.beparams = self.beparams
+
+    # extra hvparams
+    self.hvparams = getattr(self.op, "hvparams", {})
+    if self.hvparams:
+      if not isinstance(self.hvparams, dict):
+        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
+                                   " dict" % (type(self.hvparams), ))
+
+      # check hypervisor parameter syntax (locally)
+      cluster = self.cfg.GetClusterInfo()
+      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
+      filled_hvp = cluster.FillDict(cluster.hvparams[instance.hypervisor],
+                                    instance.hvparams)
+      filled_hvp.update(self.hvparams)
+      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
+      hv_type.CheckParameterSyntax(filled_hvp)
+      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
+      self.op.hvparams = self.hvparams
+
     _CheckNodeOnline(self, instance.primary_node)
 
     bep = self.cfg.GetClusterInfo().FillBE(instance)
     # check bridges existance
     _CheckInstanceBridgesExist(self, instance)
 
     _CheckNodeOnline(self, instance.primary_node)
 
     bep = self.cfg.GetClusterInfo().FillBE(instance)
     # check bridges existance
     _CheckInstanceBridgesExist(self, instance)
 
-    _CheckNodeFreeMemory(self, instance.primary_node,
-                         "starting instance %s" % instance.name,
-                         bep[constants.BE_MEMORY], instance.hypervisor)
+    remote_info = self.rpc.call_instance_info(instance.primary_node,
+                                              instance.name,
+                                              instance.hypervisor)
+    remote_info.Raise()
+    if not remote_info.data:
+      _CheckNodeFreeMemory(self, instance.primary_node,
+                           "starting instance %s" % instance.name,
+                           bep[constants.BE_MEMORY], instance.hypervisor)
 
   def Exec(self, feedback_fn):
     """Start the instance.
 
   def Exec(self, feedback_fn):
     """Start the instance.
@@ -2622,7 +2826,6 @@ class LUStartupInstance(LogicalUnit):
     """
     instance = self.instance
     force = self.op.force
     """
     instance = self.instance
     force = self.op.force
-    extra_args = getattr(self.op, "extra_args", "")
 
     self.cfg.MarkInstanceUp(instance.name)
 
 
     self.cfg.MarkInstanceUp(instance.name)
 
@@ -2630,10 +2833,12 @@ class LUStartupInstance(LogicalUnit):
 
     _StartInstanceDisks(self, instance, force)
 
 
     _StartInstanceDisks(self, instance, force)
 
-    result = self.rpc.call_instance_start(node_current, instance, extra_args)
-    if result.failed or not result.data:
+    result = self.rpc.call_instance_start(node_current, instance,
+                                          self.hvparams, self.beparams)
+    msg = result.RemoteFailMsg()
+    if msg:
       _ShutdownInstanceDisks(self, instance)
       _ShutdownInstanceDisks(self, instance)
-      raise errors.OpExecError("Could not start instance")
+      raise errors.OpExecError("Could not start instance: %s" % msg)
 
 
 class LURebootInstance(LogicalUnit):
 
 
 class LURebootInstance(LogicalUnit):
@@ -2663,10 +2868,10 @@ class LURebootInstance(LogicalUnit):
     """
     env = {
       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
     """
     env = {
       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
+      "REBOOT_TYPE": self.op.reboot_type,
       }
     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
       }
     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
-    nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
-          list(self.instance.secondary_nodes))
+    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
     return env, nl, nl
 
   def CheckPrereq(self):
     return env, nl, nl
 
   def CheckPrereq(self):
@@ -2691,25 +2896,32 @@ class LURebootInstance(LogicalUnit):
     instance = self.instance
     ignore_secondaries = self.op.ignore_secondaries
     reboot_type = self.op.reboot_type
     instance = self.instance
     ignore_secondaries = self.op.ignore_secondaries
     reboot_type = self.op.reboot_type
-    extra_args = getattr(self.op, "extra_args", "")
 
     node_current = instance.primary_node
 
     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
                        constants.INSTANCE_REBOOT_HARD]:
 
     node_current = instance.primary_node
 
     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
                        constants.INSTANCE_REBOOT_HARD]:
+      for disk in instance.disks:
+        self.cfg.SetDiskID(disk, node_current)
       result = self.rpc.call_instance_reboot(node_current, instance,
       result = self.rpc.call_instance_reboot(node_current, instance,
-                                             reboot_type, extra_args)
-      if result.failed or not result.data:
-        raise errors.OpExecError("Could not reboot instance")
+                                             reboot_type)
+      msg = result.RemoteFailMsg()
+      if msg:
+        raise errors.OpExecError("Could not reboot instance: %s" % msg)
     else:
     else:
-      if not self.rpc.call_instance_shutdown(node_current, instance):
-        raise errors.OpExecError("could not shutdown instance for full reboot")
+      result = self.rpc.call_instance_shutdown(node_current, instance)
+      msg = result.RemoteFailMsg()
+      if msg:
+        raise errors.OpExecError("Could not shutdown instance for"
+                                 " full reboot: %s" % msg)
       _ShutdownInstanceDisks(self, instance)
       _StartInstanceDisks(self, instance, ignore_secondaries)
       _ShutdownInstanceDisks(self, instance)
       _StartInstanceDisks(self, instance, ignore_secondaries)
-      result = self.rpc.call_instance_start(node_current, instance, extra_args)
-      if result.failed or not result.data:
+      result = self.rpc.call_instance_start(node_current, instance, None, None)
+      msg = result.RemoteFailMsg()
+      if msg:
         _ShutdownInstanceDisks(self, instance)
         _ShutdownInstanceDisks(self, instance)
-        raise errors.OpExecError("Could not start instance for full reboot")
+        raise errors.OpExecError("Could not start instance for"
+                                 " full reboot: %s" % msg)
 
     self.cfg.MarkInstanceUp(instance.name)
 
 
     self.cfg.MarkInstanceUp(instance.name)
 
@@ -2733,8 +2945,7 @@ class LUShutdownInstance(LogicalUnit):
 
     """
     env = _BuildInstanceHookEnvByObject(self, self.instance)
 
     """
     env = _BuildInstanceHookEnvByObject(self, self.instance)
-    nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
-          list(self.instance.secondary_nodes))
+    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
     return env, nl, nl
 
   def CheckPrereq(self):
     return env, nl, nl
 
   def CheckPrereq(self):
@@ -2756,8 +2967,9 @@ class LUShutdownInstance(LogicalUnit):
     node_current = instance.primary_node
     self.cfg.MarkInstanceDown(instance.name)
     result = self.rpc.call_instance_shutdown(node_current, instance)
     node_current = instance.primary_node
     self.cfg.MarkInstanceDown(instance.name)
     result = self.rpc.call_instance_shutdown(node_current, instance)
-    if result.failed or not result.data:
-      self.proc.LogWarning("Could not shutdown instance")
+    msg = result.RemoteFailMsg()
+    if msg:
+      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
 
     _ShutdownInstanceDisks(self, instance)
 
 
     _ShutdownInstanceDisks(self, instance)
 
@@ -2781,8 +2993,7 @@ class LUReinstallInstance(LogicalUnit):
 
     """
     env = _BuildInstanceHookEnvByObject(self, self.instance)
 
     """
     env = _BuildInstanceHookEnvByObject(self, self.instance)
-    nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
-          list(self.instance.secondary_nodes))
+    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
     return env, nl, nl
 
   def CheckPrereq(self):
     return env, nl, nl
 
   def CheckPrereq(self):
@@ -2799,13 +3010,14 @@ class LUReinstallInstance(LogicalUnit):
     if instance.disk_template == constants.DT_DISKLESS:
       raise errors.OpPrereqError("Instance '%s' has no disks" %
                                  self.op.instance_name)
     if instance.disk_template == constants.DT_DISKLESS:
       raise errors.OpPrereqError("Instance '%s' has no disks" %
                                  self.op.instance_name)
-    if instance.status != "down":
+    if instance.admin_up:
       raise errors.OpPrereqError("Instance '%s' is marked to be up" %
                                  self.op.instance_name)
     remote_info = self.rpc.call_instance_info(instance.primary_node,
                                               instance.name,
                                               instance.hypervisor)
       raise errors.OpPrereqError("Instance '%s' is marked to be up" %
                                  self.op.instance_name)
     remote_info = self.rpc.call_instance_info(instance.primary_node,
                                               instance.name,
                                               instance.hypervisor)
-    if remote_info.failed or remote_info.data:
+    remote_info.Raise()
+    if remote_info.data:
       raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
                                  (self.op.instance_name,
                                   instance.primary_node))
       raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
                                  (self.op.instance_name,
                                   instance.primary_node))
@@ -2840,12 +3052,12 @@ class LUReinstallInstance(LogicalUnit):
     _StartInstanceDisks(self, inst, None)
     try:
       feedback_fn("Running the instance OS create scripts...")
     _StartInstanceDisks(self, inst, None)
     try:
       feedback_fn("Running the instance OS create scripts...")
-      result = self.rpc.call_instance_os_add(inst.primary_node, inst)
-      result.Raise()
-      if not result.data:
+      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
+      msg = result.RemoteFailMsg()
+      if msg:
         raise errors.OpExecError("Could not install OS for instance %s"
         raise errors.OpExecError("Could not install OS for instance %s"
-                                 " on node %s" %
-                                 (inst.name, inst.primary_node))
+                                 " on node %s: %s" %
+                                 (inst.name, inst.primary_node, msg))
     finally:
       _ShutdownInstanceDisks(self, inst)
 
     finally:
       _ShutdownInstanceDisks(self, inst)
 
@@ -2866,8 +3078,7 @@ class LURenameInstance(LogicalUnit):
     """
     env = _BuildInstanceHookEnvByObject(self, self.instance)
     env["INSTANCE_NEW_NAME"] = self.op.new_name
     """
     env = _BuildInstanceHookEnvByObject(self, self.instance)
     env["INSTANCE_NEW_NAME"] = self.op.new_name
-    nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
-          list(self.instance.secondary_nodes))
+    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
     return env, nl, nl
 
   def CheckPrereq(self):
     return env, nl, nl
 
   def CheckPrereq(self):
@@ -2883,7 +3094,7 @@ class LURenameInstance(LogicalUnit):
                                  self.op.instance_name)
     _CheckNodeOnline(self, instance.primary_node)
 
                                  self.op.instance_name)
     _CheckNodeOnline(self, instance.primary_node)
 
-    if instance.status != "down":
+    if instance.admin_up:
       raise errors.OpPrereqError("Instance '%s' is marked to be up" %
                                  self.op.instance_name)
     remote_info = self.rpc.call_instance_info(instance.primary_node,
       raise errors.OpPrereqError("Instance '%s' is marked to be up" %
                                  self.op.instance_name)
     remote_info = self.rpc.call_instance_info(instance.primary_node,
@@ -2952,10 +3163,11 @@ class LURenameInstance(LogicalUnit):
     try:
       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
                                                  old_name)
     try:
       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
                                                  old_name)
-      if result.failed or not result.data:
+      msg = result.RemoteFailMsg()
+      if msg:
         msg = ("Could not run OS rename script for instance %s on node %s"
         msg = ("Could not run OS rename script for instance %s on node %s"
-               " (but the instance has been renamed in Ganeti)" %
-               (inst.name, inst.primary_node))
+               " (but the instance has been renamed in Ganeti): %s" %
+               (inst.name, inst.primary_node, msg))
         self.proc.LogWarning(msg)
     finally:
       _ShutdownInstanceDisks(self, inst)
         self.proc.LogWarning(msg)
     finally:
       _ShutdownInstanceDisks(self, inst)
@@ -3008,12 +3220,14 @@ class LURemoveInstance(LogicalUnit):
                  instance.name, instance.primary_node)
 
     result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
                  instance.name, instance.primary_node)
 
     result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
-    if result.failed or not result.data:
+    msg = result.RemoteFailMsg()
+    if msg:
       if self.op.ignore_failures:
       if self.op.ignore_failures:
-        feedback_fn("Warning: can't shutdown instance")
+        feedback_fn("Warning: can't shutdown instance: %s" % msg)
       else:
       else:
-        raise errors.OpExecError("Could not shutdown instance %s on node %s" %
-                                 (instance.name, instance.primary_node))
+        raise errors.OpExecError("Could not shutdown instance %s on"
+                                 " node %s: %s" %
+                                 (instance.name, instance.primary_node, msg))
 
     logging.info("Removing block devices for instance %s", instance.name)
 
 
     logging.info("Removing block devices for instance %s", instance.name)
 
@@ -3033,18 +3247,18 @@ class LUQueryInstances(NoHooksLU):
   """Logical unit for querying instances.
 
   """
   """Logical unit for querying instances.
 
   """
-  _OP_REQP = ["output_fields", "names"]
+  _OP_REQP = ["output_fields", "names", "use_locking"]
   REQ_BGL = False
   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
   REQ_BGL = False
   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
-                                    "admin_state", "admin_ram",
+                                    "admin_state",
                                     "disk_template", "ip", "mac", "bridge",
                                     "sda_size", "sdb_size", "vcpus", "tags",
                                     "network_port", "beparams",
                                     "disk_template", "ip", "mac", "bridge",
                                     "sda_size", "sdb_size", "vcpus", "tags",
                                     "network_port", "beparams",
-                                    "(disk).(size)/([0-9]+)",
-                                    "(disk).(sizes)",
-                                    "(nic).(mac|ip|bridge)/([0-9]+)",
-                                    "(nic).(macs|ips|bridges)",
-                                    "(disk|nic).(count)",
+                                    r"(disk)\.(size)/([0-9]+)",
+                                    r"(disk)\.(sizes)", "disk_usage",
+                                    r"(nic)\.(mac|ip|bridge)/([0-9]+)",
+                                    r"(nic)\.(macs|ips|bridges)",
+                                    r"(disk|nic)\.(count)",
                                     "serial_no", "hypervisor", "hvparams",] +
                                   ["hv/%s" % name
                                    for name in constants.HVS_PARAMETERS] +
                                     "serial_no", "hypervisor", "hvparams",] +
                                   ["hv/%s" % name
                                    for name in constants.HVS_PARAMETERS] +
@@ -3067,7 +3281,8 @@ class LUQueryInstances(NoHooksLU):
     else:
       self.wanted = locking.ALL_SET
 
     else:
       self.wanted = locking.ALL_SET
 
-    self.do_locking = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
+    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
+    self.do_locking = self.do_node_query and self.op.use_locking
     if self.do_locking:
       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
       self.needed_locks[locking.LEVEL_NODE] = []
     if self.do_locking:
       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
       self.needed_locks[locking.LEVEL_NODE] = []
@@ -3088,19 +3303,25 @@ class LUQueryInstances(NoHooksLU):
 
     """
     all_info = self.cfg.GetAllInstancesInfo()
 
     """
     all_info = self.cfg.GetAllInstancesInfo()
-    if self.do_locking:
-      instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
-    elif self.wanted != locking.ALL_SET:
-      instance_names = self.wanted
-      missing = set(instance_names).difference(all_info.keys())
-      if missing:
-        raise errors.OpExecError(
-          "Some instances were removed before retrieving their data: %s"
-          % missing)
+    if self.wanted == locking.ALL_SET:
+      # caller didn't specify instance names, so ordering is not important
+      if self.do_locking:
+        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
+      else:
+        instance_names = all_info.keys()
+      instance_names = utils.NiceSort(instance_names)
     else:
     else:
-      instance_names = all_info.keys()
+      # caller did specify names, so we must keep the ordering
+      if self.do_locking:
+        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
+      else:
+        tgt_set = all_info.keys()
+      missing = set(self.wanted).difference(tgt_set)
+      if missing:
+        raise errors.OpExecError("Some instances were removed before"
+                                 " retrieving their data: %s" % missing)
+      instance_names = self.wanted
 
 
-    instance_names = utils.NiceSort(instance_names)
     instance_list = [all_info[iname] for iname in instance_names]
 
     # begin data gathering
     instance_list = [all_info[iname] for iname in instance_names]
 
     # begin data gathering
@@ -3110,7 +3331,7 @@ class LUQueryInstances(NoHooksLU):
 
     bad_nodes = []
     off_nodes = []
 
     bad_nodes = []
     off_nodes = []
-    if self.do_locking:
+    if self.do_node_query:
       live_data = {}
       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
       for name in nodes:
       live_data = {}
       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
       for name in nodes:
@@ -3147,7 +3368,7 @@ class LUQueryInstances(NoHooksLU):
         elif field == "snodes":
           val = list(instance.secondary_nodes)
         elif field == "admin_state":
         elif field == "snodes":
           val = list(instance.secondary_nodes)
         elif field == "admin_state":
-          val = (instance.status != "down")
+          val = instance.admin_up
         elif field == "oper_state":
           if instance.primary_node in bad_nodes:
             val = None
         elif field == "oper_state":
           if instance.primary_node in bad_nodes:
             val = None
@@ -3161,12 +3382,12 @@ class LUQueryInstances(NoHooksLU):
           else:
             running = bool(live_data.get(instance.name))
             if running:
           else:
             running = bool(live_data.get(instance.name))
             if running:
-              if instance.status != "down":
+              if instance.admin_up:
                 val = "running"
               else:
                 val = "ERROR_up"
             else:
                 val = "running"
               else:
                 val = "ERROR_up"
             else:
-              if instance.status != "down":
+              if instance.admin_up:
                 val = "ERROR_down"
               else:
                 val = "ADMIN_down"
                 val = "ERROR_down"
               else:
                 val = "ADMIN_down"
@@ -3191,6 +3412,9 @@ class LUQueryInstances(NoHooksLU):
             val = instance.FindDisk(idx).size
           except errors.OpPrereqError:
             val = None
             val = instance.FindDisk(idx).size
           except errors.OpPrereqError:
             val = None
+        elif field == "disk_usage": # total disk usage per node
+          disk_sizes = [{'size': disk.size} for disk in instance.disks]
+          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
         elif field == "tags":
           val = list(instance.GetTags())
         elif field == "serial_no":
         elif field == "tags":
           val = list(instance.GetTags())
         elif field == "serial_no":
@@ -3310,6 +3534,7 @@ class LUFailoverInstance(LogicalUnit):
 
     target_node = secondary_nodes[0]
     _CheckNodeOnline(self, target_node)
 
     target_node = secondary_nodes[0]
     _CheckNodeOnline(self, target_node)
+    _CheckNodeNotDrained(self, target_node)
     # check memory requirements on the secondary node
     _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
                          instance.name, bep[constants.BE_MEMORY],
     # check memory requirements on the secondary node
     _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
                          instance.name, bep[constants.BE_MEMORY],
@@ -3340,7 +3565,7 @@ class LUFailoverInstance(LogicalUnit):
     for dev in instance.disks:
       # for drbd, these are drbd over lvm
       if not _CheckDiskConsistency(self, dev, target_node, False):
     for dev in instance.disks:
       # for drbd, these are drbd over lvm
       if not _CheckDiskConsistency(self, dev, target_node, False):
-        if instance.status == "up" and not self.op.ignore_consistency:
+        if instance.admin_up and not self.op.ignore_consistency:
           raise errors.OpExecError("Disk %s is degraded on target node,"
                                    " aborting failover." % dev.iv_name)
 
           raise errors.OpExecError("Disk %s is degraded on target node,"
                                    " aborting failover." % dev.iv_name)
 
@@ -3349,15 +3574,17 @@ class LUFailoverInstance(LogicalUnit):
                  instance.name, source_node)
 
     result = self.rpc.call_instance_shutdown(source_node, instance)
                  instance.name, source_node)
 
     result = self.rpc.call_instance_shutdown(source_node, instance)
-    if result.failed or not result.data:
+    msg = result.RemoteFailMsg()
+    if msg:
       if self.op.ignore_consistency:
         self.proc.LogWarning("Could not shutdown instance %s on node %s."
       if self.op.ignore_consistency:
         self.proc.LogWarning("Could not shutdown instance %s on node %s."
-                             " Proceeding"
-                             " anyway. Please make sure node %s is down",
-                             instance.name, source_node, source_node)
+                             " Proceeding anyway. Please make sure node"
+                             " %s is down. Error details: %s",
+                             instance.name, source_node, source_node, msg)
       else:
       else:
-        raise errors.OpExecError("Could not shutdown instance %s on node %s" %
-                                 (instance.name, source_node))
+        raise errors.OpExecError("Could not shutdown instance %s on"
+                                 " node %s: %s" %
+                                 (instance.name, source_node, msg))
 
     feedback_fn("* deactivating the instance's disks on source node")
     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
 
     feedback_fn("* deactivating the instance's disks on source node")
     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
@@ -3368,7 +3595,7 @@ class LUFailoverInstance(LogicalUnit):
     self.cfg.Update(instance)
 
     # Only start the instance if it's marked as up
     self.cfg.Update(instance)
 
     # Only start the instance if it's marked as up
-    if instance.status == "up":
+    if instance.admin_up:
       feedback_fn("* activating the instance's disks on target node")
       logging.info("Starting instance %s on node %s",
                    instance.name, target_node)
       feedback_fn("* activating the instance's disks on target node")
       logging.info("Starting instance %s on node %s",
                    instance.name, target_node)
@@ -3380,11 +3607,12 @@ class LUFailoverInstance(LogicalUnit):
         raise errors.OpExecError("Can't activate the instance's disks")
 
       feedback_fn("* starting the instance on the target node")
         raise errors.OpExecError("Can't activate the instance's disks")
 
       feedback_fn("* starting the instance on the target node")
-      result = self.rpc.call_instance_start(target_node, instance, None)
-      if result.failed or not result.data:
+      result = self.rpc.call_instance_start(target_node, instance, None, None)
+      msg = result.RemoteFailMsg()
+      if msg:
         _ShutdownInstanceDisks(self, instance)
         _ShutdownInstanceDisks(self, instance)
-        raise errors.OpExecError("Could not start instance %s on node %s." %
-                                 (instance.name, target_node))
+        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
+                                 (instance.name, target_node, msg))
 
 
 class LUMigrateInstance(LogicalUnit):
 
 
 class LUMigrateInstance(LogicalUnit):
@@ -3416,6 +3644,8 @@ class LUMigrateInstance(LogicalUnit):
 
     """
     env = _BuildInstanceHookEnvByObject(self, self.instance)
 
     """
     env = _BuildInstanceHookEnvByObject(self, self.instance)
+    env["MIGRATE_LIVE"] = self.op.live
+    env["MIGRATE_CLEANUP"] = self.op.cleanup
     nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
     return env, nl, nl
 
     nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
     return env, nl, nl
 
@@ -3437,8 +3667,8 @@ class LUMigrateInstance(LogicalUnit):
 
     secondary_nodes = instance.secondary_nodes
     if not secondary_nodes:
 
     secondary_nodes = instance.secondary_nodes
     if not secondary_nodes:
-      raise errors.ProgrammerError("no secondary node but using "
-                                   "drbd8 disk template")
+      raise errors.ConfigurationError("No secondary node but using"
+                                      " drbd8 disk template")
 
     i_be = self.cfg.GetClusterInfo().FillBE(instance)
 
 
     i_be = self.cfg.GetClusterInfo().FillBE(instance)
 
@@ -3457,6 +3687,7 @@ class LUMigrateInstance(LogicalUnit):
                                  (brlist, target_node))
 
     if not self.op.cleanup:
                                  (brlist, target_node))
 
     if not self.op.cleanup:
+      _CheckNodeNotDrained(self, target_node)
       result = self.rpc.call_instance_migratable(instance.primary_node,
                                                  instance)
       msg = result.RemoteFailMsg()
       result = self.rpc.call_instance_migratable(instance.primary_node,
                                                  instance)
       msg = result.RemoteFailMsg()
@@ -3485,7 +3716,7 @@ class LUMigrateInstance(LogicalUnit):
         if msg:
           raise errors.OpExecError("Cannot resync disks on node %s: %s" %
                                    (node, msg))
         if msg:
           raise errors.OpExecError("Cannot resync disks on node %s: %s" %
                                    (node, msg))
-        node_done, node_percent = nres.data[1]
+        node_done, node_percent = nres.payload
         all_done = all_done and node_done
         if node_percent is not None:
           min_percent = min(min_percent, node_percent)
         all_done = all_done and node_done
         if node_percent is not None:
           min_percent = min(min_percent, node_percent)
@@ -3608,6 +3839,41 @@ class LUMigrateInstance(LogicalUnit):
 
     self.feedback_fn("* done")
 
 
     self.feedback_fn("* done")
 
+  def _RevertDiskStatus(self):
+    """Try to revert the disk status after a failed migration.
+
+    """
+    target_node = self.target_node
+    try:
+      self._EnsureSecondary(target_node)
+      self._GoStandalone()
+      self._GoReconnect(False)
+      self._WaitUntilSync()
+    except errors.OpExecError, err:
+      self.LogWarning("Migration failed and I can't reconnect the"
+                      " drives: error '%s'\n"
+                      "Please look and recover the instance status" %
+                      str(err))
+
+  def _AbortMigration(self):
+    """Call the hypervisor code to abort a started migration.
+
+    """
+    instance = self.instance
+    target_node = self.target_node
+    migration_info = self.migration_info
+
+    abort_result = self.rpc.call_finalize_migration(target_node,
+                                                    instance,
+                                                    migration_info,
+                                                    False)
+    abort_msg = abort_result.RemoteFailMsg()
+    if abort_msg:
+      logging.error("Aborting migration failed on target node %s: %s" %
+                    (target_node, abort_msg))
+      # Don't raise an exception here, as we stil have to try to revert the
+      # disk status, even if this step failed.
+
   def _ExecMigration(self):
     """Migrate an instance.
 
   def _ExecMigration(self):
     """Migrate an instance.
 
@@ -3631,11 +3897,38 @@ class LUMigrateInstance(LogicalUnit):
                                  " synchronized on target node,"
                                  " aborting migrate." % dev.iv_name)
 
                                  " synchronized on target node,"
                                  " aborting migrate." % dev.iv_name)
 
+    # First get the migration information from the remote node
+    result = self.rpc.call_migration_info(source_node, instance)
+    msg = result.RemoteFailMsg()
+    if msg:
+      log_err = ("Failed fetching source migration information from %s: %s" %
+                 (source_node, msg))
+      logging.error(log_err)
+      raise errors.OpExecError(log_err)
+
+    self.migration_info = migration_info = result.payload
+
+    # Then switch the disks to master/master mode
     self._EnsureSecondary(target_node)
     self._GoStandalone()
     self._GoReconnect(True)
     self._WaitUntilSync()
 
     self._EnsureSecondary(target_node)
     self._GoStandalone()
     self._GoReconnect(True)
     self._WaitUntilSync()
 
+    self.feedback_fn("* preparing %s to accept the instance" % target_node)
+    result = self.rpc.call_accept_instance(target_node,
+                                           instance,
+                                           migration_info,
+                                           self.nodes_ip[target_node])
+
+    msg = result.RemoteFailMsg()
+    if msg:
+      logging.error("Instance pre-migration failed, trying to revert"
+                    " disk status: %s", msg)
+      self._AbortMigration()
+      self._RevertDiskStatus()
+      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
+                               (instance.name, msg))
+
     self.feedback_fn("* migrating instance to %s" % target_node)
     time.sleep(10)
     result = self.rpc.call_instance_migrate(source_node, instance,
     self.feedback_fn("* migrating instance to %s" % target_node)
     time.sleep(10)
     result = self.rpc.call_instance_migrate(source_node, instance,
@@ -3645,17 +3938,8 @@ class LUMigrateInstance(LogicalUnit):
     if msg:
       logging.error("Instance migration failed, trying to revert"
                     " disk status: %s", msg)
     if msg:
       logging.error("Instance migration failed, trying to revert"
                     " disk status: %s", msg)
-      try:
-        self._EnsureSecondary(target_node)
-        self._GoStandalone()
-        self._GoReconnect(False)
-        self._WaitUntilSync()
-      except errors.OpExecError, err:
-        self.LogWarning("Migration failed and I can't reconnect the"
-                        " drives: error '%s'\n"
-                        "Please look and recover the instance status" %
-                        str(err))
-
+      self._AbortMigration()
+      self._RevertDiskStatus()
       raise errors.OpExecError("Could not migrate instance %s: %s" %
                                (instance.name, msg))
     time.sleep(10)
       raise errors.OpExecError("Could not migrate instance %s: %s" %
                                (instance.name, msg))
     time.sleep(10)
@@ -3664,6 +3948,17 @@ class LUMigrateInstance(LogicalUnit):
     # distribute new instance config to the other nodes
     self.cfg.Update(instance)
 
     # distribute new instance config to the other nodes
     self.cfg.Update(instance)
 
+    result = self.rpc.call_finalize_migration(target_node,
+                                              instance,
+                                              migration_info,
+                                              True)
+    msg = result.RemoteFailMsg()
+    if msg:
+      logging.error("Instance migration succeeded, but finalization failed:"
+                    " %s" % msg)
+      raise errors.OpExecError("Could not finalize instance migration: %s" %
+                               msg)
+
     self._EnsureSecondary(source_node)
     self._WaitUntilSync()
     self._GoStandalone()
     self._EnsureSecondary(source_node)
     self._WaitUntilSync()
     self._GoStandalone()
@@ -3714,7 +4009,7 @@ def _CreateBlockDev(lu, node, instance, device, force_create,
       (this will be represented as a LVM tag)
   @type force_open: boolean
   @param force_open: this parameter will be passes to the
       (this will be represented as a LVM tag)
   @type force_open: boolean
   @param force_open: this parameter will be passes to the
-      L{backend.CreateBlockDevice} function where it specifies
+      L{backend.BlockdevCreate} function where it specifies
       whether we run on primary or not, and it affects both
       the child assembly and the device own Open() execution
 
       whether we run on primary or not, and it affects both
       the child assembly and the device own Open() execution
 
@@ -3730,14 +4025,40 @@ def _CreateBlockDev(lu, node, instance, device, force_create,
   if not force_create:
     return
 
   if not force_create:
     return
 
+  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
+
+
+def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
+  """Create a single block device on a given node.
+
+  This will not recurse over children of the device, so they must be
+  created in advance.
+
+  @param lu: the lu on whose behalf we execute
+  @param node: the node on which to create the device
+  @type instance: L{objects.Instance}
+  @param instance: the instance which owns the device
+  @type device: L{objects.Disk}
+  @param device: the device to create
+  @param info: the extra 'metadata' we should attach to the device
+      (this will be represented as a LVM tag)
+  @type force_open: boolean
+  @param force_open: this parameter will be passes to the
+      L{backend.BlockdevCreate} function where it specifies
+      whether we run on primary or not, and it affects both
+      the child assembly and the device own Open() execution
+
+  """
   lu.cfg.SetDiskID(device, node)
   lu.cfg.SetDiskID(device, node)
-  new_id = lu.rpc.call_blockdev_create(node, device, device.size,
+  result = lu.rpc.call_blockdev_create(node, device, device.size,
                                        instance.name, force_open, info)
                                        instance.name, force_open, info)
-  if new_id.failed or not new_id.data:
+  msg = result.RemoteFailMsg()
+  if msg:
     raise errors.OpExecError("Can't create block device %s on"
     raise errors.OpExecError("Can't create block device %s on"
-                             " node %s" % (device, node))
+                             " node %s for instance %s: %s" %
+                             (device, node, instance.name, msg))
   if device.physical_id is None:
   if device.physical_id is None:
-    device.physical_id = new_id
+    device.physical_id = result.payload
 
 
 def _GenerateUniqueNames(lu, exts):
 
 
 def _GenerateUniqueNames(lu, exts):
@@ -3799,7 +4120,8 @@ def _GenerateDiskTemplate(lu, template_name,
       disk_index = idx + base_index
       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
                               logical_id=(vgname, names[idx]),
       disk_index = idx + base_index
       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
                               logical_id=(vgname, names[idx]),
-                              iv_name="disk/%d" % disk_index)
+                              iv_name="disk/%d" % disk_index,
+                              mode=disk["mode"])
       disks.append(disk_dev)
   elif template_name == constants.DT_DRBD8:
     if len(secondary_nodes) != 1:
       disks.append(disk_dev)
   elif template_name == constants.DT_DRBD8:
     if len(secondary_nodes) != 1:
@@ -3819,6 +4141,7 @@ def _GenerateDiskTemplate(lu, template_name,
                                       disk["size"], names[idx*2:idx*2+2],
                                       "disk/%d" % disk_index,
                                       minors[idx*2], minors[idx*2+1])
                                       disk["size"], names[idx*2:idx*2+2],
                                       "disk/%d" % disk_index,
                                       minors[idx*2], minors[idx*2+1])
+      disk_dev.mode = disk["mode"]
       disks.append(disk_dev)
   elif template_name == constants.DT_FILE:
     if len(secondary_nodes) != 0:
       disks.append(disk_dev)
   elif template_name == constants.DT_FILE:
     if len(secondary_nodes) != 0:
@@ -3830,7 +4153,8 @@ def _GenerateDiskTemplate(lu, template_name,
                               iv_name="disk/%d" % disk_index,
                               logical_id=(file_driver,
                                           "%s/disk%d" % (file_storage_dir,
                               iv_name="disk/%d" % disk_index,
                               logical_id=(file_driver,
                                           "%s/disk%d" % (file_storage_dir,
-                                                         idx)))
+                                                         disk_index)),
+                              mode=disk["mode"])
       disks.append(disk_dev)
   else:
     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
       disks.append(disk_dev)
   else:
     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
@@ -3900,15 +4224,15 @@ def _RemoveDisks(lu, instance):
   """
   logging.info("Removing block devices for instance %s", instance.name)
 
   """
   logging.info("Removing block devices for instance %s", instance.name)
 
-  result = True
+  all_result = True
   for device in instance.disks:
     for node, disk in device.ComputeNodeTree(instance.primary_node):
       lu.cfg.SetDiskID(disk, node)
   for device in instance.disks:
     for node, disk in device.ComputeNodeTree(instance.primary_node):
       lu.cfg.SetDiskID(disk, node)
-      result = lu.rpc.call_blockdev_remove(node, disk)
-      if result.failed or not result.data:
-        lu.proc.LogWarning("Could not remove block device %s on node %s,"
-                           " continuing anyway", device.iv_name, node)
-        result = False
+      msg = lu.rpc.call_blockdev_remove(node, disk).RemoteFailMsg()
+      if msg:
+        lu.LogWarning("Could not remove block device %s on node %s,"
+                      " continuing anyway: %s", device.iv_name, node, msg)
+        all_result = False
 
   if instance.disk_template == constants.DT_FILE:
     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
 
   if instance.disk_template == constants.DT_FILE:
     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
@@ -3916,9 +4240,9 @@ def _RemoveDisks(lu, instance):
                                                  file_storage_dir)
     if result.failed or not result.data:
       logging.error("Could not remove directory '%s'", file_storage_dir)
                                                  file_storage_dir)
     if result.failed or not result.data:
       logging.error("Could not remove directory '%s'", file_storage_dir)
-      result = False
+      all_result = False
 
 
-  return result
+  return all_result
 
 
 def _ComputeDiskSize(disk_template, disks):
 
 
 def _ComputeDiskSize(disk_template, disks):
@@ -3963,13 +4287,12 @@ def _CheckHVParams(lu, nodenames, hvname, hvparams):
                                                   hvparams)
   for node in nodenames:
     info = hvinfo[node]
                                                   hvparams)
   for node in nodenames:
     info = hvinfo[node]
-    info.Raise()
-    if not info.data or not isinstance(info.data, (tuple, list)):
-      raise errors.OpPrereqError("Cannot get current information"
-                                 " from node '%s' (%s)" % (node, info.data))
-    if not info.data[0]:
-      raise errors.OpPrereqError("Hypervisor parameter validation failed:"
-                                 " %s" % info.data[1])
+    if info.offline:
+      continue
+    msg = info.RemoteFailMsg()
+    if msg:
+      raise errors.OpPrereqError("Hypervisor parameter validation"
+                                 " failed on node %s: %s" % (node, msg))
 
 
 class LUCreateInstance(LogicalUnit):
 
 
 class LUCreateInstance(LogicalUnit):
@@ -4029,14 +4352,14 @@ class LUCreateInstance(LogicalUnit):
                                   ",".join(enabled_hvs)))
 
     # check hypervisor parameter syntax (locally)
                                   ",".join(enabled_hvs)))
 
     # check hypervisor parameter syntax (locally)
-
+    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
     filled_hvp = cluster.FillDict(cluster.hvparams[self.op.hypervisor],
                                   self.op.hvparams)
     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
     hv_type.CheckParameterSyntax(filled_hvp)
 
     # fill and remember the beparams dict
     filled_hvp = cluster.FillDict(cluster.hvparams[self.op.hypervisor],
                                   self.op.hvparams)
     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
     hv_type.CheckParameterSyntax(filled_hvp)
 
     # fill and remember the beparams dict
-    utils.CheckBEParams(self.op.beparams)
+    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
     self.be_full = cluster.FillDict(cluster.beparams[constants.BEGR_DEFAULT],
                                     self.op.beparams)
 
     self.be_full = cluster.FillDict(cluster.beparams[constants.BEGR_DEFAULT],
                                     self.op.beparams)
 
@@ -4076,7 +4399,9 @@ class LUCreateInstance(LogicalUnit):
           raise errors.OpPrereqError("Invalid MAC address specified: %s" %
                                      mac)
       # bridge verification
           raise errors.OpPrereqError("Invalid MAC address specified: %s" %
                                      mac)
       # bridge verification
-      bridge = nic.get("bridge", self.cfg.GetDefBridge())
+      bridge = nic.get("bridge", None)
+      if bridge is None:
+        bridge = self.cfg.GetDefBridge()
       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, bridge=bridge))
 
     # disk checks/pre-build
       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, bridge=bridge))
 
     # disk checks/pre-build
@@ -4191,23 +4516,24 @@ class LUCreateInstance(LogicalUnit):
 
     """
     env = {
 
     """
     env = {
-      "INSTANCE_DISK_TEMPLATE": self.op.disk_template,
-      "INSTANCE_DISK_SIZE": ",".join(str(d["size"]) for d in self.disks),
-      "INSTANCE_ADD_MODE": self.op.mode,
+      "ADD_MODE": self.op.mode,
       }
     if self.op.mode == constants.INSTANCE_IMPORT:
       }
     if self.op.mode == constants.INSTANCE_IMPORT:
-      env["INSTANCE_SRC_NODE"] = self.op.src_node
-      env["INSTANCE_SRC_PATH"] = self.op.src_path
-      env["INSTANCE_SRC_IMAGES"] = self.src_images
+      env["SRC_NODE"] = self.op.src_node
+      env["SRC_PATH"] = self.op.src_path
+      env["SRC_IMAGES"] = self.src_images
 
 
-    env.update(_BuildInstanceHookEnv(name=self.op.instance_name,
+    env.update(_BuildInstanceHookEnv(
+      name=self.op.instance_name,
       primary_node=self.op.pnode,
       secondary_nodes=self.secondaries,
       primary_node=self.op.pnode,
       secondary_nodes=self.secondaries,
-      status=self.instance_status,
+      status=self.op.start,
       os_type=self.op.os_type,
       memory=self.be_full[constants.BE_MEMORY],
       vcpus=self.be_full[constants.BE_VCPUS],
       nics=[(n.ip, n.bridge, n.mac) for n in self.nics],
       os_type=self.op.os_type,
       memory=self.be_full[constants.BE_MEMORY],
       vcpus=self.be_full[constants.BE_VCPUS],
       nics=[(n.ip, n.bridge, n.mac) for n in self.nics],
+      disk_template=self.op.disk_template,
+      disks=[(d["size"], d["mode"]) for d in self.disks],
     ))
 
     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
     ))
 
     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
@@ -4224,7 +4550,6 @@ class LUCreateInstance(LogicalUnit):
       raise errors.OpPrereqError("Cluster does not support lvm-based"
                                  " instances")
 
       raise errors.OpPrereqError("Cluster does not support lvm-based"
                                  " instances")
 
-
     if self.op.mode == constants.INSTANCE_IMPORT:
       src_node = self.op.src_node
       src_path = self.op.src_path
     if self.op.mode == constants.INSTANCE_IMPORT:
       src_node = self.op.src_node
       src_path = self.op.src_path
@@ -4290,6 +4615,7 @@ class LUCreateInstance(LogicalUnit):
             nic_mac_ini = 'nic%d_mac' % idx
             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
 
             nic_mac_ini = 'nic%d_mac' % idx
             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
 
+    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
     # ip ping checks (we use the same ip that was resolved in ExpandNames)
     if self.op.start and not self.op.ip_check:
       raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
     # ip ping checks (we use the same ip that was resolved in ExpandNames)
     if self.op.start and not self.op.ip_check:
       raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
@@ -4300,6 +4626,18 @@ class LUCreateInstance(LogicalUnit):
         raise errors.OpPrereqError("IP %s of instance %s already in use" %
                                    (self.check_ip, self.op.instance_name))
 
         raise errors.OpPrereqError("IP %s of instance %s already in use" %
                                    (self.check_ip, self.op.instance_name))
 
+    #### mac address generation
+    # By generating here the mac address both the allocator and the hooks get
+    # the real final mac address rather than the 'auto' or 'generate' value.
+    # There is a race condition between the generation and the instance object
+    # creation, which means that we know the mac is valid now, but we're not
+    # sure it will be when we actually add the instance. If things go bad
+    # adding the instance will abort because of a duplicate mac, and the
+    # creation job will fail.
+    for nic in self.nics:
+      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
+        nic.mac = self.cfg.GenerateMAC()
+
     #### allocator run
 
     if self.op.iallocator is not None:
     #### allocator run
 
     if self.op.iallocator is not None:
@@ -4314,6 +4652,9 @@ class LUCreateInstance(LogicalUnit):
     if pnode.offline:
       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
                                  pnode.name)
     if pnode.offline:
       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
                                  pnode.name)
+    if pnode.drained:
+      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
+                                 pnode.name)
 
     self.secondaries = []
 
 
     self.secondaries = []
 
@@ -4325,8 +4666,9 @@ class LUCreateInstance(LogicalUnit):
       if self.op.snode == pnode.name:
         raise errors.OpPrereqError("The secondary node cannot be"
                                    " the primary node.")
       if self.op.snode == pnode.name:
         raise errors.OpPrereqError("The secondary node cannot be"
                                    " the primary node.")
-      self.secondaries.append(self.op.snode)
       _CheckNodeOnline(self, self.op.snode)
       _CheckNodeOnline(self, self.op.snode)
+      _CheckNodeNotDrained(self, self.op.snode)
+      self.secondaries.append(self.op.snode)
 
     nodenames = [pnode.name] + self.secondaries
 
 
     nodenames = [pnode.name] + self.secondaries
 
@@ -4378,11 +4720,6 @@ class LUCreateInstance(LogicalUnit):
                            self.be_full[constants.BE_MEMORY],
                            self.op.hypervisor)
 
                            self.be_full[constants.BE_MEMORY],
                            self.op.hypervisor)
 
-    if self.op.start:
-      self.instance_status = 'up'
-    else:
-      self.instance_status = 'down'
-
   def Exec(self, feedback_fn):
     """Create and add the instance to the cluster.
 
   def Exec(self, feedback_fn):
     """Create and add the instance to the cluster.
 
@@ -4390,10 +4727,6 @@ class LUCreateInstance(LogicalUnit):
     instance = self.op.instance_name
     pnode_name = self.pnode.name
 
     instance = self.op.instance_name
     pnode_name = self.pnode.name
 
-    for nic in self.nics:
-      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
-        nic.mac = self.cfg.GenerateMAC()
-
     ht_kind = self.op.hypervisor
     if ht_kind in constants.HTS_REQ_PORT:
       network_port = self.cfg.AllocatePort()
     ht_kind = self.op.hypervisor
     if ht_kind in constants.HTS_REQ_PORT:
       network_port = self.cfg.AllocatePort()
@@ -4428,7 +4761,7 @@ class LUCreateInstance(LogicalUnit):
                             primary_node=pnode_name,
                             nics=self.nics, disks=disks,
                             disk_template=self.op.disk_template,
                             primary_node=pnode_name,
                             nics=self.nics, disks=disks,
                             disk_template=self.op.disk_template,
-                            status=self.instance_status,
+                            admin_up=False,
                             network_port=network_port,
                             beparams=self.op.beparams,
                             hvparams=self.op.hvparams,
                             network_port=network_port,
                             beparams=self.op.beparams,
                             hvparams=self.op.hvparams,
@@ -4452,8 +4785,6 @@ class LUCreateInstance(LogicalUnit):
     # Declare that we don't want to remove the instance lock anymore, as we've
     # added the instance to the config
     del self.remove_locks[locking.LEVEL_INSTANCE]
     # Declare that we don't want to remove the instance lock anymore, as we've
     # added the instance to the config
     del self.remove_locks[locking.LEVEL_INSTANCE]
-    # Remove the temp. assignements for the instance's drbds
-    self.cfg.ReleaseDRBDMinors(instance)
     # Unlock all the nodes
     if self.op.mode == constants.INSTANCE_IMPORT:
       nodes_keep = [self.op.src_node]
     # Unlock all the nodes
     if self.op.mode == constants.INSTANCE_IMPORT:
       nodes_keep = [self.op.src_node]
@@ -4489,12 +4820,12 @@ class LUCreateInstance(LogicalUnit):
     if iobj.disk_template != constants.DT_DISKLESS:
       if self.op.mode == constants.INSTANCE_CREATE:
         feedback_fn("* running the instance OS create scripts...")
     if iobj.disk_template != constants.DT_DISKLESS:
       if self.op.mode == constants.INSTANCE_CREATE:
         feedback_fn("* running the instance OS create scripts...")
-        result = self.rpc.call_instance_os_add(pnode_name, iobj)
-        result.Raise()
-        if not result.data:
+        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
+        msg = result.RemoteFailMsg()
+        if msg:
           raise errors.OpExecError("Could not add os for instance %s"
           raise errors.OpExecError("Could not add os for instance %s"
-                                   " on node %s" %
-                                   (instance, pnode_name))
+                                   " on node %s: %s" %
+                                   (instance, pnode_name, msg))
 
       elif self.op.mode == constants.INSTANCE_IMPORT:
         feedback_fn("* running the instance OS import scripts...")
 
       elif self.op.mode == constants.INSTANCE_IMPORT:
         feedback_fn("* running the instance OS import scripts...")
@@ -4516,12 +4847,14 @@ class LUCreateInstance(LogicalUnit):
                                      % self.op.mode)
 
     if self.op.start:
                                      % self.op.mode)
 
     if self.op.start:
+      iobj.admin_up = True
+      self.cfg.Update(iobj)
       logging.info("Starting instance %s on node %s", instance, pnode_name)
       feedback_fn("* starting instance...")
       logging.info("Starting instance %s on node %s", instance, pnode_name)
       feedback_fn("* starting instance...")
-      result = self.rpc.call_instance_start(pnode_name, iobj, None)
-      result.Raise()
-      if not result.data:
-        raise errors.OpExecError("Could not start instance")
+      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
+      msg = result.RemoteFailMsg()
+      if msg:
+        raise errors.OpExecError("Could not start instance: %s" % msg)
 
 
 class LUConnectConsole(NoHooksLU):
 
 
 class LUConnectConsole(NoHooksLU):
@@ -4566,7 +4899,12 @@ class LUConnectConsole(NoHooksLU):
     logging.debug("Connecting to console of %s on %s", instance.name, node)
 
     hyper = hypervisor.GetHypervisor(instance.hypervisor)
     logging.debug("Connecting to console of %s on %s", instance.name, node)
 
     hyper = hypervisor.GetHypervisor(instance.hypervisor)
-    console_cmd = hyper.GetShellCommandForConsole(instance)
+    cluster = self.cfg.GetClusterInfo()
+    # beparams and hvparams are passed separately, to avoid editing the
+    # instance and then saving the defaults in the instance itself.
+    hvparams = cluster.FillHV(instance)
+    beparams = cluster.FillBE(instance)
+    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
 
     # build ssh cmdline
     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
 
     # build ssh cmdline
     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
@@ -4614,6 +4952,10 @@ class LUReplaceDisks(LogicalUnit):
         raise errors.OpPrereqError("Node '%s' not known" %
                                    self.op.remote_node)
       self.op.remote_node = remote_node
         raise errors.OpPrereqError("Node '%s' not known" %
                                    self.op.remote_node)
       self.op.remote_node = remote_node
+      # Warning: do not remove the locking of the new secondary here
+      # unless DRBD8.AddChildren is changed to work in parallel;
+      # currently it doesn't since parallel invocations of
+      # FindUnusedMinor will conflict
       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
     else:
       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
     else:
@@ -4719,6 +5061,7 @@ class LUReplaceDisks(LogicalUnit):
       n1 = self.new_node = remote_node
       n2 = self.oth_node = instance.primary_node
       self.tgt_node = self.sec_node
       n1 = self.new_node = remote_node
       n2 = self.oth_node = instance.primary_node
       self.tgt_node = self.sec_node
+      _CheckNodeNotDrained(self, remote_node)
     else:
       raise errors.ProgrammerError("Unhandled disk replace mode")
 
     else:
       raise errors.ProgrammerError("Unhandled disk replace mode")
 
@@ -4781,9 +5124,13 @@ class LUReplaceDisks(LogicalUnit):
       for node in tgt_node, oth_node:
         info("checking disk/%d on %s" % (idx, node))
         cfg.SetDiskID(dev, node)
       for node in tgt_node, oth_node:
         info("checking disk/%d on %s" % (idx, node))
         cfg.SetDiskID(dev, node)
-        if not self.rpc.call_blockdev_find(node, dev):
-          raise errors.OpExecError("Can't find disk/%d on node %s" %
-                                   (idx, node))
+        result = self.rpc.call_blockdev_find(node, dev)
+        msg = result.RemoteFailMsg()
+        if not msg and not result.payload:
+          msg = "disk not found"
+        if msg:
+          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
+                                   (idx, node, msg))
 
     # Step: check other node consistency
     self.proc.LogStep(2, steps_total, "check peer consistency")
 
     # Step: check other node consistency
     self.proc.LogStep(2, steps_total, "check peer consistency")
@@ -4846,8 +5193,9 @@ class LUReplaceDisks(LogicalUnit):
       # build the rename list based on what LVs exist on the node
       rlist = []
       for to_ren in old_lvs:
       # build the rename list based on what LVs exist on the node
       rlist = []
       for to_ren in old_lvs:
-        find_res = self.rpc.call_blockdev_find(tgt_node, to_ren)
-        if not find_res.failed and find_res.data is not None: # device exists
+        result = self.rpc.call_blockdev_find(tgt_node, to_ren)
+        if not result.RemoteFailMsg() and result.payload:
+          # device exists
           rlist.append((to_ren, ren_fn(to_ren, temp_suffix)))
 
       info("renaming the old LVs on the target node")
           rlist.append((to_ren, ren_fn(to_ren, temp_suffix)))
 
       info("renaming the old LVs on the target node")
@@ -4876,10 +5224,10 @@ class LUReplaceDisks(LogicalUnit):
       result = self.rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs)
       if result.failed or not result.data:
         for new_lv in new_lvs:
       result = self.rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs)
       if result.failed or not result.data:
         for new_lv in new_lvs:
-          result = self.rpc.call_blockdev_remove(tgt_node, new_lv)
-          if result.failed or not result.data:
-            warning("Can't rollback device %s", hint="manually cleanup unused"
-                    " logical volumes")
+          msg = self.rpc.call_blockdev_remove(tgt_node, new_lv).RemoteFailMsg()
+          if msg:
+            warning("Can't rollback device %s: %s", dev, msg,
+                    hint="cleanup manually the unused logical volumes")
         raise errors.OpExecError("Can't add local storage to drbd")
 
       dev.children = new_lvs
         raise errors.OpExecError("Can't add local storage to drbd")
 
       dev.children = new_lvs
@@ -4897,7 +5245,13 @@ class LUReplaceDisks(LogicalUnit):
     for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
       cfg.SetDiskID(dev, instance.primary_node)
       result = self.rpc.call_blockdev_find(instance.primary_node, dev)
     for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
       cfg.SetDiskID(dev, instance.primary_node)
       result = self.rpc.call_blockdev_find(instance.primary_node, dev)
-      if result.failed or result.data[5]:
+      msg = result.RemoteFailMsg()
+      if not msg and not result.payload:
+        msg = "disk not found"
+      if msg:
+        raise errors.OpExecError("Can't find DRBD device %s: %s" %
+                                 (name, msg))
+      if result.payload[5]:
         raise errors.OpExecError("DRBD device %s is degraded!" % name)
 
     # Step: remove old storage
         raise errors.OpExecError("DRBD device %s is degraded!" % name)
 
     # Step: remove old storage
@@ -4906,9 +5260,10 @@ class LUReplaceDisks(LogicalUnit):
       info("remove logical volumes for %s" % name)
       for lv in old_lvs:
         cfg.SetDiskID(lv, tgt_node)
       info("remove logical volumes for %s" % name)
       for lv in old_lvs:
         cfg.SetDiskID(lv, tgt_node)
-        result = self.rpc.call_blockdev_remove(tgt_node, lv)
-        if result.failed or not result.data:
-          warning("Can't remove old LV", hint="manually remove unused LVs")
+        msg = self.rpc.call_blockdev_remove(tgt_node, lv).RemoteFailMsg()
+        if msg:
+          warning("Can't remove old LV: %s" % msg,
+                  hint="manually remove unused LVs")
           continue
 
   def _ExecD8Secondary(self, feedback_fn):
           continue
 
   def _ExecD8Secondary(self, feedback_fn):
@@ -4961,10 +5316,12 @@ class LUReplaceDisks(LogicalUnit):
       info("checking disk/%d on %s" % (idx, pri_node))
       cfg.SetDiskID(dev, pri_node)
       result = self.rpc.call_blockdev_find(pri_node, dev)
       info("checking disk/%d on %s" % (idx, pri_node))
       cfg.SetDiskID(dev, pri_node)
       result = self.rpc.call_blockdev_find(pri_node, dev)
-      result.Raise()
-      if not result.data:
-        raise errors.OpExecError("Can't find disk/%d on node %s" %
-                                 (idx, pri_node))
+      msg = result.RemoteFailMsg()
+      if not msg and not result.payload:
+        msg = "disk not found"
+      if msg:
+        raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
+                                 (idx, pri_node, msg))
 
     # Step: check other node consistency
     self.proc.LogStep(2, steps_total, "check peer consistency")
 
     # Step: check other node consistency
     self.proc.LogStep(2, steps_total, "check peer consistency")
@@ -5017,9 +5374,9 @@ class LUReplaceDisks(LogicalUnit):
                               logical_id=new_alone_id,
                               children=dev.children)
       try:
                               logical_id=new_alone_id,
                               children=dev.children)
       try:
-        _CreateBlockDev(self, new_node, instance, new_drbd, False,
-                        _GetInstanceInfoText(instance), False)
-      except error.BlockDeviceError:
+        _CreateSingleBlockDev(self, new_node, instance, new_drbd,
+                              _GetInstanceInfoText(instance), False)
+      except errors.GenericError:
         self.cfg.ReleaseDRBDMinors(instance.name)
         raise
 
         self.cfg.ReleaseDRBDMinors(instance.name)
         raise
 
@@ -5027,9 +5384,10 @@ class LUReplaceDisks(LogicalUnit):
       # we have new devices, shutdown the drbd on the old secondary
       info("shutting down drbd for disk/%d on old node" % idx)
       cfg.SetDiskID(dev, old_node)
       # we have new devices, shutdown the drbd on the old secondary
       info("shutting down drbd for disk/%d on old node" % idx)
       cfg.SetDiskID(dev, old_node)
-      result = self.rpc.call_blockdev_shutdown(old_node, dev)
-      if result.failed or not result.data:
-        warning("Failed to shutdown drbd for disk/%d on old node" % idx,
+      msg = self.rpc.call_blockdev_shutdown(old_node, dev).RemoteFailMsg()
+      if msg:
+        warning("Failed to shutdown drbd for disk/%d on old node: %s" %
+                (idx, msg),
                 hint="Please cleanup this device manually as soon as possible")
 
     info("detaching primary drbds from the network (=> standalone)")
                 hint="Please cleanup this device manually as soon as possible")
 
     info("detaching primary drbds from the network (=> standalone)")
@@ -5050,9 +5408,6 @@ class LUReplaceDisks(LogicalUnit):
       dev.logical_id = new_logical_id
       cfg.SetDiskID(dev, pri_node)
     cfg.Update(instance)
       dev.logical_id = new_logical_id
       cfg.SetDiskID(dev, pri_node)
     cfg.Update(instance)
-    # we can remove now the temp minors as now the new values are
-    # written to the config file (and therefore stable)
-    self.cfg.ReleaseDRBDMinors(instance.name)
 
     # and now perform the drbd attach
     info("attaching primary drbds to new secondary (standalone => connected)")
 
     # and now perform the drbd attach
     info("attaching primary drbds to new secondary (standalone => connected)")
@@ -5076,8 +5431,13 @@ class LUReplaceDisks(LogicalUnit):
     for idx, (dev, old_lvs, _) in iv_names.iteritems():
       cfg.SetDiskID(dev, pri_node)
       result = self.rpc.call_blockdev_find(pri_node, dev)
     for idx, (dev, old_lvs, _) in iv_names.iteritems():
       cfg.SetDiskID(dev, pri_node)
       result = self.rpc.call_blockdev_find(pri_node, dev)
-      result.Raise()
-      if result.data[5]:
+      msg = result.RemoteFailMsg()
+      if not msg and not result.payload:
+        msg = "disk not found"
+      if msg:
+        raise errors.OpExecError("Can't find DRBD device disk/%d: %s" %
+                                 (idx, msg))
+      if result.payload[5]:
         raise errors.OpExecError("DRBD device disk/%d is degraded!" % idx)
 
     self.proc.LogStep(6, steps_total, "removing old storage")
         raise errors.OpExecError("DRBD device disk/%d is degraded!" % idx)
 
     self.proc.LogStep(6, steps_total, "removing old storage")
@@ -5085,9 +5445,9 @@ class LUReplaceDisks(LogicalUnit):
       info("remove logical volumes for disk/%d" % idx)
       for lv in old_lvs:
         cfg.SetDiskID(lv, old_node)
       info("remove logical volumes for disk/%d" % idx)
       for lv in old_lvs:
         cfg.SetDiskID(lv, old_node)
-        result = self.rpc.call_blockdev_remove(old_node, lv)
-        if result.failed or not result.data:
-          warning("Can't remove LV on old secondary",
+        msg = self.rpc.call_blockdev_remove(old_node, lv).RemoteFailMsg()
+        if msg:
+          warning("Can't remove LV on old secondary: %s", msg,
                   hint="Cleanup stale volumes by hand")
 
   def Exec(self, feedback_fn):
                   hint="Cleanup stale volumes by hand")
 
   def Exec(self, feedback_fn):
@@ -5099,7 +5459,7 @@ class LUReplaceDisks(LogicalUnit):
     instance = self.instance
 
     # Activate the instance disks if we're replacing them on a down instance
     instance = self.instance
 
     # Activate the instance disks if we're replacing them on a down instance
-    if instance.status == "down":
+    if not instance.admin_up:
       _StartInstanceDisks(self, instance, True)
 
     if self.op.mode == constants.REPLACE_DISK_CHG:
       _StartInstanceDisks(self, instance, True)
 
     if self.op.mode == constants.REPLACE_DISK_CHG:
@@ -5110,7 +5470,7 @@ class LUReplaceDisks(LogicalUnit):
     ret = fn(feedback_fn)
 
     # Deactivate the instance disks if we're replacing them on a down instance
     ret = fn(feedback_fn)
 
     # Deactivate the instance disks if we're replacing them on a down instance
-    if instance.status == "down":
+    if not instance.admin_up:
       _SafeShutdownInstanceDisks(self, instance)
 
     return ret
       _SafeShutdownInstanceDisks(self, instance)
 
     return ret
@@ -5160,8 +5520,8 @@ class LUGrowDisk(LogicalUnit):
     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
     assert instance is not None, \
       "Cannot retrieve locked instance %s" % self.op.instance_name
     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
     assert instance is not None, \
       "Cannot retrieve locked instance %s" % self.op.instance_name
-    _CheckNodeOnline(self, instance.primary_node)
-    for node in instance.secondary_nodes:
+    nodenames = list(instance.all_nodes)
+    for node in nodenames:
       _CheckNodeOnline(self, node)
 
 
       _CheckNodeOnline(self, node)
 
 
@@ -5173,7 +5533,6 @@ class LUGrowDisk(LogicalUnit):
 
     self.disk = instance.FindDisk(self.op.disk)
 
 
     self.disk = instance.FindDisk(self.op.disk)
 
-    nodenames = [instance.primary_node] + list(instance.secondary_nodes)
     nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
                                        instance.hypervisor)
     for node in nodenames:
     nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
                                        instance.hypervisor)
     for node in nodenames:
@@ -5196,16 +5555,13 @@ class LUGrowDisk(LogicalUnit):
     """
     instance = self.instance
     disk = self.disk
     """
     instance = self.instance
     disk = self.disk
-    for node in (instance.secondary_nodes + (instance.primary_node,)):
+    for node in instance.all_nodes:
       self.cfg.SetDiskID(disk, node)
       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
       self.cfg.SetDiskID(disk, node)
       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
-      result.Raise()
-      if (not result.data or not isinstance(result.data, (list, tuple)) or
-          len(result.data) != 2):
-        raise errors.OpExecError("Grow request failed to node %s" % node)
-      elif not result.data[0]:
+      msg = result.RemoteFailMsg()
+      if msg:
         raise errors.OpExecError("Grow request failed to node %s: %s" %
         raise errors.OpExecError("Grow request failed to node %s: %s" %
-                                 (node, result.data[1]))
+                                 (node, msg))
     disk.RecordGrow(self.op.amount)
     self.cfg.Update(instance)
     if self.op.wait_for_sync:
     disk.RecordGrow(self.op.amount)
     self.cfg.Update(instance)
     if self.op.wait_for_sync:
@@ -5269,8 +5625,14 @@ class LUQueryInstanceData(NoHooksLU):
     if not static:
       self.cfg.SetDiskID(dev, instance.primary_node)
       dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev)
     if not static:
       self.cfg.SetDiskID(dev, instance.primary_node)
       dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev)
-      dev_pstatus.Raise()
-      dev_pstatus = dev_pstatus.data
+      if dev_pstatus.offline:
+        dev_pstatus = None
+      else:
+        msg = dev_pstatus.RemoteFailMsg()
+        if msg:
+          raise errors.OpExecError("Can't compute disk status for %s: %s" %
+                                   (instance.name, msg))
+        dev_pstatus = dev_pstatus.payload
     else:
       dev_pstatus = None
 
     else:
       dev_pstatus = None
 
@@ -5284,8 +5646,14 @@ class LUQueryInstanceData(NoHooksLU):
     if snode and not static:
       self.cfg.SetDiskID(dev, snode)
       dev_sstatus = self.rpc.call_blockdev_find(snode, dev)
     if snode and not static:
       self.cfg.SetDiskID(dev, snode)
       dev_sstatus = self.rpc.call_blockdev_find(snode, dev)
-      dev_sstatus.Raise()
-      dev_sstatus = dev_sstatus.data
+      if dev_sstatus.offline:
+        dev_sstatus = None
+      else:
+        msg = dev_sstatus.RemoteFailMsg()
+        if msg:
+          raise errors.OpExecError("Can't compute disk status for %s: %s" %
+                                   (instance.name, msg))
+        dev_sstatus = dev_sstatus.payload
     else:
       dev_sstatus = None
 
     else:
       dev_sstatus = None
 
@@ -5327,10 +5695,10 @@ class LUQueryInstanceData(NoHooksLU):
           remote_state = "down"
       else:
         remote_state = None
           remote_state = "down"
       else:
         remote_state = None
-      if instance.status == "down":
-        config_state = "down"
-      else:
+      if instance.admin_up:
         config_state = "up"
         config_state = "up"
+      else:
+        config_state = "down"
 
       disks = [self._ComputeDiskStatus(instance, None, device)
                for device in instance.disks]
 
       disks = [self._ComputeDiskStatus(instance, None, device)
                for device in instance.disks]
@@ -5380,8 +5748,6 @@ class LUSetInstanceParams(LogicalUnit):
             self.op.hvparams or self.op.beparams):
       raise errors.OpPrereqError("No changes submitted")
 
             self.op.hvparams or self.op.beparams):
       raise errors.OpPrereqError("No changes submitted")
 
-    utils.CheckBEParams(self.op.beparams)
-
     # Disk validation
     disk_addremove = 0
     for disk_op, disk_dict in self.op.disks:
     # Disk validation
     disk_addremove = 0
     for disk_op, disk_dict in self.op.disks:
@@ -5395,7 +5761,7 @@ class LUSetInstanceParams(LogicalUnit):
           raise errors.OpPrereqError("Invalid disk index")
       if disk_op == constants.DDM_ADD:
         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
           raise errors.OpPrereqError("Invalid disk index")
       if disk_op == constants.DDM_ADD:
         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
-        if mode not in (constants.DISK_RDONLY, constants.DISK_RDWR):
+        if mode not in constants.DISK_ACCESS_SET:
           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
         size = disk_dict.get('size', None)
         if size is None:
           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
         size = disk_dict.get('size', None)
         if size is None:
@@ -5431,24 +5797,29 @@ class LUSetInstanceParams(LogicalUnit):
       # nic_dict should be a dict
       nic_ip = nic_dict.get('ip', None)
       if nic_ip is not None:
       # nic_dict should be a dict
       nic_ip = nic_dict.get('ip', None)
       if nic_ip is not None:
-        if nic_ip.lower() == "none":
+        if nic_ip.lower() == constants.VALUE_NONE:
           nic_dict['ip'] = None
         else:
           if not utils.IsValidIP(nic_ip):
             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
           nic_dict['ip'] = None
         else:
           if not utils.IsValidIP(nic_ip):
             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
-      # we can only check None bridges and assign the default one
-      nic_bridge = nic_dict.get('bridge', None)
-      if nic_bridge is None:
-        nic_dict['bridge'] = self.cfg.GetDefBridge()
-      # but we can validate MACs
-      nic_mac = nic_dict.get('mac', None)
-      if nic_mac is not None:
-        if self.cfg.IsMacInUse(nic_mac):
-          raise errors.OpPrereqError("MAC address %s already in use"
-                                     " in cluster" % nic_mac)
+
+      if nic_op == constants.DDM_ADD:
+        nic_bridge = nic_dict.get('bridge', None)
+        if nic_bridge is None:
+          nic_dict['bridge'] = self.cfg.GetDefBridge()
+        nic_mac = nic_dict.get('mac', None)
+        if nic_mac is None:
+          nic_dict['mac'] = constants.VALUE_AUTO
+
+      if 'mac' in nic_dict:
+        nic_mac = nic_dict['mac']
         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
           if not utils.IsValidMac(nic_mac):
             raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
           if not utils.IsValidMac(nic_mac):
             raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
+        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
+          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
+                                     " modifying an existing nic")
+
     if nic_addremove > 1:
       raise errors.OpPrereqError("Only one NIC add or remove operation"
                                  " supported at a time")
     if nic_addremove > 1:
       raise errors.OpPrereqError("Only one NIC add or remove operation"
                                  " supported at a time")
@@ -5473,10 +5844,39 @@ class LUSetInstanceParams(LogicalUnit):
       args['memory'] = self.be_new[constants.BE_MEMORY]
     if constants.BE_VCPUS in self.be_new:
       args['vcpus'] = self.be_new[constants.BE_VCPUS]
       args['memory'] = self.be_new[constants.BE_MEMORY]
     if constants.BE_VCPUS in self.be_new:
       args['vcpus'] = self.be_new[constants.BE_VCPUS]
-    # FIXME: readd disk/nic changes
+    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
+    # information at all.
+    if self.op.nics:
+      args['nics'] = []
+      nic_override = dict(self.op.nics)
+      for idx, nic in enumerate(self.instance.nics):
+        if idx in nic_override:
+          this_nic_override = nic_override[idx]
+        else:
+          this_nic_override = {}
+        if 'ip' in this_nic_override:
+          ip = this_nic_override['ip']
+        else:
+          ip = nic.ip
+        if 'bridge' in this_nic_override:
+          bridge = this_nic_override['bridge']
+        else:
+          bridge = nic.bridge
+        if 'mac' in this_nic_override:
+          mac = this_nic_override['mac']
+        else:
+          mac = nic.mac
+        args['nics'].append((ip, bridge, mac))
+      if constants.DDM_ADD in nic_override:
+        ip = nic_override[constants.DDM_ADD].get('ip', None)
+        bridge = nic_override[constants.DDM_ADD]['bridge']
+        mac = nic_override[constants.DDM_ADD]['mac']
+        args['nics'].append((ip, bridge, mac))
+      elif constants.DDM_REMOVE in nic_override:
+        del args['nics'][-1]
+
     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
-    nl = [self.cfg.GetMasterNode(),
-          self.instance.primary_node] + list(self.instance.secondary_nodes)
+    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
     return env, nl, nl
 
   def CheckPrereq(self):
     return env, nl, nl
 
   def CheckPrereq(self):
@@ -5492,9 +5892,8 @@ class LUSetInstanceParams(LogicalUnit):
     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
     assert self.instance is not None, \
       "Cannot retrieve locked instance %s" % self.op.instance_name
     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
     assert self.instance is not None, \
       "Cannot retrieve locked instance %s" % self.op.instance_name
-    pnode = self.instance.primary_node
-    nodelist = [pnode]
-    nodelist.extend(instance.secondary_nodes)
+    pnode = instance.primary_node
+    nodelist = list(instance.all_nodes)
 
     # hvparams processing
     if self.op.hvparams:
 
     # hvparams processing
     if self.op.hvparams:
@@ -5505,11 +5904,10 @@ class LUSetInstanceParams(LogicalUnit):
             del i_hvdict[key]
           except KeyError:
             pass
             del i_hvdict[key]
           except KeyError:
             pass
-        elif val == constants.VALUE_NONE:
-          i_hvdict[key] = None
         else:
           i_hvdict[key] = val
       cluster = self.cfg.GetClusterInfo()
         else:
           i_hvdict[key] = val
       cluster = self.cfg.GetClusterInfo()
+      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
       hv_new = cluster.FillDict(cluster.hvparams[instance.hypervisor],
                                 i_hvdict)
       # local check
       hv_new = cluster.FillDict(cluster.hvparams[instance.hypervisor],
                                 i_hvdict)
       # local check
@@ -5533,6 +5931,7 @@ class LUSetInstanceParams(LogicalUnit):
         else:
           i_bedict[key] = val
       cluster = self.cfg.GetClusterInfo()
         else:
           i_bedict[key] = val
       cluster = self.cfg.GetClusterInfo()
+      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
       be_new = cluster.FillDict(cluster.beparams[constants.BEGR_DEFAULT],
                                 i_bedict)
       self.be_new = be_new # the new actual values
       be_new = cluster.FillDict(cluster.beparams[constants.BEGR_DEFAULT],
                                 i_bedict)
       self.be_new = be_new # the new actual values
@@ -5556,7 +5955,7 @@ class LUSetInstanceParams(LogicalUnit):
         self.warn.append("Can't get info from primary node %s" % pnode)
       else:
         if not instance_info.failed and instance_info.data:
         self.warn.append("Can't get info from primary node %s" % pnode)
       else:
         if not instance_info.failed and instance_info.data:
-          current_mem = instance_info.data['memory']
+          current_mem = int(instance_info.data['memory'])
         else:
           # Assume instance not running
           # (there is a slight race condition here, but it's not very probable,
         else:
           # Assume instance not running
           # (there is a slight race condition here, but it's not very probable,
@@ -5591,8 +5990,10 @@ class LUSetInstanceParams(LogicalUnit):
           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
                                      " are 0 to %d" %
                                      (nic_op, len(instance.nics)))
           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
                                      " are 0 to %d" %
                                      (nic_op, len(instance.nics)))
-      nic_bridge = nic_dict.get('bridge', None)
-      if nic_bridge is not None:
+      if 'bridge' in nic_dict:
+        nic_bridge = nic_dict['bridge']
+        if nic_bridge is None:
+          raise errors.OpPrereqError('Cannot set the nic bridge to None')
         if not self.rpc.call_bridges_exist(pnode, [nic_bridge]):
           msg = ("Bridge '%s' doesn't exist on one of"
                  " the instance nodes" % nic_bridge)
         if not self.rpc.call_bridges_exist(pnode, [nic_bridge]):
           msg = ("Bridge '%s' doesn't exist on one of"
                  " the instance nodes" % nic_bridge)
@@ -5600,6 +6001,18 @@ class LUSetInstanceParams(LogicalUnit):
             self.warn.append(msg)
           else:
             raise errors.OpPrereqError(msg)
             self.warn.append(msg)
           else:
             raise errors.OpPrereqError(msg)
+      if 'mac' in nic_dict:
+        nic_mac = nic_dict['mac']
+        if nic_mac is None:
+          raise errors.OpPrereqError('Cannot set the nic mac to None')
+        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
+          # otherwise generate the mac
+          nic_dict['mac'] = self.cfg.GenerateMAC()
+        else:
+          # or validate/reserve the current one
+          if self.cfg.IsMacInUse(nic_mac):
+            raise errors.OpPrereqError("MAC address %s already in use"
+                                       " in cluster" % nic_mac)
 
     # DISK processing
     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
 
     # DISK processing
     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
@@ -5652,10 +6065,10 @@ class LUSetInstanceParams(LogicalUnit):
         device_idx = len(instance.disks)
         for node, disk in device.ComputeNodeTree(instance.primary_node):
           self.cfg.SetDiskID(disk, node)
         device_idx = len(instance.disks)
         for node, disk in device.ComputeNodeTree(instance.primary_node):
           self.cfg.SetDiskID(disk, node)
-          rpc_result = self.rpc.call_blockdev_remove(node, disk)
-          if rpc_result.failed or not rpc_result.data:
-            self.proc.LogWarning("Could not remove disk/%d on node %s,"
-                                 " continuing anyway", device_idx, node)
+          msg = self.rpc.call_blockdev_remove(node, disk).RemoteFailMsg()
+          if msg:
+            self.LogWarning("Could not remove disk/%d on node %s: %s,"
+                            " continuing anyway", device_idx, node, msg)
         result.append(("disk/%d" % device_idx, "remove"))
       elif disk_op == constants.DDM_ADD:
         # add a new disk
         result.append(("disk/%d" % device_idx, "remove"))
       elif disk_op == constants.DDM_ADD:
         # add a new disk
@@ -5667,13 +6080,12 @@ class LUSetInstanceParams(LogicalUnit):
         disk_idx_base = len(instance.disks)
         new_disk = _GenerateDiskTemplate(self,
                                          instance.disk_template,
         disk_idx_base = len(instance.disks)
         new_disk = _GenerateDiskTemplate(self,
                                          instance.disk_template,
-                                         instance, instance.primary_node,
+                                         instance.name, instance.primary_node,
                                          instance.secondary_nodes,
                                          [disk_dict],
                                          file_path,
                                          file_driver,
                                          disk_idx_base)[0]
                                          instance.secondary_nodes,
                                          [disk_dict],
                                          file_path,
                                          file_driver,
                                          disk_idx_base)[0]
-        new_disk.mode = disk_dict['mode']
         instance.disks.append(new_disk)
         info = _GetInstanceInfoText(instance)
 
         instance.disks.append(new_disk)
         info = _GetInstanceInfoText(instance)
 
@@ -5686,7 +6098,7 @@ class LUSetInstanceParams(LogicalUnit):
           try:
             _CreateBlockDev(self, node, instance, new_disk,
                             f_create, info, f_create)
           try:
             _CreateBlockDev(self, node, instance, new_disk,
                             f_create, info, f_create)
-          except error.OpExecError, err:
+          except errors.OpExecError, err:
             self.LogWarning("Failed to create volume %s (%s) on"
                             " node %s: %s",
                             new_disk.iv_name, new_disk, node, err)
             self.LogWarning("Failed to create volume %s (%s) on"
                             " node %s: %s",
                             new_disk.iv_name, new_disk, node, err)
@@ -5703,15 +6115,11 @@ class LUSetInstanceParams(LogicalUnit):
         del instance.nics[-1]
         result.append(("nic.%d" % len(instance.nics), "remove"))
       elif nic_op == constants.DDM_ADD:
         del instance.nics[-1]
         result.append(("nic.%d" % len(instance.nics), "remove"))
       elif nic_op == constants.DDM_ADD:
-        # add a new nic
-        if 'mac' not in nic_dict:
-          mac = constants.VALUE_GENERATE
-        else:
-          mac = nic_dict['mac']
-        if mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
-          mac = self.cfg.GenerateMAC()
+        # mac and bridge should be set, by now
+        mac = nic_dict['mac']
+        bridge = nic_dict['bridge']
         new_nic = objects.NIC(mac=mac, ip=nic_dict.get('ip', None),
         new_nic = objects.NIC(mac=mac, ip=nic_dict.get('ip', None),
-                              bridge=nic_dict.get('bridge', None))
+                              bridge=bridge)
         instance.nics.append(new_nic)
         result.append(("nic.%d" % (len(instance.nics) - 1),
                        "add:mac=%s,ip=%s,bridge=%s" %
         instance.nics.append(new_nic)
         result.append(("nic.%d" % (len(instance.nics) - 1),
                        "add:mac=%s,ip=%s,bridge=%s" %
@@ -5725,7 +6133,7 @@ class LUSetInstanceParams(LogicalUnit):
 
     # hvparams changes
     if self.op.hvparams:
 
     # hvparams changes
     if self.op.hvparams:
-      instance.hvparams = self.hv_new
+      instance.hvparams = self.hv_inst
       for key, val in self.op.hvparams.iteritems():
         result.append(("hv/%s" % key, val))
 
       for key, val in self.op.hvparams.iteritems():
         result.append(("hv/%s" % key, val))
 
@@ -5841,6 +6249,7 @@ class LUExportInstance(LogicalUnit):
       # This is wrong node name, not a non-locked node
       raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
     _CheckNodeOnline(self, self.dst_node.name)
       # This is wrong node name, not a non-locked node
       raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
     _CheckNodeOnline(self, self.dst_node.name)
+    _CheckNodeNotDrained(self, self.dst_node.name)
 
     # instance disk type verification
     for disk in self.instance.disks:
 
     # instance disk type verification
     for disk in self.instance.disks:
@@ -5858,10 +6267,11 @@ class LUExportInstance(LogicalUnit):
     if self.op.shutdown:
       # shutdown the instance, but not the disks
       result = self.rpc.call_instance_shutdown(src_node, instance)
     if self.op.shutdown:
       # shutdown the instance, but not the disks
       result = self.rpc.call_instance_shutdown(src_node, instance)
-      result.Raise()
-      if not result.data:
-        raise errors.OpExecError("Could not shutdown instance %s on node %s" %
-                                 (instance.name, src_node))
+      msg = result.RemoteFailMsg()
+      if msg:
+        raise errors.OpExecError("Could not shutdown instance %s on"
+                                 " node %s: %s" %
+                                 (instance.name, src_node, msg))
 
     vgname = self.cfg.GetVGName()
 
 
     vgname = self.cfg.GetVGName()
 
@@ -5888,11 +6298,12 @@ class LUExportInstance(LogicalUnit):
           snap_disks.append(new_dev)
 
     finally:
           snap_disks.append(new_dev)
 
     finally:
-      if self.op.shutdown and instance.status == "up":
-        result = self.rpc.call_instance_start(src_node, instance, None)
-        if result.failed or not result.data:
+      if self.op.shutdown and instance.admin_up:
+        result = self.rpc.call_instance_start(src_node, instance, None, None)
+        msg = result.RemoteFailMsg()
+        if msg:
           _ShutdownInstanceDisks(self, instance)
           _ShutdownInstanceDisks(self, instance)
-          raise errors.OpExecError("Could not start instance")
+          raise errors.OpExecError("Could not start instance: %s" % msg)
 
     # TODO: check for size
 
 
     # TODO: check for size
 
@@ -5905,10 +6316,10 @@ class LUExportInstance(LogicalUnit):
           self.LogWarning("Could not export block device %s from node %s to"
                           " node %s", dev.logical_id[1], src_node,
                           dst_node.name)
           self.LogWarning("Could not export block device %s from node %s to"
                           " node %s", dev.logical_id[1], src_node,
                           dst_node.name)
-        result = self.rpc.call_blockdev_remove(src_node, dev)
-        if result.failed or not result.data:
+        msg = self.rpc.call_blockdev_remove(src_node, dev).RemoteFailMsg()
+        if msg:
           self.LogWarning("Could not remove snapshot block device %s from node"
           self.LogWarning("Could not remove snapshot block device %s from node"
-                          " %s", dev.logical_id[1], src_node)
+                          " %s: %s", dev.logical_id[1], src_node, msg)
 
     result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
     if result.failed or not result.data:
 
     result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
     if result.failed or not result.data:
@@ -6260,10 +6671,10 @@ class IAllocator(object):
     cluster_info = cfg.GetClusterInfo()
     # cluster data
     data = {
     cluster_info = cfg.GetClusterInfo()
     # cluster data
     data = {
-      "version": 1,
+      "version": constants.IALLOCATOR_VERSION,
       "cluster_name": cfg.GetClusterName(),
       "cluster_tags": list(cluster_info.GetTags()),
       "cluster_name": cfg.GetClusterName(),
       "cluster_tags": list(cluster_info.GetTags()),
-      "enable_hypervisors": list(cluster_info.enabled_hypervisors),
+      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
       # we don't have job IDs
       }
     iinfo = cfg.GetAllInstancesInfo().values()
       # we don't have job IDs
       }
     iinfo = cfg.GetAllInstancesInfo().values()
@@ -6282,52 +6693,61 @@ class IAllocator(object):
                                            hypervisor_name)
     node_iinfo = self.lu.rpc.call_all_instances_info(node_list,
                        cluster_info.enabled_hypervisors)
                                            hypervisor_name)
     node_iinfo = self.lu.rpc.call_all_instances_info(node_list,
                        cluster_info.enabled_hypervisors)
-    for nname in node_list:
+    for nname, nresult in node_data.items():
+      # first fill in static (config-based) values
       ninfo = cfg.GetNodeInfo(nname)
       ninfo = cfg.GetNodeInfo(nname)
-      node_data[nname].Raise()
-      if not isinstance(node_data[nname].data, dict):
-        raise errors.OpExecError("Can't get data for node %s" % nname)
-      remote_info = node_data[nname].data
-      for attr in ['memory_total', 'memory_free', 'memory_dom0',
-                   'vg_size', 'vg_free', 'cpu_total']:
-        if attr not in remote_info:
-          raise errors.OpExecError("Node '%s' didn't return attribute '%s'" %
-                                   (nname, attr))
-        try:
-          remote_info[attr] = int(remote_info[attr])
-        except ValueError, err:
-          raise errors.OpExecError("Node '%s' returned invalid value for '%s':"
-                                   " %s" % (nname, attr, str(err)))
-      # compute memory used by primary instances
-      i_p_mem = i_p_up_mem = 0
-      for iinfo, beinfo in i_list:
-        if iinfo.primary_node == nname:
-          i_p_mem += beinfo[constants.BE_MEMORY]
-          if iinfo.name not in node_iinfo[nname]:
-            i_used_mem = 0
-          else:
-            i_used_mem = int(node_iinfo[nname][iinfo.name]['memory'])
-          i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
-          remote_info['memory_free'] -= max(0, i_mem_diff)
-
-          if iinfo.status == "up":
-            i_p_up_mem += beinfo[constants.BE_MEMORY]
-
-      # compute memory used by instances
       pnr = {
         "tags": list(ninfo.GetTags()),
       pnr = {
         "tags": list(ninfo.GetTags()),
-        "total_memory": remote_info['memory_total'],
-        "reserved_memory": remote_info['memory_dom0'],
-        "free_memory": remote_info['memory_free'],
-        "i_pri_memory": i_p_mem,
-        "i_pri_up_memory": i_p_up_mem,
-        "total_disk": remote_info['vg_size'],
-        "free_disk": remote_info['vg_free'],
         "primary_ip": ninfo.primary_ip,
         "secondary_ip": ninfo.secondary_ip,
         "primary_ip": ninfo.primary_ip,
         "secondary_ip": ninfo.secondary_ip,
-        "total_cpus": remote_info['cpu_total'],
         "offline": ninfo.offline,
         "offline": ninfo.offline,
+        "drained": ninfo.drained,
+        "master_candidate": ninfo.master_candidate,
         }
         }
+
+      if not ninfo.offline:
+        nresult.Raise()
+        if not isinstance(nresult.data, dict):
+          raise errors.OpExecError("Can't get data for node %s" % nname)
+        remote_info = nresult.data
+        for attr in ['memory_total', 'memory_free', 'memory_dom0',
+                     'vg_size', 'vg_free', 'cpu_total']:
+          if attr not in remote_info:
+            raise errors.OpExecError("Node '%s' didn't return attribute"
+                                     " '%s'" % (nname, attr))
+          try:
+            remote_info[attr] = int(remote_info[attr])
+          except ValueError, err:
+            raise errors.OpExecError("Node '%s' returned invalid value"
+                                     " for '%s': %s" % (nname, attr, err))
+        # compute memory used by primary instances
+        i_p_mem = i_p_up_mem = 0
+        for iinfo, beinfo in i_list:
+          if iinfo.primary_node == nname:
+            i_p_mem += beinfo[constants.BE_MEMORY]
+            if iinfo.name not in node_iinfo[nname].data:
+              i_used_mem = 0
+            else:
+              i_used_mem = int(node_iinfo[nname].data[iinfo.name]['memory'])
+            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
+            remote_info['memory_free'] -= max(0, i_mem_diff)
+
+            if iinfo.admin_up:
+              i_p_up_mem += beinfo[constants.BE_MEMORY]
+
+        # compute memory used by instances
+        pnr_dyn = {
+          "total_memory": remote_info['memory_total'],
+          "reserved_memory": remote_info['memory_dom0'],
+          "free_memory": remote_info['memory_free'],
+          "total_disk": remote_info['vg_size'],
+          "free_disk": remote_info['vg_free'],
+          "total_cpus": remote_info['cpu_total'],
+          "i_pri_memory": i_p_mem,
+          "i_pri_up_memory": i_p_up_mem,
+          }
+        pnr.update(pnr_dyn)
+
       node_results[nname] = pnr
     data["nodes"] = node_results
 
       node_results[nname] = pnr
     data["nodes"] = node_results
 
@@ -6338,16 +6758,18 @@ class IAllocator(object):
                   for n in iinfo.nics]
       pir = {
         "tags": list(iinfo.GetTags()),
                   for n in iinfo.nics]
       pir = {
         "tags": list(iinfo.GetTags()),
-        "should_run": iinfo.status == "up",
+        "admin_up": iinfo.admin_up,
         "vcpus": beinfo[constants.BE_VCPUS],
         "memory": beinfo[constants.BE_MEMORY],
         "os": iinfo.os,
         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
         "nics": nic_data,
         "vcpus": beinfo[constants.BE_VCPUS],
         "memory": beinfo[constants.BE_MEMORY],
         "os": iinfo.os,
         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
         "nics": nic_data,
-        "disks": [{"size": dsk.size, "mode": "w"} for dsk in iinfo.disks],
+        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
         "disk_template": iinfo.disk_template,
         "hypervisor": iinfo.hypervisor,
         }
         "disk_template": iinfo.disk_template,
         "hypervisor": iinfo.hypervisor,
         }
+      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
+                                                 pir["disks"])
       instance_data[iinfo.name] = pir
 
     data["instances"] = instance_data
       instance_data[iinfo.name] = pir
 
     data["instances"] = instance_data
@@ -6365,8 +6787,6 @@ class IAllocator(object):
 
     """
     data = self.in_data
 
     """
     data = self.in_data
-    if len(self.disks) != 2:
-      raise errors.OpExecError("Only two-disk configurations supported")
 
     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
 
 
     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
 
@@ -6523,8 +6943,6 @@ class LUTestAllocator(NoHooksLU):
                                      " 'nics' parameter")
       if not isinstance(self.op.disks, list):
         raise errors.OpPrereqError("Invalid parameter 'disks'")
                                      " 'nics' parameter")
       if not isinstance(self.op.disks, list):
         raise errors.OpPrereqError("Invalid parameter 'disks'")
-      if len(self.op.disks) != 2:
-        raise errors.OpPrereqError("Only two-disk configurations supported")
       for row in self.op.disks:
         if (not isinstance(row, dict) or
             "size" not in row or
       for row in self.op.disks:
         if (not isinstance(row, dict) or
             "size" not in row or
@@ -6533,7 +6951,7 @@ class LUTestAllocator(NoHooksLU):
             row["mode"] not in ['r', 'w']):
           raise errors.OpPrereqError("Invalid contents of the"
                                      " 'disks' parameter")
             row["mode"] not in ['r', 'w']):
           raise errors.OpPrereqError("Invalid contents of the"
                                      " 'disks' parameter")
-      if self.op.hypervisor is None:
+      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
         self.op.hypervisor = self.cfg.GetHypervisorType()
     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
       if not hasattr(self.op, "name"):
         self.op.hypervisor = self.cfg.GetHypervisorType()
     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
       if not hasattr(self.op, "name"):