cluster verify: show correctly drained nodes
[ganeti-local] / lib / cmdlib.py
index 82bbdf9..2434a5b 100644 (file)
@@ -392,8 +392,8 @@ def _GetWantedInstances(lu, instances):
       wanted.append(instance)
 
   else:
       wanted.append(instance)
 
   else:
-    wanted = lu.cfg.GetInstanceList()
-  return utils.NiceSort(wanted)
+    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
+  return wanted
 
 
 def _CheckOutputFields(static, dynamic, selected):
 
 
 def _CheckOutputFields(static, dynamic, selected):
@@ -455,8 +455,8 @@ def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
   @param secondary_nodes: list of secondary nodes as strings
   @type os_type: string
   @param os_type: the name of the instance's OS
   @param secondary_nodes: list of secondary nodes as strings
   @type os_type: string
   @param os_type: the name of the instance's OS
-  @type status: string
-  @param status: the desired status of the instances
+  @type status: boolean
+  @param status: the should_run status of the instance
   @type memory: string
   @param memory: the memory size of the instance
   @type vcpus: string
   @type memory: string
   @param memory: the memory size of the instance
   @type vcpus: string
@@ -468,13 +468,17 @@ def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
   @return: the hook environment for this instance
 
   """
   @return: the hook environment for this instance
 
   """
+  if status:
+    str_status = "up"
+  else:
+    str_status = "down"
   env = {
     "OP_TARGET": name,
     "INSTANCE_NAME": name,
     "INSTANCE_PRIMARY": primary_node,
     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
     "INSTANCE_OS_TYPE": os_type,
   env = {
     "OP_TARGET": name,
     "INSTANCE_NAME": name,
     "INSTANCE_PRIMARY": primary_node,
     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
     "INSTANCE_OS_TYPE": os_type,
-    "INSTANCE_STATUS": status,
+    "INSTANCE_STATUS": str_status,
     "INSTANCE_MEMORY": memory,
     "INSTANCE_VCPUS": vcpus,
   }
     "INSTANCE_MEMORY": memory,
     "INSTANCE_VCPUS": vcpus,
   }
@@ -516,7 +520,7 @@ def _BuildInstanceHookEnvByObject(lu, instance, override=None):
     'primary_node': instance.primary_node,
     'secondary_nodes': instance.secondary_nodes,
     'os_type': instance.os,
     'primary_node': instance.primary_node,
     'secondary_nodes': instance.secondary_nodes,
     'os_type': instance.os,
-    'status': instance.os,
+    'status': instance.admin_up,
     'memory': bep[constants.BE_MEMORY],
     'vcpus': bep[constants.BE_VCPUS],
     'nics': [(nic.ip, nic.bridge, nic.mac) for nic in instance.nics],
     'memory': bep[constants.BE_MEMORY],
     'vcpus': bep[constants.BE_VCPUS],
     'nics': [(nic.ip, nic.bridge, nic.mac) for nic in instance.nics],
@@ -613,7 +617,8 @@ class LUVerifyCluster(LogicalUnit):
     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
 
   def _VerifyNode(self, nodeinfo, file_list, local_cksum,
     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
 
   def _VerifyNode(self, nodeinfo, file_list, local_cksum,
-                  node_result, feedback_fn, master_files):
+                  node_result, feedback_fn, master_files,
+                  drbd_map):
     """Run multiple tests against a node.
 
     Test list:
     """Run multiple tests against a node.
 
     Test list:
@@ -630,6 +635,9 @@ class LUVerifyCluster(LogicalUnit):
     @param node_result: the results from the node
     @param feedback_fn: function used to accumulate results
     @param master_files: list of files that only masters should have
     @param node_result: the results from the node
     @param feedback_fn: function used to accumulate results
     @param master_files: list of files that only masters should have
+    @param drbd_map: the useddrbd minors for this node, in
+        form of minor: (instance, must_exist) which correspond to instances
+        and their running status
 
     """
     node = nodeinfo.name
 
     """
     node = nodeinfo.name
@@ -642,18 +650,28 @@ class LUVerifyCluster(LogicalUnit):
     # compares ganeti version
     local_version = constants.PROTOCOL_VERSION
     remote_version = node_result.get('version', None)
     # compares ganeti version
     local_version = constants.PROTOCOL_VERSION
     remote_version = node_result.get('version', None)
-    if not remote_version:
+    if not (remote_version and isinstance(remote_version, (list, tuple)) and
+            len(remote_version) == 2):
       feedback_fn("  - ERROR: connection to %s failed" % (node))
       return True
 
       feedback_fn("  - ERROR: connection to %s failed" % (node))
       return True
 
-    if local_version != remote_version:
-      feedback_fn("  - ERROR: sw version mismatch: master %s, node(%s) %s" %
-                      (local_version, node, remote_version))
+    if local_version != remote_version[0]:
+      feedback_fn("  - ERROR: incompatible protocol versions: master %s,"
+                  " node %s %s" % (local_version, node, remote_version[0]))
       return True
 
       return True
 
-    # checks vg existance and size > 20G
+    # node seems compatible, we can actually try to look into its results
 
     bad = False
 
     bad = False
+
+    # full package version
+    if constants.RELEASE_VERSION != remote_version[1]:
+      feedback_fn("  - WARNING: software version mismatch: master %s,"
+                  " node %s %s" %
+                  (constants.RELEASE_VERSION, node, remote_version[1]))
+
+    # checks vg existence and size > 20G
+
     vglist = node_result.get(constants.NV_VGLIST, None)
     if not vglist:
       feedback_fn("  - ERROR: unable to check volume groups on node %s." %
     vglist = node_result.get(constants.NV_VGLIST, None)
     if not vglist:
       feedback_fn("  - ERROR: unable to check volume groups on node %s." %
@@ -724,6 +742,19 @@ class LUVerifyCluster(LogicalUnit):
         if hv_result is not None:
           feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
                       (hv_name, hv_result))
         if hv_result is not None:
           feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
                       (hv_name, hv_result))
+
+    # check used drbd list
+    used_minors = node_result.get(constants.NV_DRBDLIST, [])
+    for minor, (iname, must_exist) in drbd_map.items():
+      if minor not in used_minors and must_exist:
+        feedback_fn("  - ERROR: drbd minor %d of instance %s is not active" %
+                    (minor, iname))
+        bad = True
+    for minor in used_minors:
+      if minor not in drbd_map:
+        feedback_fn("  - ERROR: unallocated drbd minor %d is in use" % minor)
+        bad = True
+
     return bad
 
   def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
     return bad
 
   def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
@@ -751,7 +782,7 @@ class LUVerifyCluster(LogicalUnit):
                           (volume, node))
           bad = True
 
                           (volume, node))
           bad = True
 
-    if not instanceconfig.status == 'down':
+    if instanceconfig.admin_up:
       if ((node_current not in node_instance or
           not instance in node_instance[node_current]) and
           node_current not in n_offline):
       if ((node_current not in node_instance or
           not instance in node_instance[node_current]) and
           node_current not in n_offline):
@@ -867,9 +898,12 @@ class LUVerifyCluster(LogicalUnit):
     nodelist = utils.NiceSort(self.cfg.GetNodeList())
     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
     nodelist = utils.NiceSort(self.cfg.GetNodeList())
     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
+    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
+                        for iname in instancelist)
     i_non_redundant = [] # Non redundant instances
     i_non_a_balanced = [] # Non auto-balanced instances
     n_offline = [] # List of offline nodes
     i_non_redundant = [] # Non redundant instances
     i_non_a_balanced = [] # Non auto-balanced instances
     n_offline = [] # List of offline nodes
+    n_drained = [] # List of nodes being drained
     node_volume = {}
     node_instance = {}
     node_info = {}
     node_volume = {}
     node_instance = {}
     node_info = {}
@@ -900,12 +934,15 @@ class LUVerifyCluster(LogicalUnit):
       constants.NV_VGLIST: None,
       constants.NV_VERSION: None,
       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
       constants.NV_VGLIST: None,
       constants.NV_VERSION: None,
       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
+      constants.NV_DRBDLIST: None,
       }
     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
                                            self.cfg.GetClusterName())
 
     cluster = self.cfg.GetClusterInfo()
     master_node = self.cfg.GetMasterNode()
       }
     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
                                            self.cfg.GetClusterName())
 
     cluster = self.cfg.GetClusterInfo()
     master_node = self.cfg.GetMasterNode()
+    all_drbd_map = self.cfg.ComputeDRBDMap()
+
     for node_i in nodeinfo:
       node = node_i.name
       nresult = all_nvinfo[node].data
     for node_i in nodeinfo:
       node = node_i.name
       nresult = all_nvinfo[node].data
@@ -919,6 +956,9 @@ class LUVerifyCluster(LogicalUnit):
         ntype = "master"
       elif node_i.master_candidate:
         ntype = "master candidate"
         ntype = "master"
       elif node_i.master_candidate:
         ntype = "master candidate"
+      elif node_i.drained:
+        ntype = "drained"
+        n_drained.append(node)
       else:
         ntype = "regular"
       feedback_fn("* Verifying node %s (%s)" % (node, ntype))
       else:
         ntype = "regular"
       feedback_fn("* Verifying node %s (%s)" % (node, ntype))
@@ -928,14 +968,19 @@ class LUVerifyCluster(LogicalUnit):
         bad = True
         continue
 
         bad = True
         continue
 
+      node_drbd = {}
+      for minor, instance in all_drbd_map[node].items():
+        instance = instanceinfo[instance]
+        node_drbd[minor] = (instance.name, instance.admin_up)
       result = self._VerifyNode(node_i, file_names, local_checksums,
       result = self._VerifyNode(node_i, file_names, local_checksums,
-                                nresult, feedback_fn, master_files)
+                                nresult, feedback_fn, master_files,
+                                node_drbd)
       bad = bad or result
 
       lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
       if isinstance(lvdata, basestring):
         feedback_fn("  - ERROR: LVM problem on node %s: %s" %
       bad = bad or result
 
       lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
       if isinstance(lvdata, basestring):
         feedback_fn("  - ERROR: LVM problem on node %s: %s" %
-                    (node, lvdata.encode('string_escape')))
+                    (node, utils.SafeEncode(lvdata)))
         bad = True
         node_volume[node] = {}
       elif not isinstance(lvdata, dict):
         bad = True
         node_volume[node] = {}
       elif not isinstance(lvdata, dict):
@@ -985,7 +1030,7 @@ class LUVerifyCluster(LogicalUnit):
 
     for instance in instancelist:
       feedback_fn("* Verifying instance %s" % instance)
 
     for instance in instancelist:
       feedback_fn("* Verifying instance %s" % instance)
-      inst_config = self.cfg.GetInstanceInfo(instance)
+      inst_config = instanceinfo[instance]
       result =  self._VerifyInstance(instance, inst_config, node_volume,
                                      node_instance, feedback_fn, n_offline)
       bad = bad or result
       result =  self._VerifyInstance(instance, inst_config, node_volume,
                                      node_instance, feedback_fn, n_offline)
       bad = bad or result
@@ -1066,6 +1111,9 @@ class LUVerifyCluster(LogicalUnit):
     if n_offline:
       feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
 
     if n_offline:
       feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
 
+    if n_drained:
+      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
+
     return not bad
 
   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
     return not bad
 
   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
@@ -1154,7 +1202,7 @@ class LUVerifyDisks(NoHooksLU):
     nv_dict = {}
     for inst in instances:
       inst_lvs = {}
     nv_dict = {}
     for inst in instances:
       inst_lvs = {}
-      if (inst.status != "up" or
+      if (not inst.admin_up or
           inst.disk_template not in constants.DTS_NET_MIRROR):
         continue
       inst.MapLVsByNode(inst_lvs)
           inst.disk_template not in constants.DTS_NET_MIRROR):
         continue
       inst.MapLVsByNode(inst_lvs)
@@ -1495,8 +1543,7 @@ def _WaitForSync(lu, instance, oneshot=False, unlock=False):
       continue
     rstats = rstats.data
     retries = 0
       continue
     rstats = rstats.data
     retries = 0
-    for i in range(len(rstats)):
-      mstat = rstats[i]
+    for i, mstat in enumerate(rstats):
       if mstat is None:
         lu.LogWarning("Can't compute data for node %s/%s",
                            node, instance.disks[i].iv_name)
       if mstat is None:
         lu.LogWarning("Can't compute data for node %s/%s",
                            node, instance.disks[i].iv_name)
@@ -1540,11 +1587,15 @@ def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
   result = True
   if on_primary or dev.AssembleOnSecondary():
     rstats = lu.rpc.call_blockdev_find(node, dev)
   result = True
   if on_primary or dev.AssembleOnSecondary():
     rstats = lu.rpc.call_blockdev_find(node, dev)
-    if rstats.failed or not rstats.data:
-      logging.warning("Node %s: disk degraded, not found or node down", node)
+    msg = rstats.RemoteFailMsg()
+    if msg:
+      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
+      result = False
+    elif not rstats.payload:
+      lu.LogWarning("Can't find disk on node %s", node)
       result = False
     else:
       result = False
     else:
-      result = result and (not rstats.data[idx])
+      result = result and (not rstats.payload[idx])
   if dev.children:
     for child in dev.children:
       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
   if dev.children:
     for child in dev.children:
       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
@@ -1687,11 +1738,8 @@ class LURemoveNode(LogicalUnit):
 
     for instance_name in instance_list:
       instance = self.cfg.GetInstanceInfo(instance_name)
 
     for instance_name in instance_list:
       instance = self.cfg.GetInstanceInfo(instance_name)
-      if node.name == instance.primary_node:
-        raise errors.OpPrereqError("Instance %s still running on the node,"
-                                   " please remove first." % instance_name)
-      if node.name in instance.secondary_nodes:
-        raise errors.OpPrereqError("Instance %s has node as a secondary,"
+      if node.name in instance.all_nodes:
+        raise errors.OpPrereqError("Instance %s is still running on the node,"
                                    " please remove first." % instance_name)
     self.op.node_name = node.name
     self.node = node
                                    " please remove first." % instance_name)
     self.op.node_name = node.name
     self.node = node
@@ -1716,13 +1764,13 @@ class LUQueryNodes(NoHooksLU):
   """Logical unit for querying nodes.
 
   """
   """Logical unit for querying nodes.
 
   """
-  _OP_REQP = ["output_fields", "names"]
+  _OP_REQP = ["output_fields", "names", "use_locking"]
   REQ_BGL = False
   _FIELDS_DYNAMIC = utils.FieldSet(
     "dtotal", "dfree",
     "mtotal", "mnode", "mfree",
     "bootid",
   REQ_BGL = False
   _FIELDS_DYNAMIC = utils.FieldSet(
     "dtotal", "dfree",
     "mtotal", "mnode", "mfree",
     "bootid",
-    "ctotal",
+    "ctotal", "cnodes", "csockets",
     )
 
   _FIELDS_STATIC = utils.FieldSet(
     )
 
   _FIELDS_STATIC = utils.FieldSet(
@@ -1733,6 +1781,7 @@ class LUQueryNodes(NoHooksLU):
     "master_candidate",
     "master",
     "offline",
     "master_candidate",
     "master",
     "offline",
+    "drained",
     )
 
   def ExpandNames(self):
     )
 
   def ExpandNames(self):
@@ -1748,7 +1797,8 @@ class LUQueryNodes(NoHooksLU):
     else:
       self.wanted = locking.ALL_SET
 
     else:
       self.wanted = locking.ALL_SET
 
-    self.do_locking = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
+    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
+    self.do_locking = self.do_node_query and self.op.use_locking
     if self.do_locking:
       # if we don't request only static fields, we need to lock the nodes
       self.needed_locks[locking.LEVEL_NODE] = self.wanted
     if self.do_locking:
       # if we don't request only static fields, we need to lock the nodes
       self.needed_locks[locking.LEVEL_NODE] = self.wanted
@@ -1783,7 +1833,7 @@ class LUQueryNodes(NoHooksLU):
 
     # begin data gathering
 
 
     # begin data gathering
 
-    if self.do_locking:
+    if self.do_node_query:
       live_data = {}
       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
                                           self.cfg.GetHypervisorType())
       live_data = {}
       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
                                           self.cfg.GetHypervisorType())
@@ -1800,6 +1850,8 @@ class LUQueryNodes(NoHooksLU):
             "dfree": fn(int, nodeinfo.get('vg_free', None)),
             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
             "bootid": nodeinfo.get('bootid', None),
             "dfree": fn(int, nodeinfo.get('vg_free', None)),
             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
             "bootid": nodeinfo.get('bootid', None),
+            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
+            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
             }
         else:
           live_data[name] = {}
             }
         else:
           live_data[name] = {}
@@ -1854,6 +1906,8 @@ class LUQueryNodes(NoHooksLU):
           val = node.name == master_node
         elif field == "offline":
           val = node.offline
           val = node.name == master_node
         elif field == "offline":
           val = node.offline
+        elif field == "drained":
+          val = node.drained
         elif self._FIELDS_DYNAMIC.Matches(field):
           val = live_data[node.name].get(field, None)
         else:
         elif self._FIELDS_DYNAMIC.Matches(field):
           val = live_data[node.name].get(field, None)
         else:
@@ -2050,7 +2104,7 @@ class LUAddNode(LogicalUnit):
                                  primary_ip=primary_ip,
                                  secondary_ip=secondary_ip,
                                  master_candidate=master_candidate,
                                  primary_ip=primary_ip,
                                  secondary_ip=secondary_ip,
                                  master_candidate=master_candidate,
-                                 offline=False)
+                                 offline=False, drained=False)
 
   def Exec(self, feedback_fn):
     """Adds the new node to the cluster.
 
   def Exec(self, feedback_fn):
     """Adds the new node to the cluster.
@@ -2092,8 +2146,10 @@ class LUAddNode(LogicalUnit):
                                     keyarray[2],
                                     keyarray[3], keyarray[4], keyarray[5])
 
                                     keyarray[2],
                                     keyarray[3], keyarray[4], keyarray[5])
 
-    if result.failed or not result.data:
-      raise errors.OpExecError("Cannot transfer ssh keys to the new node")
+    msg = result.RemoteFailMsg()
+    if msg:
+      raise errors.OpExecError("Cannot transfer ssh keys to the"
+                               " new node: %s" % msg)
 
     # Add node to our /etc/hosts, and add key to known_hosts
     utils.AddHostToEtcHosts(new_node.name)
 
     # Add node to our /etc/hosts, and add key to known_hosts
     utils.AddHostToEtcHosts(new_node.name)
@@ -2121,7 +2177,7 @@ class LUAddNode(LogicalUnit):
       if result[verifier].data['nodelist']:
         for failed in result[verifier].data['nodelist']:
           feedback_fn("ssh/hostname verification failed %s -> %s" %
       if result[verifier].data['nodelist']:
         for failed in result[verifier].data['nodelist']:
           feedback_fn("ssh/hostname verification failed %s -> %s" %
-                      (verifier, result[verifier]['nodelist'][failed]))
+                      (verifier, result[verifier].data['nodelist'][failed]))
         raise errors.OpExecError("ssh/hostname verification failed.")
 
     # Distribute updated /etc/hosts and known_hosts to all nodes,
         raise errors.OpExecError("ssh/hostname verification failed.")
 
     # Distribute updated /etc/hosts and known_hosts to all nodes,
@@ -2141,8 +2197,10 @@ class LUAddNode(LogicalUnit):
           logging.error("Copy of file %s to node %s failed", fname, to_node)
 
     to_copy = []
           logging.error("Copy of file %s to node %s failed", fname, to_node)
 
     to_copy = []
-    if constants.HT_XEN_HVM in self.cfg.GetClusterInfo().enabled_hypervisors:
+    enabled_hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
+    if constants.HTS_USE_VNC.intersection(enabled_hypervisors):
       to_copy.append(constants.VNC_PASSWORD_FILE)
       to_copy.append(constants.VNC_PASSWORD_FILE)
+
     for fname in to_copy:
       result = self.rpc.call_upload_file([node], fname)
       if result[node].failed or not result[node]:
     for fname in to_copy:
       result = self.rpc.call_upload_file([node], fname)
       if result[node].failed or not result[node]:
@@ -2245,11 +2303,9 @@ class LUSetNodeParams(LogicalUnit):
       result.append(("master_candidate", str(self.op.master_candidate)))
       if self.op.master_candidate == False:
         rrc = self.rpc.call_node_demote_from_mc(node.name)
       result.append(("master_candidate", str(self.op.master_candidate)))
       if self.op.master_candidate == False:
         rrc = self.rpc.call_node_demote_from_mc(node.name)
-        if (rrc.failed or not isinstance(rrc.data, (tuple, list))
-            or len(rrc.data) != 2):
-          self.LogWarning("Node rpc error: %s" % rrc.error)
-        elif not rrc.data[0]:
-          self.LogWarning("Node failed to demote itself: %s" % rrc.data[1])
+        msg = rrc.RemoteFailMsg()
+        if msg:
+          self.LogWarning("Node failed to demote itself: %s" % msg)
 
     # this will trigger configuration file update, if needed
     self.cfg.Update(node)
 
     # this will trigger configuration file update, if needed
     self.cfg.Update(node)
@@ -2292,7 +2348,8 @@ class LUQueryClusterInfo(NoHooksLU):
       "master": cluster.master_node,
       "default_hypervisor": cluster.default_hypervisor,
       "enabled_hypervisors": cluster.enabled_hypervisors,
       "master": cluster.master_node,
       "default_hypervisor": cluster.default_hypervisor,
       "enabled_hypervisors": cluster.enabled_hypervisors,
-      "hvparams": cluster.hvparams,
+      "hvparams": dict([(hypervisor, cluster.hvparams[hypervisor])
+                        for hypervisor in cluster.enabled_hypervisors]),
       "beparams": cluster.beparams,
       "candidate_pool_size": cluster.candidate_pool_size,
       }
       "beparams": cluster.beparams,
       "candidate_pool_size": cluster.candidate_pool_size,
       }
@@ -2412,10 +2469,11 @@ def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
       lu.cfg.SetDiskID(node_disk, node)
       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
       lu.cfg.SetDiskID(node_disk, node)
       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
-      if result.failed or not result:
+      msg = result.RemoteFailMsg()
+      if msg:
         lu.proc.LogWarning("Could not prepare block device %s on node %s"
         lu.proc.LogWarning("Could not prepare block device %s on node %s"
-                           " (is_primary=False, pass=1)",
-                           inst_disk.iv_name, node)
+                           " (is_primary=False, pass=1): %s",
+                           inst_disk.iv_name, node, msg)
         if not ignore_secondaries:
           disks_ok = False
 
         if not ignore_secondaries:
           disks_ok = False
 
@@ -2428,10 +2486,11 @@ def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
         continue
       lu.cfg.SetDiskID(node_disk, node)
       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
         continue
       lu.cfg.SetDiskID(node_disk, node)
       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
-      if result.failed or not result:
+      msg = result.RemoteFailMsg()
+      if msg:
         lu.proc.LogWarning("Could not prepare block device %s on node %s"
         lu.proc.LogWarning("Could not prepare block device %s on node %s"
-                           " (is_primary=True, pass=2)",
-                           inst_disk.iv_name, node)
+                           " (is_primary=True, pass=2): %s",
+                           inst_disk.iv_name, node, msg)
         disks_ok = False
     device_info.append((instance.primary_node, inst_disk.iv_name, result.data))
 
         disks_ok = False
     device_info.append((instance.primary_node, inst_disk.iv_name, result.data))
 
@@ -2523,17 +2582,18 @@ def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
   ignored.
 
   """
   ignored.
 
   """
-  result = True
+  all_result = True
   for disk in instance.disks:
     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
       lu.cfg.SetDiskID(top_disk, node)
       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
   for disk in instance.disks:
     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
       lu.cfg.SetDiskID(top_disk, node)
       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
-      if result.failed or not result.data:
-        logging.error("Could not shutdown block device %s on node %s",
-                      disk.iv_name, node)
+      msg = result.RemoteFailMsg()
+      if msg:
+        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
+                      disk.iv_name, node, msg)
         if not ignore_primary or node != instance.primary_node:
         if not ignore_primary or node != instance.primary_node:
-          result = False
-  return result
+          all_result = False
+  return all_result
 
 
 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
 
 
 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
@@ -2592,8 +2652,7 @@ class LUStartupInstance(LogicalUnit):
       "FORCE": self.op.force,
       }
     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
       "FORCE": self.op.force,
       }
     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
-    nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
-          list(self.instance.secondary_nodes))
+    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
     return env, nl, nl
 
   def CheckPrereq(self):
     return env, nl, nl
 
   def CheckPrereq(self):
@@ -2631,9 +2690,10 @@ class LUStartupInstance(LogicalUnit):
     _StartInstanceDisks(self, instance, force)
 
     result = self.rpc.call_instance_start(node_current, instance, extra_args)
     _StartInstanceDisks(self, instance, force)
 
     result = self.rpc.call_instance_start(node_current, instance, extra_args)
-    if result.failed or not result.data:
+    msg = result.RemoteFailMsg()
+    if msg:
       _ShutdownInstanceDisks(self, instance)
       _ShutdownInstanceDisks(self, instance)
-      raise errors.OpExecError("Could not start instance")
+      raise errors.OpExecError("Could not start instance: %s" % msg)
 
 
 class LURebootInstance(LogicalUnit):
 
 
 class LURebootInstance(LogicalUnit):
@@ -2665,8 +2725,7 @@ class LURebootInstance(LogicalUnit):
       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
       }
     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
       }
     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
-    nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
-          list(self.instance.secondary_nodes))
+    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
     return env, nl, nl
 
   def CheckPrereq(self):
     return env, nl, nl
 
   def CheckPrereq(self):
@@ -2707,9 +2766,11 @@ class LURebootInstance(LogicalUnit):
       _ShutdownInstanceDisks(self, instance)
       _StartInstanceDisks(self, instance, ignore_secondaries)
       result = self.rpc.call_instance_start(node_current, instance, extra_args)
       _ShutdownInstanceDisks(self, instance)
       _StartInstanceDisks(self, instance, ignore_secondaries)
       result = self.rpc.call_instance_start(node_current, instance, extra_args)
-      if result.failed or not result.data:
+      msg = result.RemoteFailMsg()
+      if msg:
         _ShutdownInstanceDisks(self, instance)
         _ShutdownInstanceDisks(self, instance)
-        raise errors.OpExecError("Could not start instance for full reboot")
+        raise errors.OpExecError("Could not start instance for"
+                                 " full reboot: %s" % msg)
 
     self.cfg.MarkInstanceUp(instance.name)
 
 
     self.cfg.MarkInstanceUp(instance.name)
 
@@ -2733,8 +2794,7 @@ class LUShutdownInstance(LogicalUnit):
 
     """
     env = _BuildInstanceHookEnvByObject(self, self.instance)
 
     """
     env = _BuildInstanceHookEnvByObject(self, self.instance)
-    nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
-          list(self.instance.secondary_nodes))
+    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
     return env, nl, nl
 
   def CheckPrereq(self):
     return env, nl, nl
 
   def CheckPrereq(self):
@@ -2781,8 +2841,7 @@ class LUReinstallInstance(LogicalUnit):
 
     """
     env = _BuildInstanceHookEnvByObject(self, self.instance)
 
     """
     env = _BuildInstanceHookEnvByObject(self, self.instance)
-    nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
-          list(self.instance.secondary_nodes))
+    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
     return env, nl, nl
 
   def CheckPrereq(self):
     return env, nl, nl
 
   def CheckPrereq(self):
@@ -2799,7 +2858,7 @@ class LUReinstallInstance(LogicalUnit):
     if instance.disk_template == constants.DT_DISKLESS:
       raise errors.OpPrereqError("Instance '%s' has no disks" %
                                  self.op.instance_name)
     if instance.disk_template == constants.DT_DISKLESS:
       raise errors.OpPrereqError("Instance '%s' has no disks" %
                                  self.op.instance_name)
-    if instance.status != "down":
+    if instance.admin_up:
       raise errors.OpPrereqError("Instance '%s' is marked to be up" %
                                  self.op.instance_name)
     remote_info = self.rpc.call_instance_info(instance.primary_node,
       raise errors.OpPrereqError("Instance '%s' is marked to be up" %
                                  self.op.instance_name)
     remote_info = self.rpc.call_instance_info(instance.primary_node,
@@ -2841,11 +2900,11 @@ class LUReinstallInstance(LogicalUnit):
     try:
       feedback_fn("Running the instance OS create scripts...")
       result = self.rpc.call_instance_os_add(inst.primary_node, inst)
     try:
       feedback_fn("Running the instance OS create scripts...")
       result = self.rpc.call_instance_os_add(inst.primary_node, inst)
-      result.Raise()
-      if not result.data:
+      msg = result.RemoteFailMsg()
+      if msg:
         raise errors.OpExecError("Could not install OS for instance %s"
         raise errors.OpExecError("Could not install OS for instance %s"
-                                 " on node %s" %
-                                 (inst.name, inst.primary_node))
+                                 " on node %s: %s" %
+                                 (inst.name, inst.primary_node, msg))
     finally:
       _ShutdownInstanceDisks(self, inst)
 
     finally:
       _ShutdownInstanceDisks(self, inst)
 
@@ -2866,8 +2925,7 @@ class LURenameInstance(LogicalUnit):
     """
     env = _BuildInstanceHookEnvByObject(self, self.instance)
     env["INSTANCE_NEW_NAME"] = self.op.new_name
     """
     env = _BuildInstanceHookEnvByObject(self, self.instance)
     env["INSTANCE_NEW_NAME"] = self.op.new_name
-    nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
-          list(self.instance.secondary_nodes))
+    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
     return env, nl, nl
 
   def CheckPrereq(self):
     return env, nl, nl
 
   def CheckPrereq(self):
@@ -2883,7 +2941,7 @@ class LURenameInstance(LogicalUnit):
                                  self.op.instance_name)
     _CheckNodeOnline(self, instance.primary_node)
 
                                  self.op.instance_name)
     _CheckNodeOnline(self, instance.primary_node)
 
-    if instance.status != "down":
+    if instance.admin_up:
       raise errors.OpPrereqError("Instance '%s' is marked to be up" %
                                  self.op.instance_name)
     remote_info = self.rpc.call_instance_info(instance.primary_node,
       raise errors.OpPrereqError("Instance '%s' is marked to be up" %
                                  self.op.instance_name)
     remote_info = self.rpc.call_instance_info(instance.primary_node,
@@ -2952,10 +3010,11 @@ class LURenameInstance(LogicalUnit):
     try:
       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
                                                  old_name)
     try:
       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
                                                  old_name)
-      if result.failed or not result.data:
+      msg = result.RemoteFailMsg()
+      if msg:
         msg = ("Could not run OS rename script for instance %s on node %s"
         msg = ("Could not run OS rename script for instance %s on node %s"
-               " (but the instance has been renamed in Ganeti)" %
-               (inst.name, inst.primary_node))
+               " (but the instance has been renamed in Ganeti): %s" %
+               (inst.name, inst.primary_node, msg))
         self.proc.LogWarning(msg)
     finally:
       _ShutdownInstanceDisks(self, inst)
         self.proc.LogWarning(msg)
     finally:
       _ShutdownInstanceDisks(self, inst)
@@ -3033,7 +3092,7 @@ class LUQueryInstances(NoHooksLU):
   """Logical unit for querying instances.
 
   """
   """Logical unit for querying instances.
 
   """
-  _OP_REQP = ["output_fields", "names"]
+  _OP_REQP = ["output_fields", "names", "use_locking"]
   REQ_BGL = False
   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
                                     "admin_state", "admin_ram",
   REQ_BGL = False
   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
                                     "admin_state", "admin_ram",
@@ -3041,7 +3100,7 @@ class LUQueryInstances(NoHooksLU):
                                     "sda_size", "sdb_size", "vcpus", "tags",
                                     "network_port", "beparams",
                                     "(disk).(size)/([0-9]+)",
                                     "sda_size", "sdb_size", "vcpus", "tags",
                                     "network_port", "beparams",
                                     "(disk).(size)/([0-9]+)",
-                                    "(disk).(sizes)",
+                                    "(disk).(sizes)", "disk_usage",
                                     "(nic).(mac|ip|bridge)/([0-9]+)",
                                     "(nic).(macs|ips|bridges)",
                                     "(disk|nic).(count)",
                                     "(nic).(mac|ip|bridge)/([0-9]+)",
                                     "(nic).(macs|ips|bridges)",
                                     "(disk|nic).(count)",
@@ -3067,7 +3126,8 @@ class LUQueryInstances(NoHooksLU):
     else:
       self.wanted = locking.ALL_SET
 
     else:
       self.wanted = locking.ALL_SET
 
-    self.do_locking = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
+    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
+    self.do_locking = self.do_node_query and self.op.use_locking
     if self.do_locking:
       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
       self.needed_locks[locking.LEVEL_NODE] = []
     if self.do_locking:
       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
       self.needed_locks[locking.LEVEL_NODE] = []
@@ -3088,19 +3148,25 @@ class LUQueryInstances(NoHooksLU):
 
     """
     all_info = self.cfg.GetAllInstancesInfo()
 
     """
     all_info = self.cfg.GetAllInstancesInfo()
-    if self.do_locking:
-      instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
-    elif self.wanted != locking.ALL_SET:
-      instance_names = self.wanted
-      missing = set(instance_names).difference(all_info.keys())
-      if missing:
-        raise errors.OpExecError(
-          "Some instances were removed before retrieving their data: %s"
-          % missing)
+    if self.wanted == locking.ALL_SET:
+      # caller didn't specify instance names, so ordering is not important
+      if self.do_locking:
+        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
+      else:
+        instance_names = all_info.keys()
+      instance_names = utils.NiceSort(instance_names)
     else:
     else:
-      instance_names = all_info.keys()
+      # caller did specify names, so we must keep the ordering
+      if self.do_locking:
+        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
+      else:
+        tgt_set = all_info.keys()
+      missing = set(self.wanted).difference(tgt_set)
+      if missing:
+        raise errors.OpExecError("Some instances were removed before"
+                                 " retrieving their data: %s" % missing)
+      instance_names = self.wanted
 
 
-    instance_names = utils.NiceSort(instance_names)
     instance_list = [all_info[iname] for iname in instance_names]
 
     # begin data gathering
     instance_list = [all_info[iname] for iname in instance_names]
 
     # begin data gathering
@@ -3110,7 +3176,7 @@ class LUQueryInstances(NoHooksLU):
 
     bad_nodes = []
     off_nodes = []
 
     bad_nodes = []
     off_nodes = []
-    if self.do_locking:
+    if self.do_node_query:
       live_data = {}
       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
       for name in nodes:
       live_data = {}
       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
       for name in nodes:
@@ -3147,7 +3213,7 @@ class LUQueryInstances(NoHooksLU):
         elif field == "snodes":
           val = list(instance.secondary_nodes)
         elif field == "admin_state":
         elif field == "snodes":
           val = list(instance.secondary_nodes)
         elif field == "admin_state":
-          val = (instance.status != "down")
+          val = instance.admin_up
         elif field == "oper_state":
           if instance.primary_node in bad_nodes:
             val = None
         elif field == "oper_state":
           if instance.primary_node in bad_nodes:
             val = None
@@ -3161,12 +3227,12 @@ class LUQueryInstances(NoHooksLU):
           else:
             running = bool(live_data.get(instance.name))
             if running:
           else:
             running = bool(live_data.get(instance.name))
             if running:
-              if instance.status != "down":
+              if instance.admin_up:
                 val = "running"
               else:
                 val = "ERROR_up"
             else:
                 val = "running"
               else:
                 val = "ERROR_up"
             else:
-              if instance.status != "down":
+              if instance.admin_up:
                 val = "ERROR_down"
               else:
                 val = "ADMIN_down"
                 val = "ERROR_down"
               else:
                 val = "ADMIN_down"
@@ -3191,6 +3257,9 @@ class LUQueryInstances(NoHooksLU):
             val = instance.FindDisk(idx).size
           except errors.OpPrereqError:
             val = None
             val = instance.FindDisk(idx).size
           except errors.OpPrereqError:
             val = None
+        elif field == "disk_usage": # total disk usage per node
+          disk_sizes = [{'size': disk.size} for disk in instance.disks]
+          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
         elif field == "tags":
           val = list(instance.GetTags())
         elif field == "serial_no":
         elif field == "tags":
           val = list(instance.GetTags())
         elif field == "serial_no":
@@ -3340,7 +3409,7 @@ class LUFailoverInstance(LogicalUnit):
     for dev in instance.disks:
       # for drbd, these are drbd over lvm
       if not _CheckDiskConsistency(self, dev, target_node, False):
     for dev in instance.disks:
       # for drbd, these are drbd over lvm
       if not _CheckDiskConsistency(self, dev, target_node, False):
-        if instance.status == "up" and not self.op.ignore_consistency:
+        if instance.admin_up and not self.op.ignore_consistency:
           raise errors.OpExecError("Disk %s is degraded on target node,"
                                    " aborting failover." % dev.iv_name)
 
           raise errors.OpExecError("Disk %s is degraded on target node,"
                                    " aborting failover." % dev.iv_name)
 
@@ -3368,7 +3437,7 @@ class LUFailoverInstance(LogicalUnit):
     self.cfg.Update(instance)
 
     # Only start the instance if it's marked as up
     self.cfg.Update(instance)
 
     # Only start the instance if it's marked as up
-    if instance.status == "up":
+    if instance.admin_up:
       feedback_fn("* activating the instance's disks on target node")
       logging.info("Starting instance %s on node %s",
                    instance.name, target_node)
       feedback_fn("* activating the instance's disks on target node")
       logging.info("Starting instance %s on node %s",
                    instance.name, target_node)
@@ -3381,60 +3450,454 @@ class LUFailoverInstance(LogicalUnit):
 
       feedback_fn("* starting the instance on the target node")
       result = self.rpc.call_instance_start(target_node, instance, None)
 
       feedback_fn("* starting the instance on the target node")
       result = self.rpc.call_instance_start(target_node, instance, None)
-      if result.failed or not result.data:
+      msg = result.RemoteFailMsg()
+      if msg:
         _ShutdownInstanceDisks(self, instance)
         _ShutdownInstanceDisks(self, instance)
-        raise errors.OpExecError("Could not start instance %s on node %s." %
-                                 (instance.name, target_node))
+        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
+                                 (instance.name, target_node, msg))
 
 
 
 
-def _CreateBlockDevOnPrimary(lu, node, instance, device, info):
-  """Create a tree of block devices on the primary node.
+class LUMigrateInstance(LogicalUnit):
+  """Migrate an instance.
 
 
-  This always creates all devices.
+  This is migration without shutting down, compared to the failover,
+  which is done with shutdown.
 
   """
 
   """
-  if device.children:
-    for child in device.children:
-      if not _CreateBlockDevOnPrimary(lu, node, instance, child, info):
-        return False
+  HPATH = "instance-migrate"
+  HTYPE = constants.HTYPE_INSTANCE
+  _OP_REQP = ["instance_name", "live", "cleanup"]
 
 
-  lu.cfg.SetDiskID(device, node)
-  new_id = lu.rpc.call_blockdev_create(node, device, device.size,
-                                       instance.name, True, info)
-  if new_id.failed or not new_id.data:
-    return False
-  if device.physical_id is None:
-    device.physical_id = new_id
-  return True
+  REQ_BGL = False
+
+  def ExpandNames(self):
+    self._ExpandAndLockInstance()
+    self.needed_locks[locking.LEVEL_NODE] = []
+    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
+
+  def DeclareLocks(self, level):
+    if level == locking.LEVEL_NODE:
+      self._LockInstancesNodes()
+
+  def BuildHooksEnv(self):
+    """Build hooks env.
+
+    This runs on master, primary and secondary nodes of the instance.
+
+    """
+    env = _BuildInstanceHookEnvByObject(self, self.instance)
+    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
+    return env, nl, nl
+
+  def CheckPrereq(self):
+    """Check prerequisites.
+
+    This checks that the instance is in the cluster.
+
+    """
+    instance = self.cfg.GetInstanceInfo(
+      self.cfg.ExpandInstanceName(self.op.instance_name))
+    if instance is None:
+      raise errors.OpPrereqError("Instance '%s' not known" %
+                                 self.op.instance_name)
+
+    if instance.disk_template != constants.DT_DRBD8:
+      raise errors.OpPrereqError("Instance's disk layout is not"
+                                 " drbd8, cannot migrate.")
+
+    secondary_nodes = instance.secondary_nodes
+    if not secondary_nodes:
+      raise errors.ProgrammerError("no secondary node but using "
+                                   "drbd8 disk template")
+
+    i_be = self.cfg.GetClusterInfo().FillBE(instance)
+
+    target_node = secondary_nodes[0]
+    # check memory requirements on the secondary node
+    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
+                         instance.name, i_be[constants.BE_MEMORY],
+                         instance.hypervisor)
+
+    # check bridge existance
+    brlist = [nic.bridge for nic in instance.nics]
+    result = self.rpc.call_bridges_exist(target_node, brlist)
+    if result.failed or not result.data:
+      raise errors.OpPrereqError("One or more target bridges %s does not"
+                                 " exist on destination node '%s'" %
+                                 (brlist, target_node))
+
+    if not self.op.cleanup:
+      result = self.rpc.call_instance_migratable(instance.primary_node,
+                                                 instance)
+      msg = result.RemoteFailMsg()
+      if msg:
+        raise errors.OpPrereqError("Can't migrate: %s - please use failover" %
+                                   msg)
+
+    self.instance = instance
+
+  def _WaitUntilSync(self):
+    """Poll with custom rpc for disk sync.
+
+    This uses our own step-based rpc call.
+
+    """
+    self.feedback_fn("* wait until resync is done")
+    all_done = False
+    while not all_done:
+      all_done = True
+      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
+                                            self.nodes_ip,
+                                            self.instance.disks)
+      min_percent = 100
+      for node, nres in result.items():
+        msg = nres.RemoteFailMsg()
+        if msg:
+          raise errors.OpExecError("Cannot resync disks on node %s: %s" %
+                                   (node, msg))
+        node_done, node_percent = nres.payload
+        all_done = all_done and node_done
+        if node_percent is not None:
+          min_percent = min(min_percent, node_percent)
+      if not all_done:
+        if min_percent < 100:
+          self.feedback_fn("   - progress: %.1f%%" % min_percent)
+        time.sleep(2)
+
+  def _EnsureSecondary(self, node):
+    """Demote a node to secondary.
+
+    """
+    self.feedback_fn("* switching node %s to secondary mode" % node)
+
+    for dev in self.instance.disks:
+      self.cfg.SetDiskID(dev, node)
+
+    result = self.rpc.call_blockdev_close(node, self.instance.name,
+                                          self.instance.disks)
+    msg = result.RemoteFailMsg()
+    if msg:
+      raise errors.OpExecError("Cannot change disk to secondary on node %s,"
+                               " error %s" % (node, msg))
+
+  def _GoStandalone(self):
+    """Disconnect from the network.
+
+    """
+    self.feedback_fn("* changing into standalone mode")
+    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
+                                               self.instance.disks)
+    for node, nres in result.items():
+      msg = nres.RemoteFailMsg()
+      if msg:
+        raise errors.OpExecError("Cannot disconnect disks node %s,"
+                                 " error %s" % (node, msg))
+
+  def _GoReconnect(self, multimaster):
+    """Reconnect to the network.
+
+    """
+    if multimaster:
+      msg = "dual-master"
+    else:
+      msg = "single-master"
+    self.feedback_fn("* changing disks into %s mode" % msg)
+    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
+                                           self.instance.disks,
+                                           self.instance.name, multimaster)
+    for node, nres in result.items():
+      msg = nres.RemoteFailMsg()
+      if msg:
+        raise errors.OpExecError("Cannot change disks config on node %s,"
+                                 " error: %s" % (node, msg))
+
+  def _ExecCleanup(self):
+    """Try to cleanup after a failed migration.
+
+    The cleanup is done by:
+      - check that the instance is running only on one node
+        (and update the config if needed)
+      - change disks on its secondary node to secondary
+      - wait until disks are fully synchronized
+      - disconnect from the network
+      - change disks into single-master mode
+      - wait again until disks are fully synchronized
+
+    """
+    instance = self.instance
+    target_node = self.target_node
+    source_node = self.source_node
+
+    # check running on only one node
+    self.feedback_fn("* checking where the instance actually runs"
+                     " (if this hangs, the hypervisor might be in"
+                     " a bad state)")
+    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
+    for node, result in ins_l.items():
+      result.Raise()
+      if not isinstance(result.data, list):
+        raise errors.OpExecError("Can't contact node '%s'" % node)
+
+    runningon_source = instance.name in ins_l[source_node].data
+    runningon_target = instance.name in ins_l[target_node].data
+
+    if runningon_source and runningon_target:
+      raise errors.OpExecError("Instance seems to be running on two nodes,"
+                               " or the hypervisor is confused. You will have"
+                               " to ensure manually that it runs only on one"
+                               " and restart this operation.")
+
+    if not (runningon_source or runningon_target):
+      raise errors.OpExecError("Instance does not seem to be running at all."
+                               " In this case, it's safer to repair by"
+                               " running 'gnt-instance stop' to ensure disk"
+                               " shutdown, and then restarting it.")
+
+    if runningon_target:
+      # the migration has actually succeeded, we need to update the config
+      self.feedback_fn("* instance running on secondary node (%s),"
+                       " updating config" % target_node)
+      instance.primary_node = target_node
+      self.cfg.Update(instance)
+      demoted_node = source_node
+    else:
+      self.feedback_fn("* instance confirmed to be running on its"
+                       " primary node (%s)" % source_node)
+      demoted_node = target_node
+
+    self._EnsureSecondary(demoted_node)
+    try:
+      self._WaitUntilSync()
+    except errors.OpExecError:
+      # we ignore here errors, since if the device is standalone, it
+      # won't be able to sync
+      pass
+    self._GoStandalone()
+    self._GoReconnect(False)
+    self._WaitUntilSync()
+
+    self.feedback_fn("* done")
+
+  def _RevertDiskStatus(self):
+    """Try to revert the disk status after a failed migration.
+
+    """
+    target_node = self.target_node
+    try:
+      self._EnsureSecondary(target_node)
+      self._GoStandalone()
+      self._GoReconnect(False)
+      self._WaitUntilSync()
+    except errors.OpExecError, err:
+      self.LogWarning("Migration failed and I can't reconnect the"
+                      " drives: error '%s'\n"
+                      "Please look and recover the instance status" %
+                      str(err))
+
+  def _AbortMigration(self):
+    """Call the hypervisor code to abort a started migration.
+
+    """
+    instance = self.instance
+    target_node = self.target_node
+    migration_info = self.migration_info
+
+    abort_result = self.rpc.call_finalize_migration(target_node,
+                                                    instance,
+                                                    migration_info,
+                                                    False)
+    abort_msg = abort_result.RemoteFailMsg()
+    if abort_msg:
+      logging.error("Aborting migration failed on target node %s: %s" %
+                    (target_node, abort_msg))
+      # Don't raise an exception here, as we stil have to try to revert the
+      # disk status, even if this step failed.
+
+  def _ExecMigration(self):
+    """Migrate an instance.
+
+    The migrate is done by:
+      - change the disks into dual-master mode
+      - wait until disks are fully synchronized again
+      - migrate the instance
+      - change disks on the new secondary node (the old primary) to secondary
+      - wait until disks are fully synchronized
+      - change disks into single-master mode
+
+    """
+    instance = self.instance
+    target_node = self.target_node
+    source_node = self.source_node
+
+    self.feedback_fn("* checking disk consistency between source and target")
+    for dev in instance.disks:
+      if not _CheckDiskConsistency(self, dev, target_node, False):
+        raise errors.OpExecError("Disk %s is degraded or not fully"
+                                 " synchronized on target node,"
+                                 " aborting migrate." % dev.iv_name)
+
+    # First get the migration information from the remote node
+    result = self.rpc.call_migration_info(source_node, instance)
+    msg = result.RemoteFailMsg()
+    if msg:
+      log_err = ("Failed fetching source migration information from %s: %s" %
+                 (source_node, msg))
+      logging.error(log_err)
+      raise errors.OpExecError(log_err)
+
+    self.migration_info = migration_info = result.payload
+
+    # Then switch the disks to master/master mode
+    self._EnsureSecondary(target_node)
+    self._GoStandalone()
+    self._GoReconnect(True)
+    self._WaitUntilSync()
+
+    self.feedback_fn("* preparing %s to accept the instance" % target_node)
+    result = self.rpc.call_accept_instance(target_node,
+                                           instance,
+                                           migration_info,
+                                           self.nodes_ip[target_node])
+
+    msg = result.RemoteFailMsg()
+    if msg:
+      logging.error("Instance pre-migration failed, trying to revert"
+                    " disk status: %s", msg)
+      self._AbortMigration()
+      self._RevertDiskStatus()
+      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
+                               (instance.name, msg))
+
+    self.feedback_fn("* migrating instance to %s" % target_node)
+    time.sleep(10)
+    result = self.rpc.call_instance_migrate(source_node, instance,
+                                            self.nodes_ip[target_node],
+                                            self.op.live)
+    msg = result.RemoteFailMsg()
+    if msg:
+      logging.error("Instance migration failed, trying to revert"
+                    " disk status: %s", msg)
+      self._AbortMigration()
+      self._RevertDiskStatus()
+      raise errors.OpExecError("Could not migrate instance %s: %s" %
+                               (instance.name, msg))
+    time.sleep(10)
+
+    instance.primary_node = target_node
+    # distribute new instance config to the other nodes
+    self.cfg.Update(instance)
+
+    result = self.rpc.call_finalize_migration(target_node,
+                                              instance,
+                                              migration_info,
+                                              True)
+    msg = result.RemoteFailMsg()
+    if msg:
+      logging.error("Instance migration succeeded, but finalization failed:"
+                    " %s" % msg)
+      raise errors.OpExecError("Could not finalize instance migration: %s" %
+                               msg)
+
+    self._EnsureSecondary(source_node)
+    self._WaitUntilSync()
+    self._GoStandalone()
+    self._GoReconnect(False)
+    self._WaitUntilSync()
+
+    self.feedback_fn("* done")
+
+  def Exec(self, feedback_fn):
+    """Perform the migration.
+
+    """
+    self.feedback_fn = feedback_fn
+
+    self.source_node = self.instance.primary_node
+    self.target_node = self.instance.secondary_nodes[0]
+    self.all_nodes = [self.source_node, self.target_node]
+    self.nodes_ip = {
+      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
+      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
+      }
+    if self.op.cleanup:
+      return self._ExecCleanup()
+    else:
+      return self._ExecMigration()
 
 
 
 
-def _CreateBlockDevOnSecondary(lu, node, instance, device, force, info):
-  """Create a tree of block devices on a secondary node.
+def _CreateBlockDev(lu, node, instance, device, force_create,
+                    info, force_open):
+  """Create a tree of block devices on a given node.
 
   If this device type has to be created on secondaries, create it and
   all its children.
 
   If not, just recurse to children keeping the same 'force' value.
 
 
   If this device type has to be created on secondaries, create it and
   all its children.
 
   If not, just recurse to children keeping the same 'force' value.
 
+  @param lu: the lu on whose behalf we execute
+  @param node: the node on which to create the device
+  @type instance: L{objects.Instance}
+  @param instance: the instance which owns the device
+  @type device: L{objects.Disk}
+  @param device: the device to create
+  @type force_create: boolean
+  @param force_create: whether to force creation of this device; this
+      will be change to True whenever we find a device which has
+      CreateOnSecondary() attribute
+  @param info: the extra 'metadata' we should attach to the device
+      (this will be represented as a LVM tag)
+  @type force_open: boolean
+  @param force_open: this parameter will be passes to the
+      L{backend.BlockdevCreate} function where it specifies
+      whether we run on primary or not, and it affects both
+      the child assembly and the device own Open() execution
+
   """
   if device.CreateOnSecondary():
   """
   if device.CreateOnSecondary():
-    force = True
+    force_create = True
+
   if device.children:
     for child in device.children:
   if device.children:
     for child in device.children:
-      if not _CreateBlockDevOnSecondary(lu, node, instance,
-                                        child, force, info):
-        return False
+      _CreateBlockDev(lu, node, instance, child, force_create,
+                      info, force_open)
 
 
-  if not force:
-    return True
+  if not force_create:
+    return
+
+  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
+
+
+def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
+  """Create a single block device on a given node.
+
+  This will not recurse over children of the device, so they must be
+  created in advance.
+
+  @param lu: the lu on whose behalf we execute
+  @param node: the node on which to create the device
+  @type instance: L{objects.Instance}
+  @param instance: the instance which owns the device
+  @type device: L{objects.Disk}
+  @param device: the device to create
+  @param info: the extra 'metadata' we should attach to the device
+      (this will be represented as a LVM tag)
+  @type force_open: boolean
+  @param force_open: this parameter will be passes to the
+      L{backend.BlockdevCreate} function where it specifies
+      whether we run on primary or not, and it affects both
+      the child assembly and the device own Open() execution
+
+  """
   lu.cfg.SetDiskID(device, node)
   lu.cfg.SetDiskID(device, node)
-  new_id = lu.rpc.call_blockdev_create(node, device, device.size,
-                                       instance.name, False, info)
-  if new_id.failed or not new_id.data:
-    return False
+  result = lu.rpc.call_blockdev_create(node, device, device.size,
+                                       instance.name, force_open, info)
+  msg = result.RemoteFailMsg()
+  if msg:
+    raise errors.OpExecError("Can't create block device %s on"
+                             " node %s for instance %s: %s" %
+                             (device, node, instance.name, msg))
   if device.physical_id is None:
   if device.physical_id is None:
-    device.physical_id = new_id
-  return True
+    device.physical_id = result.payload
 
 
 def _GenerateUniqueNames(lu, exts):
 
 
 def _GenerateUniqueNames(lu, exts):
@@ -3496,7 +3959,8 @@ def _GenerateDiskTemplate(lu, template_name,
       disk_index = idx + base_index
       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
                               logical_id=(vgname, names[idx]),
       disk_index = idx + base_index
       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
                               logical_id=(vgname, names[idx]),
-                              iv_name="disk/%d" % disk_index)
+                              iv_name="disk/%d" % disk_index,
+                              mode=disk["mode"])
       disks.append(disk_dev)
   elif template_name == constants.DT_DRBD8:
     if len(secondary_nodes) != 1:
       disks.append(disk_dev)
   elif template_name == constants.DT_DRBD8:
     if len(secondary_nodes) != 1:
@@ -3505,17 +3969,18 @@ def _GenerateDiskTemplate(lu, template_name,
     minors = lu.cfg.AllocateDRBDMinor(
       [primary_node, remote_node] * len(disk_info), instance_name)
 
     minors = lu.cfg.AllocateDRBDMinor(
       [primary_node, remote_node] * len(disk_info), instance_name)
 
-    names = _GenerateUniqueNames(lu,
-                                 [".disk%d_%s" % (i, s)
-                                  for i in range(disk_count)
-                                  for s in ("data", "meta")
-                                  ])
+    names = []
+    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % i
+                                               for i in range(disk_count)]):
+      names.append(lv_prefix + "_data")
+      names.append(lv_prefix + "_meta")
     for idx, disk in enumerate(disk_info):
       disk_index = idx + base_index
       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
                                       disk["size"], names[idx*2:idx*2+2],
                                       "disk/%d" % disk_index,
                                       minors[idx*2], minors[idx*2+1])
     for idx, disk in enumerate(disk_info):
       disk_index = idx + base_index
       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
                                       disk["size"], names[idx*2:idx*2+2],
                                       "disk/%d" % disk_index,
                                       minors[idx*2], minors[idx*2+1])
+      disk_dev.mode = disk["mode"]
       disks.append(disk_dev)
   elif template_name == constants.DT_FILE:
     if len(secondary_nodes) != 0:
       disks.append(disk_dev)
   elif template_name == constants.DT_FILE:
     if len(secondary_nodes) != 0:
@@ -3527,7 +3992,8 @@ def _GenerateDiskTemplate(lu, template_name,
                               iv_name="disk/%d" % disk_index,
                               logical_id=(file_driver,
                                           "%s/disk%d" % (file_storage_dir,
                               iv_name="disk/%d" % disk_index,
                               logical_id=(file_driver,
                                           "%s/disk%d" % (file_storage_dir,
-                                                         idx)))
+                                                         idx)),
+                              mode=disk["mode"])
       disks.append(disk_dev)
   else:
     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
       disks.append(disk_dev)
   else:
     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
@@ -3555,19 +4021,18 @@ def _CreateDisks(lu, instance):
 
   """
   info = _GetInstanceInfoText(instance)
 
   """
   info = _GetInstanceInfoText(instance)
+  pnode = instance.primary_node
 
   if instance.disk_template == constants.DT_FILE:
     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
 
   if instance.disk_template == constants.DT_FILE:
     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
-    result = lu.rpc.call_file_storage_dir_create(instance.primary_node,
-                                                 file_storage_dir)
+    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
 
     if result.failed or not result.data:
 
     if result.failed or not result.data:
-      logging.error("Could not connect to node '%s'", instance.primary_node)
-      return False
+      raise errors.OpExecError("Could not connect to node '%s'" % pnode)
 
     if not result.data[0]:
 
     if not result.data[0]:
-      logging.error("Failed to create directory '%s'", file_storage_dir)
-      return False
+      raise errors.OpExecError("Failed to create directory '%s'" %
+                               file_storage_dir)
 
   # Note: this needs to be kept in sync with adding of disks in
   # LUSetInstanceParams
 
   # Note: this needs to be kept in sync with adding of disks in
   # LUSetInstanceParams
@@ -3575,19 +4040,9 @@ def _CreateDisks(lu, instance):
     logging.info("Creating volume %s for instance %s",
                  device.iv_name, instance.name)
     #HARDCODE
     logging.info("Creating volume %s for instance %s",
                  device.iv_name, instance.name)
     #HARDCODE
-    for secondary_node in instance.secondary_nodes:
-      if not _CreateBlockDevOnSecondary(lu, secondary_node, instance,
-                                        device, False, info):
-        logging.error("Failed to create volume %s (%s) on secondary node %s!",
-                      device.iv_name, device, secondary_node)
-        return False
-    #HARDCODE
-    if not _CreateBlockDevOnPrimary(lu, instance.primary_node,
-                                    instance, device, info):
-      logging.error("Failed to create volume %s on primary!", device.iv_name)
-      return False
-
-  return True
+    for node in instance.all_nodes:
+      f_create = node == pnode
+      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
 
 
 def _RemoveDisks(lu, instance):
 
 
 def _RemoveDisks(lu, instance):
@@ -3608,15 +4063,15 @@ def _RemoveDisks(lu, instance):
   """
   logging.info("Removing block devices for instance %s", instance.name)
 
   """
   logging.info("Removing block devices for instance %s", instance.name)
 
-  result = True
+  all_result = True
   for device in instance.disks:
     for node, disk in device.ComputeNodeTree(instance.primary_node):
       lu.cfg.SetDiskID(disk, node)
   for device in instance.disks:
     for node, disk in device.ComputeNodeTree(instance.primary_node):
       lu.cfg.SetDiskID(disk, node)
-      result = lu.rpc.call_blockdev_remove(node, disk)
-      if result.failed or not result.data:
-        lu.proc.LogWarning("Could not remove block device %s on node %s,"
-                           " continuing anyway", device.iv_name, node)
-        result = False
+      msg = lu.rpc.call_blockdev_remove(node, disk).RemoteFailMsg()
+      if msg:
+        lu.LogWarning("Could not remove block device %s on node %s,"
+                      " continuing anyway: %s", device.iv_name, node, msg)
+        all_result = False
 
   if instance.disk_template == constants.DT_FILE:
     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
 
   if instance.disk_template == constants.DT_FILE:
     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
@@ -3624,9 +4079,9 @@ def _RemoveDisks(lu, instance):
                                                  file_storage_dir)
     if result.failed or not result.data:
       logging.error("Could not remove directory '%s'", file_storage_dir)
                                                  file_storage_dir)
     if result.failed or not result.data:
       logging.error("Could not remove directory '%s'", file_storage_dir)
-      result = False
+      all_result = False
 
 
-  return result
+  return all_result
 
 
 def _ComputeDiskSize(disk_template, disks):
 
 
 def _ComputeDiskSize(disk_template, disks):
@@ -3671,13 +4126,12 @@ def _CheckHVParams(lu, nodenames, hvname, hvparams):
                                                   hvparams)
   for node in nodenames:
     info = hvinfo[node]
                                                   hvparams)
   for node in nodenames:
     info = hvinfo[node]
-    info.Raise()
-    if not info.data or not isinstance(info.data, (tuple, list)):
-      raise errors.OpPrereqError("Cannot get current information"
-                                 " from node '%s' (%s)" % (node, info.data))
-    if not info.data[0]:
+    if info.offline:
+      continue
+    msg = info.RemoteFailMsg()
+    if msg:
       raise errors.OpPrereqError("Hypervisor parameter validation failed:"
       raise errors.OpPrereqError("Hypervisor parameter validation failed:"
-                                 " %s" % info.data[1])
+                                 " %s" % msg)
 
 
 class LUCreateInstance(LogicalUnit):
 
 
 class LUCreateInstance(LogicalUnit):
@@ -3784,7 +4238,9 @@ class LUCreateInstance(LogicalUnit):
           raise errors.OpPrereqError("Invalid MAC address specified: %s" %
                                      mac)
       # bridge verification
           raise errors.OpPrereqError("Invalid MAC address specified: %s" %
                                      mac)
       # bridge verification
-      bridge = nic.get("bridge", self.cfg.GetDefBridge())
+      bridge = nic.get("bridge", None)
+      if bridge is None:
+        bridge = self.cfg.GetDefBridge()
       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, bridge=bridge))
 
     # disk checks/pre-build
       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, bridge=bridge))
 
     # disk checks/pre-build
@@ -3911,7 +4367,7 @@ class LUCreateInstance(LogicalUnit):
     env.update(_BuildInstanceHookEnv(name=self.op.instance_name,
       primary_node=self.op.pnode,
       secondary_nodes=self.secondaries,
     env.update(_BuildInstanceHookEnv(name=self.op.instance_name,
       primary_node=self.op.pnode,
       secondary_nodes=self.secondaries,
-      status=self.instance_status,
+      status=self.op.start,
       os_type=self.op.os_type,
       memory=self.be_full[constants.BE_MEMORY],
       vcpus=self.be_full[constants.BE_VCPUS],
       os_type=self.op.os_type,
       memory=self.be_full[constants.BE_MEMORY],
       vcpus=self.be_full[constants.BE_VCPUS],
@@ -4086,11 +4542,6 @@ class LUCreateInstance(LogicalUnit):
                            self.be_full[constants.BE_MEMORY],
                            self.op.hypervisor)
 
                            self.be_full[constants.BE_MEMORY],
                            self.op.hypervisor)
 
-    if self.op.start:
-      self.instance_status = 'up'
-    else:
-      self.instance_status = 'down'
-
   def Exec(self, feedback_fn):
     """Create and add the instance to the cluster.
 
   def Exec(self, feedback_fn):
     """Create and add the instance to the cluster.
 
@@ -4136,7 +4587,7 @@ class LUCreateInstance(LogicalUnit):
                             primary_node=pnode_name,
                             nics=self.nics, disks=disks,
                             disk_template=self.op.disk_template,
                             primary_node=pnode_name,
                             nics=self.nics, disks=disks,
                             disk_template=self.op.disk_template,
-                            status=self.instance_status,
+                            admin_up=False,
                             network_port=network_port,
                             beparams=self.op.beparams,
                             hvparams=self.op.hvparams,
                             network_port=network_port,
                             beparams=self.op.beparams,
                             hvparams=self.op.hvparams,
@@ -4144,10 +4595,15 @@ class LUCreateInstance(LogicalUnit):
                             )
 
     feedback_fn("* creating instance disks...")
                             )
 
     feedback_fn("* creating instance disks...")
-    if not _CreateDisks(self, iobj):
-      _RemoveDisks(self, iobj)
-      self.cfg.ReleaseDRBDMinors(instance)
-      raise errors.OpExecError("Device creation failed, reverting...")
+    try:
+      _CreateDisks(self, iobj)
+    except errors.OpExecError:
+      self.LogWarning("Device creation failed, reverting...")
+      try:
+        _RemoveDisks(self, iobj)
+      finally:
+        self.cfg.ReleaseDRBDMinors(instance)
+        raise
 
     feedback_fn("adding instance %s to cluster config" % instance)
 
 
     feedback_fn("adding instance %s to cluster config" % instance)
 
@@ -4155,8 +4611,6 @@ class LUCreateInstance(LogicalUnit):
     # Declare that we don't want to remove the instance lock anymore, as we've
     # added the instance to the config
     del self.remove_locks[locking.LEVEL_INSTANCE]
     # Declare that we don't want to remove the instance lock anymore, as we've
     # added the instance to the config
     del self.remove_locks[locking.LEVEL_INSTANCE]
-    # Remove the temp. assignements for the instance's drbds
-    self.cfg.ReleaseDRBDMinors(instance)
     # Unlock all the nodes
     if self.op.mode == constants.INSTANCE_IMPORT:
       nodes_keep = [self.op.src_node]
     # Unlock all the nodes
     if self.op.mode == constants.INSTANCE_IMPORT:
       nodes_keep = [self.op.src_node]
@@ -4193,11 +4647,11 @@ class LUCreateInstance(LogicalUnit):
       if self.op.mode == constants.INSTANCE_CREATE:
         feedback_fn("* running the instance OS create scripts...")
         result = self.rpc.call_instance_os_add(pnode_name, iobj)
       if self.op.mode == constants.INSTANCE_CREATE:
         feedback_fn("* running the instance OS create scripts...")
         result = self.rpc.call_instance_os_add(pnode_name, iobj)
-        result.Raise()
-        if not result.data:
+        msg = result.RemoteFailMsg()
+        if msg:
           raise errors.OpExecError("Could not add os for instance %s"
           raise errors.OpExecError("Could not add os for instance %s"
-                                   " on node %s" %
-                                   (instance, pnode_name))
+                                   " on node %s: %s" %
+                                   (instance, pnode_name, msg))
 
       elif self.op.mode == constants.INSTANCE_IMPORT:
         feedback_fn("* running the instance OS import scripts...")
 
       elif self.op.mode == constants.INSTANCE_IMPORT:
         feedback_fn("* running the instance OS import scripts...")
@@ -4219,12 +4673,14 @@ class LUCreateInstance(LogicalUnit):
                                      % self.op.mode)
 
     if self.op.start:
                                      % self.op.mode)
 
     if self.op.start:
+      iobj.admin_up = True
+      self.cfg.Update(iobj)
       logging.info("Starting instance %s on node %s", instance, pnode_name)
       feedback_fn("* starting instance...")
       result = self.rpc.call_instance_start(pnode_name, iobj, None)
       logging.info("Starting instance %s on node %s", instance, pnode_name)
       feedback_fn("* starting instance...")
       result = self.rpc.call_instance_start(pnode_name, iobj, None)
-      result.Raise()
-      if not result.data:
-        raise errors.OpExecError("Could not start instance")
+      msg = result.RemoteFailMsg()
+      if msg:
+        raise errors.OpExecError("Could not start instance: %s" % msg)
 
 
 class LUConnectConsole(NoHooksLU):
 
 
 class LUConnectConsole(NoHooksLU):
@@ -4269,7 +4725,12 @@ class LUConnectConsole(NoHooksLU):
     logging.debug("Connecting to console of %s on %s", instance.name, node)
 
     hyper = hypervisor.GetHypervisor(instance.hypervisor)
     logging.debug("Connecting to console of %s on %s", instance.name, node)
 
     hyper = hypervisor.GetHypervisor(instance.hypervisor)
-    console_cmd = hyper.GetShellCommandForConsole(instance)
+    cluster = self.cfg.GetClusterInfo()
+    # beparams and hvparams are passed separately, to avoid editing the
+    # instance and then saving the defaults in the instance itself.
+    hvparams = cluster.FillHV(instance)
+    beparams = cluster.FillBE(instance)
+    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
 
     # build ssh cmdline
     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
 
     # build ssh cmdline
     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
@@ -4317,6 +4778,10 @@ class LUReplaceDisks(LogicalUnit):
         raise errors.OpPrereqError("Node '%s' not known" %
                                    self.op.remote_node)
       self.op.remote_node = remote_node
         raise errors.OpPrereqError("Node '%s' not known" %
                                    self.op.remote_node)
       self.op.remote_node = remote_node
+      # Warning: do not remove the locking of the new secondary here
+      # unless DRBD8.AddChildren is changed to work in parallel;
+      # currently it doesn't since parallel invocations of
+      # FindUnusedMinor will conflict
       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
     else:
       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
     else:
@@ -4484,9 +4949,13 @@ class LUReplaceDisks(LogicalUnit):
       for node in tgt_node, oth_node:
         info("checking disk/%d on %s" % (idx, node))
         cfg.SetDiskID(dev, node)
       for node in tgt_node, oth_node:
         info("checking disk/%d on %s" % (idx, node))
         cfg.SetDiskID(dev, node)
-        if not self.rpc.call_blockdev_find(node, dev):
-          raise errors.OpExecError("Can't find disk/%d on node %s" %
-                                   (idx, node))
+        result = self.rpc.call_blockdev_find(node, dev)
+        msg = result.RemoteFailMsg()
+        if not msg and not result.payload:
+          msg = "disk not found"
+        if msg:
+          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
+                                   (idx, node, msg))
 
     # Step: check other node consistency
     self.proc.LogStep(2, steps_total, "check peer consistency")
 
     # Step: check other node consistency
     self.proc.LogStep(2, steps_total, "check peer consistency")
@@ -4519,15 +4988,10 @@ class LUReplaceDisks(LogicalUnit):
       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
       info("creating new local storage on %s for %s" %
            (tgt_node, dev.iv_name))
       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
       info("creating new local storage on %s for %s" %
            (tgt_node, dev.iv_name))
-      # since we *always* want to create this LV, we use the
-      # _Create...OnPrimary (which forces the creation), even if we
-      # are talking about the secondary node
+      # we pass force_create=True to force the LVM creation
       for new_lv in new_lvs:
       for new_lv in new_lvs:
-        if not _CreateBlockDevOnPrimary(self, tgt_node, instance, new_lv,
-                                        _GetInstanceInfoText(instance)):
-          raise errors.OpExecError("Failed to create new LV named '%s' on"
-                                   " node '%s'" %
-                                   (new_lv.logical_id[1], tgt_node))
+        _CreateBlockDev(self, tgt_node, instance, new_lv, True,
+                        _GetInstanceInfoText(instance), False)
 
     # Step: for each lv, detach+rename*2+attach
     self.proc.LogStep(4, steps_total, "change drbd configuration")
 
     # Step: for each lv, detach+rename*2+attach
     self.proc.LogStep(4, steps_total, "change drbd configuration")
@@ -4554,8 +5018,9 @@ class LUReplaceDisks(LogicalUnit):
       # build the rename list based on what LVs exist on the node
       rlist = []
       for to_ren in old_lvs:
       # build the rename list based on what LVs exist on the node
       rlist = []
       for to_ren in old_lvs:
-        find_res = self.rpc.call_blockdev_find(tgt_node, to_ren)
-        if not find_res.failed and find_res.data is not None: # device exists
+        result = self.rpc.call_blockdev_find(tgt_node, to_ren)
+        if not result.RemoteFailMsg() and result.payload:
+          # device exists
           rlist.append((to_ren, ren_fn(to_ren, temp_suffix)))
 
       info("renaming the old LVs on the target node")
           rlist.append((to_ren, ren_fn(to_ren, temp_suffix)))
 
       info("renaming the old LVs on the target node")
@@ -4584,10 +5049,10 @@ class LUReplaceDisks(LogicalUnit):
       result = self.rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs)
       if result.failed or not result.data:
         for new_lv in new_lvs:
       result = self.rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs)
       if result.failed or not result.data:
         for new_lv in new_lvs:
-          result = self.rpc.call_blockdev_remove(tgt_node, new_lv)
-          if result.failed or not result.data:
-            warning("Can't rollback device %s", hint="manually cleanup unused"
-                    " logical volumes")
+          msg = self.rpc.call_blockdev_remove(tgt_node, new_lv).RemoteFailMsg()
+          if msg:
+            warning("Can't rollback device %s: %s", dev, msg,
+                    hint="cleanup manually the unused logical volumes")
         raise errors.OpExecError("Can't add local storage to drbd")
 
       dev.children = new_lvs
         raise errors.OpExecError("Can't add local storage to drbd")
 
       dev.children = new_lvs
@@ -4605,7 +5070,13 @@ class LUReplaceDisks(LogicalUnit):
     for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
       cfg.SetDiskID(dev, instance.primary_node)
       result = self.rpc.call_blockdev_find(instance.primary_node, dev)
     for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
       cfg.SetDiskID(dev, instance.primary_node)
       result = self.rpc.call_blockdev_find(instance.primary_node, dev)
-      if result.failed or result.data[5]:
+      msg = result.RemoteFailMsg()
+      if not msg and not result.payload:
+        msg = "disk not found"
+      if msg:
+        raise errors.OpExecError("Can't find DRBD device %s: %s" %
+                                 (name, msg))
+      if result.payload[5]:
         raise errors.OpExecError("DRBD device %s is degraded!" % name)
 
     # Step: remove old storage
         raise errors.OpExecError("DRBD device %s is degraded!" % name)
 
     # Step: remove old storage
@@ -4614,9 +5085,10 @@ class LUReplaceDisks(LogicalUnit):
       info("remove logical volumes for %s" % name)
       for lv in old_lvs:
         cfg.SetDiskID(lv, tgt_node)
       info("remove logical volumes for %s" % name)
       for lv in old_lvs:
         cfg.SetDiskID(lv, tgt_node)
-        result = self.rpc.call_blockdev_remove(tgt_node, lv)
-        if result.failed or not result.data:
-          warning("Can't remove old LV", hint="manually remove unused LVs")
+        msg = self.rpc.call_blockdev_remove(tgt_node, lv).RemoteFailMsg()
+        if msg:
+          warning("Can't remove old LV: %s" % msg,
+                  hint="manually remove unused LVs")
           continue
 
   def _ExecD8Secondary(self, feedback_fn):
           continue
 
   def _ExecD8Secondary(self, feedback_fn):
@@ -4647,6 +5119,11 @@ class LUReplaceDisks(LogicalUnit):
     old_node = self.tgt_node
     new_node = self.new_node
     pri_node = instance.primary_node
     old_node = self.tgt_node
     new_node = self.new_node
     pri_node = instance.primary_node
+    nodes_ip = {
+      old_node: self.cfg.GetNodeInfo(old_node).secondary_ip,
+      new_node: self.cfg.GetNodeInfo(new_node).secondary_ip,
+      pri_node: self.cfg.GetNodeInfo(pri_node).secondary_ip,
+      }
 
     # Step: check device activation
     self.proc.LogStep(1, steps_total, "check device existence")
 
     # Step: check device activation
     self.proc.LogStep(1, steps_total, "check device existence")
@@ -4664,10 +5141,12 @@ class LUReplaceDisks(LogicalUnit):
       info("checking disk/%d on %s" % (idx, pri_node))
       cfg.SetDiskID(dev, pri_node)
       result = self.rpc.call_blockdev_find(pri_node, dev)
       info("checking disk/%d on %s" % (idx, pri_node))
       cfg.SetDiskID(dev, pri_node)
       result = self.rpc.call_blockdev_find(pri_node, dev)
-      result.Raise()
-      if not result.data:
-        raise errors.OpExecError("Can't find disk/%d on node %s" %
-                                 (idx, pri_node))
+      msg = result.RemoteFailMsg()
+      if not msg and not result.payload:
+        msg = "disk not found"
+      if msg:
+        raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
+                                 (idx, pri_node, msg))
 
     # Step: check other node consistency
     self.proc.LogStep(2, steps_total, "check peer consistency")
 
     # Step: check other node consistency
     self.proc.LogStep(2, steps_total, "check peer consistency")
@@ -4685,15 +5164,10 @@ class LUReplaceDisks(LogicalUnit):
     for idx, dev in enumerate(instance.disks):
       info("adding new local storage on %s for disk/%d" %
            (new_node, idx))
     for idx, dev in enumerate(instance.disks):
       info("adding new local storage on %s for disk/%d" %
            (new_node, idx))
-      # since we *always* want to create this LV, we use the
-      # _Create...OnPrimary (which forces the creation), even if we
-      # are talking about the secondary node
+      # we pass force_create=True to force LVM creation
       for new_lv in dev.children:
       for new_lv in dev.children:
-        if not _CreateBlockDevOnPrimary(self, new_node, instance, new_lv,
-                                        _GetInstanceInfoText(instance)):
-          raise errors.OpExecError("Failed to create new LV named '%s' on"
-                                   " node '%s'" %
-                                   (new_lv.logical_id[1], new_node))
+        _CreateBlockDev(self, new_node, instance, new_lv, True,
+                        _GetInstanceInfoText(instance), False)
 
     # Step 4: dbrd minors and drbd setups changes
     # after this, we must manually remove the drbd minors on both the
 
     # Step 4: dbrd minors and drbd setups changes
     # after this, we must manually remove the drbd minors on both the
@@ -4705,57 +5179,52 @@ class LUReplaceDisks(LogicalUnit):
     for idx, (dev, new_minor) in enumerate(zip(instance.disks, minors)):
       size = dev.size
       info("activating a new drbd on %s for disk/%d" % (new_node, idx))
     for idx, (dev, new_minor) in enumerate(zip(instance.disks, minors)):
       size = dev.size
       info("activating a new drbd on %s for disk/%d" % (new_node, idx))
-      # create new devices on new_node
-      if pri_node == dev.logical_id[0]:
-        new_logical_id = (pri_node, new_node,
-                          dev.logical_id[2], dev.logical_id[3], new_minor,
-                          dev.logical_id[5])
+      # create new devices on new_node; note that we create two IDs:
+      # one without port, so the drbd will be activated without
+      # networking information on the new node at this stage, and one
+      # with network, for the latter activation in step 4
+      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
+      if pri_node == o_node1:
+        p_minor = o_minor1
       else:
       else:
-        new_logical_id = (new_node, pri_node,
-                          dev.logical_id[2], new_minor, dev.logical_id[4],
-                          dev.logical_id[5])
-      iv_names[idx] = (dev, dev.children, new_logical_id)
+        p_minor = o_minor2
+
+      new_alone_id = (pri_node, new_node, None, p_minor, new_minor, o_secret)
+      new_net_id = (pri_node, new_node, o_port, p_minor, new_minor, o_secret)
+
+      iv_names[idx] = (dev, dev.children, new_net_id)
       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
-                    new_logical_id)
+                    new_net_id)
       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
-                              logical_id=new_logical_id,
+                              logical_id=new_alone_id,
                               children=dev.children)
                               children=dev.children)
-      if not _CreateBlockDevOnSecondary(self, new_node, instance,
-                                        new_drbd, False,
-                                        _GetInstanceInfoText(instance)):
+      try:
+        _CreateSingleBlockDev(self, new_node, instance, new_drbd,
+                              _GetInstanceInfoText(instance), False)
+      except errors.BlockDeviceError:
         self.cfg.ReleaseDRBDMinors(instance.name)
         self.cfg.ReleaseDRBDMinors(instance.name)
-        raise errors.OpExecError("Failed to create new DRBD on"
-                                 " node '%s'" % new_node)
+        raise
 
     for idx, dev in enumerate(instance.disks):
       # we have new devices, shutdown the drbd on the old secondary
       info("shutting down drbd for disk/%d on old node" % idx)
       cfg.SetDiskID(dev, old_node)
 
     for idx, dev in enumerate(instance.disks):
       # we have new devices, shutdown the drbd on the old secondary
       info("shutting down drbd for disk/%d on old node" % idx)
       cfg.SetDiskID(dev, old_node)
-      result = self.rpc.call_blockdev_shutdown(old_node, dev)
-      if result.failed or not result.data:
-        warning("Failed to shutdown drbd for disk/%d on old node" % idx,
+      msg = self.rpc.call_blockdev_shutdown(old_node, dev).RemoteFailMsg()
+      if msg:
+        warning("Failed to shutdown drbd for disk/%d on old node: %s" %
+                (idx, msg),
                 hint="Please cleanup this device manually as soon as possible")
 
     info("detaching primary drbds from the network (=> standalone)")
                 hint="Please cleanup this device manually as soon as possible")
 
     info("detaching primary drbds from the network (=> standalone)")
-    done = 0
-    for idx, dev in enumerate(instance.disks):
-      cfg.SetDiskID(dev, pri_node)
-      # set the network part of the physical (unique in bdev terms) id
-      # to None, meaning detach from network
-      dev.physical_id = (None, None, None, None) + dev.physical_id[4:]
-      # and 'find' the device, which will 'fix' it to match the
-      # standalone state
-      result = self.rpc.call_blockdev_find(pri_node, dev)
-      if not result.failed and result.data:
-        done += 1
-      else:
-        warning("Failed to detach drbd disk/%d from network, unusual case" %
-                idx)
+    result = self.rpc.call_drbd_disconnect_net([pri_node], nodes_ip,
+                                               instance.disks)[pri_node]
 
 
-    if not done:
-      # no detaches succeeded (very unlikely)
+    msg = result.RemoteFailMsg()
+    if msg:
+      # detaches didn't succeed (unlikely)
       self.cfg.ReleaseDRBDMinors(instance.name)
       self.cfg.ReleaseDRBDMinors(instance.name)
-      raise errors.OpExecError("Can't detach at least one DRBD from old node")
+      raise errors.OpExecError("Can't detach the disks from the network on"
+                               " old node: %s" % (msg,))
 
     # if we managed to detach at least one, we update all the disks of
     # the instance to point to the new secondary
 
     # if we managed to detach at least one, we update all the disks of
     # the instance to point to the new secondary
@@ -4764,23 +5233,18 @@ class LUReplaceDisks(LogicalUnit):
       dev.logical_id = new_logical_id
       cfg.SetDiskID(dev, pri_node)
     cfg.Update(instance)
       dev.logical_id = new_logical_id
       cfg.SetDiskID(dev, pri_node)
     cfg.Update(instance)
-    # we can remove now the temp minors as now the new values are
-    # written to the config file (and therefore stable)
-    self.cfg.ReleaseDRBDMinors(instance.name)
 
     # and now perform the drbd attach
     info("attaching primary drbds to new secondary (standalone => connected)")
 
     # and now perform the drbd attach
     info("attaching primary drbds to new secondary (standalone => connected)")
-    for idx, dev in enumerate(instance.disks):
-      info("attaching primary drbd for disk/%d to new secondary node" % idx)
-      # since the attach is smart, it's enough to 'find' the device,
-      # it will automatically activate the network, if the physical_id
-      # is correct
-      cfg.SetDiskID(dev, pri_node)
-      logging.debug("Disk to attach: %s", dev)
-      result = self.rpc.call_blockdev_find(pri_node, dev)
-      if result.failed or not result.data:
-        warning("can't attach drbd disk/%d to new secondary!" % idx,
-                "please do a gnt-instance info to see the status of disks")
+    result = self.rpc.call_drbd_attach_net([pri_node, new_node], nodes_ip,
+                                           instance.disks, instance.name,
+                                           False)
+    for to_node, to_result in result.items():
+      msg = to_result.RemoteFailMsg()
+      if msg:
+        warning("can't attach drbd disks on node %s: %s", to_node, msg,
+                hint="please do a gnt-instance info to see the"
+                " status of disks")
 
     # this can fail as the old devices are degraded and _WaitForSync
     # does a combined result over all disks, so we don't check its
 
     # this can fail as the old devices are degraded and _WaitForSync
     # does a combined result over all disks, so we don't check its
@@ -4792,8 +5256,13 @@ class LUReplaceDisks(LogicalUnit):
     for idx, (dev, old_lvs, _) in iv_names.iteritems():
       cfg.SetDiskID(dev, pri_node)
       result = self.rpc.call_blockdev_find(pri_node, dev)
     for idx, (dev, old_lvs, _) in iv_names.iteritems():
       cfg.SetDiskID(dev, pri_node)
       result = self.rpc.call_blockdev_find(pri_node, dev)
-      result.Raise()
-      if result.data[5]:
+      msg = result.RemoteFailMsg()
+      if not msg and not result.payload:
+        msg = "disk not found"
+      if msg:
+        raise errors.OpExecError("Can't find DRBD device disk/%d: %s" %
+                                 (idx, msg))
+      if result.payload[5]:
         raise errors.OpExecError("DRBD device disk/%d is degraded!" % idx)
 
     self.proc.LogStep(6, steps_total, "removing old storage")
         raise errors.OpExecError("DRBD device disk/%d is degraded!" % idx)
 
     self.proc.LogStep(6, steps_total, "removing old storage")
@@ -4801,9 +5270,9 @@ class LUReplaceDisks(LogicalUnit):
       info("remove logical volumes for disk/%d" % idx)
       for lv in old_lvs:
         cfg.SetDiskID(lv, old_node)
       info("remove logical volumes for disk/%d" % idx)
       for lv in old_lvs:
         cfg.SetDiskID(lv, old_node)
-        result = self.rpc.call_blockdev_remove(old_node, lv)
-        if result.failed or not result.data:
-          warning("Can't remove LV on old secondary",
+        msg = self.rpc.call_blockdev_remove(old_node, lv).RemoteFailMsg()
+        if msg:
+          warning("Can't remove LV on old secondary: %s", msg,
                   hint="Cleanup stale volumes by hand")
 
   def Exec(self, feedback_fn):
                   hint="Cleanup stale volumes by hand")
 
   def Exec(self, feedback_fn):
@@ -4815,7 +5284,7 @@ class LUReplaceDisks(LogicalUnit):
     instance = self.instance
 
     # Activate the instance disks if we're replacing them on a down instance
     instance = self.instance
 
     # Activate the instance disks if we're replacing them on a down instance
-    if instance.status == "down":
+    if not instance.admin_up:
       _StartInstanceDisks(self, instance, True)
 
     if self.op.mode == constants.REPLACE_DISK_CHG:
       _StartInstanceDisks(self, instance, True)
 
     if self.op.mode == constants.REPLACE_DISK_CHG:
@@ -4826,7 +5295,7 @@ class LUReplaceDisks(LogicalUnit):
     ret = fn(feedback_fn)
 
     # Deactivate the instance disks if we're replacing them on a down instance
     ret = fn(feedback_fn)
 
     # Deactivate the instance disks if we're replacing them on a down instance
-    if instance.status == "down":
+    if not instance.admin_up:
       _SafeShutdownInstanceDisks(self, instance)
 
     return ret
       _SafeShutdownInstanceDisks(self, instance)
 
     return ret
@@ -4876,8 +5345,8 @@ class LUGrowDisk(LogicalUnit):
     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
     assert instance is not None, \
       "Cannot retrieve locked instance %s" % self.op.instance_name
     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
     assert instance is not None, \
       "Cannot retrieve locked instance %s" % self.op.instance_name
-    _CheckNodeOnline(self, instance.primary_node)
-    for node in instance.secondary_nodes:
+    nodenames = list(instance.all_nodes)
+    for node in nodenames:
       _CheckNodeOnline(self, node)
 
 
       _CheckNodeOnline(self, node)
 
 
@@ -4889,7 +5358,6 @@ class LUGrowDisk(LogicalUnit):
 
     self.disk = instance.FindDisk(self.op.disk)
 
 
     self.disk = instance.FindDisk(self.op.disk)
 
-    nodenames = [instance.primary_node] + list(instance.secondary_nodes)
     nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
                                        instance.hypervisor)
     for node in nodenames:
     nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
                                        instance.hypervisor)
     for node in nodenames:
@@ -4912,16 +5380,13 @@ class LUGrowDisk(LogicalUnit):
     """
     instance = self.instance
     disk = self.disk
     """
     instance = self.instance
     disk = self.disk
-    for node in (instance.secondary_nodes + (instance.primary_node,)):
+    for node in instance.all_nodes:
       self.cfg.SetDiskID(disk, node)
       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
       self.cfg.SetDiskID(disk, node)
       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
-      result.Raise()
-      if (not result.data or not isinstance(result.data, (list, tuple)) or
-          len(result.data) != 2):
-        raise errors.OpExecError("Grow request failed to node %s" % node)
-      elif not result.data[0]:
+      msg = result.RemoteFailMsg()
+      if msg:
         raise errors.OpExecError("Grow request failed to node %s: %s" %
         raise errors.OpExecError("Grow request failed to node %s: %s" %
-                                 (node, result.data[1]))
+                                 (node, msg))
     disk.RecordGrow(self.op.amount)
     self.cfg.Update(instance)
     if self.op.wait_for_sync:
     disk.RecordGrow(self.op.amount)
     self.cfg.Update(instance)
     if self.op.wait_for_sync:
@@ -4950,8 +5415,7 @@ class LUQueryInstanceData(NoHooksLU):
       for name in self.op.instances:
         full_name = self.cfg.ExpandInstanceName(name)
         if full_name is None:
       for name in self.op.instances:
         full_name = self.cfg.ExpandInstanceName(name)
         if full_name is None:
-          raise errors.OpPrereqError("Instance '%s' not known" %
-                                     self.op.instance_name)
+          raise errors.OpPrereqError("Instance '%s' not known" % name)
         self.wanted_names.append(full_name)
       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
     else:
         self.wanted_names.append(full_name)
       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
     else:
@@ -4986,8 +5450,11 @@ class LUQueryInstanceData(NoHooksLU):
     if not static:
       self.cfg.SetDiskID(dev, instance.primary_node)
       dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev)
     if not static:
       self.cfg.SetDiskID(dev, instance.primary_node)
       dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev)
-      dev_pstatus.Raise()
-      dev_pstatus = dev_pstatus.data
+      msg = dev_pstatus.RemoteFailMsg()
+      if msg:
+        raise errors.OpExecError("Can't compute disk status for %s: %s" %
+                                 (instance.name, msg))
+      dev_pstatus = dev_pstatus.payload
     else:
       dev_pstatus = None
 
     else:
       dev_pstatus = None
 
@@ -5001,8 +5468,11 @@ class LUQueryInstanceData(NoHooksLU):
     if snode and not static:
       self.cfg.SetDiskID(dev, snode)
       dev_sstatus = self.rpc.call_blockdev_find(snode, dev)
     if snode and not static:
       self.cfg.SetDiskID(dev, snode)
       dev_sstatus = self.rpc.call_blockdev_find(snode, dev)
-      dev_sstatus.Raise()
-      dev_sstatus = dev_sstatus.data
+      msg = dev_sstatus.RemoteFailMsg()
+      if msg:
+        raise errors.OpExecError("Can't compute disk status for %s: %s" %
+                                 (instance.name, msg))
+      dev_sstatus = dev_sstatus.payload
     else:
       dev_sstatus = None
 
     else:
       dev_sstatus = None
 
@@ -5044,10 +5514,10 @@ class LUQueryInstanceData(NoHooksLU):
           remote_state = "down"
       else:
         remote_state = None
           remote_state = "down"
       else:
         remote_state = None
-      if instance.status == "down":
-        config_state = "down"
-      else:
+      if instance.admin_up:
         config_state = "up"
         config_state = "up"
+      else:
+        config_state = "down"
 
       disks = [self._ComputeDiskStatus(instance, None, device)
                for device in instance.disks]
 
       disks = [self._ComputeDiskStatus(instance, None, device)
                for device in instance.disks]
@@ -5112,7 +5582,7 @@ class LUSetInstanceParams(LogicalUnit):
           raise errors.OpPrereqError("Invalid disk index")
       if disk_op == constants.DDM_ADD:
         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
           raise errors.OpPrereqError("Invalid disk index")
       if disk_op == constants.DDM_ADD:
         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
-        if mode not in (constants.DISK_RDONLY, constants.DISK_RDWR):
+        if mode not in constants.DISK_ACCESS_SET:
           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
         size = disk_dict.get('size', None)
         if size is None:
           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
         size = disk_dict.get('size', None)
         if size is None:
@@ -5192,8 +5662,7 @@ class LUSetInstanceParams(LogicalUnit):
       args['vcpus'] = self.be_new[constants.BE_VCPUS]
     # FIXME: readd disk/nic changes
     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
       args['vcpus'] = self.be_new[constants.BE_VCPUS]
     # FIXME: readd disk/nic changes
     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
-    nl = [self.cfg.GetMasterNode(),
-          self.instance.primary_node] + list(self.instance.secondary_nodes)
+    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
     return env, nl, nl
 
   def CheckPrereq(self):
     return env, nl, nl
 
   def CheckPrereq(self):
@@ -5209,9 +5678,8 @@ class LUSetInstanceParams(LogicalUnit):
     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
     assert self.instance is not None, \
       "Cannot retrieve locked instance %s" % self.op.instance_name
     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
     assert self.instance is not None, \
       "Cannot retrieve locked instance %s" % self.op.instance_name
-    pnode = self.instance.primary_node
-    nodelist = [pnode]
-    nodelist.extend(instance.secondary_nodes)
+    pnode = instance.primary_node
+    nodelist = list(instance.all_nodes)
 
     # hvparams processing
     if self.op.hvparams:
 
     # hvparams processing
     if self.op.hvparams:
@@ -5287,7 +5755,9 @@ class LUSetInstanceParams(LogicalUnit):
                                      " missing on its primary node" % miss_mem)
 
       if be_new[constants.BE_AUTO_BALANCE]:
                                      " missing on its primary node" % miss_mem)
 
       if be_new[constants.BE_AUTO_BALANCE]:
-        for node, nres in instance.secondary_nodes.iteritems():
+        for node, nres in nodeinfo.iteritems():
+          if node not in instance.secondary_nodes:
+            continue
           if nres.failed or not isinstance(nres.data, dict):
             self.warn.append("Can't get info from secondary node %s" % node)
           elif be_new[constants.BE_MEMORY] > nres.data['memory_free']:
           if nres.failed or not isinstance(nres.data, dict):
             self.warn.append("Can't get info from secondary node %s" % node)
           elif be_new[constants.BE_MEMORY] > nres.data['memory_free']:
@@ -5327,9 +5797,9 @@ class LUSetInstanceParams(LogicalUnit):
                                      " an instance")
         ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
         ins_l = ins_l[pnode]
                                      " an instance")
         ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
         ins_l = ins_l[pnode]
-        if not type(ins_l) is list:
+        if ins_l.failed or not isinstance(ins_l.data, list):
           raise errors.OpPrereqError("Can't contact node '%s'" % pnode)
           raise errors.OpPrereqError("Can't contact node '%s'" % pnode)
-        if instance.name in ins_l:
+        if instance.name in ins_l.data:
           raise errors.OpPrereqError("Instance is running, can't remove"
                                      " disks.")
 
           raise errors.OpPrereqError("Instance is running, can't remove"
                                      " disks.")
 
@@ -5367,10 +5837,10 @@ class LUSetInstanceParams(LogicalUnit):
         device_idx = len(instance.disks)
         for node, disk in device.ComputeNodeTree(instance.primary_node):
           self.cfg.SetDiskID(disk, node)
         device_idx = len(instance.disks)
         for node, disk in device.ComputeNodeTree(instance.primary_node):
           self.cfg.SetDiskID(disk, node)
-          result = self.rpc.call_blockdev_remove(node, disk)
-          if result.failed or not result.data:
-            self.proc.LogWarning("Could not remove disk/%d on node %s,"
-                                 " continuing anyway", device_idx, node)
+          msg = self.rpc.call_blockdev_remove(node, disk).RemoteFailMsg()
+          if msg:
+            self.LogWarning("Could not remove disk/%d on node %s: %s,"
+                            " continuing anyway", device_idx, node, msg)
         result.append(("disk/%d" % device_idx, "remove"))
       elif disk_op == constants.DDM_ADD:
         # add a new disk
         result.append(("disk/%d" % device_idx, "remove"))
       elif disk_op == constants.DDM_ADD:
         # add a new disk
@@ -5382,13 +5852,12 @@ class LUSetInstanceParams(LogicalUnit):
         disk_idx_base = len(instance.disks)
         new_disk = _GenerateDiskTemplate(self,
                                          instance.disk_template,
         disk_idx_base = len(instance.disks)
         new_disk = _GenerateDiskTemplate(self,
                                          instance.disk_template,
-                                         instance, instance.primary_node,
+                                         instance.name, instance.primary_node,
                                          instance.secondary_nodes,
                                          [disk_dict],
                                          file_path,
                                          file_driver,
                                          disk_idx_base)[0]
                                          instance.secondary_nodes,
                                          [disk_dict],
                                          file_path,
                                          file_driver,
                                          disk_idx_base)[0]
-        new_disk.mode = disk_dict['mode']
         instance.disks.append(new_disk)
         info = _GetInstanceInfoText(instance)
 
         instance.disks.append(new_disk)
         info = _GetInstanceInfoText(instance)
 
@@ -5396,17 +5865,15 @@ class LUSetInstanceParams(LogicalUnit):
                      new_disk.iv_name, instance.name)
         # Note: this needs to be kept in sync with _CreateDisks
         #HARDCODE
                      new_disk.iv_name, instance.name)
         # Note: this needs to be kept in sync with _CreateDisks
         #HARDCODE
-        for secondary_node in instance.secondary_nodes:
-          if not _CreateBlockDevOnSecondary(self, secondary_node, instance,
-                                            new_disk, False, info):
+        for node in instance.all_nodes:
+          f_create = node == instance.primary_node
+          try:
+            _CreateBlockDev(self, node, instance, new_disk,
+                            f_create, info, f_create)
+          except errors.OpExecError, err:
             self.LogWarning("Failed to create volume %s (%s) on"
             self.LogWarning("Failed to create volume %s (%s) on"
-                            " secondary node %s!",
-                            new_disk.iv_name, new_disk, secondary_node)
-        #HARDCODE
-        if not _CreateBlockDevOnPrimary(self, instance.primary_node,
-                                        instance, new_disk, info):
-          self.LogWarning("Failed to create volume %s on primary!",
-                          new_disk.iv_name)
+                            " node %s: %s",
+                            new_disk.iv_name, new_disk, node, err)
         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
                        (new_disk.size, new_disk.mode)))
       else:
         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
                        (new_disk.size, new_disk.mode)))
       else:
@@ -5442,7 +5909,7 @@ class LUSetInstanceParams(LogicalUnit):
 
     # hvparams changes
     if self.op.hvparams:
 
     # hvparams changes
     if self.op.hvparams:
-      instance.hvparams = self.hv_new
+      instance.hvparams = self.hv_inst
       for key, val in self.op.hvparams.iteritems():
         result.append(("hv/%s" % key, val))
 
       for key, val in self.op.hvparams.iteritems():
         result.append(("hv/%s" % key, val))
 
@@ -5557,7 +6024,7 @@ class LUExportInstance(LogicalUnit):
     if self.dst_node is None:
       # This is wrong node name, not a non-locked node
       raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
     if self.dst_node is None:
       # This is wrong node name, not a non-locked node
       raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
-    _CheckNodeOnline(self, self.op.target_node)
+    _CheckNodeOnline(self, self.dst_node.name)
 
     # instance disk type verification
     for disk in self.instance.disks:
 
     # instance disk type verification
     for disk in self.instance.disks:
@@ -5584,6 +6051,11 @@ class LUExportInstance(LogicalUnit):
 
     snap_disks = []
 
 
     snap_disks = []
 
+    # set the disks ID correctly since call_instance_start needs the
+    # correct drbd minor to create the symlinks
+    for disk in instance.disks:
+      self.cfg.SetDiskID(disk, src_node)
+
     try:
       for disk in instance.disks:
         # new_dev_name will be a snapshot of an lvm leaf of the one we passed
     try:
       for disk in instance.disks:
         # new_dev_name will be a snapshot of an lvm leaf of the one we passed
@@ -5600,11 +6072,12 @@ class LUExportInstance(LogicalUnit):
           snap_disks.append(new_dev)
 
     finally:
           snap_disks.append(new_dev)
 
     finally:
-      if self.op.shutdown and instance.status == "up":
+      if self.op.shutdown and instance.admin_up:
         result = self.rpc.call_instance_start(src_node, instance, None)
         result = self.rpc.call_instance_start(src_node, instance, None)
-        if result.failed or not result.data:
+        msg = result.RemoteFailMsg()
+        if msg:
           _ShutdownInstanceDisks(self, instance)
           _ShutdownInstanceDisks(self, instance)
-          raise errors.OpExecError("Could not start instance")
+          raise errors.OpExecError("Could not start instance: %s" % msg)
 
     # TODO: check for size
 
 
     # TODO: check for size
 
@@ -5617,10 +6090,10 @@ class LUExportInstance(LogicalUnit):
           self.LogWarning("Could not export block device %s from node %s to"
                           " node %s", dev.logical_id[1], src_node,
                           dst_node.name)
           self.LogWarning("Could not export block device %s from node %s to"
                           " node %s", dev.logical_id[1], src_node,
                           dst_node.name)
-        result = self.rpc.call_blockdev_remove(src_node, dev)
-        if result.failed or not result.data:
+        msg = self.rpc.call_blockdev_remove(src_node, dev).RemoteFailMsg()
+        if msg:
           self.LogWarning("Could not remove snapshot block device %s from node"
           self.LogWarning("Could not remove snapshot block device %s from node"
-                          " %s", dev.logical_id[1], src_node)
+                          " %s: %s", dev.logical_id[1], src_node, msg)
 
     result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
     if result.failed or not result.data:
 
     result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
     if result.failed or not result.data:
@@ -5975,7 +6448,7 @@ class IAllocator(object):
       "version": 1,
       "cluster_name": cfg.GetClusterName(),
       "cluster_tags": list(cluster_info.GetTags()),
       "version": 1,
       "cluster_name": cfg.GetClusterName(),
       "cluster_tags": list(cluster_info.GetTags()),
-      "enable_hypervisors": list(cluster_info.enabled_hypervisors),
+      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
       # we don't have job IDs
       }
     iinfo = cfg.GetAllInstancesInfo().values()
       # we don't have job IDs
       }
     iinfo = cfg.GetAllInstancesInfo().values()
@@ -5994,52 +6467,61 @@ class IAllocator(object):
                                            hypervisor_name)
     node_iinfo = self.lu.rpc.call_all_instances_info(node_list,
                        cluster_info.enabled_hypervisors)
                                            hypervisor_name)
     node_iinfo = self.lu.rpc.call_all_instances_info(node_list,
                        cluster_info.enabled_hypervisors)
-    for nname in node_list:
+    for nname, nresult in node_data.items():
+      # first fill in static (config-based) values
       ninfo = cfg.GetNodeInfo(nname)
       ninfo = cfg.GetNodeInfo(nname)
-      node_data[nname].Raise()
-      if not isinstance(node_data[nname].data, dict):
-        raise errors.OpExecError("Can't get data for node %s" % nname)
-      remote_info = node_data[nname].data
-      for attr in ['memory_total', 'memory_free', 'memory_dom0',
-                   'vg_size', 'vg_free', 'cpu_total']:
-        if attr not in remote_info:
-          raise errors.OpExecError("Node '%s' didn't return attribute '%s'" %
-                                   (nname, attr))
-        try:
-          remote_info[attr] = int(remote_info[attr])
-        except ValueError, err:
-          raise errors.OpExecError("Node '%s' returned invalid value for '%s':"
-                                   " %s" % (nname, attr, str(err)))
-      # compute memory used by primary instances
-      i_p_mem = i_p_up_mem = 0
-      for iinfo, beinfo in i_list:
-        if iinfo.primary_node == nname:
-          i_p_mem += beinfo[constants.BE_MEMORY]
-          if iinfo.name not in node_iinfo[nname]:
-            i_used_mem = 0
-          else:
-            i_used_mem = int(node_iinfo[nname][iinfo.name]['memory'])
-          i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
-          remote_info['memory_free'] -= max(0, i_mem_diff)
-
-          if iinfo.status == "up":
-            i_p_up_mem += beinfo[constants.BE_MEMORY]
-
-      # compute memory used by instances
       pnr = {
         "tags": list(ninfo.GetTags()),
       pnr = {
         "tags": list(ninfo.GetTags()),
-        "total_memory": remote_info['memory_total'],
-        "reserved_memory": remote_info['memory_dom0'],
-        "free_memory": remote_info['memory_free'],
-        "i_pri_memory": i_p_mem,
-        "i_pri_up_memory": i_p_up_mem,
-        "total_disk": remote_info['vg_size'],
-        "free_disk": remote_info['vg_free'],
         "primary_ip": ninfo.primary_ip,
         "secondary_ip": ninfo.secondary_ip,
         "primary_ip": ninfo.primary_ip,
         "secondary_ip": ninfo.secondary_ip,
-        "total_cpus": remote_info['cpu_total'],
         "offline": ninfo.offline,
         "offline": ninfo.offline,
+        "drained": ninfo.drained,
+        "master_candidate": ninfo.master_candidate,
         }
         }
+
+      if not ninfo.offline:
+        nresult.Raise()
+        if not isinstance(nresult.data, dict):
+          raise errors.OpExecError("Can't get data for node %s" % nname)
+        remote_info = nresult.data
+        for attr in ['memory_total', 'memory_free', 'memory_dom0',
+                     'vg_size', 'vg_free', 'cpu_total']:
+          if attr not in remote_info:
+            raise errors.OpExecError("Node '%s' didn't return attribute"
+                                     " '%s'" % (nname, attr))
+          try:
+            remote_info[attr] = int(remote_info[attr])
+          except ValueError, err:
+            raise errors.OpExecError("Node '%s' returned invalid value"
+                                     " for '%s': %s" % (nname, attr, err))
+        # compute memory used by primary instances
+        i_p_mem = i_p_up_mem = 0
+        for iinfo, beinfo in i_list:
+          if iinfo.primary_node == nname:
+            i_p_mem += beinfo[constants.BE_MEMORY]
+            if iinfo.name not in node_iinfo[nname].data:
+              i_used_mem = 0
+            else:
+              i_used_mem = int(node_iinfo[nname].data[iinfo.name]['memory'])
+            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
+            remote_info['memory_free'] -= max(0, i_mem_diff)
+
+            if iinfo.admin_up:
+              i_p_up_mem += beinfo[constants.BE_MEMORY]
+
+        # compute memory used by instances
+        pnr_dyn = {
+          "total_memory": remote_info['memory_total'],
+          "reserved_memory": remote_info['memory_dom0'],
+          "free_memory": remote_info['memory_free'],
+          "total_disk": remote_info['vg_size'],
+          "free_disk": remote_info['vg_free'],
+          "total_cpus": remote_info['cpu_total'],
+          "i_pri_memory": i_p_mem,
+          "i_pri_up_memory": i_p_up_mem,
+          }
+        pnr.update(pnr_dyn)
+
       node_results[nname] = pnr
     data["nodes"] = node_results
 
       node_results[nname] = pnr
     data["nodes"] = node_results
 
@@ -6050,13 +6532,13 @@ class IAllocator(object):
                   for n in iinfo.nics]
       pir = {
         "tags": list(iinfo.GetTags()),
                   for n in iinfo.nics]
       pir = {
         "tags": list(iinfo.GetTags()),
-        "should_run": iinfo.status == "up",
+        "admin_up": iinfo.admin_up,
         "vcpus": beinfo[constants.BE_VCPUS],
         "memory": beinfo[constants.BE_MEMORY],
         "os": iinfo.os,
         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
         "nics": nic_data,
         "vcpus": beinfo[constants.BE_VCPUS],
         "memory": beinfo[constants.BE_MEMORY],
         "os": iinfo.os,
         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
         "nics": nic_data,
-        "disks": [{"size": dsk.size, "mode": "w"} for dsk in iinfo.disks],
+        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
         "disk_template": iinfo.disk_template,
         "hypervisor": iinfo.hypervisor,
         }
         "disk_template": iinfo.disk_template,
         "hypervisor": iinfo.hypervisor,
         }
@@ -6077,8 +6559,6 @@ class IAllocator(object):
 
     """
     data = self.in_data
 
     """
     data = self.in_data
-    if len(self.disks) != 2:
-      raise errors.OpExecError("Only two-disk configurations supported")
 
     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
 
 
     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
 
@@ -6235,8 +6715,6 @@ class LUTestAllocator(NoHooksLU):
                                      " 'nics' parameter")
       if not isinstance(self.op.disks, list):
         raise errors.OpPrereqError("Invalid parameter 'disks'")
                                      " 'nics' parameter")
       if not isinstance(self.op.disks, list):
         raise errors.OpPrereqError("Invalid parameter 'disks'")
-      if len(self.op.disks) != 2:
-        raise errors.OpPrereqError("Only two-disk configurations supported")
       for row in self.op.disks:
         if (not isinstance(row, dict) or
             "size" not in row or
       for row in self.op.disks:
         if (not isinstance(row, dict) or
             "size" not in row or
@@ -6245,7 +6723,7 @@ class LUTestAllocator(NoHooksLU):
             row["mode"] not in ['r', 'w']):
           raise errors.OpPrereqError("Invalid contents of the"
                                      " 'disks' parameter")
             row["mode"] not in ['r', 'w']):
           raise errors.OpPrereqError("Invalid contents of the"
                                      " 'disks' parameter")
-      if self.op.hypervisor is None:
+      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
         self.op.hypervisor = self.cfg.GetHypervisorType()
     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
       if not hasattr(self.op, "name"):
         self.op.hypervisor = self.cfg.GetHypervisorType()
     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
       if not hasattr(self.op, "name"):