Small change in the instance disk creation path

[ganeti-local] / lib / cmdlib.py
diff --git a/lib/cmdlib.py b/lib/cmdlib.py

index 3e62dd1..6ddb1f3 100644 (file)
--- a/lib/cmdlib.py
+++ b/lib/cmdlib.py
@@ -32,9 +32,9 @@ import re
  import platform
  import logging
  import copy
  import platform
  import logging
  import copy
+import random
  
  from ganeti import ssh
  
  from ganeti import ssh
-from ganeti import logger
  from ganeti import utils
  from ganeti import errors
  from ganeti import hypervisor
  from ganeti import utils
  from ganeti import errors
  from ganeti import hypervisor
@@ -43,6 +43,7 @@ from ganeti import constants
  from ganeti import objects
  from ganeti import opcodes
  from ganeti import serializer
  from ganeti import objects
  from ganeti import opcodes
  from ganeti import serializer
+from ganeti import ssconf
  
  
  class LogicalUnit(object):
  
  
  class LogicalUnit(object):
@@ -55,7 +56,6 @@ class LogicalUnit(object):
      - implement BuildHooksEnv
      - redefine HPATH and HTYPE
      - optionally redefine their run requirements:
      - implement BuildHooksEnv
      - redefine HPATH and HTYPE
      - optionally redefine their run requirements:
-        REQ_MASTER: the LU needs to run on the master node
          REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  
    Note that all commands require root permissions.
          REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  
    Note that all commands require root permissions.
@@ -64,7 +64,6 @@ class LogicalUnit(object):
    HPATH = None
    HTYPE = None
    _OP_REQP = []
    HPATH = None
    HTYPE = None
    _OP_REQP = []
-  REQ_MASTER = True
    REQ_BGL = True
  
    def __init__(self, processor, op, context, rpc):
    REQ_BGL = True
  
    def __init__(self, processor, op, context, rpc):
@@ -88,21 +87,16 @@ class LogicalUnit(object):
      # Used to force good behavior when calling helper functions
      self.recalculate_locks = {}
      self.__ssh = None
      # Used to force good behavior when calling helper functions
      self.recalculate_locks = {}
      self.__ssh = None
+    # logging
+    self.LogWarning = processor.LogWarning
+    self.LogInfo = processor.LogInfo
  
      for attr_name in self._OP_REQP:
        attr_val = getattr(op, attr_name, None)
        if attr_val is None:
          raise errors.OpPrereqError("Required parameter '%s' missing" %
                                     attr_name)
  
      for attr_name in self._OP_REQP:
        attr_val = getattr(op, attr_name, None)
        if attr_val is None:
          raise errors.OpPrereqError("Required parameter '%s' missing" %
                                     attr_name)
-
-    if not self.cfg.IsCluster():
-      raise errors.OpPrereqError("Cluster not initialized yet,"
-                                 " use 'gnt-cluster init' first.")
-    if self.REQ_MASTER:
-      master = self.cfg.GetMasterNode()
-      if master != utils.HostInfo().name:
-        raise errors.OpPrereqError("Commands must be run on the master"
-                                   " node %s" % master)
+    self.CheckArguments()
  
    def __GetSSH(self):
      """Returns the SshRunner object
  
    def __GetSSH(self):
      """Returns the SshRunner object
@@ -114,6 +108,24 @@ class LogicalUnit(object):
  
    ssh = property(fget=__GetSSH)
  
  
    ssh = property(fget=__GetSSH)
  
+  def CheckArguments(self):
+    """Check syntactic validity for the opcode arguments.
+
+    This method is for doing a simple syntactic check and ensure
+    validity of opcode parameters, without any cluster-related
+    checks. While the same can be accomplished in ExpandNames and/or
+    CheckPrereq, doing these separate is better because:
+
+      - ExpandNames is left as as purely a lock-related function
+      - CheckPrereq is run after we have aquired locks (and possible
+        waited for them)
+
+    The function is allowed to change the self.op attribute so that
+    later methods can no longer worry about missing parameters.
+
+    """
+    pass
+
    def ExpandNames(self):
      """Expand names for this LU.
  
    def ExpandNames(self):
      """Expand names for this LU.
  
@@ -125,27 +137,29 @@ class LogicalUnit(object):
      LUs which implement this method must also populate the self.needed_locks
      member, as a dict with lock levels as keys, and a list of needed lock names
      as values. Rules:
      LUs which implement this method must also populate the self.needed_locks
      member, as a dict with lock levels as keys, and a list of needed lock names
      as values. Rules:
-      - Use an empty dict if you don't need any lock
-      - If you don't need any lock at a particular level omit that level
-      - Don't put anything for the BGL level
-      - If you want all locks at a level use locking.ALL_SET as a value
+
+      - use an empty dict if you don't need any lock
+      - if you don't need any lock at a particular level omit that level
+      - don't put anything for the BGL level
+      - if you want all locks at a level use locking.ALL_SET as a value
  
      If you need to share locks (rather than acquire them exclusively) at one
      level you can modify self.share_locks, setting a true value (usually 1) for
      that level. By default locks are not shared.
  
  
      If you need to share locks (rather than acquire them exclusively) at one
      level you can modify self.share_locks, setting a true value (usually 1) for
      that level. By default locks are not shared.
  
-    Examples:
-    # Acquire all nodes and one instance
-    self.needed_locks = {
-      locking.LEVEL_NODE: locking.ALL_SET,
-      locking.LEVEL_INSTANCE: ['instance1.example.tld'],
-    }
-    # Acquire just two nodes
-    self.needed_locks = {
-      locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
-    }
-    # Acquire no locks
-    self.needed_locks = {} # No, you can't leave it to the default value None
+    Examples::
+
+      # Acquire all nodes and one instance
+      self.needed_locks = {
+        locking.LEVEL_NODE: locking.ALL_SET,
+        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
+      }
+      # Acquire just two nodes
+      self.needed_locks = {
+        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
+      }
+      # Acquire no locks
+      self.needed_locks = {} # No, you can't leave it to the default value None
  
      """
      # The implementation of this method is mandatory only if the new LU is
  
      """
      # The implementation of this method is mandatory only if the new LU is
@@ -232,11 +246,14 @@ class LogicalUnit(object):
      previous result is passed back unchanged but any LU can define it if it
      wants to use the local cluster hook-scripts somehow.
  
      previous result is passed back unchanged but any LU can define it if it
      wants to use the local cluster hook-scripts somehow.
  
-    Args:
-      phase: the hooks phase that has just been run
-      hooks_results: the results of the multi-node hooks rpc call
-      feedback_fn: function to send feedback back to the caller
-      lu_result: the previous result this LU had, or None in the PRE phase.
+    @param phase: one of L{constants.HOOKS_PHASE_POST} or
+        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
+    @param hook_results: the results of the multi-node hooks rpc call
+    @param feedback_fn: function used send feedback back to the caller
+    @param lu_result: the previous Exec result this LU had, or None
+        in the PRE phase
+    @return: the new Exec result, based on the previous result
+        and hook results
  
      """
      return lu_result
  
      """
      return lu_result
@@ -277,10 +294,10 @@ class LogicalUnit(object):
      In the future it may grow parameters to just lock some instance's nodes, or
      to just lock primaries or secondary nodes, if needed.
  
      In the future it may grow parameters to just lock some instance's nodes, or
      to just lock primaries or secondary nodes, if needed.
  
-    If should be called in DeclareLocks in a way similar to:
+    If should be called in DeclareLocks in a way similar to::
  
  
-    if level == locking.LEVEL_NODE:
-      self._LockInstancesNodes()
+      if level == locking.LEVEL_NODE:
+        self._LockInstancesNodes()
  
      @type primary_only: boolean
      @param primary_only: only lock primary nodes of locked instances
  
      @type primary_only: boolean
      @param primary_only: only lock primary nodes of locked instances
@@ -323,8 +340,13 @@ class NoHooksLU(LogicalUnit):
  def _GetWantedNodes(lu, nodes):
    """Returns list of checked and expanded node names.
  
  def _GetWantedNodes(lu, nodes):
    """Returns list of checked and expanded node names.
  
-  Args:
-    nodes: List of nodes (strings) or None for all
+  @type lu: L{LogicalUnit}
+  @param lu: the logical unit on whose behalf we execute
+  @type nodes: list
+  @param nodes: list of node names or None for all nodes
+  @rtype: list
+  @return: the list of nodes, sorted
+  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
  
    """
    if not isinstance(nodes, list):
  
    """
    if not isinstance(nodes, list):
@@ -347,8 +369,14 @@ def _GetWantedNodes(lu, nodes):
  def _GetWantedInstances(lu, instances):
    """Returns list of checked and expanded instance names.
  
  def _GetWantedInstances(lu, instances):
    """Returns list of checked and expanded instance names.
  
-  Args:
-    instances: List of instances (strings) or None for all
+  @type lu: L{LogicalUnit}
+  @param lu: the logical unit on whose behalf we execute
+  @type instances: list
+  @param instances: list of instance names or None for all instances
+  @rtype: list
+  @return: the list of instances, sorted
+  @raise errors.OpPrereqError: if the instances parameter is wrong type
+  @raise errors.OpPrereqError: if any of the passed instances is not found
  
    """
    if not isinstance(instances, list):
  
    """
    if not isinstance(instances, list):
@@ -371,28 +399,74 @@ def _GetWantedInstances(lu, instances):
  def _CheckOutputFields(static, dynamic, selected):
    """Checks whether all selected fields are valid.
  
  def _CheckOutputFields(static, dynamic, selected):
    """Checks whether all selected fields are valid.
  
-  Args:
-    static: Static fields
-    dynamic: Dynamic fields
+  @type static: L{utils.FieldSet}
+  @param static: static fields set
+  @type dynamic: L{utils.FieldSet}
+  @param dynamic: dynamic fields set
  
    """
  
    """
-  static_fields = frozenset(static)
-  dynamic_fields = frozenset(dynamic)
-
-  all_fields = static_fields | dynamic_fields
+  f = utils.FieldSet()
+  f.Extend(static)
+  f.Extend(dynamic)
  
  
-  if not all_fields.issuperset(selected):
+  delta = f.NonMatching(selected)
+  if delta:
      raise errors.OpPrereqError("Unknown output fields selected: %s"
      raise errors.OpPrereqError("Unknown output fields selected: %s"
-                               % ",".join(frozenset(selected).
-                                          difference(all_fields)))
+                               % ",".join(delta))
+
+
+def _CheckBooleanOpField(op, name):
+  """Validates boolean opcode parameters.
+
+  This will ensure that an opcode parameter is either a boolean value,
+  or None (but that it always exists).
+
+  """
+  val = getattr(op, name, None)
+  if not (val is None or isinstance(val, bool)):
+    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
+                               (name, str(val)))
+  setattr(op, name, val)
+
+
+def _CheckNodeOnline(lu, node):
+  """Ensure that a given node is online.
+
+  @param lu: the LU on behalf of which we make the check
+  @param node: the node to check
+  @raise errors.OpPrereqError: if the nodes is offline
+
+  """
+  if lu.cfg.GetNodeInfo(node).offline:
+    raise errors.OpPrereqError("Can't use offline node %s" % node)
  
  
  def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
                            memory, vcpus, nics):
  
  
  def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
                            memory, vcpus, nics):
-  """Builds instance related env variables for hooks from single variables.
+  """Builds instance related env variables for hooks
+
+  This builds the hook environment from individual variables.
+
+  @type name: string
+  @param name: the name of the instance
+  @type primary_node: string
+  @param primary_node: the name of the instance's primary node
+  @type secondary_nodes: list
+  @param secondary_nodes: list of secondary nodes as strings
+  @type os_type: string
+  @param os_type: the name of the instance's OS
+  @type status: string
+  @param status: the desired status of the instances
+  @type memory: string
+  @param memory: the memory size of the instance
+  @type vcpus: string
+  @param vcpus: the count of VCPUs the instance has
+  @type nics: list
+  @param nics: list of tuples (ip, bridge, mac) representing
+      the NICs the instance  has
+  @rtype: dict
+  @return: the hook environment for this instance
  
  
-  Args:
-    secondary_nodes: List of secondary nodes as strings
    """
    env = {
      "OP_TARGET": name,
    """
    env = {
      "OP_TARGET": name,
@@ -424,9 +498,17 @@ def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
  def _BuildInstanceHookEnvByObject(lu, instance, override=None):
    """Builds instance related env variables for hooks from an object.
  
  def _BuildInstanceHookEnvByObject(lu, instance, override=None):
    """Builds instance related env variables for hooks from an object.
  
-  Args:
-    instance: objects.Instance object of instance
-    override: dict of values to override
+  @type lu: L{LogicalUnit}
+  @param lu: the logical unit on whose behalf we execute
+  @type instance: L{objects.Instance}
+  @param instance: the instance for which we should build the
+      environment
+  @type override: dict
+  @param override: dictionary with key/values that will override
+      our values
+  @rtype: dict
+  @return: the hook environment dictionary
+
    """
    bep = lu.cfg.GetClusterInfo().FillBE(instance)
    args = {
    """
    bep = lu.cfg.GetClusterInfo().FillBE(instance)
    args = {
@@ -444,14 +526,32 @@ def _BuildInstanceHookEnvByObject(lu, instance, override=None):
    return _BuildInstanceHookEnv(**args)
  
  
    return _BuildInstanceHookEnv(**args)
  
  
+def _AdjustCandidatePool(lu):
+  """Adjust the candidate pool after node operations.
+
+  """
+  mod_list = lu.cfg.MaintainCandidatePool()
+  if mod_list:
+    lu.LogInfo("Promoted nodes to master candidate role: %s",
+               ", ".join(node.name for node in mod_list))
+    for name in mod_list:
+      lu.context.ReaddNode(name)
+  mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
+  if mc_now > mc_max:
+    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
+               (mc_now, mc_max))
+
+
  def _CheckInstanceBridgesExist(lu, instance):
    """Check that the brigdes needed by an instance exist.
  
    """
    # check bridges existance
    brlist = [nic.bridge for nic in instance.nics]
  def _CheckInstanceBridgesExist(lu, instance):
    """Check that the brigdes needed by an instance exist.
  
    """
    # check bridges existance
    brlist = [nic.bridge for nic in instance.nics]
-  if not lu.rpc.call_bridges_exist(instance.primary_node, brlist):
-    raise errors.OpPrereqError("one or more target bridges %s does not"
+  result = lu.rpc.call_bridges_exist(instance.primary_node, brlist)
+  result.Raise()
+  if not result.data:
+    raise errors.OpPrereqError("One or more target bridges %s does not"
                                 " exist on destination node '%s'" %
                                 (brlist, instance.primary_node))
  
                                 " exist on destination node '%s'" %
                                 (brlist, instance.primary_node))
  
@@ -486,7 +586,9 @@ class LUDestroyCluster(NoHooksLU):
  
      """
      master = self.cfg.GetMasterNode()
  
      """
      master = self.cfg.GetMasterNode()
-    if not self.rpc.call_node_stop_master(master, False):
+    result = self.rpc.call_node_stop_master(master, False)
+    result.Raise()
+    if not result.data:
        raise errors.OpExecError("Could not disable the master role")
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
      utils.CreateBackup(priv_key)
        raise errors.OpExecError("Could not disable the master role")
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
      utils.CreateBackup(priv_key)
@@ -510,24 +612,36 @@ class LUVerifyCluster(LogicalUnit):
      }
      self.share_locks = dict(((i, 1) for i in locking.LEVELS))
  
      }
      self.share_locks = dict(((i, 1) for i in locking.LEVELS))
  
-  def _VerifyNode(self, node, file_list, local_cksum, vglist, node_result,
-                  remote_version, feedback_fn):
+  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
+                  node_result, feedback_fn, master_files):
      """Run multiple tests against a node.
  
      Test list:
      """Run multiple tests against a node.
  
      Test list:
+
        - compares ganeti version
        - checks vg existance and size > 20G
        - checks config file checksum
        - checks ssh to other nodes
  
        - compares ganeti version
        - checks vg existance and size > 20G
        - checks config file checksum
        - checks ssh to other nodes
  
-    Args:
-      node: name of the node to check
-      file_list: required list of files
-      local_cksum: dictionary of local files and their checksums
+    @type nodeinfo: L{objects.Node}
+    @param nodeinfo: the node to check
+    @param file_list: required list of files
+    @param local_cksum: dictionary of local files and their checksums
+    @param node_result: the results from the node
+    @param feedback_fn: function used to accumulate results
+    @param master_files: list of files that only masters should have
  
      """
  
      """
+    node = nodeinfo.name
+
+    # main result, node_result should be a non-empty dict
+    if not node_result or not isinstance(node_result, dict):
+      feedback_fn("  - ERROR: unable to verify node %s." % (node,))
+      return True
+
      # compares ganeti version
      local_version = constants.PROTOCOL_VERSION
      # compares ganeti version
      local_version = constants.PROTOCOL_VERSION
+    remote_version = node_result.get('version', None)
      if not remote_version:
        feedback_fn("  - ERROR: connection to %s failed" % (node))
        return True
      if not remote_version:
        feedback_fn("  - ERROR: connection to %s failed" % (node))
        return True
@@ -540,6 +654,7 @@ class LUVerifyCluster(LogicalUnit):
      # checks vg existance and size > 20G
  
      bad = False
      # checks vg existance and size > 20G
  
      bad = False
+    vglist = node_result.get(constants.NV_VGLIST, None)
      if not vglist:
        feedback_fn("  - ERROR: unable to check volume groups on node %s." %
                        (node,))
      if not vglist:
        feedback_fn("  - ERROR: unable to check volume groups on node %s." %
                        (node,))
@@ -551,47 +666,59 @@ class LUVerifyCluster(LogicalUnit):
          feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
          bad = True
  
          feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
          bad = True
  
-    if not node_result:
-      feedback_fn("  - ERROR: unable to verify node %s." % (node,))
-      return True
-
      # checks config file checksum
      # checks config file checksum
-    # checks ssh to any
  
  
-    if 'filelist' not in node_result:
+    remote_cksum = node_result.get(constants.NV_FILELIST, None)
+    if not isinstance(remote_cksum, dict):
        bad = True
        feedback_fn("  - ERROR: node hasn't returned file checksum data")
      else:
        bad = True
        feedback_fn("  - ERROR: node hasn't returned file checksum data")
      else:
-      remote_cksum = node_result['filelist']
        for file_name in file_list:
        for file_name in file_list:
+        node_is_mc = nodeinfo.master_candidate
+        must_have_file = file_name not in master_files
          if file_name not in remote_cksum:
          if file_name not in remote_cksum:
-          bad = True
-          feedback_fn("  - ERROR: file '%s' missing" % file_name)
+          if node_is_mc or must_have_file:
+            bad = True
+            feedback_fn("  - ERROR: file '%s' missing" % file_name)
          elif remote_cksum[file_name] != local_cksum[file_name]:
          elif remote_cksum[file_name] != local_cksum[file_name]:
-          bad = True
-          feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
+          if node_is_mc or must_have_file:
+            bad = True
+            feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
+          else:
+            # not candidate and this is not a must-have file
+            bad = True
+            feedback_fn("  - ERROR: non master-candidate has old/wrong file"
+                        " '%s'" % file_name)
+        else:
+          # all good, except non-master/non-must have combination
+          if not node_is_mc and not must_have_file:
+            feedback_fn("  - ERROR: file '%s' should not exist on non master"
+                        " candidates" % file_name)
+
+    # checks ssh to any
  
  
-    if 'nodelist' not in node_result:
+    if constants.NV_NODELIST not in node_result:
        bad = True
        feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
      else:
        bad = True
        feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
      else:
-      if node_result['nodelist']:
+      if node_result[constants.NV_NODELIST]:
          bad = True
          bad = True
-        for node in node_result['nodelist']:
+        for node in node_result[constants.NV_NODELIST]:
            feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
            feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
-                          (node, node_result['nodelist'][node]))
-    if 'node-net-test' not in node_result:
+                          (node, node_result[constants.NV_NODELIST][node]))
+
+    if constants.NV_NODENETTEST not in node_result:
        bad = True
        feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
      else:
        bad = True
        feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
      else:
-      if node_result['node-net-test']:
+      if node_result[constants.NV_NODENETTEST]:
          bad = True
          bad = True
-        nlist = utils.NiceSort(node_result['node-net-test'].keys())
+        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
          for node in nlist:
            feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
          for node in nlist:
            feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
-                          (node, node_result['node-net-test'][node]))
+                          (node, node_result[constants.NV_NODENETTEST][node]))
  
  
-    hyp_result = node_result.get('hypervisor', None)
+    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
      if isinstance(hyp_result, dict):
        for hv_name, hv_result in hyp_result.iteritems():
          if hv_result is not None:
      if isinstance(hyp_result, dict):
        for hv_name, hv_result in hyp_result.iteritems():
          if hv_result is not None:
@@ -600,7 +727,7 @@ class LUVerifyCluster(LogicalUnit):
      return bad
  
    def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
      return bad
  
    def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
-                      node_instance, feedback_fn):
+                      node_instance, feedback_fn, n_offline):
      """Verify an instance.
  
      This function checks to see if the required block devices are
      """Verify an instance.
  
      This function checks to see if the required block devices are
@@ -615,6 +742,9 @@ class LUVerifyCluster(LogicalUnit):
      instanceconfig.MapLVsByNode(node_vol_should)
  
      for node in node_vol_should:
      instanceconfig.MapLVsByNode(node_vol_should)
  
      for node in node_vol_should:
+      if node in n_offline:
+        # ignore missing volumes on offline nodes
+        continue
        for volume in node_vol_should[node]:
          if node not in node_vol_is or volume not in node_vol_is[node]:
            feedback_fn("  - ERROR: volume %s missing on node %s" %
        for volume in node_vol_should[node]:
          if node not in node_vol_is or volume not in node_vol_is[node]:
            feedback_fn("  - ERROR: volume %s missing on node %s" %
@@ -622,8 +752,9 @@ class LUVerifyCluster(LogicalUnit):
            bad = True
  
      if not instanceconfig.status == 'down':
            bad = True
  
      if not instanceconfig.status == 'down':
-      if (node_current not in node_instance or
-          not instance in node_instance[node_current]):
+      if ((node_current not in node_instance or
+          not instance in node_instance[node_current]) and
+          node_current not in n_offline):
          feedback_fn("  - ERROR: instance %s not running on node %s" %
                          (instance, node_current))
          bad = True
          feedback_fn("  - ERROR: instance %s not running on node %s" %
                          (instance, node_current))
          bad = True
@@ -738,6 +869,7 @@ class LUVerifyCluster(LogicalUnit):
      instancelist = utils.NiceSort(self.cfg.GetInstanceList())
      i_non_redundant = [] # Non redundant instances
      i_non_a_balanced = [] # Non auto-balanced instances
      instancelist = utils.NiceSort(self.cfg.GetInstanceList())
      i_non_redundant = [] # Non redundant instances
      i_non_a_balanced = [] # Non auto-balanced instances
+    n_offline = [] # List of offline nodes
      node_volume = {}
      node_instance = {}
      node_info = {}
      node_volume = {}
      node_instance = {}
      node_info = {}
@@ -745,71 +877,95 @@ class LUVerifyCluster(LogicalUnit):
  
      # FIXME: verify OS list
      # do local checksums
  
      # FIXME: verify OS list
      # do local checksums
-    file_names = []
+    master_files = [constants.CLUSTER_CONF_FILE]
+
+    file_names = ssconf.SimpleStore().GetFileList()
      file_names.append(constants.SSL_CERT_FILE)
      file_names.append(constants.SSL_CERT_FILE)
-    file_names.append(constants.CLUSTER_CONF_FILE)
+    file_names.append(constants.RAPI_CERT_FILE)
+    file_names.extend(master_files)
+
      local_checksums = utils.FingerprintFiles(file_names)
  
      feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
      local_checksums = utils.FingerprintFiles(file_names)
  
      feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
-    all_volumeinfo = self.rpc.call_volume_list(nodelist, vg_name)
-    all_instanceinfo = self.rpc.call_instance_list(nodelist, hypervisors)
-    all_vglist = self.rpc.call_vg_list(nodelist)
      node_verify_param = {
      node_verify_param = {
-      'filelist': file_names,
-      'nodelist': nodelist,
-      'hypervisor': hypervisors,
-      'node-net-test': [(node.name, node.primary_ip, node.secondary_ip)
-                        for node in nodeinfo]
+      constants.NV_FILELIST: file_names,
+      constants.NV_NODELIST: [node.name for node in nodeinfo
+                              if not node.offline],
+      constants.NV_HYPERVISOR: hypervisors,
+      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
+                                  node.secondary_ip) for node in nodeinfo
+                                 if not node.offline],
+      constants.NV_LVLIST: vg_name,
+      constants.NV_INSTANCELIST: hypervisors,
+      constants.NV_VGLIST: None,
+      constants.NV_VERSION: None,
+      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
        }
      all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
                                             self.cfg.GetClusterName())
        }
      all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
                                             self.cfg.GetClusterName())
-    all_rversion = self.rpc.call_version(nodelist)
-    all_ninfo = self.rpc.call_node_info(nodelist, self.cfg.GetVGName(),
-                                        self.cfg.GetHypervisorType())
  
      cluster = self.cfg.GetClusterInfo()
  
      cluster = self.cfg.GetClusterInfo()
-    for node in nodelist:
-      feedback_fn("* Verifying node %s" % node)
-      result = self._VerifyNode(node, file_names, local_checksums,
-                                all_vglist[node], all_nvinfo[node],
-                                all_rversion[node], feedback_fn)
-      bad = bad or result
+    master_node = self.cfg.GetMasterNode()
+    for node_i in nodeinfo:
+      node = node_i.name
+      nresult = all_nvinfo[node].data
+
+      if node_i.offline:
+        feedback_fn("* Skipping offline node %s" % (node,))
+        n_offline.append(node)
+        continue
  
  
-      # node_volume
-      volumeinfo = all_volumeinfo[node]
+      if node == master_node:
+        ntype = "master"
+      elif node_i.master_candidate:
+        ntype = "master candidate"
+      else:
+        ntype = "regular"
+      feedback_fn("* Verifying node %s (%s)" % (node, ntype))
  
  
-      if isinstance(volumeinfo, basestring):
+      if all_nvinfo[node].failed or not isinstance(nresult, dict):
+        feedback_fn("  - ERROR: connection to %s failed" % (node,))
+        bad = True
+        continue
+
+      result = self._VerifyNode(node_i, file_names, local_checksums,
+                                nresult, feedback_fn, master_files)
+      bad = bad or result
+
+      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
+      if isinstance(lvdata, basestring):
          feedback_fn("  - ERROR: LVM problem on node %s: %s" %
          feedback_fn("  - ERROR: LVM problem on node %s: %s" %
-                    (node, volumeinfo[-400:].encode('string_escape')))
+                    (node, lvdata.encode('string_escape')))
          bad = True
          node_volume[node] = {}
          bad = True
          node_volume[node] = {}
-      elif not isinstance(volumeinfo, dict):
-        feedback_fn("  - ERROR: connection to %s failed" % (node,))
+      elif not isinstance(lvdata, dict):
+        feedback_fn("  - ERROR: connection to %s failed (lvlist)" % (node,))
          bad = True
          continue
        else:
          bad = True
          continue
        else:
-        node_volume[node] = volumeinfo
+        node_volume[node] = lvdata
  
        # node_instance
  
        # node_instance
-      nodeinstance = all_instanceinfo[node]
-      if type(nodeinstance) != list:
-        feedback_fn("  - ERROR: connection to %s failed" % (node,))
+      idata = nresult.get(constants.NV_INSTANCELIST, None)
+      if not isinstance(idata, list):
+        feedback_fn("  - ERROR: connection to %s failed (instancelist)" %
+                    (node,))
          bad = True
          continue
  
          bad = True
          continue
  
-      node_instance[node] = nodeinstance
+      node_instance[node] = idata
  
        # node_info
  
        # node_info
-      nodeinfo = all_ninfo[node]
+      nodeinfo = nresult.get(constants.NV_HVINFO, None)
        if not isinstance(nodeinfo, dict):
        if not isinstance(nodeinfo, dict):
-        feedback_fn("  - ERROR: connection to %s failed" % (node,))
+        feedback_fn("  - ERROR: connection to %s failed (hvinfo)" % (node,))
          bad = True
          continue
  
        try:
          node_info[node] = {
            "mfree": int(nodeinfo['memory_free']),
          bad = True
          continue
  
        try:
          node_info[node] = {
            "mfree": int(nodeinfo['memory_free']),
-          "dfree": int(nodeinfo['vg_free']),
+          "dfree": int(nresult[constants.NV_VGLIST][vg_name]),
            "pinst": [],
            "sinst": [],
            # dictionary holding all instances this node is secondary for,
            "pinst": [],
            "sinst": [],
            # dictionary holding all instances this node is secondary for,
@@ -831,8 +987,9 @@ class LUVerifyCluster(LogicalUnit):
        feedback_fn("* Verifying instance %s" % instance)
        inst_config = self.cfg.GetInstanceInfo(instance)
        result =  self._VerifyInstance(instance, inst_config, node_volume,
        feedback_fn("* Verifying instance %s" % instance)
        inst_config = self.cfg.GetInstanceInfo(instance)
        result =  self._VerifyInstance(instance, inst_config, node_volume,
-                                     node_instance, feedback_fn)
+                                     node_instance, feedback_fn, n_offline)
        bad = bad or result
        bad = bad or result
+      inst_nodes_offline = []
  
        inst_config.MapLVsByNode(node_vol_should)
  
  
        inst_config.MapLVsByNode(node_vol_should)
  
@@ -841,11 +998,14 @@ class LUVerifyCluster(LogicalUnit):
        pnode = inst_config.primary_node
        if pnode in node_info:
          node_info[pnode]['pinst'].append(instance)
        pnode = inst_config.primary_node
        if pnode in node_info:
          node_info[pnode]['pinst'].append(instance)
-      else:
+      elif pnode not in n_offline:
          feedback_fn("  - ERROR: instance %s, connection to primary node"
                      " %s failed" % (instance, pnode))
          bad = True
  
          feedback_fn("  - ERROR: instance %s, connection to primary node"
                      " %s failed" % (instance, pnode))
          bad = True
  
+      if pnode in n_offline:
+        inst_nodes_offline.append(pnode)
+
        # If the instance is non-redundant we cannot survive losing its primary
        # node, so we are not N+1 compliant. On the other hand we have no disk
        # templates with more than one secondary so that situation is not well
        # If the instance is non-redundant we cannot survive losing its primary
        # node, so we are not N+1 compliant. On the other hand we have no disk
        # templates with more than one secondary so that situation is not well
@@ -866,9 +1026,18 @@ class LUVerifyCluster(LogicalUnit):
            if pnode not in node_info[snode]['sinst-by-pnode']:
              node_info[snode]['sinst-by-pnode'][pnode] = []
            node_info[snode]['sinst-by-pnode'][pnode].append(instance)
            if pnode not in node_info[snode]['sinst-by-pnode']:
              node_info[snode]['sinst-by-pnode'][pnode] = []
            node_info[snode]['sinst-by-pnode'][pnode].append(instance)
-        else:
+        elif snode not in n_offline:
            feedback_fn("  - ERROR: instance %s, connection to secondary node"
                        " %s failed" % (instance, snode))
            feedback_fn("  - ERROR: instance %s, connection to secondary node"
                        " %s failed" % (instance, snode))
+          bad = True
+        if snode in n_offline:
+          inst_nodes_offline.append(snode)
+
+      if inst_nodes_offline:
+        # warn that the instance lives on offline nodes, and set bad=True
+        feedback_fn("  - ERROR: instance lives on offline node(s) %s" %
+                    ", ".join(inst_nodes_offline))
+        bad = True
  
      feedback_fn("* Verifying orphan volumes")
      result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
  
      feedback_fn("* Verifying orphan volumes")
      result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
@@ -894,17 +1063,24 @@ class LUVerifyCluster(LogicalUnit):
        feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
                    % len(i_non_a_balanced))
  
        feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
                    % len(i_non_a_balanced))
  
+    if n_offline:
+      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
+
      return not bad
  
    def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
      return not bad
  
    def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
-    """Analize the post-hooks' result, handle it, and send some
+    """Analize the post-hooks' result
+
+    This method analyses the hook result, handles it, and sends some
      nicely-formatted feedback back to the user.
  
      nicely-formatted feedback back to the user.
  
-    Args:
-      phase: the hooks phase that has just been run
-      hooks_results: the results of the multi-node hooks rpc call
-      feedback_fn: function to send feedback back to the caller
-      lu_result: previous Exec result
+    @param phase: one of L{constants.HOOKS_PHASE_POST} or
+        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
+    @param hooks_results: the results of the multi-node hooks rpc call
+    @param feedback_fn: function used send feedback back to the caller
+    @param lu_result: previous Exec result
+    @return: the new Exec result, based on the previous result
+        and hook results
  
      """
      # We only really run POST phase hooks, and are only interested in
  
      """
      # We only really run POST phase hooks, and are only interested in
@@ -920,11 +1096,14 @@ class LUVerifyCluster(LogicalUnit):
          for node_name in hooks_results:
            show_node_header = True
            res = hooks_results[node_name]
          for node_name in hooks_results:
            show_node_header = True
            res = hooks_results[node_name]
-          if res is False or not isinstance(res, list):
-            feedback_fn("    Communication failure")
+          if res.failed or res.data is False or not isinstance(res.data, list):
+            if res.offline:
+              # no need to warn or set fail return value
+              continue
+            feedback_fn("    Communication failure in hooks execution")
              lu_result = 1
              continue
              lu_result = 1
              continue
-          for script, hkr, output in res:
+          for script, hkr, output in res.data:
              if hkr == constants.HKR_FAIL:
                # The node header is only shown once, if there are
                # failing hooks on that node
              if hkr == constants.HKR_FAIL:
                # The node header is only shown once, if there are
                # failing hooks on that node
@@ -993,13 +1172,18 @@ class LUVerifyDisks(NoHooksLU):
      for node in nodes:
        # node_volume
        lvs = node_lvs[node]
      for node in nodes:
        # node_volume
        lvs = node_lvs[node]
-
+      if lvs.failed:
+        if not lvs.offline:
+          self.LogWarning("Connection to node %s failed: %s" %
+                          (node, lvs.data))
+        continue
+      lvs = lvs.data
        if isinstance(lvs, basestring):
        if isinstance(lvs, basestring):
-        logger.Info("error enumerating LVs on node %s: %s" % (node, lvs))
+        logging.warning("Error enumerating LVs on node %s: %s", node, lvs)
          res_nlvm[node] = lvs
        elif not isinstance(lvs, dict):
          res_nlvm[node] = lvs
        elif not isinstance(lvs, dict):
-        logger.Info("connection to node %s failed or invalid data returned" %
-                    (node,))
+        logging.warning("Connection to node %s failed or invalid data"
+                        " returned", node)
          res_nodes.append(node)
          continue
  
          res_nodes.append(node)
          continue
  
@@ -1068,43 +1252,44 @@ class LURenameCluster(LogicalUnit):
  
      # shutdown the master IP
      master = self.cfg.GetMasterNode()
  
      # shutdown the master IP
      master = self.cfg.GetMasterNode()
-    if not self.rpc.call_node_stop_master(master, False):
+    result = self.rpc.call_node_stop_master(master, False)
+    if result.failed or not result.data:
        raise errors.OpExecError("Could not disable the master role")
  
      try:
        raise errors.OpExecError("Could not disable the master role")
  
      try:
-      # modify the sstore
-      # TODO: sstore
-      ss.SetKey(ss.SS_MASTER_IP, ip)
-      ss.SetKey(ss.SS_CLUSTER_NAME, clustername)
-
-      # Distribute updated ss config to all nodes
-      myself = self.cfg.GetNodeInfo(master)
-      dist_nodes = self.cfg.GetNodeList()
-      if myself.name in dist_nodes:
-        dist_nodes.remove(myself.name)
-
-      logger.Debug("Copying updated ssconf data to all nodes")
-      for keyname in [ss.SS_CLUSTER_NAME, ss.SS_MASTER_IP]:
-        fname = ss.KeyToFilename(keyname)
-        result = self.rpc.call_upload_file(dist_nodes, fname)
-        for to_node in dist_nodes:
-          if not result[to_node]:
-            logger.Error("copy of file %s to node %s failed" %
-                         (fname, to_node))
+      cluster = self.cfg.GetClusterInfo()
+      cluster.cluster_name = clustername
+      cluster.master_ip = ip
+      self.cfg.Update(cluster)
+
+      # update the known hosts file
+      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
+      node_list = self.cfg.GetNodeList()
+      try:
+        node_list.remove(master)
+      except ValueError:
+        pass
+      result = self.rpc.call_upload_file(node_list,
+                                         constants.SSH_KNOWN_HOSTS_FILE)
+      for to_node, to_result in result.iteritems():
+        if to_result.failed or not to_result.data:
+          logging.error("Copy of file %s to node %s failed",
+                        constants.SSH_KNOWN_HOSTS_FILE, to_node)
+
      finally:
      finally:
-      if not self.rpc.call_node_start_master(master, False):
-        logger.Error("Could not re-enable the master role on the master,"
-                     " please restart manually.")
+      result = self.rpc.call_node_start_master(master, False)
+      if result.failed or not result.data:
+        self.LogWarning("Could not re-enable the master role on"
+                        " the master, please restart manually.")
  
  
  def _RecursiveCheckIfLVMBased(disk):
    """Check if the given disk or its children are lvm-based.
  
  
  
  def _RecursiveCheckIfLVMBased(disk):
    """Check if the given disk or its children are lvm-based.
  
-  Args:
-    disk: ganeti.objects.Disk object
-
-  Returns:
-    boolean indicating whether a LD_LV dev_type was found or not
+  @type disk: L{objects.Disk}
+  @param disk: the disk to check
+  @rtype: booleean
+  @return: boolean indicating whether a LD_LV dev_type was found or not
  
    """
    if disk.children:
  
    """
    if disk.children:
@@ -1123,6 +1308,21 @@ class LUSetClusterParams(LogicalUnit):
    _OP_REQP = []
    REQ_BGL = False
  
    _OP_REQP = []
    REQ_BGL = False
  
+  def CheckParameters(self):
+    """Check parameters
+
+    """
+    if not hasattr(self.op, "candidate_pool_size"):
+      self.op.candidate_pool_size = None
+    if self.op.candidate_pool_size is not None:
+      try:
+        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
+      except ValueError, err:
+        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
+                                   str(err))
+      if self.op.candidate_pool_size < 1:
+        raise errors.OpPrereqError("At least one master candidate needed")
+
    def ExpandNames(self):
      # FIXME: in the future maybe other cluster params won't require checking on
      # all nodes to be modified.
    def ExpandNames(self):
      # FIXME: in the future maybe other cluster params won't require checking on
      # all nodes to be modified.
@@ -1151,7 +1351,7 @@ class LUSetClusterParams(LogicalUnit):
      """
      # FIXME: This only works because there is only one parameter that can be
      # changed or removed.
      """
      # FIXME: This only works because there is only one parameter that can be
      # changed or removed.
-    if not self.op.vg_name:
+    if self.op.vg_name is not None and not self.op.vg_name:
        instances = self.cfg.GetAllInstancesInfo().values()
        for inst in instances:
          for disk in inst.disks:
        instances = self.cfg.GetAllInstancesInfo().values()
        for inst in instances:
          for disk in inst.disks:
@@ -1159,26 +1359,109 @@ class LUSetClusterParams(LogicalUnit):
              raise errors.OpPrereqError("Cannot disable lvm storage while"
                                         " lvm-based instances exist")
  
              raise errors.OpPrereqError("Cannot disable lvm storage while"
                                         " lvm-based instances exist")
  
+    node_list = self.acquired_locks[locking.LEVEL_NODE]
+
      # if vg_name not None, checks given volume group on all nodes
      if self.op.vg_name:
      # if vg_name not None, checks given volume group on all nodes
      if self.op.vg_name:
-      node_list = self.acquired_locks[locking.LEVEL_NODE]
        vglist = self.rpc.call_vg_list(node_list)
        for node in node_list:
        vglist = self.rpc.call_vg_list(node_list)
        for node in node_list:
-        vgstatus = utils.CheckVolumeGroupSize(vglist[node], self.op.vg_name,
+        if vglist[node].failed:
+          # ignoring down node
+          self.LogWarning("Node %s unreachable/error, ignoring" % node)
+          continue
+        vgstatus = utils.CheckVolumeGroupSize(vglist[node].data,
+                                              self.op.vg_name,
                                                constants.MIN_VG_SIZE)
          if vgstatus:
            raise errors.OpPrereqError("Error on node '%s': %s" %
                                       (node, vgstatus))
  
                                                constants.MIN_VG_SIZE)
          if vgstatus:
            raise errors.OpPrereqError("Error on node '%s': %s" %
                                       (node, vgstatus))
  
+    self.cluster = cluster = self.cfg.GetClusterInfo()
+    # validate beparams changes
+    if self.op.beparams:
+      utils.CheckBEParams(self.op.beparams)
+      self.new_beparams = cluster.FillDict(
+        cluster.beparams[constants.BEGR_DEFAULT], self.op.beparams)
+
+    # hypervisor list/parameters
+    self.new_hvparams = cluster.FillDict(cluster.hvparams, {})
+    if self.op.hvparams:
+      if not isinstance(self.op.hvparams, dict):
+        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
+      for hv_name, hv_dict in self.op.hvparams.items():
+        if hv_name not in self.new_hvparams:
+          self.new_hvparams[hv_name] = hv_dict
+        else:
+          self.new_hvparams[hv_name].update(hv_dict)
+
+    if self.op.enabled_hypervisors is not None:
+      self.hv_list = self.op.enabled_hypervisors
+    else:
+      self.hv_list = cluster.enabled_hypervisors
+
+    if self.op.hvparams or self.op.enabled_hypervisors is not None:
+      # either the enabled list has changed, or the parameters have, validate
+      for hv_name, hv_params in self.new_hvparams.items():
+        if ((self.op.hvparams and hv_name in self.op.hvparams) or
+            (self.op.enabled_hypervisors and
+             hv_name in self.op.enabled_hypervisors)):
+          # either this is a new hypervisor, or its parameters have changed
+          hv_class = hypervisor.GetHypervisor(hv_name)
+          hv_class.CheckParameterSyntax(hv_params)
+          _CheckHVParams(self, node_list, hv_name, hv_params)
+
    def Exec(self, feedback_fn):
      """Change the parameters of the cluster.
  
      """
    def Exec(self, feedback_fn):
      """Change the parameters of the cluster.
  
      """
-    if self.op.vg_name != self.cfg.GetVGName():
-      self.cfg.SetVGName(self.op.vg_name)
-    else:
-      feedback_fn("Cluster LVM configuration already in desired"
-                  " state, not changing")
+    if self.op.vg_name is not None:
+      if self.op.vg_name != self.cfg.GetVGName():
+        self.cfg.SetVGName(self.op.vg_name)
+      else:
+        feedback_fn("Cluster LVM configuration already in desired"
+                    " state, not changing")
+    if self.op.hvparams:
+      self.cluster.hvparams = self.new_hvparams
+    if self.op.enabled_hypervisors is not None:
+      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
+    if self.op.beparams:
+      self.cluster.beparams[constants.BEGR_DEFAULT] = self.new_beparams
+    if self.op.candidate_pool_size is not None:
+      self.cluster.candidate_pool_size = self.op.candidate_pool_size
+
+    self.cfg.Update(self.cluster)
+
+    # we want to update nodes after the cluster so that if any errors
+    # happen, we have recorded and saved the cluster info
+    if self.op.candidate_pool_size is not None:
+      _AdjustCandidatePool(self)
+
+
+class LURedistributeConfig(NoHooksLU):
+  """Force the redistribution of cluster configuration.
+
+  This is a very simple LU.
+
+  """
+  _OP_REQP = []
+  REQ_BGL = False
+
+  def ExpandNames(self):
+    self.needed_locks = {
+      locking.LEVEL_NODE: locking.ALL_SET,
+    }
+    self.share_locks[locking.LEVEL_NODE] = 1
+
+  def CheckPrereq(self):
+    """Check prerequisites.
+
+    """
+
+  def Exec(self, feedback_fn):
+    """Redistribute the configuration.
+
+    """
+    self.cfg.Update(self.cfg.GetClusterInfo())
  
  
  def _WaitForSync(lu, instance, oneshot=False, unlock=False):
  
  
  def _WaitForSync(lu, instance, oneshot=False, unlock=False):
@@ -1202,20 +1485,21 @@ def _WaitForSync(lu, instance, oneshot=False, unlock=False):
      done = True
      cumul_degraded = False
      rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
      done = True
      cumul_degraded = False
      rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
-    if not rstats:
-      lu.proc.LogWarning("Can't get any data from node %s" % node)
+    if rstats.failed or not rstats.data:
+      lu.LogWarning("Can't get any data from node %s", node)
        retries += 1
        if retries >= 10:
          raise errors.RemoteError("Can't contact node %s for mirror data,"
                                   " aborting." % node)
        time.sleep(6)
        continue
        retries += 1
        if retries >= 10:
          raise errors.RemoteError("Can't contact node %s for mirror data,"
                                   " aborting." % node)
        time.sleep(6)
        continue
+    rstats = rstats.data
      retries = 0
      for i in range(len(rstats)):
        mstat = rstats[i]
        if mstat is None:
      retries = 0
      for i in range(len(rstats)):
        mstat = rstats[i]
        if mstat is None:
-        lu.proc.LogWarning("Can't compute data for node %s/%s" %
-                           (node, instance.disks[i].iv_name))
+        lu.LogWarning("Can't compute data for node %s/%s",
+                           node, instance.disks[i].iv_name)
          continue
        # we ignore the ldisk parameter
        perc_done, est_time, is_degraded, _ = mstat
          continue
        # we ignore the ldisk parameter
        perc_done, est_time, is_degraded, _ = mstat
@@ -1256,11 +1540,11 @@ def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
    result = True
    if on_primary or dev.AssembleOnSecondary():
      rstats = lu.rpc.call_blockdev_find(node, dev)
    result = True
    if on_primary or dev.AssembleOnSecondary():
      rstats = lu.rpc.call_blockdev_find(node, dev)
-    if not rstats:
-      logger.ToStderr("Node %s: Disk degraded, not found or node down" % node)
+    if rstats.failed or not rstats.data:
+      logging.warning("Node %s: disk degraded, not found or node down", node)
        result = False
      else:
        result = False
      else:
-      result = result and (not rstats[idx])
+      result = result and (not rstats.data[idx])
    if dev.children:
      for child in dev.children:
        result = result and _CheckDiskConsistency(lu, child, node, on_primary)
    if dev.children:
      for child in dev.children:
        result = result and _CheckDiskConsistency(lu, child, node, on_primary)
@@ -1274,14 +1558,15 @@ class LUDiagnoseOS(NoHooksLU):
    """
    _OP_REQP = ["output_fields", "names"]
    REQ_BGL = False
    """
    _OP_REQP = ["output_fields", "names"]
    REQ_BGL = False
+  _FIELDS_STATIC = utils.FieldSet()
+  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
  
    def ExpandNames(self):
      if self.op.names:
        raise errors.OpPrereqError("Selective OS query not supported")
  
  
    def ExpandNames(self):
      if self.op.names:
        raise errors.OpPrereqError("Selective OS query not supported")
  
-    self.dynamic_fields = frozenset(["name", "valid", "node_status"])
-    _CheckOutputFields(static=[],
-                       dynamic=self.dynamic_fields,
+    _CheckOutputFields(static=self._FIELDS_STATIC,
+                       dynamic=self._FIELDS_DYNAMIC,
                         selected=self.op.output_fields)
  
      # Lock all nodes, in shared mode
                         selected=self.op.output_fields)
  
      # Lock all nodes, in shared mode
@@ -1298,24 +1583,23 @@ class LUDiagnoseOS(NoHooksLU):
    def _DiagnoseByOS(node_list, rlist):
      """Remaps a per-node return list into an a per-os per-node dictionary
  
    def _DiagnoseByOS(node_list, rlist):
      """Remaps a per-node return list into an a per-os per-node dictionary
  
-      Args:
-        node_list: a list with the names of all nodes
-        rlist: a map with node names as keys and OS objects as values
+    @param node_list: a list with the names of all nodes
+    @param rlist: a map with node names as keys and OS objects as values
+
+    @rtype: dict
+    @returns: a dictionary with osnames as keys and as value another map, with
+        nodes as keys and list of OS objects as values, eg::
  
  
-      Returns:
-        map: a map with osnames as keys and as value another map, with
-             nodes as
-             keys and list of OS objects as values
-             e.g. {"debian-etch": {"node1": [<object>,...],
-                                   "node2": [<object>,]}
-                  }
+          {"debian-etch": {"node1": [<object>,...],
+                           "node2": [<object>,]}
+          }
  
      """
      all_os = {}
      for node_name, nr in rlist.iteritems():
  
      """
      all_os = {}
      for node_name, nr in rlist.iteritems():
-      if not nr:
+      if nr.failed or not nr.data:
          continue
          continue
-      for os_obj in nr:
+      for os_obj in nr.data:
          if os_obj.name not in all_os:
            # build a list of nodes for this os containing empty lists
            # for each node in node_list
          if os_obj.name not in all_os:
            # build a list of nodes for this os containing empty lists
            # for each node in node_list
@@ -1330,10 +1614,12 @@ class LUDiagnoseOS(NoHooksLU):
  
      """
      node_list = self.acquired_locks[locking.LEVEL_NODE]
  
      """
      node_list = self.acquired_locks[locking.LEVEL_NODE]
-    node_data = self.rpc.call_os_diagnose(node_list)
+    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()
+                   if node in node_list]
+    node_data = self.rpc.call_os_diagnose(valid_nodes)
      if node_data == False:
        raise errors.OpExecError("Can't gather the list of OSes")
      if node_data == False:
        raise errors.OpExecError("Can't gather the list of OSes")
-    pol = self._DiagnoseByOS(node_list, node_data)
+    pol = self._DiagnoseByOS(valid_nodes, node_data)
      output = []
      for os_name, os_data in pol.iteritems():
        row = []
      output = []
      for os_name, os_data in pol.iteritems():
        row = []
@@ -1415,13 +1701,16 @@ class LURemoveNode(LogicalUnit):
  
      """
      node = self.node
  
      """
      node = self.node
-    logger.Info("stopping the node daemon and removing configs from node %s" %
-                node.name)
+    logging.info("Stopping the node daemon and removing configs from node %s",
+                 node.name)
  
      self.context.RemoveNode(node.name)
  
      self.rpc.call_node_leave_cluster(node.name)
  
  
      self.context.RemoveNode(node.name)
  
      self.rpc.call_node_leave_cluster(node.name)
  
+    # Promote nodes to master candidate as needed
+    _AdjustCandidatePool(self)
+
  
  class LUQueryNodes(NoHooksLU):
    """Logical unit for querying nodes.
  
  class LUQueryNodes(NoHooksLU):
    """Logical unit for querying nodes.
@@ -1429,24 +1718,26 @@ class LUQueryNodes(NoHooksLU):
    """
    _OP_REQP = ["output_fields", "names"]
    REQ_BGL = False
    """
    _OP_REQP = ["output_fields", "names"]
    REQ_BGL = False
+  _FIELDS_DYNAMIC = utils.FieldSet(
+    "dtotal", "dfree",
+    "mtotal", "mnode", "mfree",
+    "bootid",
+    "ctotal",
+    )
+
+  _FIELDS_STATIC = utils.FieldSet(
+    "name", "pinst_cnt", "sinst_cnt",
+    "pinst_list", "sinst_list",
+    "pip", "sip", "tags",
+    "serial_no",
+    "master_candidate",
+    "master",
+    "offline",
+    )
  
    def ExpandNames(self):
  
    def ExpandNames(self):
-    self.dynamic_fields = frozenset([
-      "dtotal", "dfree",
-      "mtotal", "mnode", "mfree",
-      "bootid",
-      "ctotal",
-      ])
-
-    self.static_fields = frozenset([
-      "name", "pinst_cnt", "sinst_cnt",
-      "pinst_list", "sinst_list",
-      "pip", "sip", "tags",
-      "serial_no",
-      ])
-
-    _CheckOutputFields(static=self.static_fields,
-                       dynamic=self.dynamic_fields,
+    _CheckOutputFields(static=self._FIELDS_STATIC,
+                       dynamic=self._FIELDS_DYNAMIC,
                         selected=self.op.output_fields)
  
      self.needed_locks = {}
                         selected=self.op.output_fields)
  
      self.needed_locks = {}
@@ -1457,7 +1748,7 @@ class LUQueryNodes(NoHooksLU):
      else:
        self.wanted = locking.ALL_SET
  
      else:
        self.wanted = locking.ALL_SET
  
-    self.do_locking = not self.static_fields.issuperset(self.op.output_fields)
+    self.do_locking = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
      if self.do_locking:
        # if we don't request only static fields, we need to lock the nodes
        self.needed_locks[locking.LEVEL_NODE] = self.wanted
      if self.do_locking:
        # if we don't request only static fields, we need to lock the nodes
        self.needed_locks[locking.LEVEL_NODE] = self.wanted
@@ -1492,21 +1783,23 @@ class LUQueryNodes(NoHooksLU):
  
      # begin data gathering
  
  
      # begin data gathering
  
-    if self.dynamic_fields.intersection(self.op.output_fields):
+    if self.do_locking:
        live_data = {}
        node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
                                            self.cfg.GetHypervisorType())
        for name in nodenames:
        live_data = {}
        node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
                                            self.cfg.GetHypervisorType())
        for name in nodenames:
-        nodeinfo = node_data.get(name, None)
-        if nodeinfo:
+        nodeinfo = node_data[name]
+        if not nodeinfo.failed and nodeinfo.data:
+          nodeinfo = nodeinfo.data
+          fn = utils.TryConvert
            live_data[name] = {
            live_data[name] = {
-            "mtotal": utils.TryConvert(int, nodeinfo['memory_total']),
-            "mnode": utils.TryConvert(int, nodeinfo['memory_dom0']),
-            "mfree": utils.TryConvert(int, nodeinfo['memory_free']),
-            "dtotal": utils.TryConvert(int, nodeinfo['vg_size']),
-            "dfree": utils.TryConvert(int, nodeinfo['vg_free']),
-            "ctotal": utils.TryConvert(int, nodeinfo['cpu_total']),
-            "bootid": nodeinfo['bootid'],
+            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
+            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
+            "mfree": fn(int, nodeinfo.get('memory_free', None)),
+            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
+            "dfree": fn(int, nodeinfo.get('vg_free', None)),
+            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
+            "bootid": nodeinfo.get('bootid', None),
              }
          else:
            live_data[name] = {}
              }
          else:
            live_data[name] = {}
@@ -1529,6 +1822,8 @@ class LUQueryNodes(NoHooksLU):
            if secnode in node_to_secondary:
              node_to_secondary[secnode].add(inst.name)
  
            if secnode in node_to_secondary:
              node_to_secondary[secnode].add(inst.name)
  
+    master_node = self.cfg.GetMasterNode()
+
      # end data gathering
  
      output = []
      # end data gathering
  
      output = []
@@ -1553,7 +1848,13 @@ class LUQueryNodes(NoHooksLU):
            val = list(node.GetTags())
          elif field == "serial_no":
            val = node.serial_no
            val = list(node.GetTags())
          elif field == "serial_no":
            val = node.serial_no
-        elif field in self.dynamic_fields:
+        elif field == "master_candidate":
+          val = node.master_candidate
+        elif field == "master":
+          val = node.name == master_node
+        elif field == "offline":
+          val = node.offline
+        elif self._FIELDS_DYNAMIC.Matches(field):
            val = live_data[node.name].get(field, None)
          else:
            raise errors.ParameterError(field)
            val = live_data[node.name].get(field, None)
          else:
            raise errors.ParameterError(field)
@@ -1569,10 +1870,12 @@ class LUQueryNodeVolumes(NoHooksLU):
    """
    _OP_REQP = ["nodes", "output_fields"]
    REQ_BGL = False
    """
    _OP_REQP = ["nodes", "output_fields"]
    REQ_BGL = False
+  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
+  _FIELDS_STATIC = utils.FieldSet("node")
  
    def ExpandNames(self):
  
    def ExpandNames(self):
-    _CheckOutputFields(static=["node"],
-                       dynamic=["phys", "vg", "name", "size", "instance"],
+    _CheckOutputFields(static=self._FIELDS_STATIC,
+                       dynamic=self._FIELDS_DYNAMIC,
                         selected=self.op.output_fields)
  
      self.needed_locks = {}
                         selected=self.op.output_fields)
  
      self.needed_locks = {}
@@ -1605,10 +1908,10 @@ class LUQueryNodeVolumes(NoHooksLU):
  
      output = []
      for node in nodenames:
  
      output = []
      for node in nodenames:
-      if node not in volumes or not volumes[node]:
+      if node not in volumes or volumes[node].failed or not volumes[node].data:
          continue
  
          continue
  
-      node_vols = volumes[node][:]
+      node_vols = volumes[node].data[:]
        node_vols.sort(key=lambda vol: vol['dev'])
  
        for vol in node_vols:
        node_vols.sort(key=lambda vol: vol['dev'])
  
        for vol in node_vols:
@@ -1739,9 +2042,15 @@ class LUAddNode(LogicalUnit):
          raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
                                     " based ping to noded port")
  
          raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
                                     " based ping to noded port")
  
+    cp_size = self.cfg.GetClusterInfo().candidate_pool_size
+    mc_now, _ = self.cfg.GetMasterCandidateStats()
+    master_candidate = mc_now < cp_size
+
      self.new_node = objects.Node(name=node,
                                   primary_ip=primary_ip,
      self.new_node = objects.Node(name=node,
                                   primary_ip=primary_ip,
-                                 secondary_ip=secondary_ip)
+                                 secondary_ip=secondary_ip,
+                                 master_candidate=master_candidate,
+                                 offline=False)
  
    def Exec(self, feedback_fn):
      """Adds the new node to the cluster.
  
    def Exec(self, feedback_fn):
      """Adds the new node to the cluster.
@@ -1752,19 +2061,20 @@ class LUAddNode(LogicalUnit):
  
      # check connectivity
      result = self.rpc.call_version([node])[node]
  
      # check connectivity
      result = self.rpc.call_version([node])[node]
-    if result:
-      if constants.PROTOCOL_VERSION == result:
-        logger.Info("communication to node %s fine, sw version %s match" %
-                    (node, result))
+    result.Raise()
+    if result.data:
+      if constants.PROTOCOL_VERSION == result.data:
+        logging.info("Communication to node %s fine, sw version %s match",
+                     node, result.data)
        else:
          raise errors.OpExecError("Version mismatch master version %s,"
                                   " node version %s" %
        else:
          raise errors.OpExecError("Version mismatch master version %s,"
                                   " node version %s" %
-                                 (constants.PROTOCOL_VERSION, result))
+                                 (constants.PROTOCOL_VERSION, result.data))
      else:
        raise errors.OpExecError("Cannot get version from the new node")
  
      # setup ssh on node
      else:
        raise errors.OpExecError("Cannot get version from the new node")
  
      # setup ssh on node
-    logger.Info("copy ssh key to node %s" % node)
+    logging.info("Copy ssh key to node %s", node)
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
      keyarray = []
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
      keyarray = []
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
@@ -1782,15 +2092,16 @@ class LUAddNode(LogicalUnit):
                                      keyarray[2],
                                      keyarray[3], keyarray[4], keyarray[5])
  
                                      keyarray[2],
                                      keyarray[3], keyarray[4], keyarray[5])
  
-    if not result:
+    if result.failed or not result.data:
        raise errors.OpExecError("Cannot transfer ssh keys to the new node")
  
      # Add node to our /etc/hosts, and add key to known_hosts
      utils.AddHostToEtcHosts(new_node.name)
  
      if new_node.secondary_ip != new_node.primary_ip:
        raise errors.OpExecError("Cannot transfer ssh keys to the new node")
  
      # Add node to our /etc/hosts, and add key to known_hosts
      utils.AddHostToEtcHosts(new_node.name)
  
      if new_node.secondary_ip != new_node.primary_ip:
-      if not self.rpc.call_node_has_ip_address(new_node.name,
-                                               new_node.secondary_ip):
+      result = self.rpc.call_node_has_ip_address(new_node.name,
+                                                 new_node.secondary_ip)
+      if result.failed or not result.data:
          raise errors.OpExecError("Node claims it doesn't have the secondary ip"
                                   " you gave (%s). Please fix and re-run this"
                                   " command." % new_node.secondary_ip)
          raise errors.OpExecError("Node claims it doesn't have the secondary ip"
                                   " you gave (%s). Please fix and re-run this"
                                   " command." % new_node.secondary_ip)
@@ -1804,11 +2115,11 @@ class LUAddNode(LogicalUnit):
      result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
                                         self.cfg.GetClusterName())
      for verifier in node_verify_list:
      result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
                                         self.cfg.GetClusterName())
      for verifier in node_verify_list:
-      if not result[verifier]:
+      if result[verifier].failed or not result[verifier].data:
          raise errors.OpExecError("Cannot communicate with %s's node daemon"
                                   " for remote verification" % verifier)
          raise errors.OpExecError("Cannot communicate with %s's node daemon"
                                   " for remote verification" % verifier)
-      if result[verifier]['nodelist']:
-        for failed in result[verifier]['nodelist']:
+      if result[verifier].data['nodelist']:
+        for failed in result[verifier].data['nodelist']:
            feedback_fn("ssh/hostname verification failed %s -> %s" %
                        (verifier, result[verifier]['nodelist'][failed]))
          raise errors.OpExecError("ssh/hostname verification failed.")
            feedback_fn("ssh/hostname verification failed %s -> %s" %
                        (verifier, result[verifier]['nodelist'][failed]))
          raise errors.OpExecError("ssh/hostname verification failed.")
@@ -1822,21 +2133,20 @@ class LUAddNode(LogicalUnit):
      if myself.name in dist_nodes:
        dist_nodes.remove(myself.name)
  
      if myself.name in dist_nodes:
        dist_nodes.remove(myself.name)
  
-    logger.Debug("Copying hosts and known_hosts to all nodes")
+    logging.debug("Copying hosts and known_hosts to all nodes")
      for fname in (constants.ETC_HOSTS, constants.SSH_KNOWN_HOSTS_FILE):
        result = self.rpc.call_upload_file(dist_nodes, fname)
      for fname in (constants.ETC_HOSTS, constants.SSH_KNOWN_HOSTS_FILE):
        result = self.rpc.call_upload_file(dist_nodes, fname)
-      for to_node in dist_nodes:
-        if not result[to_node]:
-          logger.Error("copy of file %s to node %s failed" %
-                       (fname, to_node))
+      for to_node, to_result in result.iteritems():
+        if to_result.failed or not to_result.data:
+          logging.error("Copy of file %s to node %s failed", fname, to_node)
  
      to_copy = []
      if constants.HT_XEN_HVM in self.cfg.GetClusterInfo().enabled_hypervisors:
        to_copy.append(constants.VNC_PASSWORD_FILE)
      for fname in to_copy:
        result = self.rpc.call_upload_file([node], fname)
  
      to_copy = []
      if constants.HT_XEN_HVM in self.cfg.GetClusterInfo().enabled_hypervisors:
        to_copy.append(constants.VNC_PASSWORD_FILE)
      for fname in to_copy:
        result = self.rpc.call_upload_file([node], fname)
-      if not result[node]:
-        logger.Error("could not copy file %s to node %s" % (fname, node))
+      if result[node].failed or not result[node]:
+        logging.error("Could not copy file %s to node %s", fname, node)
  
      if self.op.readd:
        self.context.ReaddNode(new_node)
  
      if self.op.readd:
        self.context.ReaddNode(new_node)
@@ -1844,12 +2154,117 @@ class LUAddNode(LogicalUnit):
        self.context.AddNode(new_node)
  
  
        self.context.AddNode(new_node)
  
  
+class LUSetNodeParams(LogicalUnit):
+  """Modifies the parameters of a node.
+
+  """
+  HPATH = "node-modify"
+  HTYPE = constants.HTYPE_NODE
+  _OP_REQP = ["node_name"]
+  REQ_BGL = False
+
+  def CheckArguments(self):
+    node_name = self.cfg.ExpandNodeName(self.op.node_name)
+    if node_name is None:
+      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
+    self.op.node_name = node_name
+    _CheckBooleanOpField(self.op, 'master_candidate')
+    _CheckBooleanOpField(self.op, 'offline')
+    if self.op.master_candidate is None and self.op.offline is None:
+      raise errors.OpPrereqError("Please pass at least one modification")
+    if self.op.offline == True and self.op.master_candidate == True:
+      raise errors.OpPrereqError("Can't set the node into offline and"
+                                 " master_candidate at the same time")
+
+  def ExpandNames(self):
+    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
+
+  def BuildHooksEnv(self):
+    """Build hooks env.
+
+    This runs on the master node.
+
+    """
+    env = {
+      "OP_TARGET": self.op.node_name,
+      "MASTER_CANDIDATE": str(self.op.master_candidate),
+      "OFFLINE": str(self.op.offline),
+      }
+    nl = [self.cfg.GetMasterNode(),
+          self.op.node_name]
+    return env, nl, nl
+
+  def CheckPrereq(self):
+    """Check prerequisites.
+
+    This only checks the instance list against the existing names.
+
+    """
+    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
+
+    if ((self.op.master_candidate == False or self.op.offline == True)
+        and node.master_candidate):
+      # we will demote the node from master_candidate
+      if self.op.node_name == self.cfg.GetMasterNode():
+        raise errors.OpPrereqError("The master node has to be a"
+                                   " master candidate and online")
+      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
+      num_candidates, _ = self.cfg.GetMasterCandidateStats()
+      if num_candidates <= cp_size:
+        msg = ("Not enough master candidates (desired"
+               " %d, new value will be %d)" % (cp_size, num_candidates-1))
+        if self.op.force:
+          self.LogWarning(msg)
+        else:
+          raise errors.OpPrereqError(msg)
+
+    if (self.op.master_candidate == True and node.offline and
+        not self.op.offline == False):
+      raise errors.OpPrereqError("Can't set an offline node to"
+                                 " master_candidate")
+
+    return
+
+  def Exec(self, feedback_fn):
+    """Modifies a node.
+
+    """
+    node = self.node
+
+    result = []
+
+    if self.op.offline is not None:
+      node.offline = self.op.offline
+      result.append(("offline", str(self.op.offline)))
+      if self.op.offline == True and node.master_candidate:
+        node.master_candidate = False
+        result.append(("master_candidate", "auto-demotion due to offline"))
+
+    if self.op.master_candidate is not None:
+      node.master_candidate = self.op.master_candidate
+      result.append(("master_candidate", str(self.op.master_candidate)))
+      if self.op.master_candidate == False:
+        rrc = self.rpc.call_node_demote_from_mc(node.name)
+        if (rrc.failed or not isinstance(rrc.data, (tuple, list))
+            or len(rrc.data) != 2):
+          self.LogWarning("Node rpc error: %s" % rrc.error)
+        elif not rrc.data[0]:
+          self.LogWarning("Node failed to demote itself: %s" % rrc.data[1])
+
+    # this will trigger configuration file update, if needed
+    self.cfg.Update(node)
+    # this will trigger job queue propagation or cleanup
+    if self.op.node_name != self.cfg.GetMasterNode():
+      self.context.ReaddNode(node)
+
+    return result
+
+
  class LUQueryClusterInfo(NoHooksLU):
    """Query cluster configuration.
  
    """
    _OP_REQP = []
  class LUQueryClusterInfo(NoHooksLU):
    """Query cluster configuration.
  
    """
    _OP_REQP = []
-  REQ_MASTER = False
    REQ_BGL = False
  
    def ExpandNames(self):
    REQ_BGL = False
  
    def ExpandNames(self):
@@ -1875,10 +2290,11 @@ class LUQueryClusterInfo(NoHooksLU):
        "architecture": (platform.architecture()[0], platform.machine()),
        "name": cluster.cluster_name,
        "master": cluster.master_node,
        "architecture": (platform.architecture()[0], platform.machine()),
        "name": cluster.cluster_name,
        "master": cluster.master_node,
-      "hypervisor_type": cluster.hypervisor,
+      "default_hypervisor": cluster.default_hypervisor,
        "enabled_hypervisors": cluster.enabled_hypervisors,
        "hvparams": cluster.hvparams,
        "beparams": cluster.beparams,
        "enabled_hypervisors": cluster.enabled_hypervisors,
        "hvparams": cluster.hvparams,
        "beparams": cluster.beparams,
+      "candidate_pool_size": cluster.candidate_pool_size,
        }
  
      return result
        }
  
      return result
@@ -1890,13 +2306,14 @@ class LUQueryConfigValues(NoHooksLU):
    """
    _OP_REQP = []
    REQ_BGL = False
    """
    _OP_REQP = []
    REQ_BGL = False
+  _FIELDS_DYNAMIC = utils.FieldSet()
+  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag")
  
    def ExpandNames(self):
      self.needed_locks = {}
  
  
    def ExpandNames(self):
      self.needed_locks = {}
  
-    static_fields = ["cluster_name", "master_node", "drain_flag"]
-    _CheckOutputFields(static=static_fields,
-                       dynamic=[],
+    _CheckOutputFields(static=self._FIELDS_STATIC,
+                       dynamic=self._FIELDS_DYNAMIC,
                         selected=self.op.output_fields)
  
    def CheckPrereq(self):
                         selected=self.op.output_fields)
  
    def CheckPrereq(self):
@@ -1948,6 +2365,7 @@ class LUActivateInstanceDisks(NoHooksLU):
      self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
      assert self.instance is not None, \
        "Cannot retrieve locked instance %s" % self.op.instance_name
      self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
      assert self.instance is not None, \
        "Cannot retrieve locked instance %s" % self.op.instance_name
+    _CheckNodeOnline(self, self.instance.primary_node)
  
    def Exec(self, feedback_fn):
      """Activate the disks.
  
    def Exec(self, feedback_fn):
      """Activate the disks.
@@ -1965,15 +2383,17 @@ def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
  
    This sets up the block devices on all nodes.
  
  
    This sets up the block devices on all nodes.
  
-  Args:
-    instance: a ganeti.objects.Instance object
-    ignore_secondaries: if true, errors on secondary nodes won't result
-                        in an error return from the function
+  @type lu: L{LogicalUnit}
+  @param lu: the logical unit on whose behalf we execute
+  @type instance: L{objects.Instance}
+  @param instance: the instance for whose disks we assemble
+  @type ignore_secondaries: boolean
+  @param ignore_secondaries: if true, errors on secondary nodes
+      won't result in an error return from the function
+  @return: False if the operation failed, otherwise a list of
+      (host, instance_visible_name, node_visible_name)
+      with the mapping from node devices to instance devices
  
  
-  Returns:
-    false if the operation failed
-    list of (host, instance_visible_name, node_visible_name) if the operation
-         suceeded with the mapping from node devices to instance devices
    """
    device_info = []
    disks_ok = True
    """
    device_info = []
    disks_ok = True
@@ -1992,9 +2412,10 @@ def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
      for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
        lu.cfg.SetDiskID(node_disk, node)
        result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
      for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
        lu.cfg.SetDiskID(node_disk, node)
        result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
-      if not result:
-        logger.Error("could not prepare block device %s on node %s"
-                     " (is_primary=False, pass=1)" % (inst_disk.iv_name, node))
+      if result.failed or not result:
+        lu.proc.LogWarning("Could not prepare block device %s on node %s"
+                           " (is_primary=False, pass=1)",
+                           inst_disk.iv_name, node)
          if not ignore_secondaries:
            disks_ok = False
  
          if not ignore_secondaries:
            disks_ok = False
  
@@ -2007,11 +2428,12 @@ def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
          continue
        lu.cfg.SetDiskID(node_disk, node)
        result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
          continue
        lu.cfg.SetDiskID(node_disk, node)
        result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
-      if not result:
-        logger.Error("could not prepare block device %s on node %s"
-                     " (is_primary=True, pass=2)" % (inst_disk.iv_name, node))
+      if result.failed or not result:
+        lu.proc.LogWarning("Could not prepare block device %s on node %s"
+                           " (is_primary=True, pass=2)",
+                           inst_disk.iv_name, node)
          disks_ok = False
          disks_ok = False
-    device_info.append((instance.primary_node, inst_disk.iv_name, result))
+    device_info.append((instance.primary_node, inst_disk.iv_name, result.data))
  
    # leave the disks configured for the primary node
    # this is a workaround that would be fixed better by
  
    # leave the disks configured for the primary node
    # this is a workaround that would be fixed better by
@@ -2031,8 +2453,9 @@ def _StartInstanceDisks(lu, instance, force):
    if not disks_ok:
      _ShutdownInstanceDisks(lu, instance)
      if force is not None and not force:
    if not disks_ok:
      _ShutdownInstanceDisks(lu, instance)
      if force is not None and not force:
-      logger.Error("If the message above refers to a secondary node,"
-                   " you can retry the operation using '--force'.")
+      lu.proc.LogWarning("", hint="If the message above refers to a"
+                         " secondary node,"
+                         " you can retry the operation using '--force'.")
      raise errors.OpExecError("Disk consistency error")
  
  
      raise errors.OpExecError("Disk consistency error")
  
  
@@ -2080,11 +2503,11 @@ def _SafeShutdownInstanceDisks(lu, instance):
    ins_l = lu.rpc.call_instance_list([instance.primary_node],
                                        [instance.hypervisor])
    ins_l = ins_l[instance.primary_node]
    ins_l = lu.rpc.call_instance_list([instance.primary_node],
                                        [instance.hypervisor])
    ins_l = ins_l[instance.primary_node]
-  if not type(ins_l) is list:
+  if ins_l.failed or not isinstance(ins_l.data, list):
      raise errors.OpExecError("Can't contact node '%s'" %
                               instance.primary_node)
  
      raise errors.OpExecError("Can't contact node '%s'" %
                               instance.primary_node)
  
-  if instance.name in ins_l:
+  if instance.name in ins_l.data:
      raise errors.OpExecError("Instance is running, can't shutdown"
                               " block devices.")
  
      raise errors.OpExecError("Instance is running, can't shutdown"
                               " block devices.")
  
@@ -2104,15 +2527,16 @@ def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
    for disk in instance.disks:
      for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
        lu.cfg.SetDiskID(top_disk, node)
    for disk in instance.disks:
      for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
        lu.cfg.SetDiskID(top_disk, node)
-      if not lu.rpc.call_blockdev_shutdown(node, top_disk):
-        logger.Error("could not shutdown block device %s on node %s" %
-                     (disk.iv_name, node))
+      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
+      if result.failed or not result.data:
+        logging.error("Could not shutdown block device %s on node %s",
+                      disk.iv_name, node)
          if not ignore_primary or node != instance.primary_node:
            result = False
    return result
  
  
          if not ignore_primary or node != instance.primary_node:
            result = False
    return result
  
  
-def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor):
+def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
    """Checks if a node has enough free memory.
  
    This function check if a given node has the needed amount of free
    """Checks if a node has enough free memory.
  
    This function check if a given node has the needed amount of free
@@ -2128,18 +2552,15 @@ def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor):
    @param reason: string to use in the error message
    @type requested: C{int}
    @param requested: the amount of memory in MiB to check for
    @param reason: string to use in the error message
    @type requested: C{int}
    @param requested: the amount of memory in MiB to check for
-  @type hypervisor: C{str}
-  @param hypervisor: the hypervisor to ask for memory stats
+  @type hypervisor_name: C{str}
+  @param hypervisor_name: the hypervisor to ask for memory stats
    @raise errors.OpPrereqError: if the node doesn't have enough memory, or
        we cannot check the node
  
    """
    @raise errors.OpPrereqError: if the node doesn't have enough memory, or
        we cannot check the node
  
    """
-  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor)
-  if not nodeinfo or not isinstance(nodeinfo, dict):
-    raise errors.OpPrereqError("Could not contact node %s for resource"
-                             " information" % (node,))
-
-  free_mem = nodeinfo[node].get('memory_free')
+  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
+  nodeinfo[node].Raise()
+  free_mem = nodeinfo[node].data.get('memory_free')
    if not isinstance(free_mem, int):
      raise errors.OpPrereqError("Can't compute free memory on node %s, result"
                               " was '%s'" % (node, free_mem))
    if not isinstance(free_mem, int):
      raise errors.OpPrereqError("Can't compute free memory on node %s, result"
                               " was '%s'" % (node, free_mem))
@@ -2160,12 +2581,6 @@ class LUStartupInstance(LogicalUnit):
  
    def ExpandNames(self):
      self._ExpandAndLockInstance()
  
    def ExpandNames(self):
      self._ExpandAndLockInstance()
-    self.needed_locks[locking.LEVEL_NODE] = []
-    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
-
-  def DeclareLocks(self, level):
-    if level == locking.LEVEL_NODE:
-      self._LockInstancesNodes()
  
    def BuildHooksEnv(self):
      """Build hooks env.
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -2191,6 +2606,8 @@ class LUStartupInstance(LogicalUnit):
      assert self.instance is not None, \
        "Cannot retrieve locked instance %s" % self.op.instance_name
  
      assert self.instance is not None, \
        "Cannot retrieve locked instance %s" % self.op.instance_name
  
+    _CheckNodeOnline(self, instance.primary_node)
+
      bep = self.cfg.GetClusterInfo().FillBE(instance)
      # check bridges existance
      _CheckInstanceBridgesExist(self, instance)
      bep = self.cfg.GetClusterInfo().FillBE(instance)
      # check bridges existance
      _CheckInstanceBridgesExist(self, instance)
@@ -2213,7 +2630,8 @@ class LUStartupInstance(LogicalUnit):
  
      _StartInstanceDisks(self, instance, force)
  
  
      _StartInstanceDisks(self, instance, force)
  
-    if not self.rpc.call_instance_start(node_current, instance, extra_args):
+    result = self.rpc.call_instance_start(node_current, instance, extra_args)
+    if result.failed or not result.data:
        _ShutdownInstanceDisks(self, instance)
        raise errors.OpExecError("Could not start instance")
  
        _ShutdownInstanceDisks(self, instance)
        raise errors.OpExecError("Could not start instance")
  
@@ -2236,13 +2654,6 @@ class LURebootInstance(LogicalUnit):
                                     constants.INSTANCE_REBOOT_HARD,
                                     constants.INSTANCE_REBOOT_FULL))
      self._ExpandAndLockInstance()
                                     constants.INSTANCE_REBOOT_HARD,
                                     constants.INSTANCE_REBOOT_FULL))
      self._ExpandAndLockInstance()
-    self.needed_locks[locking.LEVEL_NODE] = []
-    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
-
-  def DeclareLocks(self, level):
-    if level == locking.LEVEL_NODE:
-      primary_only = not constants.INSTANCE_REBOOT_FULL
-      self._LockInstancesNodes(primary_only=primary_only)
  
    def BuildHooksEnv(self):
      """Build hooks env.
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -2268,6 +2679,8 @@ class LURebootInstance(LogicalUnit):
      assert self.instance is not None, \
        "Cannot retrieve locked instance %s" % self.op.instance_name
  
      assert self.instance is not None, \
        "Cannot retrieve locked instance %s" % self.op.instance_name
  
+    _CheckNodeOnline(self, instance.primary_node)
+
      # check bridges existance
      _CheckInstanceBridgesExist(self, instance)
  
      # check bridges existance
      _CheckInstanceBridgesExist(self, instance)
  
@@ -2284,15 +2697,17 @@ class LURebootInstance(LogicalUnit):
  
      if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
                         constants.INSTANCE_REBOOT_HARD]:
  
      if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
                         constants.INSTANCE_REBOOT_HARD]:
-      if not self.rpc.call_instance_reboot(node_current, instance,
-                                           reboot_type, extra_args):
+      result = self.rpc.call_instance_reboot(node_current, instance,
+                                             reboot_type, extra_args)
+      if result.failed or not result.data:
          raise errors.OpExecError("Could not reboot instance")
      else:
        if not self.rpc.call_instance_shutdown(node_current, instance):
          raise errors.OpExecError("could not shutdown instance for full reboot")
        _ShutdownInstanceDisks(self, instance)
        _StartInstanceDisks(self, instance, ignore_secondaries)
          raise errors.OpExecError("Could not reboot instance")
      else:
        if not self.rpc.call_instance_shutdown(node_current, instance):
          raise errors.OpExecError("could not shutdown instance for full reboot")
        _ShutdownInstanceDisks(self, instance)
        _StartInstanceDisks(self, instance, ignore_secondaries)
-      if not self.rpc.call_instance_start(node_current, instance, extra_args):
+      result = self.rpc.call_instance_start(node_current, instance, extra_args)
+      if result.failed or not result.data:
          _ShutdownInstanceDisks(self, instance)
          raise errors.OpExecError("Could not start instance for full reboot")
  
          _ShutdownInstanceDisks(self, instance)
          raise errors.OpExecError("Could not start instance for full reboot")
  
@@ -2310,12 +2725,6 @@ class LUShutdownInstance(LogicalUnit):
  
    def ExpandNames(self):
      self._ExpandAndLockInstance()
  
    def ExpandNames(self):
      self._ExpandAndLockInstance()
-    self.needed_locks[locking.LEVEL_NODE] = []
-    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
-
-  def DeclareLocks(self, level):
-    if level == locking.LEVEL_NODE:
-      self._LockInstancesNodes()
  
    def BuildHooksEnv(self):
      """Build hooks env.
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -2337,6 +2746,7 @@ class LUShutdownInstance(LogicalUnit):
      self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
      assert self.instance is not None, \
        "Cannot retrieve locked instance %s" % self.op.instance_name
      self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
      assert self.instance is not None, \
        "Cannot retrieve locked instance %s" % self.op.instance_name
+    _CheckNodeOnline(self, self.instance.primary_node)
  
    def Exec(self, feedback_fn):
      """Shutdown the instance.
  
    def Exec(self, feedback_fn):
      """Shutdown the instance.
@@ -2345,8 +2755,9 @@ class LUShutdownInstance(LogicalUnit):
      instance = self.instance
      node_current = instance.primary_node
      self.cfg.MarkInstanceDown(instance.name)
      instance = self.instance
      node_current = instance.primary_node
      self.cfg.MarkInstanceDown(instance.name)
-    if not self.rpc.call_instance_shutdown(node_current, instance):
-      logger.Error("could not shutdown instance")
+    result = self.rpc.call_instance_shutdown(node_current, instance)
+    if result.failed or not result.data:
+      self.proc.LogWarning("Could not shutdown instance")
  
      _ShutdownInstanceDisks(self, instance)
  
  
      _ShutdownInstanceDisks(self, instance)
  
@@ -2362,12 +2773,6 @@ class LUReinstallInstance(LogicalUnit):
  
    def ExpandNames(self):
      self._ExpandAndLockInstance()
  
    def ExpandNames(self):
      self._ExpandAndLockInstance()
-    self.needed_locks[locking.LEVEL_NODE] = []
-    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
-
-  def DeclareLocks(self, level):
-    if level == locking.LEVEL_NODE:
-      self._LockInstancesNodes()
  
    def BuildHooksEnv(self):
      """Build hooks env.
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -2389,6 +2794,7 @@ class LUReinstallInstance(LogicalUnit):
      instance = self.cfg.GetInstanceInfo(self.op.instance_name)
      assert instance is not None, \
        "Cannot retrieve locked instance %s" % self.op.instance_name
      instance = self.cfg.GetInstanceInfo(self.op.instance_name)
      assert instance is not None, \
        "Cannot retrieve locked instance %s" % self.op.instance_name
+    _CheckNodeOnline(self, instance.primary_node)
  
      if instance.disk_template == constants.DT_DISKLESS:
        raise errors.OpPrereqError("Instance '%s' has no disks" %
  
      if instance.disk_template == constants.DT_DISKLESS:
        raise errors.OpPrereqError("Instance '%s' has no disks" %
@@ -2399,7 +2805,7 @@ class LUReinstallInstance(LogicalUnit):
      remote_info = self.rpc.call_instance_info(instance.primary_node,
                                                instance.name,
                                                instance.hypervisor)
      remote_info = self.rpc.call_instance_info(instance.primary_node,
                                                instance.name,
                                                instance.hypervisor)
-    if remote_info:
+    if remote_info.failed or remote_info.data:
        raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
                                   (self.op.instance_name,
                                    instance.primary_node))
        raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
                                   (self.op.instance_name,
                                    instance.primary_node))
@@ -2412,8 +2818,9 @@ class LUReinstallInstance(LogicalUnit):
        if pnode is None:
          raise errors.OpPrereqError("Primary node '%s' is unknown" %
                                     self.op.pnode)
        if pnode is None:
          raise errors.OpPrereqError("Primary node '%s' is unknown" %
                                     self.op.pnode)
-      os_obj = self.rpc.call_os_get(pnode.name, self.op.os_type)
-      if not os_obj:
+      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
+      result.Raise()
+      if not isinstance(result.data, objects.OS):
          raise errors.OpPrereqError("OS '%s' not in supported OS list for"
                                     " primary node"  % self.op.os_type)
  
          raise errors.OpPrereqError("OS '%s' not in supported OS list for"
                                     " primary node"  % self.op.os_type)
  
@@ -2433,8 +2840,9 @@ class LUReinstallInstance(LogicalUnit):
      _StartInstanceDisks(self, inst, None)
      try:
        feedback_fn("Running the instance OS create scripts...")
      _StartInstanceDisks(self, inst, None)
      try:
        feedback_fn("Running the instance OS create scripts...")
-      if not self.rpc.call_instance_os_add(inst.primary_node, inst,
-                                           "sda", "sdb"):
+      result = self.rpc.call_instance_os_add(inst.primary_node, inst)
+      result.Raise()
+      if not result.data:
          raise errors.OpExecError("Could not install OS for instance %s"
                                   " on node %s" %
                                   (inst.name, inst.primary_node))
          raise errors.OpExecError("Could not install OS for instance %s"
                                   " on node %s" %
                                   (inst.name, inst.primary_node))
@@ -2473,13 +2881,16 @@ class LURenameInstance(LogicalUnit):
      if instance is None:
        raise errors.OpPrereqError("Instance '%s' not known" %
                                   self.op.instance_name)
      if instance is None:
        raise errors.OpPrereqError("Instance '%s' not known" %
                                   self.op.instance_name)
+    _CheckNodeOnline(self, instance.primary_node)
+
      if instance.status != "down":
        raise errors.OpPrereqError("Instance '%s' is marked to be up" %
                                   self.op.instance_name)
      remote_info = self.rpc.call_instance_info(instance.primary_node,
                                                instance.name,
                                                instance.hypervisor)
      if instance.status != "down":
        raise errors.OpPrereqError("Instance '%s' is marked to be up" %
                                   self.op.instance_name)
      remote_info = self.rpc.call_instance_info(instance.primary_node,
                                                instance.name,
                                                instance.hypervisor)
-    if remote_info:
+    remote_info.Raise()
+    if remote_info.data:
        raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
                                   (self.op.instance_name,
                                    instance.primary_node))
        raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
                                   (self.op.instance_name,
                                    instance.primary_node))
@@ -2512,7 +2923,7 @@ class LURenameInstance(LogicalUnit):
  
      self.cfg.RenameInstance(inst.name, self.op.new_name)
      # Change the instance lock. This is definitely safe while we hold the BGL
  
      self.cfg.RenameInstance(inst.name, self.op.new_name)
      # Change the instance lock. This is definitely safe while we hold the BGL
-    self.context.glm.remove(locking.LEVEL_INSTANCE, inst.name)
+    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
      self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
  
      # re-read the instance from the configuration after rename
      self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
  
      # re-read the instance from the configuration after rename
@@ -2523,15 +2934,15 @@ class LURenameInstance(LogicalUnit):
        result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
                                                       old_file_storage_dir,
                                                       new_file_storage_dir)
        result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
                                                       old_file_storage_dir,
                                                       new_file_storage_dir)
-
-      if not result:
+      result.Raise()
+      if not result.data:
          raise errors.OpExecError("Could not connect to node '%s' to rename"
                                   " directory '%s' to '%s' (but the instance"
                                   " has been renamed in Ganeti)" % (
                                   inst.primary_node, old_file_storage_dir,
                                   new_file_storage_dir))
  
          raise errors.OpExecError("Could not connect to node '%s' to rename"
                                   " directory '%s' to '%s' (but the instance"
                                   " has been renamed in Ganeti)" % (
                                   inst.primary_node, old_file_storage_dir,
                                   new_file_storage_dir))
  
-      if not result[0]:
+      if not result.data[0]:
          raise errors.OpExecError("Could not rename directory '%s' to '%s'"
                                   " (but the instance has been renamed in"
                                   " Ganeti)" % (old_file_storage_dir,
          raise errors.OpExecError("Could not rename directory '%s' to '%s'"
                                   " (but the instance has been renamed in"
                                   " Ganeti)" % (old_file_storage_dir,
@@ -2539,13 +2950,13 @@ class LURenameInstance(LogicalUnit):
  
      _StartInstanceDisks(self, inst, None)
      try:
  
      _StartInstanceDisks(self, inst, None)
      try:
-      if not self.rpc.call_instance_run_rename(inst.primary_node, inst,
-                                               old_name,
-                                               "sda", "sdb"):
+      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
+                                                 old_name)
+      if result.failed or not result.data:
          msg = ("Could not run OS rename script for instance %s on node %s"
                 " (but the instance has been renamed in Ganeti)" %
                 (inst.name, inst.primary_node))
          msg = ("Could not run OS rename script for instance %s on node %s"
                 " (but the instance has been renamed in Ganeti)" %
                 (inst.name, inst.primary_node))
-        logger.Error(msg)
+        self.proc.LogWarning(msg)
      finally:
        _ShutdownInstanceDisks(self, inst)
  
      finally:
        _ShutdownInstanceDisks(self, inst)
  
@@ -2593,17 +3004,18 @@ class LURemoveInstance(LogicalUnit):
  
      """
      instance = self.instance
  
      """
      instance = self.instance
-    logger.Info("shutting down instance %s on node %s" %
-                (instance.name, instance.primary_node))
+    logging.info("Shutting down instance %s on node %s",
+                 instance.name, instance.primary_node)
  
  
-    if not self.rpc.call_instance_shutdown(instance.primary_node, instance):
+    result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
+    if result.failed or not result.data:
        if self.op.ignore_failures:
          feedback_fn("Warning: can't shutdown instance")
        else:
          raise errors.OpExecError("Could not shutdown instance %s on node %s" %
                                   (instance.name, instance.primary_node))
  
        if self.op.ignore_failures:
          feedback_fn("Warning: can't shutdown instance")
        else:
          raise errors.OpExecError("Could not shutdown instance %s on node %s" %
                                   (instance.name, instance.primary_node))
  
-    logger.Info("removing block devices for instance %s" % instance.name)
+    logging.info("Removing block devices for instance %s", instance.name)
  
      if not _RemoveDisks(self, instance):
        if self.op.ignore_failures:
  
      if not _RemoveDisks(self, instance):
        if self.op.ignore_failures:
@@ -2611,7 +3023,7 @@ class LURemoveInstance(LogicalUnit):
        else:
          raise errors.OpExecError("Can't remove instance's disks")
  
        else:
          raise errors.OpExecError("Can't remove instance's disks")
  
-    logger.Info("removing instance %s out of cluster config" % instance.name)
+    logging.info("Removing instance %s out of cluster config", instance.name)
  
      self.cfg.RemoveInstance(instance.name)
      self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
  
      self.cfg.RemoveInstance(instance.name)
      self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
@@ -2623,22 +3035,27 @@ class LUQueryInstances(NoHooksLU):
    """
    _OP_REQP = ["output_fields", "names"]
    REQ_BGL = False
    """
    _OP_REQP = ["output_fields", "names"]
    REQ_BGL = False
+  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
+                                    "admin_state", "admin_ram",
+                                    "disk_template", "ip", "mac", "bridge",
+                                    "sda_size", "sdb_size", "vcpus", "tags",
+                                    "network_port", "beparams",
+                                    "(disk).(size)/([0-9]+)",
+                                    "(disk).(sizes)",
+                                    "(nic).(mac|ip|bridge)/([0-9]+)",
+                                    "(nic).(macs|ips|bridges)",
+                                    "(disk|nic).(count)",
+                                    "serial_no", "hypervisor", "hvparams",] +
+                                  ["hv/%s" % name
+                                   for name in constants.HVS_PARAMETERS] +
+                                  ["be/%s" % name
+                                   for name in constants.BES_PARAMETERS])
+  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
+
  
    def ExpandNames(self):
  
    def ExpandNames(self):
-    self.dynamic_fields = frozenset(["oper_state", "oper_ram", "status"])
-    hvp = ["hv/%s" % name for name in constants.HVS_PARAMETERS]
-    bep = ["be/%s" % name for name in constants.BES_PARAMETERS]
-    self.static_fields = frozenset([
-      "name", "os", "pnode", "snodes",
-      "admin_state", "admin_ram",
-      "disk_template", "ip", "mac", "bridge",
-      "sda_size", "sdb_size", "vcpus", "tags",
-      "network_port",
-      "serial_no", "hypervisor", "hvparams",
-      ] + hvp + bep)
-
-    _CheckOutputFields(static=self.static_fields,
-                       dynamic=self.dynamic_fields,
+    _CheckOutputFields(static=self._FIELDS_STATIC,
+                       dynamic=self._FIELDS_DYNAMIC,
                         selected=self.op.output_fields)
  
      self.needed_locks = {}
                         selected=self.op.output_fields)
  
      self.needed_locks = {}
@@ -2650,7 +3067,7 @@ class LUQueryInstances(NoHooksLU):
      else:
        self.wanted = locking.ALL_SET
  
      else:
        self.wanted = locking.ALL_SET
  
-    self.do_locking = not self.static_fields.issuperset(self.op.output_fields)
+    self.do_locking = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
      if self.do_locking:
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
        self.needed_locks[locking.LEVEL_NODE] = []
      if self.do_locking:
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
        self.needed_locks[locking.LEVEL_NODE] = []
@@ -2692,16 +3109,21 @@ class LUQueryInstances(NoHooksLU):
      hv_list = list(set([inst.hypervisor for inst in instance_list]))
  
      bad_nodes = []
      hv_list = list(set([inst.hypervisor for inst in instance_list]))
  
      bad_nodes = []
-    if self.dynamic_fields.intersection(self.op.output_fields):
+    off_nodes = []
+    if self.do_locking:
        live_data = {}
        node_data = self.rpc.call_all_instances_info(nodes, hv_list)
        for name in nodes:
          result = node_data[name]
        live_data = {}
        node_data = self.rpc.call_all_instances_info(nodes, hv_list)
        for name in nodes:
          result = node_data[name]
-        if result:
-          live_data.update(result)
-        elif result == False:
+        if result.offline:
+          # offline nodes will be in both lists
+          off_nodes.append(name)
+        if result.failed:
            bad_nodes.append(name)
            bad_nodes.append(name)
-        # else no instance is alive
+        else:
+          if result.data:
+            live_data.update(result.data)
+            # else no instance is alive
      else:
        live_data = dict([(name, {}) for name in instance_names])
  
      else:
        live_data = dict([(name, {}) for name in instance_names])
  
@@ -2715,6 +3137,7 @@ class LUQueryInstances(NoHooksLU):
        i_hv = self.cfg.GetClusterInfo().FillHV(instance)
        i_be = self.cfg.GetClusterInfo().FillBE(instance)
        for field in self.op.output_fields:
        i_hv = self.cfg.GetClusterInfo().FillHV(instance)
        i_be = self.cfg.GetClusterInfo().FillBE(instance)
        for field in self.op.output_fields:
+        st_match = self._FIELDS_STATIC.Matches(field)
          if field == "name":
            val = instance.name
          elif field == "os":
          if field == "name":
            val = instance.name
          elif field == "os":
@@ -2731,7 +3154,9 @@ class LUQueryInstances(NoHooksLU):
            else:
              val = bool(live_data.get(instance.name))
          elif field == "status":
            else:
              val = bool(live_data.get(instance.name))
          elif field == "status":
-          if instance.primary_node in bad_nodes:
+          if instance.primary_node in off_nodes:
+            val = "ERROR_nodeoffline"
+          elif instance.primary_node in bad_nodes:
              val = "ERROR_nodedown"
            else:
              running = bool(live_data.get(instance.name))
              val = "ERROR_nodedown"
            else:
              running = bool(live_data.get(instance.name))
@@ -2761,11 +3186,11 @@ class LUQueryInstances(NoHooksLU):
          elif field == "mac":
            val = instance.nics[0].mac
          elif field == "sda_size" or field == "sdb_size":
          elif field == "mac":
            val = instance.nics[0].mac
          elif field == "sda_size" or field == "sdb_size":
-          disk = instance.FindDisk(field[:3])
-          if disk is None:
+          idx = ord(field[2]) - ord('a')
+          try:
+            val = instance.FindDisk(idx).size
+          except errors.OpPrereqError:
              val = None
              val = None
-          else:
-            val = disk.size
          elif field == "tags":
            val = list(instance.GetTags())
          elif field == "serial_no":
          elif field == "tags":
            val = list(instance.GetTags())
          elif field == "serial_no":
@@ -2784,6 +3209,46 @@ class LUQueryInstances(NoHooksLU):
          elif (field.startswith(BEPREFIX) and
                field[len(BEPREFIX):] in constants.BES_PARAMETERS):
            val = i_be.get(field[len(BEPREFIX):], None)
          elif (field.startswith(BEPREFIX) and
                field[len(BEPREFIX):] in constants.BES_PARAMETERS):
            val = i_be.get(field[len(BEPREFIX):], None)
+        elif st_match and st_match.groups():
+          # matches a variable list
+          st_groups = st_match.groups()
+          if st_groups and st_groups[0] == "disk":
+            if st_groups[1] == "count":
+              val = len(instance.disks)
+            elif st_groups[1] == "sizes":
+              val = [disk.size for disk in instance.disks]
+            elif st_groups[1] == "size":
+              try:
+                val = instance.FindDisk(st_groups[2]).size
+              except errors.OpPrereqError:
+                val = None
+            else:
+              assert False, "Unhandled disk parameter"
+          elif st_groups[0] == "nic":
+            if st_groups[1] == "count":
+              val = len(instance.nics)
+            elif st_groups[1] == "macs":
+              val = [nic.mac for nic in instance.nics]
+            elif st_groups[1] == "ips":
+              val = [nic.ip for nic in instance.nics]
+            elif st_groups[1] == "bridges":
+              val = [nic.bridge for nic in instance.nics]
+            else:
+              # index-based item
+              nic_idx = int(st_groups[2])
+              if nic_idx >= len(instance.nics):
+                val = None
+              else:
+                if st_groups[1] == "mac":
+                  val = instance.nics[nic_idx].mac
+                elif st_groups[1] == "ip":
+                  val = instance.nics[nic_idx].ip
+                elif st_groups[1] == "bridge":
+                  val = instance.nics[nic_idx].bridge
+                else:
+                  assert False, "Unhandled NIC parameter"
+          else:
+            assert False, "Unhandled variable parameter"
          else:
            raise errors.ParameterError(field)
          iout.append(val)
          else:
            raise errors.ParameterError(field)
          iout.append(val)
@@ -2844,6 +3309,7 @@ class LUFailoverInstance(LogicalUnit):
                                     "a mirrored disk template")
  
      target_node = secondary_nodes[0]
                                     "a mirrored disk template")
  
      target_node = secondary_nodes[0]
+    _CheckNodeOnline(self, target_node)
      # check memory requirements on the secondary node
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
                           instance.name, bep[constants.BE_MEMORY],
      # check memory requirements on the secondary node
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
                           instance.name, bep[constants.BE_MEMORY],
@@ -2851,7 +3317,9 @@ class LUFailoverInstance(LogicalUnit):
  
      # check bridge existance
      brlist = [nic.bridge for nic in instance.nics]
  
      # check bridge existance
      brlist = [nic.bridge for nic in instance.nics]
-    if not self.rpc.call_bridges_exist(target_node, brlist):
+    result = self.rpc.call_bridges_exist(target_node, brlist)
+    result.Raise()
+    if not result.data:
        raise errors.OpPrereqError("One or more target bridges %s does not"
                                   " exist on destination node '%s'" %
                                   (brlist, target_node))
        raise errors.OpPrereqError("One or more target bridges %s does not"
                                   " exist on destination node '%s'" %
                                   (brlist, target_node))
@@ -2877,14 +3345,16 @@ class LUFailoverInstance(LogicalUnit):
                                     " aborting failover." % dev.iv_name)
  
      feedback_fn("* shutting down instance on source node")
                                     " aborting failover." % dev.iv_name)
  
      feedback_fn("* shutting down instance on source node")
-    logger.Info("Shutting down instance %s on node %s" %
-                (instance.name, source_node))
+    logging.info("Shutting down instance %s on node %s",
+                 instance.name, source_node)
  
  
-    if not self.rpc.call_instance_shutdown(source_node, instance):
+    result = self.rpc.call_instance_shutdown(source_node, instance)
+    if result.failed or not result.data:
        if self.op.ignore_consistency:
        if self.op.ignore_consistency:
-        logger.Error("Could not shutdown instance %s on node %s. Proceeding"
-                     " anyway. Please make sure node %s is down"  %
-                     (instance.name, source_node, source_node))
+        self.proc.LogWarning("Could not shutdown instance %s on node %s."
+                             " Proceeding"
+                             " anyway. Please make sure node %s is down",
+                             instance.name, source_node, source_node)
        else:
          raise errors.OpExecError("Could not shutdown instance %s on node %s" %
                                   (instance.name, source_node))
        else:
          raise errors.OpExecError("Could not shutdown instance %s on node %s" %
                                   (instance.name, source_node))
@@ -2900,8 +3370,8 @@ class LUFailoverInstance(LogicalUnit):
      # Only start the instance if it's marked as up
      if instance.status == "up":
        feedback_fn("* activating the instance's disks on target node")
      # Only start the instance if it's marked as up
      if instance.status == "up":
        feedback_fn("* activating the instance's disks on target node")
-      logger.Info("Starting instance %s on node %s" %
-                  (instance.name, target_node))
+      logging.info("Starting instance %s on node %s",
+                   instance.name, target_node)
  
        disks_ok, dummy = _AssembleInstanceDisks(self, instance,
                                                 ignore_secondaries=True)
  
        disks_ok, dummy = _AssembleInstanceDisks(self, instance,
                                                 ignore_secondaries=True)
@@ -2910,12 +3380,317 @@ class LUFailoverInstance(LogicalUnit):
          raise errors.OpExecError("Can't activate the instance's disks")
  
        feedback_fn("* starting the instance on the target node")
          raise errors.OpExecError("Can't activate the instance's disks")
  
        feedback_fn("* starting the instance on the target node")
-      if not self.rpc.call_instance_start(target_node, instance, None):
+      result = self.rpc.call_instance_start(target_node, instance, None)
+      if result.failed or not result.data:
          _ShutdownInstanceDisks(self, instance)
          raise errors.OpExecError("Could not start instance %s on node %s." %
                                   (instance.name, target_node))
  
  
          _ShutdownInstanceDisks(self, instance)
          raise errors.OpExecError("Could not start instance %s on node %s." %
                                   (instance.name, target_node))
  
  
+class LUMigrateInstance(LogicalUnit):
+  """Migrate an instance.
+
+  This is migration without shutting down, compared to the failover,
+  which is done with shutdown.
+
+  """
+  HPATH = "instance-migrate"
+  HTYPE = constants.HTYPE_INSTANCE
+  _OP_REQP = ["instance_name", "live", "cleanup"]
+
+  REQ_BGL = False
+
+  def ExpandNames(self):
+    self._ExpandAndLockInstance()
+    self.needed_locks[locking.LEVEL_NODE] = []
+    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
+
+  def DeclareLocks(self, level):
+    if level == locking.LEVEL_NODE:
+      self._LockInstancesNodes()
+
+  def BuildHooksEnv(self):
+    """Build hooks env.
+
+    This runs on master, primary and secondary nodes of the instance.
+
+    """
+    env = _BuildInstanceHookEnvByObject(self, self.instance)
+    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
+    return env, nl, nl
+
+  def CheckPrereq(self):
+    """Check prerequisites.
+
+    This checks that the instance is in the cluster.
+
+    """
+    instance = self.cfg.GetInstanceInfo(
+      self.cfg.ExpandInstanceName(self.op.instance_name))
+    if instance is None:
+      raise errors.OpPrereqError("Instance '%s' not known" %
+                                 self.op.instance_name)
+
+    if instance.disk_template != constants.DT_DRBD8:
+      raise errors.OpPrereqError("Instance's disk layout is not"
+                                 " drbd8, cannot migrate.")
+
+    secondary_nodes = instance.secondary_nodes
+    if not secondary_nodes:
+      raise errors.ProgrammerError("no secondary node but using "
+                                   "drbd8 disk template")
+
+    i_be = self.cfg.GetClusterInfo().FillBE(instance)
+
+    target_node = secondary_nodes[0]
+    # check memory requirements on the secondary node
+    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
+                         instance.name, i_be[constants.BE_MEMORY],
+                         instance.hypervisor)
+
+    # check bridge existance
+    brlist = [nic.bridge for nic in instance.nics]
+    result = self.rpc.call_bridges_exist(target_node, brlist)
+    if result.failed or not result.data:
+      raise errors.OpPrereqError("One or more target bridges %s does not"
+                                 " exist on destination node '%s'" %
+                                 (brlist, target_node))
+
+    if not self.op.cleanup:
+      result = self.rpc.call_instance_migratable(instance.primary_node,
+                                                 instance)
+      msg = result.RemoteFailMsg()
+      if msg:
+        raise errors.OpPrereqError("Can't migrate: %s - please use failover" %
+                                   msg)
+
+    self.instance = instance
+
+  def _WaitUntilSync(self):
+    """Poll with custom rpc for disk sync.
+
+    This uses our own step-based rpc call.
+
+    """
+    self.feedback_fn("* wait until resync is done")
+    all_done = False
+    while not all_done:
+      all_done = True
+      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
+                                            self.nodes_ip,
+                                            self.instance.disks)
+      min_percent = 100
+      for node, nres in result.items():
+        msg = nres.RemoteFailMsg()
+        if msg:
+          raise errors.OpExecError("Cannot resync disks on node %s: %s" %
+                                   (node, msg))
+        node_done, node_percent = nres.data[1]
+        all_done = all_done and node_done
+        if node_percent is not None:
+          min_percent = min(min_percent, node_percent)
+      if not all_done:
+        if min_percent < 100:
+          self.feedback_fn("   - progress: %.1f%%" % min_percent)
+        time.sleep(2)
+
+  def _EnsureSecondary(self, node):
+    """Demote a node to secondary.
+
+    """
+    self.feedback_fn("* switching node %s to secondary mode" % node)
+
+    for dev in self.instance.disks:
+      self.cfg.SetDiskID(dev, node)
+
+    result = self.rpc.call_blockdev_close(node, self.instance.name,
+                                          self.instance.disks)
+    msg = result.RemoteFailMsg()
+    if msg:
+      raise errors.OpExecError("Cannot change disk to secondary on node %s,"
+                               " error %s" % (node, msg))
+
+  def _GoStandalone(self):
+    """Disconnect from the network.
+
+    """
+    self.feedback_fn("* changing into standalone mode")
+    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
+                                               self.instance.disks)
+    for node, nres in result.items():
+      msg = nres.RemoteFailMsg()
+      if msg:
+        raise errors.OpExecError("Cannot disconnect disks node %s,"
+                                 " error %s" % (node, msg))
+
+  def _GoReconnect(self, multimaster):
+    """Reconnect to the network.
+
+    """
+    if multimaster:
+      msg = "dual-master"
+    else:
+      msg = "single-master"
+    self.feedback_fn("* changing disks into %s mode" % msg)
+    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
+                                           self.instance.disks,
+                                           self.instance.name, multimaster)
+    for node, nres in result.items():
+      msg = nres.RemoteFailMsg()
+      if msg:
+        raise errors.OpExecError("Cannot change disks config on node %s,"
+                                 " error: %s" % (node, msg))
+
+  def _ExecCleanup(self):
+    """Try to cleanup after a failed migration.
+
+    The cleanup is done by:
+      - check that the instance is running only on one node
+        (and update the config if needed)
+      - change disks on its secondary node to secondary
+      - wait until disks are fully synchronized
+      - disconnect from the network
+      - change disks into single-master mode
+      - wait again until disks are fully synchronized
+
+    """
+    instance = self.instance
+    target_node = self.target_node
+    source_node = self.source_node
+
+    # check running on only one node
+    self.feedback_fn("* checking where the instance actually runs"
+                     " (if this hangs, the hypervisor might be in"
+                     " a bad state)")
+    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
+    for node, result in ins_l.items():
+      result.Raise()
+      if not isinstance(result.data, list):
+        raise errors.OpExecError("Can't contact node '%s'" % node)
+
+    runningon_source = instance.name in ins_l[source_node].data
+    runningon_target = instance.name in ins_l[target_node].data
+
+    if runningon_source and runningon_target:
+      raise errors.OpExecError("Instance seems to be running on two nodes,"
+                               " or the hypervisor is confused. You will have"
+                               " to ensure manually that it runs only on one"
+                               " and restart this operation.")
+
+    if not (runningon_source or runningon_target):
+      raise errors.OpExecError("Instance does not seem to be running at all."
+                               " In this case, it's safer to repair by"
+                               " running 'gnt-instance stop' to ensure disk"
+                               " shutdown, and then restarting it.")
+
+    if runningon_target:
+      # the migration has actually succeeded, we need to update the config
+      self.feedback_fn("* instance running on secondary node (%s),"
+                       " updating config" % target_node)
+      instance.primary_node = target_node
+      self.cfg.Update(instance)
+      demoted_node = source_node
+    else:
+      self.feedback_fn("* instance confirmed to be running on its"
+                       " primary node (%s)" % source_node)
+      demoted_node = target_node
+
+    self._EnsureSecondary(demoted_node)
+    try:
+      self._WaitUntilSync()
+    except errors.OpExecError:
+      # we ignore here errors, since if the device is standalone, it
+      # won't be able to sync
+      pass
+    self._GoStandalone()
+    self._GoReconnect(False)
+    self._WaitUntilSync()
+
+    self.feedback_fn("* done")
+
+  def _ExecMigration(self):
+    """Migrate an instance.
+
+    The migrate is done by:
+      - change the disks into dual-master mode
+      - wait until disks are fully synchronized again
+      - migrate the instance
+      - change disks on the new secondary node (the old primary) to secondary
+      - wait until disks are fully synchronized
+      - change disks into single-master mode
+
+    """
+    instance = self.instance
+    target_node = self.target_node
+    source_node = self.source_node
+
+    self.feedback_fn("* checking disk consistency between source and target")
+    for dev in instance.disks:
+      if not _CheckDiskConsistency(self, dev, target_node, False):
+        raise errors.OpExecError("Disk %s is degraded or not fully"
+                                 " synchronized on target node,"
+                                 " aborting migrate." % dev.iv_name)
+
+    self._EnsureSecondary(target_node)
+    self._GoStandalone()
+    self._GoReconnect(True)
+    self._WaitUntilSync()
+
+    self.feedback_fn("* migrating instance to %s" % target_node)
+    time.sleep(10)
+    result = self.rpc.call_instance_migrate(source_node, instance,
+                                            self.nodes_ip[target_node],
+                                            self.op.live)
+    msg = result.RemoteFailMsg()
+    if msg:
+      logging.error("Instance migration failed, trying to revert"
+                    " disk status: %s", msg)
+      try:
+        self._EnsureSecondary(target_node)
+        self._GoStandalone()
+        self._GoReconnect(False)
+        self._WaitUntilSync()
+      except errors.OpExecError, err:
+        self.LogWarning("Migration failed and I can't reconnect the"
+                        " drives: error '%s'\n"
+                        "Please look and recover the instance status" %
+                        str(err))
+
+      raise errors.OpExecError("Could not migrate instance %s: %s" %
+                               (instance.name, msg))
+    time.sleep(10)
+
+    instance.primary_node = target_node
+    # distribute new instance config to the other nodes
+    self.cfg.Update(instance)
+
+    self._EnsureSecondary(source_node)
+    self._WaitUntilSync()
+    self._GoStandalone()
+    self._GoReconnect(False)
+    self._WaitUntilSync()
+
+    self.feedback_fn("* done")
+
+  def Exec(self, feedback_fn):
+    """Perform the migration.
+
+    """
+    self.feedback_fn = feedback_fn
+
+    self.source_node = self.instance.primary_node
+    self.target_node = self.instance.secondary_nodes[0]
+    self.all_nodes = [self.source_node, self.target_node]
+    self.nodes_ip = {
+      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
+      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
+      }
+    if self.op.cleanup:
+      return self._ExecCleanup()
+    else:
+      return self._ExecMigration()
+
+
  def _CreateBlockDevOnPrimary(lu, node, instance, device, info):
    """Create a tree of block devices on the primary node.
  
  def _CreateBlockDevOnPrimary(lu, node, instance, device, info):
    """Create a tree of block devices on the primary node.
  
@@ -2924,17 +3699,16 @@ def _CreateBlockDevOnPrimary(lu, node, instance, device, info):
    """
    if device.children:
      for child in device.children:
    """
    if device.children:
      for child in device.children:
-      if not _CreateBlockDevOnPrimary(lu, node, instance, child, info):
-        return False
+      _CreateBlockDevOnPrimary(lu, node, instance, child, info)
  
    lu.cfg.SetDiskID(device, node)
    new_id = lu.rpc.call_blockdev_create(node, device, device.size,
                                         instance.name, True, info)
  
    lu.cfg.SetDiskID(device, node)
    new_id = lu.rpc.call_blockdev_create(node, device, device.size,
                                         instance.name, True, info)
-  if not new_id:
-    return False
+  if new_id.failed or not new_id.data:
+    raise errors.OpExecError("Can't create block device %s on primary"
+                             " node %s" % (device, node))
    if device.physical_id is None:
      device.physical_id = new_id
    if device.physical_id is None:
      device.physical_id = new_id
-  return True
  
  
  def _CreateBlockDevOnSecondary(lu, node, instance, device, force, info):
  
  
  def _CreateBlockDevOnSecondary(lu, node, instance, device, force, info):
@@ -2948,22 +3722,22 @@ def _CreateBlockDevOnSecondary(lu, node, instance, device, force, info):
    """
    if device.CreateOnSecondary():
      force = True
    """
    if device.CreateOnSecondary():
      force = True
+
    if device.children:
      for child in device.children:
    if device.children:
      for child in device.children:
-      if not _CreateBlockDevOnSecondary(lu, node, instance,
-                                        child, force, info):
-        return False
+      _CreateBlockDevOnSecondary(lu, node, instance, child, force, info)
  
    if not force:
  
    if not force:
-    return True
+    return
+
    lu.cfg.SetDiskID(device, node)
    new_id = lu.rpc.call_blockdev_create(node, device, device.size,
                                         instance.name, False, info)
    lu.cfg.SetDiskID(device, node)
    new_id = lu.rpc.call_blockdev_create(node, device, device.size,
                                         instance.name, False, info)
-  if not new_id:
-    return False
+  if new_id.failed or not new_id.data:
+    raise errors.OpExecError("Can't create block device %s on secondary"
+                             " node %s" % (device, node))
    if device.physical_id is None:
      device.physical_id = new_id
    if device.physical_id is None:
      device.physical_id = new_id
-  return True
  
  
  def _GenerateUniqueNames(lu, exts):
  
  
  def _GenerateUniqueNames(lu, exts):
@@ -3002,56 +3776,62 @@ def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
  
  def _GenerateDiskTemplate(lu, template_name,
                            instance_name, primary_node,
  
  def _GenerateDiskTemplate(lu, template_name,
                            instance_name, primary_node,
-                          secondary_nodes, disk_sz, swap_sz,
-                          file_storage_dir, file_driver):
+                          secondary_nodes, disk_info,
+                          file_storage_dir, file_driver,
+                          base_index):
    """Generate the entire disk layout for a given template type.
  
    """
    #TODO: compute space requirements
  
    vgname = lu.cfg.GetVGName()
    """Generate the entire disk layout for a given template type.
  
    """
    #TODO: compute space requirements
  
    vgname = lu.cfg.GetVGName()
+  disk_count = len(disk_info)
+  disks = []
    if template_name == constants.DT_DISKLESS:
    if template_name == constants.DT_DISKLESS:
-    disks = []
+    pass
    elif template_name == constants.DT_PLAIN:
      if len(secondary_nodes) != 0:
        raise errors.ProgrammerError("Wrong template configuration")
  
    elif template_name == constants.DT_PLAIN:
      if len(secondary_nodes) != 0:
        raise errors.ProgrammerError("Wrong template configuration")
  
-    names = _GenerateUniqueNames(lu, [".sda", ".sdb"])
-    sda_dev = objects.Disk(dev_type=constants.LD_LV, size=disk_sz,
-                           logical_id=(vgname, names[0]),
-                           iv_name = "sda")
-    sdb_dev = objects.Disk(dev_type=constants.LD_LV, size=swap_sz,
-                           logical_id=(vgname, names[1]),
-                           iv_name = "sdb")
-    disks = [sda_dev, sdb_dev]
+    names = _GenerateUniqueNames(lu, [".disk%d" % i
+                                      for i in range(disk_count)])
+    for idx, disk in enumerate(disk_info):
+      disk_index = idx + base_index
+      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
+                              logical_id=(vgname, names[idx]),
+                              iv_name="disk/%d" % disk_index)
+      disks.append(disk_dev)
    elif template_name == constants.DT_DRBD8:
      if len(secondary_nodes) != 1:
        raise errors.ProgrammerError("Wrong template configuration")
      remote_node = secondary_nodes[0]
    elif template_name == constants.DT_DRBD8:
      if len(secondary_nodes) != 1:
        raise errors.ProgrammerError("Wrong template configuration")
      remote_node = secondary_nodes[0]
-    (minor_pa, minor_pb,
-     minor_sa, minor_sb) = lu.cfg.AllocateDRBDMinor(
-      [primary_node, primary_node, remote_node, remote_node], instance_name)
-
-    names = _GenerateUniqueNames(lu, [".sda_data", ".sda_meta",
-                                      ".sdb_data", ".sdb_meta"])
-    drbd_sda_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
-                                        disk_sz, names[0:2], "sda",
-                                        minor_pa, minor_sa)
-    drbd_sdb_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
-                                        swap_sz, names[2:4], "sdb",
-                                        minor_pb, minor_sb)
-    disks = [drbd_sda_dev, drbd_sdb_dev]
+    minors = lu.cfg.AllocateDRBDMinor(
+      [primary_node, remote_node] * len(disk_info), instance_name)
+
+    names = []
+    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % i
+                                               for i in range(disk_count)]):
+      names.append(lv_prefix + "_data")
+      names.append(lv_prefix + "_meta")
+    for idx, disk in enumerate(disk_info):
+      disk_index = idx + base_index
+      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
+                                      disk["size"], names[idx*2:idx*2+2],
+                                      "disk/%d" % disk_index,
+                                      minors[idx*2], minors[idx*2+1])
+      disks.append(disk_dev)
    elif template_name == constants.DT_FILE:
      if len(secondary_nodes) != 0:
        raise errors.ProgrammerError("Wrong template configuration")
  
    elif template_name == constants.DT_FILE:
      if len(secondary_nodes) != 0:
        raise errors.ProgrammerError("Wrong template configuration")
  
-    file_sda_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk_sz,
-                                iv_name="sda", logical_id=(file_driver,
-                                "%s/sda" % file_storage_dir))
-    file_sdb_dev = objects.Disk(dev_type=constants.LD_FILE, size=swap_sz,
-                                iv_name="sdb", logical_id=(file_driver,
-                                "%s/sdb" % file_storage_dir))
-    disks = [file_sda_dev, file_sdb_dev]
+    for idx, disk in enumerate(disk_info):
+      disk_index = idx + base_index
+      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
+                              iv_name="disk/%d" % disk_index,
+                              logical_id=(file_driver,
+                                          "%s/disk%d" % (file_storage_dir,
+                                                         idx)))
+      disks.append(disk_dev)
    else:
      raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
    return disks
    else:
      raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
    return disks
@@ -3069,11 +3849,12 @@ def _CreateDisks(lu, instance):
  
    This abstracts away some work from AddInstance.
  
  
    This abstracts away some work from AddInstance.
  
-  Args:
-    instance: the instance object
-
-  Returns:
-    True or False showing the success of the creation process
+  @type lu: L{LogicalUnit}
+  @param lu: the logical unit on whose behalf we execute
+  @type instance: L{objects.Instance}
+  @param instance: the instance whose disks we should create
+  @rtype: boolean
+  @return: the success of the creation
  
    """
    info = _GetInstanceInfoText(instance)
  
    """
    info = _GetInstanceInfoText(instance)
@@ -3083,32 +3864,26 @@ def _CreateDisks(lu, instance):
      result = lu.rpc.call_file_storage_dir_create(instance.primary_node,
                                                   file_storage_dir)
  
      result = lu.rpc.call_file_storage_dir_create(instance.primary_node,
                                                   file_storage_dir)
  
-    if not result:
-      logger.Error("Could not connect to node '%s'" % instance.primary_node)
-      return False
+    if result.failed or not result.data:
+      raise errors.OpExecError("Could not connect to node '%s'" %
+                               instance.primary_node)
  
  
-    if not result[0]:
-      logger.Error("failed to create directory '%s'" % file_storage_dir)
-      return False
+    if not result.data[0]:
+      raise errors.OpExecError("Failed to create directory '%s'" %
+                               file_storage_dir)
  
  
+  # Note: this needs to be kept in sync with adding of disks in
+  # LUSetInstanceParams
    for device in instance.disks:
    for device in instance.disks:
-    logger.Info("creating volume %s for instance %s" %
-                (device.iv_name, instance.name))
+    logging.info("Creating volume %s for instance %s",
+                 device.iv_name, instance.name)
      #HARDCODE
      for secondary_node in instance.secondary_nodes:
      #HARDCODE
      for secondary_node in instance.secondary_nodes:
-      if not _CreateBlockDevOnSecondary(lu, secondary_node, instance,
-                                        device, False, info):
-        logger.Error("failed to create volume %s (%s) on secondary node %s!" %
-                     (device.iv_name, device, secondary_node))
-        return False
+      _CreateBlockDevOnSecondary(lu, secondary_node, instance,
+                                 device, False, info)
      #HARDCODE
      #HARDCODE
-    if not _CreateBlockDevOnPrimary(lu, instance.primary_node,
-                                    instance, device, info):
-      logger.Error("failed to create volume %s on primary!" %
-                   device.iv_name)
-      return False
-
-  return True
+    _CreateBlockDevOnPrimary(lu, instance.primary_node,
+                             instance, device, info)
  
  
  def _RemoveDisks(lu, instance):
  
  
  def _RemoveDisks(lu, instance):
@@ -3119,47 +3894,47 @@ def _RemoveDisks(lu, instance):
    be removed, the removal will continue with the other ones (compare
    with `_CreateDisks()`).
  
    be removed, the removal will continue with the other ones (compare
    with `_CreateDisks()`).
  
-  Args:
-    instance: the instance object
-
-  Returns:
-    True or False showing the success of the removal proces
+  @type lu: L{LogicalUnit}
+  @param lu: the logical unit on whose behalf we execute
+  @type instance: L{objects.Instance}
+  @param instance: the instance whose disks we should remove
+  @rtype: boolean
+  @return: the success of the removal
  
    """
  
    """
-  logger.Info("removing block devices for instance %s" % instance.name)
+  logging.info("Removing block devices for instance %s", instance.name)
  
    result = True
    for device in instance.disks:
      for node, disk in device.ComputeNodeTree(instance.primary_node):
        lu.cfg.SetDiskID(disk, node)
  
    result = True
    for device in instance.disks:
      for node, disk in device.ComputeNodeTree(instance.primary_node):
        lu.cfg.SetDiskID(disk, node)
-      if not lu.rpc.call_blockdev_remove(node, disk):
-        logger.Error("could not remove block device %s on node %s,"
-                     " continuing anyway" %
-                     (device.iv_name, node))
+      result = lu.rpc.call_blockdev_remove(node, disk)
+      if result.failed or not result.data:
+        lu.proc.LogWarning("Could not remove block device %s on node %s,"
+                           " continuing anyway", device.iv_name, node)
          result = False
  
    if instance.disk_template == constants.DT_FILE:
      file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
          result = False
  
    if instance.disk_template == constants.DT_FILE:
      file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
-    if not lu.rpc.call_file_storage_dir_remove(instance.primary_node,
-                                               file_storage_dir):
-      logger.Error("could not remove directory '%s'" % file_storage_dir)
+    result = lu.rpc.call_file_storage_dir_remove(instance.primary_node,
+                                                 file_storage_dir)
+    if result.failed or not result.data:
+      logging.error("Could not remove directory '%s'", file_storage_dir)
        result = False
  
    return result
  
  
        result = False
  
    return result
  
  
-def _ComputeDiskSize(disk_template, disk_size, swap_size):
+def _ComputeDiskSize(disk_template, disks):
    """Compute disk size requirements in the volume group
  
    """Compute disk size requirements in the volume group
  
-  This is currently hard-coded for the two-drive layout.
-
    """
    # Required free disk space as a function of disk and swap space
    req_size_dict = {
      constants.DT_DISKLESS: None,
    """
    # Required free disk space as a function of disk and swap space
    req_size_dict = {
      constants.DT_DISKLESS: None,
-    constants.DT_PLAIN: disk_size + swap_size,
-    # 256 MB are added for drbd metadata, 128MB for each drbd device
-    constants.DT_DRBD8: disk_size + swap_size + 256,
+    constants.DT_PLAIN: sum(d["size"] for d in disks),
+    # 128 MB are added for drbd metadata for each disk
+    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
      constants.DT_FILE: None,
    }
  
      constants.DT_FILE: None,
    }
  
@@ -3191,13 +3966,14 @@ def _CheckHVParams(lu, nodenames, hvname, hvparams):
                                                    hvname,
                                                    hvparams)
    for node in nodenames:
                                                    hvname,
                                                    hvparams)
    for node in nodenames:
-    info = hvinfo.get(node, None)
-    if not info or not isinstance(info, (tuple, list)):
+    info = hvinfo[node]
+    info.Raise()
+    if not info.data or not isinstance(info.data, (tuple, list)):
        raise errors.OpPrereqError("Cannot get current information"
        raise errors.OpPrereqError("Cannot get current information"
-                                 " from node '%s' (%s)" % (node, info))
-    if not info[0]:
+                                 " from node '%s' (%s)" % (node, info.data))
+    if not info.data[0]:
        raise errors.OpPrereqError("Hypervisor parameter validation failed:"
        raise errors.OpPrereqError("Hypervisor parameter validation failed:"
-                                 " %s" % info[1])
+                                 " %s" % info.data[1])
  
  
  class LUCreateInstance(LogicalUnit):
  
  
  class LUCreateInstance(LogicalUnit):
@@ -3206,9 +3982,9 @@ class LUCreateInstance(LogicalUnit):
    """
    HPATH = "instance-add"
    HTYPE = constants.HTYPE_INSTANCE
    """
    HPATH = "instance-add"
    HTYPE = constants.HTYPE_INSTANCE
-  _OP_REQP = ["instance_name", "disk_size",
-              "disk_template", "swap_size", "mode", "start",
-              "wait_for_sync", "ip_check", "mac",
+  _OP_REQP = ["instance_name", "disks", "disk_template",
+              "mode", "start",
+              "wait_for_sync", "ip_check", "nics",
                "hvparams", "beparams"]
    REQ_BGL = False
  
                "hvparams", "beparams"]
    REQ_BGL = False
  
@@ -3264,6 +4040,7 @@ class LUCreateInstance(LogicalUnit):
      hv_type.CheckParameterSyntax(filled_hvp)
  
      # fill and remember the beparams dict
      hv_type.CheckParameterSyntax(filled_hvp)
  
      # fill and remember the beparams dict
+    utils.CheckBEParams(self.op.beparams)
      self.be_full = cluster.FillDict(cluster.beparams[constants.BEGR_DEFAULT],
                                      self.op.beparams)
  
      self.be_full = cluster.FillDict(cluster.beparams[constants.BEGR_DEFAULT],
                                      self.op.beparams)
  
@@ -3281,27 +4058,50 @@ class LUCreateInstance(LogicalUnit):
  
      self.add_locks[locking.LEVEL_INSTANCE] = instance_name
  
  
      self.add_locks[locking.LEVEL_INSTANCE] = instance_name
  
-    # ip validity checks
-    ip = getattr(self.op, "ip", None)
-    if ip is None or ip.lower() == "none":
-      inst_ip = None
-    elif ip.lower() == "auto":
-      inst_ip = hostname1.ip
-    else:
-      if not utils.IsValidIP(ip):
-        raise errors.OpPrereqError("given IP address '%s' doesn't look"
-                                   " like a valid IP" % ip)
-      inst_ip = ip
-    self.inst_ip = self.op.ip = inst_ip
+    # NIC buildup
+    self.nics = []
+    for nic in self.op.nics:
+      # ip validity checks
+      ip = nic.get("ip", None)
+      if ip is None or ip.lower() == "none":
+        nic_ip = None
+      elif ip.lower() == constants.VALUE_AUTO:
+        nic_ip = hostname1.ip
+      else:
+        if not utils.IsValidIP(ip):
+          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
+                                     " like a valid IP" % ip)
+        nic_ip = ip
+
+      # MAC address verification
+      mac = nic.get("mac", constants.VALUE_AUTO)
+      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
+        if not utils.IsValidMac(mac.lower()):
+          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
+                                     mac)
+      # bridge verification
+      bridge = nic.get("bridge", self.cfg.GetDefBridge())
+      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, bridge=bridge))
+
+    # disk checks/pre-build
+    self.disks = []
+    for disk in self.op.disks:
+      mode = disk.get("mode", constants.DISK_RDWR)
+      if mode not in constants.DISK_ACCESS_SET:
+        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
+                                   mode)
+      size = disk.get("size", None)
+      if size is None:
+        raise errors.OpPrereqError("Missing disk size")
+      try:
+        size = int(size)
+      except ValueError:
+        raise errors.OpPrereqError("Invalid disk size '%s'" % size)
+      self.disks.append({"size": size, "mode": mode})
+
      # used in CheckPrereq for ip ping check
      self.check_ip = hostname1.ip
  
      # used in CheckPrereq for ip ping check
      self.check_ip = hostname1.ip
  
-    # MAC address verification
-    if self.op.mac != "auto":
-      if not utils.IsValidMac(self.op.mac.lower()):
-        raise errors.OpPrereqError("invalid MAC address specified: %s" %
-                                   self.op.mac)
-
      # file storage checks
      if (self.op.file_driver and
          not self.op.file_driver in constants.FILE_DRIVER):
      # file storage checks
      if (self.op.file_driver and
          not self.op.file_driver in constants.FILE_DRIVER):
@@ -3331,16 +4131,22 @@ class LUCreateInstance(LogicalUnit):
        src_node = getattr(self.op, "src_node", None)
        src_path = getattr(self.op, "src_path", None)
  
        src_node = getattr(self.op, "src_node", None)
        src_path = getattr(self.op, "src_path", None)
  
-      if src_node is None or src_path is None:
-        raise errors.OpPrereqError("Importing an instance requires source"
-                                   " node and path options")
-
-      if not os.path.isabs(src_path):
-        raise errors.OpPrereqError("The source path must be absolute")
+      if src_path is None:
+        self.op.src_path = src_path = self.op.instance_name
  
  
-      self.op.src_node = src_node = self._ExpandNode(src_node)
-      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
-        self.needed_locks[locking.LEVEL_NODE].append(src_node)
+      if src_node is None:
+        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+        self.op.src_node = None
+        if os.path.isabs(src_path):
+          raise errors.OpPrereqError("Importing an instance from an absolute"
+                                     " path requires a source node option.")
+      else:
+        self.op.src_node = src_node = self._ExpandNode(src_node)
+        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
+          self.needed_locks[locking.LEVEL_NODE].append(src_node)
+        if not os.path.isabs(src_path):
+          self.op.src_path = src_path = \
+            os.path.join(constants.EXPORT_DIR, src_path)
  
      else: # INSTANCE_CREATE
        if getattr(self.op, "os_type", None) is None:
  
      else: # INSTANCE_CREATE
        if getattr(self.op, "os_type", None) is None:
@@ -3350,10 +4156,7 @@ class LUCreateInstance(LogicalUnit):
      """Run the allocator based on input opcode.
  
      """
      """Run the allocator based on input opcode.
  
      """
-    disks = [{"size": self.op.disk_size, "mode": "w"},
-             {"size": self.op.swap_size, "mode": "w"}]
-    nics = [{"mac": self.op.mac, "ip": getattr(self.op, "ip", None),
-             "bridge": self.op.bridge}]
+    nics = [n.ToDict() for n in self.nics]
      ial = IAllocator(self,
                       mode=constants.IALLOCATOR_MODE_ALLOC,
                       name=self.op.instance_name,
      ial = IAllocator(self,
                       mode=constants.IALLOCATOR_MODE_ALLOC,
                       name=self.op.instance_name,
@@ -3362,8 +4165,9 @@ class LUCreateInstance(LogicalUnit):
                       os=self.op.os_type,
                       vcpus=self.be_full[constants.BE_VCPUS],
                       mem_size=self.be_full[constants.BE_MEMORY],
                       os=self.op.os_type,
                       vcpus=self.be_full[constants.BE_VCPUS],
                       mem_size=self.be_full[constants.BE_MEMORY],
-                     disks=disks,
+                     disks=self.disks,
                       nics=nics,
                       nics=nics,
+                     hypervisor=self.op.hypervisor,
                       )
  
      ial.Run(self.op.iallocator)
                       )
  
      ial.Run(self.op.iallocator)
@@ -3378,10 +4182,9 @@ class LUCreateInstance(LogicalUnit):
                                   (self.op.iallocator, len(ial.nodes),
                                    ial.required_nodes))
      self.op.pnode = ial.nodes[0]
                                   (self.op.iallocator, len(ial.nodes),
                                    ial.required_nodes))
      self.op.pnode = ial.nodes[0]
-    logger.ToStdout("Selected nodes for the instance: %s" %
-                    (", ".join(ial.nodes),))
-    logger.Info("Selected nodes for instance %s via iallocator %s: %s" %
-                (self.op.instance_name, self.op.iallocator, ial.nodes))
+    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
+                 self.op.instance_name, self.op.iallocator,
+                 ", ".join(ial.nodes))
      if ial.required_nodes == 2:
        self.op.snode = ial.nodes[1]
  
      if ial.required_nodes == 2:
        self.op.snode = ial.nodes[1]
  
@@ -3393,14 +4196,13 @@ class LUCreateInstance(LogicalUnit):
      """
      env = {
        "INSTANCE_DISK_TEMPLATE": self.op.disk_template,
      """
      env = {
        "INSTANCE_DISK_TEMPLATE": self.op.disk_template,
-      "INSTANCE_DISK_SIZE": self.op.disk_size,
-      "INSTANCE_SWAP_SIZE": self.op.swap_size,
+      "INSTANCE_DISK_SIZE": ",".join(str(d["size"]) for d in self.disks),
        "INSTANCE_ADD_MODE": self.op.mode,
        }
      if self.op.mode == constants.INSTANCE_IMPORT:
        env["INSTANCE_SRC_NODE"] = self.op.src_node
        env["INSTANCE_SRC_PATH"] = self.op.src_path
        "INSTANCE_ADD_MODE": self.op.mode,
        }
      if self.op.mode == constants.INSTANCE_IMPORT:
        env["INSTANCE_SRC_NODE"] = self.op.src_node
        env["INSTANCE_SRC_PATH"] = self.op.src_path
-      env["INSTANCE_SRC_IMAGE"] = self.src_image
+      env["INSTANCE_SRC_IMAGES"] = self.src_images
  
      env.update(_BuildInstanceHookEnv(name=self.op.instance_name,
        primary_node=self.op.pnode,
  
      env.update(_BuildInstanceHookEnv(name=self.op.instance_name,
        primary_node=self.op.pnode,
@@ -3409,7 +4211,7 @@ class LUCreateInstance(LogicalUnit):
        os_type=self.op.os_type,
        memory=self.be_full[constants.BE_MEMORY],
        vcpus=self.be_full[constants.BE_VCPUS],
        os_type=self.op.os_type,
        memory=self.be_full[constants.BE_MEMORY],
        vcpus=self.be_full[constants.BE_VCPUS],
-      nics=[(self.inst_ip, self.op.bridge, self.op.mac)],
+      nics=[(n.ip, n.bridge, n.mac) for n in self.nics],
      ))
  
      nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
      ))
  
      nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
@@ -3431,11 +4233,28 @@ class LUCreateInstance(LogicalUnit):
        src_node = self.op.src_node
        src_path = self.op.src_path
  
        src_node = self.op.src_node
        src_path = self.op.src_path
  
-      export_info = self.rpc.call_export_info(src_node, src_path)
-
-      if not export_info:
+      if src_node is None:
+        exp_list = self.rpc.call_export_list(
+          self.acquired_locks[locking.LEVEL_NODE])
+        found = False
+        for node in exp_list:
+          if not exp_list[node].failed and src_path in exp_list[node].data:
+            found = True
+            self.op.src_node = src_node = node
+            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
+                                                       src_path)
+            break
+        if not found:
+          raise errors.OpPrereqError("No export found for relative path %s" %
+                                      src_path)
+
+      _CheckNodeOnline(self, src_node)
+      result = self.rpc.call_export_info(src_node, src_path)
+      result.Raise()
+      if not result.data:
          raise errors.OpPrereqError("No export found in dir %s" % src_path)
  
          raise errors.OpPrereqError("No export found in dir %s" % src_path)
  
+      export_info = result.data
        if not export_info.has_section(constants.INISECT_EXP):
          raise errors.ProgrammerError("Corrupted export config")
  
        if not export_info.has_section(constants.INISECT_EXP):
          raise errors.ProgrammerError("Corrupted export config")
  
@@ -3444,18 +4263,38 @@ class LUCreateInstance(LogicalUnit):
          raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
                                     (ei_version, constants.EXPORT_VERSION))
  
          raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
                                     (ei_version, constants.EXPORT_VERSION))
  
-      if int(export_info.get(constants.INISECT_INS, 'disk_count')) > 1:
-        raise errors.OpPrereqError("Can't import instance with more than"
-                                   " one data disk")
+      # Check that the new instance doesn't have less disks than the export
+      instance_disks = len(self.disks)
+      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
+      if instance_disks < export_disks:
+        raise errors.OpPrereqError("Not enough disks to import."
+                                   " (instance: %d, export: %d)" %
+                                   (instance_disks, export_disks))
  
  
-      # FIXME: are the old os-es, disk sizes, etc. useful?
        self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
        self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
-      diskimage = os.path.join(src_path, export_info.get(constants.INISECT_INS,
-                                                         'disk0_dump'))
-      self.src_image = diskimage
+      disk_images = []
+      for idx in range(export_disks):
+        option = 'disk%d_dump' % idx
+        if export_info.has_option(constants.INISECT_INS, option):
+          # FIXME: are the old os-es, disk sizes, etc. useful?
+          export_name = export_info.get(constants.INISECT_INS, option)
+          image = os.path.join(src_path, export_name)
+          disk_images.append(image)
+        else:
+          disk_images.append(False)
  
  
-    # ip ping checks (we use the same ip that was resolved in ExpandNames)
+      self.src_images = disk_images
+
+      old_name = export_info.get(constants.INISECT_INS, 'name')
+      # FIXME: int() here could throw a ValueError on broken exports
+      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
+      if self.op.instance_name == old_name:
+        for idx, nic in enumerate(self.nics):
+          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
+            nic_mac_ini = 'nic%d_mac' % idx
+            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
  
  
+    # ip ping checks (we use the same ip that was resolved in ExpandNames)
      if self.op.start and not self.op.ip_check:
        raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
                                   " adding an instance in start mode")
      if self.op.start and not self.op.ip_check:
        raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
                                   " adding an instance in start mode")
@@ -3465,13 +4304,6 @@ class LUCreateInstance(LogicalUnit):
          raise errors.OpPrereqError("IP %s of instance %s already in use" %
                                     (self.check_ip, self.op.instance_name))
  
          raise errors.OpPrereqError("IP %s of instance %s already in use" %
                                     (self.check_ip, self.op.instance_name))
  
-    # bridge verification
-    bridge = getattr(self.op, "bridge", None)
-    if bridge is None:
-      self.op.bridge = self.cfg.GetDefBridge()
-    else:
-      self.op.bridge = bridge
-
      #### allocator run
  
      if self.op.iallocator is not None:
      #### allocator run
  
      if self.op.iallocator is not None:
@@ -3483,6 +4315,10 @@ class LUCreateInstance(LogicalUnit):
      self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
      assert self.pnode is not None, \
        "Cannot retrieve locked node %s" % self.op.pnode
      self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
      assert self.pnode is not None, \
        "Cannot retrieve locked node %s" % self.op.pnode
+    if pnode.offline:
+      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
+                                 pnode.name)
+
      self.secondaries = []
  
      # mirror node verification
      self.secondaries = []
  
      # mirror node verification
@@ -3494,18 +4330,21 @@ class LUCreateInstance(LogicalUnit):
          raise errors.OpPrereqError("The secondary node cannot be"
                                     " the primary node.")
        self.secondaries.append(self.op.snode)
          raise errors.OpPrereqError("The secondary node cannot be"
                                     " the primary node.")
        self.secondaries.append(self.op.snode)
+      _CheckNodeOnline(self, self.op.snode)
  
      nodenames = [pnode.name] + self.secondaries
  
      req_size = _ComputeDiskSize(self.op.disk_template,
  
      nodenames = [pnode.name] + self.secondaries
  
      req_size = _ComputeDiskSize(self.op.disk_template,
-                                self.op.disk_size, self.op.swap_size)
+                                self.disks)
  
      # Check lv size requirements
      if req_size is not None:
        nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
                                           self.op.hypervisor)
        for node in nodenames:
  
      # Check lv size requirements
      if req_size is not None:
        nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
                                           self.op.hypervisor)
        for node in nodenames:
-        info = nodeinfo.get(node, None)
+        info = nodeinfo[node]
+        info.Raise()
+        info = info.data
          if not info:
            raise errors.OpPrereqError("Cannot get current information"
                                       " from node '%s'" % node)
          if not info:
            raise errors.OpPrereqError("Cannot get current information"
                                       " from node '%s'" % node)
@@ -3521,16 +4360,20 @@ class LUCreateInstance(LogicalUnit):
      _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
  
      # os verification
      _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
  
      # os verification
-    os_obj = self.rpc.call_os_get(pnode.name, self.op.os_type)
-    if not os_obj:
+    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
+    result.Raise()
+    if not isinstance(result.data, objects.OS):
        raise errors.OpPrereqError("OS '%s' not in supported os list for"
                                   " primary node"  % self.op.os_type)
  
      # bridge check on primary node
        raise errors.OpPrereqError("OS '%s' not in supported os list for"
                                   " primary node"  % self.op.os_type)
  
      # bridge check on primary node
-    if not self.rpc.call_bridges_exist(self.pnode.name, [self.op.bridge]):
-      raise errors.OpPrereqError("target bridge '%s' does not exist on"
-                                 " destination node '%s'" %
-                                 (self.op.bridge, pnode.name))
+    bridges = [n.bridge for n in self.nics]
+    result = self.rpc.call_bridges_exist(self.pnode.name, bridges)
+    result.Raise()
+    if not result.data:
+      raise errors.OpPrereqError("One of the target bridges '%s' does not"
+                                 " exist on destination node '%s'" %
+                                 (",".join(bridges), pnode.name))
  
      # memory check on primary node
      if self.op.start:
  
      # memory check on primary node
      if self.op.start:
@@ -3551,14 +4394,9 @@ class LUCreateInstance(LogicalUnit):
      instance = self.op.instance_name
      pnode_name = self.pnode.name
  
      instance = self.op.instance_name
      pnode_name = self.pnode.name
  
-    if self.op.mac == "auto":
-      mac_address = self.cfg.GenerateMAC()
-    else:
-      mac_address = self.op.mac
-
-    nic = objects.NIC(bridge=self.op.bridge, mac=mac_address)
-    if self.inst_ip is not None:
-      nic.ip = self.inst_ip
+    for nic in self.nics:
+      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
+        nic.mac = self.cfg.GenerateMAC()
  
      ht_kind = self.op.hypervisor
      if ht_kind in constants.HTS_REQ_PORT:
  
      ht_kind = self.op.hypervisor
      if ht_kind in constants.HTS_REQ_PORT:
@@ -3584,14 +4422,15 @@ class LUCreateInstance(LogicalUnit):
      disks = _GenerateDiskTemplate(self,
                                    self.op.disk_template,
                                    instance, pnode_name,
      disks = _GenerateDiskTemplate(self,
                                    self.op.disk_template,
                                    instance, pnode_name,
-                                  self.secondaries, self.op.disk_size,
-                                  self.op.swap_size,
+                                  self.secondaries,
+                                  self.disks,
                                    file_storage_dir,
                                    file_storage_dir,
-                                  self.op.file_driver)
+                                  self.op.file_driver,
+                                  0)
  
      iobj = objects.Instance(name=instance, os=self.op.os_type,
                              primary_node=pnode_name,
  
      iobj = objects.Instance(name=instance, os=self.op.os_type,
                              primary_node=pnode_name,
-                            nics=[nic], disks=disks,
+                            nics=self.nics, disks=disks,
                              disk_template=self.op.disk_template,
                              status=self.instance_status,
                              network_port=network_port,
                              disk_template=self.op.disk_template,
                              status=self.instance_status,
                              network_port=network_port,
@@ -3601,10 +4440,15 @@ class LUCreateInstance(LogicalUnit):
                              )
  
      feedback_fn("* creating instance disks...")
                              )
  
      feedback_fn("* creating instance disks...")
-    if not _CreateDisks(self, iobj):
-      _RemoveDisks(self, iobj)
-      self.cfg.ReleaseDRBDMinors(instance)
-      raise errors.OpExecError("Device creation failed, reverting...")
+    try:
+      _CreateDisks(self, iobj)
+    except errors.OpExecError:
+      self.LogWarning("Device creation failed, reverting...")
+      try:
+        _RemoveDisks(self, iobj)
+      finally:
+        self.cfg.ReleaseDRBDMinors(instance)
+        raise
  
      feedback_fn("adding instance %s to cluster config" % instance)
  
  
      feedback_fn("adding instance %s to cluster config" % instance)
  
@@ -3614,6 +4458,16 @@ class LUCreateInstance(LogicalUnit):
      del self.remove_locks[locking.LEVEL_INSTANCE]
      # Remove the temp. assignements for the instance's drbds
      self.cfg.ReleaseDRBDMinors(instance)
      del self.remove_locks[locking.LEVEL_INSTANCE]
      # Remove the temp. assignements for the instance's drbds
      self.cfg.ReleaseDRBDMinors(instance)
+    # Unlock all the nodes
+    if self.op.mode == constants.INSTANCE_IMPORT:
+      nodes_keep = [self.op.src_node]
+      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
+                       if node != self.op.src_node]
+      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
+      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
+    else:
+      self.context.glm.release(locking.LEVEL_NODE)
+      del self.acquired_locks[locking.LEVEL_NODE]
  
      if self.op.wait_for_sync:
        disk_abort = not _WaitForSync(self, iobj)
  
      if self.op.wait_for_sync:
        disk_abort = not _WaitForSync(self, iobj)
@@ -3639,31 +4493,38 @@ class LUCreateInstance(LogicalUnit):
      if iobj.disk_template != constants.DT_DISKLESS:
        if self.op.mode == constants.INSTANCE_CREATE:
          feedback_fn("* running the instance OS create scripts...")
      if iobj.disk_template != constants.DT_DISKLESS:
        if self.op.mode == constants.INSTANCE_CREATE:
          feedback_fn("* running the instance OS create scripts...")
-        if not self.rpc.call_instance_os_add(pnode_name, iobj, "sda", "sdb"):
-          raise errors.OpExecError("could not add os for instance %s"
+        result = self.rpc.call_instance_os_add(pnode_name, iobj)
+        result.Raise()
+        if not result.data:
+          raise errors.OpExecError("Could not add os for instance %s"
                                     " on node %s" %
                                     (instance, pnode_name))
  
        elif self.op.mode == constants.INSTANCE_IMPORT:
          feedback_fn("* running the instance OS import scripts...")
          src_node = self.op.src_node
                                     " on node %s" %
                                     (instance, pnode_name))
  
        elif self.op.mode == constants.INSTANCE_IMPORT:
          feedback_fn("* running the instance OS import scripts...")
          src_node = self.op.src_node
-        src_image = self.src_image
+        src_images = self.src_images
          cluster_name = self.cfg.GetClusterName()
          cluster_name = self.cfg.GetClusterName()
-        if not self.rpc.call_instance_os_import(pnode_name, iobj, "sda", "sdb",
-                                                src_node, src_image,
-                                                cluster_name):
-          raise errors.OpExecError("Could not import os for instance"
-                                   " %s on node %s" %
-                                   (instance, pnode_name))
+        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
+                                                         src_node, src_images,
+                                                         cluster_name)
+        import_result.Raise()
+        for idx, result in enumerate(import_result.data):
+          if not result:
+            self.LogWarning("Could not import the image %s for instance"
+                            " %s, disk %d, on node %s" %
+                            (src_images[idx], instance, idx, pnode_name))
        else:
          # also checked in the prereq part
          raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
                                       % self.op.mode)
  
      if self.op.start:
        else:
          # also checked in the prereq part
          raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
                                       % self.op.mode)
  
      if self.op.start:
-      logger.Info("starting instance %s on node %s" % (instance, pnode_name))
+      logging.info("Starting instance %s on node %s", instance, pnode_name)
        feedback_fn("* starting instance...")
        feedback_fn("* starting instance...")
-      if not self.rpc.call_instance_start(pnode_name, iobj, None):
+      result = self.rpc.call_instance_start(pnode_name, iobj, None)
+      result.Raise()
+      if not result.data:
          raise errors.OpExecError("Could not start instance")
  
  
          raise errors.OpExecError("Could not start instance")
  
  
@@ -3690,6 +4551,7 @@ class LUConnectConsole(NoHooksLU):
      self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
      assert self.instance is not None, \
        "Cannot retrieve locked instance %s" % self.op.instance_name
      self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
      assert self.instance is not None, \
        "Cannot retrieve locked instance %s" % self.op.instance_name
+    _CheckNodeOnline(self, self.instance.primary_node)
  
    def Exec(self, feedback_fn):
      """Connect to the console of an instance
  
    def Exec(self, feedback_fn):
      """Connect to the console of an instance
@@ -3700,13 +4562,12 @@ class LUConnectConsole(NoHooksLU):
  
      node_insts = self.rpc.call_instance_list([node],
                                               [instance.hypervisor])[node]
  
      node_insts = self.rpc.call_instance_list([node],
                                               [instance.hypervisor])[node]
-    if node_insts is False:
-      raise errors.OpExecError("Can't connect to node %s." % node)
+    node_insts.Raise()
  
  
-    if instance.name not in node_insts:
+    if instance.name not in node_insts.data:
        raise errors.OpExecError("Instance %s is not running." % instance.name)
  
        raise errors.OpExecError("Instance %s is not running." % instance.name)
  
-    logger.Debug("connecting to console of %s on %s" % (instance.name, node))
+    logging.debug("Connecting to console of %s on %s", instance.name, node)
  
      hyper = hypervisor.GetHypervisor(instance.hypervisor)
      console_cmd = hyper.GetShellCommandForConsole(instance)
  
      hyper = hypervisor.GetHypervisor(instance.hypervisor)
      console_cmd = hyper.GetShellCommandForConsole(instance)
@@ -3724,17 +4585,32 @@ class LUReplaceDisks(LogicalUnit):
    _OP_REQP = ["instance_name", "mode", "disks"]
    REQ_BGL = False
  
    _OP_REQP = ["instance_name", "mode", "disks"]
    REQ_BGL = False
  
-  def ExpandNames(self):
-    self._ExpandAndLockInstance()
-
+  def CheckArguments(self):
      if not hasattr(self.op, "remote_node"):
        self.op.remote_node = None
      if not hasattr(self.op, "remote_node"):
        self.op.remote_node = None
-
-    ia_name = getattr(self.op, "iallocator", None)
-    if ia_name is not None:
-      if self.op.remote_node is not None:
+    if not hasattr(self.op, "iallocator"):
+      self.op.iallocator = None
+
+    # check for valid parameter combination
+    cnt = [self.op.remote_node, self.op.iallocator].count(None)
+    if self.op.mode == constants.REPLACE_DISK_CHG:
+      if cnt == 2:
+        raise errors.OpPrereqError("When changing the secondary either an"
+                                   " iallocator script must be used or the"
+                                   " new node given")
+      elif cnt == 0:
          raise errors.OpPrereqError("Give either the iallocator or the new"
                                     " secondary, not both")
          raise errors.OpPrereqError("Give either the iallocator or the new"
                                     " secondary, not both")
+    else: # not replacing the secondary
+      if cnt != 2:
+        raise errors.OpPrereqError("The iallocator and new node options can"
+                                   " be used only when changing the"
+                                   " secondary node")
+
+  def ExpandNames(self):
+    self._ExpandAndLockInstance()
+
+    if self.op.iallocator is not None:
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
      elif self.op.remote_node is not None:
        remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
      elif self.op.remote_node is not None:
        remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
@@ -3775,8 +4651,8 @@ class LUReplaceDisks(LogicalUnit):
                                   " of nodes (%s), required %s" %
                                   (len(ial.nodes), ial.required_nodes))
      self.op.remote_node = ial.nodes[0]
                                   " of nodes (%s), required %s" %
                                   (len(ial.nodes), ial.required_nodes))
      self.op.remote_node = ial.nodes[0]
-    logger.ToStdout("Selected new secondary for the instance: %s" %
-                    self.op.remote_node)
+    self.LogInfo("Selected new secondary for the instance: %s",
+                 self.op.remote_node)
  
    def BuildHooksEnv(self):
      """Build hooks env.
  
    def BuildHooksEnv(self):
      """Build hooks env.
@@ -3809,9 +4685,9 @@ class LUReplaceDisks(LogicalUnit):
        "Cannot retrieve locked instance %s" % self.op.instance_name
      self.instance = instance
  
        "Cannot retrieve locked instance %s" % self.op.instance_name
      self.instance = instance
  
-    if instance.disk_template not in constants.DTS_NET_MIRROR:
-      raise errors.OpPrereqError("Instance's disk layout is not"
-                                 " network mirrored.")
+    if instance.disk_template != constants.DT_DRBD8:
+      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
+                                 " instances")
  
      if len(instance.secondary_nodes) != 1:
        raise errors.OpPrereqError("The instance has a strange layout,"
  
      if len(instance.secondary_nodes) != 1:
        raise errors.OpPrereqError("The instance has a strange layout,"
@@ -3820,8 +4696,7 @@ class LUReplaceDisks(LogicalUnit):
  
      self.sec_node = instance.secondary_nodes[0]
  
  
      self.sec_node = instance.secondary_nodes[0]
  
-    ia_name = getattr(self.op, "iallocator", None)
-    if ia_name is not None:
+    if self.op.iallocator is not None:
        self._RunAllocator()
  
      remote_node = self.op.remote_node
        self._RunAllocator()
  
      remote_node = self.op.remote_node
@@ -3835,54 +4710,49 @@ class LUReplaceDisks(LogicalUnit):
        raise errors.OpPrereqError("The specified node is the primary node of"
                                   " the instance.")
      elif remote_node == self.sec_node:
        raise errors.OpPrereqError("The specified node is the primary node of"
                                   " the instance.")
      elif remote_node == self.sec_node:
-      if self.op.mode == constants.REPLACE_DISK_SEC:
-        # this is for DRBD8, where we can't execute the same mode of
-        # replacement as for drbd7 (no different port allocated)
-        raise errors.OpPrereqError("Same secondary given, cannot execute"
-                                   " replacement")
-    if instance.disk_template == constants.DT_DRBD8:
-      if (self.op.mode == constants.REPLACE_DISK_ALL and
-          remote_node is not None):
-        # switch to replace secondary mode
-        self.op.mode = constants.REPLACE_DISK_SEC
-
-      if self.op.mode == constants.REPLACE_DISK_ALL:
-        raise errors.OpPrereqError("Template 'drbd' only allows primary or"
-                                   " secondary disk replacement, not"
-                                   " both at once")
-      elif self.op.mode == constants.REPLACE_DISK_PRI:
-        if remote_node is not None:
-          raise errors.OpPrereqError("Template 'drbd' does not allow changing"
-                                     " the secondary while doing a primary"
-                                     " node disk replacement")
-        self.tgt_node = instance.primary_node
-        self.oth_node = instance.secondary_nodes[0]
-      elif self.op.mode == constants.REPLACE_DISK_SEC:
-        self.new_node = remote_node # this can be None, in which case
-                                    # we don't change the secondary
-        self.tgt_node = instance.secondary_nodes[0]
-        self.oth_node = instance.primary_node
-      else:
-        raise errors.ProgrammerError("Unhandled disk replace mode")
+      raise errors.OpPrereqError("The specified node is already the"
+                                 " secondary node of the instance.")
+
+    if self.op.mode == constants.REPLACE_DISK_PRI:
+      n1 = self.tgt_node = instance.primary_node
+      n2 = self.oth_node = self.sec_node
+    elif self.op.mode == constants.REPLACE_DISK_SEC:
+      n1 = self.tgt_node = self.sec_node
+      n2 = self.oth_node = instance.primary_node
+    elif self.op.mode == constants.REPLACE_DISK_CHG:
+      n1 = self.new_node = remote_node
+      n2 = self.oth_node = instance.primary_node
+      self.tgt_node = self.sec_node
+    else:
+      raise errors.ProgrammerError("Unhandled disk replace mode")
  
  
-    for name in self.op.disks:
-      if instance.FindDisk(name) is None:
-        raise errors.OpPrereqError("Disk '%s' not found for instance '%s'" %
-                                   (name, instance.name))
+    _CheckNodeOnline(self, n1)
+    _CheckNodeOnline(self, n2)
+
+    if not self.op.disks:
+      self.op.disks = range(len(instance.disks))
+
+    for disk_idx in self.op.disks:
+      instance.FindDisk(disk_idx)
  
    def _ExecD8DiskOnly(self, feedback_fn):
      """Replace a disk on the primary or secondary for dbrd8.
  
      The algorithm for replace is quite complicated:
  
    def _ExecD8DiskOnly(self, feedback_fn):
      """Replace a disk on the primary or secondary for dbrd8.
  
      The algorithm for replace is quite complicated:
-      - for each disk to be replaced:
-        - create new LVs on the target node with unique names
-        - detach old LVs from the drbd device
-        - rename old LVs to name_replaced.<time_t>
-        - rename new LVs to old LVs
-        - attach the new LVs (with the old names now) to the drbd device
-      - wait for sync across all devices
-      - for each modified disk:
-        - remove old LVs (which have the name name_replaces.<time_t>)
+
+      1. for each disk to be replaced:
+
+        1. create new LVs on the target node with unique names
+        1. detach old LVs from the drbd device
+        1. rename old LVs to name_replaced.<time_t>
+        1. rename new LVs to old LVs
+        1. attach the new LVs (with the old names now) to the drbd device
+
+      1. wait for sync across all devices
+
+      1. for each modified disk:
+
+        1. remove old LVs (which have the name name_replaces.<time_t>)
  
      Failures are not very well handled.
  
  
      Failures are not very well handled.
  
@@ -3905,26 +4775,26 @@ class LUReplaceDisks(LogicalUnit):
      if not results:
        raise errors.OpExecError("Can't list volume groups on the nodes")
      for node in oth_node, tgt_node:
      if not results:
        raise errors.OpExecError("Can't list volume groups on the nodes")
      for node in oth_node, tgt_node:
-      res = results.get(node, False)
-      if not res or my_vg not in res:
+      res = results[node]
+      if res.failed or not res.data or my_vg not in res.data:
          raise errors.OpExecError("Volume group '%s' not found on %s" %
                                   (my_vg, node))
          raise errors.OpExecError("Volume group '%s' not found on %s" %
                                   (my_vg, node))
-    for dev in instance.disks:
-      if not dev.iv_name in self.op.disks:
+    for idx, dev in enumerate(instance.disks):
+      if idx not in self.op.disks:
          continue
        for node in tgt_node, oth_node:
          continue
        for node in tgt_node, oth_node:
-        info("checking %s on %s" % (dev.iv_name, node))
+        info("checking disk/%d on %s" % (idx, node))
          cfg.SetDiskID(dev, node)
          if not self.rpc.call_blockdev_find(node, dev):
          cfg.SetDiskID(dev, node)
          if not self.rpc.call_blockdev_find(node, dev):
-          raise errors.OpExecError("Can't find device %s on node %s" %
-                                   (dev.iv_name, node))
+          raise errors.OpExecError("Can't find disk/%d on node %s" %
+                                   (idx, node))
  
      # Step: check other node consistency
      self.proc.LogStep(2, steps_total, "check peer consistency")
  
      # Step: check other node consistency
      self.proc.LogStep(2, steps_total, "check peer consistency")
-    for dev in instance.disks:
-      if not dev.iv_name in self.op.disks:
+    for idx, dev in enumerate(instance.disks):
+      if idx not in self.op.disks:
          continue
          continue
-      info("checking %s consistency on %s" % (dev.iv_name, oth_node))
+      info("checking disk/%d consistency on %s" % (idx, oth_node))
        if not _CheckDiskConsistency(self, dev, oth_node,
                                     oth_node==instance.primary_node):
          raise errors.OpExecError("Peer node (%s) has degraded storage, unsafe"
        if not _CheckDiskConsistency(self, dev, oth_node,
                                     oth_node==instance.primary_node):
          raise errors.OpExecError("Peer node (%s) has degraded storage, unsafe"
@@ -3933,12 +4803,13 @@ class LUReplaceDisks(LogicalUnit):
  
      # Step: create new storage
      self.proc.LogStep(3, steps_total, "allocate new storage")
  
      # Step: create new storage
      self.proc.LogStep(3, steps_total, "allocate new storage")
-    for dev in instance.disks:
-      if not dev.iv_name in self.op.disks:
+    for idx, dev in enumerate(instance.disks):
+      if idx not in self.op.disks:
          continue
        size = dev.size
        cfg.SetDiskID(dev, tgt_node)
          continue
        size = dev.size
        cfg.SetDiskID(dev, tgt_node)
-      lv_names = [".%s_%s" % (dev.iv_name, suf) for suf in ["data", "meta"]]
+      lv_names = [".disk%d_%s" % (idx, suf)
+                  for suf in ["data", "meta"]]
        names = _GenerateUniqueNames(self, lv_names)
        lv_data = objects.Disk(dev_type=constants.LD_LV, size=size,
                               logical_id=(vgname, names[0]))
        names = _GenerateUniqueNames(self, lv_names)
        lv_data = objects.Disk(dev_type=constants.LD_LV, size=size,
                               logical_id=(vgname, names[0]))
@@ -3953,17 +4824,16 @@ class LUReplaceDisks(LogicalUnit):
        # _Create...OnPrimary (which forces the creation), even if we
        # are talking about the secondary node
        for new_lv in new_lvs:
        # _Create...OnPrimary (which forces the creation), even if we
        # are talking about the secondary node
        for new_lv in new_lvs:
-        if not _CreateBlockDevOnPrimary(self, tgt_node, instance, new_lv,
-                                        _GetInstanceInfoText(instance)):
-          raise errors.OpExecError("Failed to create new LV named '%s' on"
-                                   " node '%s'" %
-                                   (new_lv.logical_id[1], tgt_node))
+        _CreateBlockDevOnPrimary(self, tgt_node, instance, new_lv,
+                                 _GetInstanceInfoText(instance))
  
      # Step: for each lv, detach+rename*2+attach
      self.proc.LogStep(4, steps_total, "change drbd configuration")
      for dev, old_lvs, new_lvs in iv_names.itervalues():
        info("detaching %s drbd from local storage" % dev.iv_name)
  
      # Step: for each lv, detach+rename*2+attach
      self.proc.LogStep(4, steps_total, "change drbd configuration")
      for dev, old_lvs, new_lvs in iv_names.itervalues():
        info("detaching %s drbd from local storage" % dev.iv_name)
-      if not self.rpc.call_blockdev_removechildren(tgt_node, dev, old_lvs):
+      result = self.rpc.call_blockdev_removechildren(tgt_node, dev, old_lvs)
+      result.Raise()
+      if not result.data:
          raise errors.OpExecError("Can't detach drbd from local storage on node"
                                   " %s for device %s" % (tgt_node, dev.iv_name))
        #dev.children = []
          raise errors.OpExecError("Can't detach drbd from local storage on node"
                                   " %s for device %s" % (tgt_node, dev.iv_name))
        #dev.children = []
@@ -3983,16 +4853,20 @@ class LUReplaceDisks(LogicalUnit):
        rlist = []
        for to_ren in old_lvs:
          find_res = self.rpc.call_blockdev_find(tgt_node, to_ren)
        rlist = []
        for to_ren in old_lvs:
          find_res = self.rpc.call_blockdev_find(tgt_node, to_ren)
-        if find_res is not None: # device exists
+        if not find_res.failed and find_res.data is not None: # device exists
            rlist.append((to_ren, ren_fn(to_ren, temp_suffix)))
  
        info("renaming the old LVs on the target node")
            rlist.append((to_ren, ren_fn(to_ren, temp_suffix)))
  
        info("renaming the old LVs on the target node")
-      if not self.rpc.call_blockdev_rename(tgt_node, rlist):
+      result = self.rpc.call_blockdev_rename(tgt_node, rlist)
+      result.Raise()
+      if not result.data:
          raise errors.OpExecError("Can't rename old LVs on node %s" % tgt_node)
        # now we rename the new LVs to the old LVs
        info("renaming the new LVs on the target node")
        rlist = [(new, old.physical_id) for old, new in zip(old_lvs, new_lvs)]
          raise errors.OpExecError("Can't rename old LVs on node %s" % tgt_node)
        # now we rename the new LVs to the old LVs
        info("renaming the new LVs on the target node")
        rlist = [(new, old.physical_id) for old, new in zip(old_lvs, new_lvs)]
-      if not self.rpc.call_blockdev_rename(tgt_node, rlist):
+      result = self.rpc.call_blockdev_rename(tgt_node, rlist)
+      result.Raise()
+      if not result.data:
          raise errors.OpExecError("Can't rename new LVs on node %s" % tgt_node)
  
        for old, new in zip(old_lvs, new_lvs):
          raise errors.OpExecError("Can't rename new LVs on node %s" % tgt_node)
  
        for old, new in zip(old_lvs, new_lvs):
@@ -4005,9 +4879,11 @@ class LUReplaceDisks(LogicalUnit):
  
        # now that the new lvs have the old name, we can add them to the device
        info("adding new mirror component on %s" % tgt_node)
  
        # now that the new lvs have the old name, we can add them to the device
        info("adding new mirror component on %s" % tgt_node)
-      if not self.rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs):
+      result = self.rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs)
+      if result.failed or not result.data:
          for new_lv in new_lvs:
          for new_lv in new_lvs:
-          if not self.rpc.call_blockdev_remove(tgt_node, new_lv):
+          result = self.rpc.call_blockdev_remove(tgt_node, new_lv)
+          if result.failed or not result.data:
              warning("Can't rollback device %s", hint="manually cleanup unused"
                      " logical volumes")
          raise errors.OpExecError("Can't add local storage to drbd")
              warning("Can't rollback device %s", hint="manually cleanup unused"
                      " logical volumes")
          raise errors.OpExecError("Can't add local storage to drbd")
@@ -4026,8 +4902,8 @@ class LUReplaceDisks(LogicalUnit):
      # so check manually all the devices
      for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
        cfg.SetDiskID(dev, instance.primary_node)
      # so check manually all the devices
      for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
        cfg.SetDiskID(dev, instance.primary_node)
-      is_degr = self.rpc.call_blockdev_find(instance.primary_node, dev)[5]
-      if is_degr:
+      result = self.rpc.call_blockdev_find(instance.primary_node, dev)
+      if result.failed or result.data[5]:
          raise errors.OpExecError("DRBD device %s is degraded!" % name)
  
      # Step: remove old storage
          raise errors.OpExecError("DRBD device %s is degraded!" % name)
  
      # Step: remove old storage
@@ -4036,7 +4912,8 @@ class LUReplaceDisks(LogicalUnit):
        info("remove logical volumes for %s" % name)
        for lv in old_lvs:
          cfg.SetDiskID(lv, tgt_node)
        info("remove logical volumes for %s" % name)
        for lv in old_lvs:
          cfg.SetDiskID(lv, tgt_node)
-        if not self.rpc.call_blockdev_remove(tgt_node, lv):
+        result = self.rpc.call_blockdev_remove(tgt_node, lv)
+        if result.failed or not result.data:
            warning("Can't remove old LV", hint="manually remove unused LVs")
            continue
  
            warning("Can't remove old LV", hint="manually remove unused LVs")
            continue
  
@@ -4063,40 +4940,44 @@ class LUReplaceDisks(LogicalUnit):
      warning, info = (self.proc.LogWarning, self.proc.LogInfo)
      instance = self.instance
      iv_names = {}
      warning, info = (self.proc.LogWarning, self.proc.LogInfo)
      instance = self.instance
      iv_names = {}
-    vgname = self.cfg.GetVGName()
      # start of work
      cfg = self.cfg
      old_node = self.tgt_node
      new_node = self.new_node
      pri_node = instance.primary_node
      # start of work
      cfg = self.cfg
      old_node = self.tgt_node
      new_node = self.new_node
      pri_node = instance.primary_node
+    nodes_ip = {
+      old_node: self.cfg.GetNodeInfo(old_node).secondary_ip,
+      new_node: self.cfg.GetNodeInfo(new_node).secondary_ip,
+      pri_node: self.cfg.GetNodeInfo(pri_node).secondary_ip,
+      }
  
      # Step: check device activation
      self.proc.LogStep(1, steps_total, "check device existence")
      info("checking volume groups")
      my_vg = cfg.GetVGName()
      results = self.rpc.call_vg_list([pri_node, new_node])
  
      # Step: check device activation
      self.proc.LogStep(1, steps_total, "check device existence")
      info("checking volume groups")
      my_vg = cfg.GetVGName()
      results = self.rpc.call_vg_list([pri_node, new_node])
-    if not results:
-      raise errors.OpExecError("Can't list volume groups on the nodes")
      for node in pri_node, new_node:
      for node in pri_node, new_node:
-      res = results.get(node, False)
-      if not res or my_vg not in res:
+      res = results[node]
+      if res.failed or not res.data or my_vg not in res.data:
          raise errors.OpExecError("Volume group '%s' not found on %s" %
                                   (my_vg, node))
          raise errors.OpExecError("Volume group '%s' not found on %s" %
                                   (my_vg, node))
-    for dev in instance.disks:
-      if not dev.iv_name in self.op.disks:
+    for idx, dev in enumerate(instance.disks):
+      if idx not in self.op.disks:
          continue
          continue
-      info("checking %s on %s" % (dev.iv_name, pri_node))
+      info("checking disk/%d on %s" % (idx, pri_node))
        cfg.SetDiskID(dev, pri_node)
        cfg.SetDiskID(dev, pri_node)
-      if not self.rpc.call_blockdev_find(pri_node, dev):
-        raise errors.OpExecError("Can't find device %s on node %s" %
-                                 (dev.iv_name, pri_node))
+      result = self.rpc.call_blockdev_find(pri_node, dev)
+      result.Raise()
+      if not result.data:
+        raise errors.OpExecError("Can't find disk/%d on node %s" %
+                                 (idx, pri_node))
  
      # Step: check other node consistency
      self.proc.LogStep(2, steps_total, "check peer consistency")
  
      # Step: check other node consistency
      self.proc.LogStep(2, steps_total, "check peer consistency")
-    for dev in instance.disks:
-      if not dev.iv_name in self.op.disks:
+    for idx, dev in enumerate(instance.disks):
+      if idx not in self.op.disks:
          continue
          continue
-      info("checking %s consistency on %s" % (dev.iv_name, pri_node))
+      info("checking disk/%d consistency on %s" % (idx, pri_node))
        if not _CheckDiskConsistency(self, dev, pri_node, True, ldisk=True):
          raise errors.OpExecError("Primary node (%s) has degraded storage,"
                                   " unsafe to replace the secondary" %
        if not _CheckDiskConsistency(self, dev, pri_node, True, ldisk=True):
          raise errors.OpExecError("Primary node (%s) has degraded storage,"
                                   " unsafe to replace the secondary" %
@@ -4104,19 +4985,15 @@ class LUReplaceDisks(LogicalUnit):
  
      # Step: create new storage
      self.proc.LogStep(3, steps_total, "allocate new storage")
  
      # Step: create new storage
      self.proc.LogStep(3, steps_total, "allocate new storage")
-    for dev in instance.disks:
-      size = dev.size
-      info("adding new local storage on %s for %s" % (new_node, dev.iv_name))
+    for idx, dev in enumerate(instance.disks):
+      info("adding new local storage on %s for disk/%d" %
+           (new_node, idx))
        # since we *always* want to create this LV, we use the
        # _Create...OnPrimary (which forces the creation), even if we
        # are talking about the secondary node
        for new_lv in dev.children:
        # since we *always* want to create this LV, we use the
        # _Create...OnPrimary (which forces the creation), even if we
        # are talking about the secondary node
        for new_lv in dev.children:
-        if not _CreateBlockDevOnPrimary(self, new_node, instance, new_lv,
-                                        _GetInstanceInfoText(instance)):
-          raise errors.OpExecError("Failed to create new LV named '%s' on"
-                                   " node '%s'" %
-                                   (new_lv.logical_id[1], new_node))
-
+        _CreateBlockDevOnPrimary(self, new_node, instance, new_lv,
+                                 _GetInstanceInfoText(instance))
  
      # Step 4: dbrd minors and drbd setups changes
      # after this, we must manually remove the drbd minors on both the
  
      # Step 4: dbrd minors and drbd setups changes
      # after this, we must manually remove the drbd minors on both the
@@ -4125,58 +5002,54 @@ class LUReplaceDisks(LogicalUnit):
                                     instance.name)
      logging.debug("Allocated minors %s" % (minors,))
      self.proc.LogStep(4, steps_total, "changing drbd configuration")
                                     instance.name)
      logging.debug("Allocated minors %s" % (minors,))
      self.proc.LogStep(4, steps_total, "changing drbd configuration")
-    for dev, new_minor in zip(instance.disks, minors):
+    for idx, (dev, new_minor) in enumerate(zip(instance.disks, minors)):
        size = dev.size
        size = dev.size
-      info("activating a new drbd on %s for %s" % (new_node, dev.iv_name))
-      # create new devices on new_node
-      if pri_node == dev.logical_id[0]:
-        new_logical_id = (pri_node, new_node,
-                          dev.logical_id[2], dev.logical_id[3], new_minor,
-                          dev.logical_id[5])
+      info("activating a new drbd on %s for disk/%d" % (new_node, idx))
+      # create new devices on new_node; note that we create two IDs:
+      # one without port, so the drbd will be activated without
+      # networking information on the new node at this stage, and one
+      # with network, for the latter activation in step 4
+      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
+      if pri_node == o_node1:
+        p_minor = o_minor1
        else:
        else:
-        new_logical_id = (new_node, pri_node,
-                          dev.logical_id[2], new_minor, dev.logical_id[4],
-                          dev.logical_id[5])
-      iv_names[dev.iv_name] = (dev, dev.children, new_logical_id)
+        p_minor = o_minor2
+
+      new_alone_id = (pri_node, new_node, None, p_minor, new_minor, o_secret)
+      new_net_id = (pri_node, new_node, o_port, p_minor, new_minor, o_secret)
+
+      iv_names[idx] = (dev, dev.children, new_net_id)
        logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
        logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
-                    new_logical_id)
+                    new_net_id)
        new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
        new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
-                              logical_id=new_logical_id,
+                              logical_id=new_alone_id,
                                children=dev.children)
                                children=dev.children)
-      if not _CreateBlockDevOnSecondary(self, new_node, instance,
-                                        new_drbd, False,
-                                        _GetInstanceInfoText(instance)):
+      try:
+        _CreateBlockDevOnSecondary(self, new_node, instance, new_drbd, False,
+                                   _GetInstanceInfoText(instance))
+      except error.BlockDeviceError:
          self.cfg.ReleaseDRBDMinors(instance.name)
          self.cfg.ReleaseDRBDMinors(instance.name)
-        raise errors.OpExecError("Failed to create new DRBD on"
-                                 " node '%s'" % new_node)
+        raise
  
  
-    for dev in instance.disks:
+    for idx, dev in enumerate(instance.disks):
        # we have new devices, shutdown the drbd on the old secondary
        # we have new devices, shutdown the drbd on the old secondary
-      info("shutting down drbd for %s on old node" % dev.iv_name)
+      info("shutting down drbd for disk/%d on old node" % idx)
        cfg.SetDiskID(dev, old_node)
        cfg.SetDiskID(dev, old_node)
-      if not self.rpc.call_blockdev_shutdown(old_node, dev):
-        warning("Failed to shutdown drbd for %s on old node" % dev.iv_name,
+      result = self.rpc.call_blockdev_shutdown(old_node, dev)
+      if result.failed or not result.data:
+        warning("Failed to shutdown drbd for disk/%d on old node" % idx,
                  hint="Please cleanup this device manually as soon as possible")
  
      info("detaching primary drbds from the network (=> standalone)")
                  hint="Please cleanup this device manually as soon as possible")
  
      info("detaching primary drbds from the network (=> standalone)")
-    done = 0
-    for dev in instance.disks:
-      cfg.SetDiskID(dev, pri_node)
-      # set the network part of the physical (unique in bdev terms) id
-      # to None, meaning detach from network
-      dev.physical_id = (None, None, None, None) + dev.physical_id[4:]
-      # and 'find' the device, which will 'fix' it to match the
-      # standalone state
-      if self.rpc.call_blockdev_find(pri_node, dev):
-        done += 1
-      else:
-        warning("Failed to detach drbd %s from network, unusual case" %
-                dev.iv_name)
+    result = self.rpc.call_drbd_disconnect_net([pri_node], nodes_ip,
+                                               instance.disks)[pri_node]
  
  
-    if not done:
-      # no detaches succeeded (very unlikely)
+    msg = result.RemoteFailMsg()
+    if msg:
+      # detaches didn't succeed (unlikely)
        self.cfg.ReleaseDRBDMinors(instance.name)
        self.cfg.ReleaseDRBDMinors(instance.name)
-      raise errors.OpExecError("Can't detach at least one DRBD from old node")
+      raise errors.OpExecError("Can't detach the disks from the network on"
+                               " old node: %s" % (msg,))
  
      # if we managed to detach at least one, we update all the disks of
      # the instance to point to the new secondary
  
      # if we managed to detach at least one, we update all the disks of
      # the instance to point to the new secondary
@@ -4191,17 +5064,15 @@ class LUReplaceDisks(LogicalUnit):
  
      # and now perform the drbd attach
      info("attaching primary drbds to new secondary (standalone => connected)")
  
      # and now perform the drbd attach
      info("attaching primary drbds to new secondary (standalone => connected)")
-    failures = []
-    for dev in instance.disks:
-      info("attaching primary drbd for %s to new secondary node" % dev.iv_name)
-      # since the attach is smart, it's enough to 'find' the device,
-      # it will automatically activate the network, if the physical_id
-      # is correct
-      cfg.SetDiskID(dev, pri_node)
-      logging.debug("Disk to attach: %s", dev)
-      if not self.rpc.call_blockdev_find(pri_node, dev):
-        warning("can't attach drbd %s to new secondary!" % dev.iv_name,
-                "please do a gnt-instance info to see the status of disks")
+    result = self.rpc.call_drbd_attach_net([pri_node, new_node], nodes_ip,
+                                           instance.disks, instance.name,
+                                           False)
+    for to_node, to_result in result.items():
+      msg = to_result.RemoteFailMsg()
+      if msg:
+        warning("can't attach drbd disks on node %s: %s", to_node, msg,
+                hint="please do a gnt-instance info to see the"
+                " status of disks")
  
      # this can fail as the old devices are degraded and _WaitForSync
      # does a combined result over all disks, so we don't check its
  
      # this can fail as the old devices are degraded and _WaitForSync
      # does a combined result over all disks, so we don't check its
@@ -4210,18 +5081,20 @@ class LUReplaceDisks(LogicalUnit):
      _WaitForSync(self, instance, unlock=True)
  
      # so check manually all the devices
      _WaitForSync(self, instance, unlock=True)
  
      # so check manually all the devices
-    for name, (dev, old_lvs, _) in iv_names.iteritems():
+    for idx, (dev, old_lvs, _) in iv_names.iteritems():
        cfg.SetDiskID(dev, pri_node)
        cfg.SetDiskID(dev, pri_node)
-      is_degr = self.rpc.call_blockdev_find(pri_node, dev)[5]
-      if is_degr:
-        raise errors.OpExecError("DRBD device %s is degraded!" % name)
+      result = self.rpc.call_blockdev_find(pri_node, dev)
+      result.Raise()
+      if result.data[5]:
+        raise errors.OpExecError("DRBD device disk/%d is degraded!" % idx)
  
      self.proc.LogStep(6, steps_total, "removing old storage")
  
      self.proc.LogStep(6, steps_total, "removing old storage")
-    for name, (dev, old_lvs, _) in iv_names.iteritems():
-      info("remove logical volumes for %s" % name)
+    for idx, (dev, old_lvs, _) in iv_names.iteritems():
+      info("remove logical volumes for disk/%d" % idx)
        for lv in old_lvs:
          cfg.SetDiskID(lv, old_node)
        for lv in old_lvs:
          cfg.SetDiskID(lv, old_node)
-        if not self.rpc.call_blockdev_remove(old_node, lv):
+        result = self.rpc.call_blockdev_remove(old_node, lv)
+        if result.failed or not result.data:
            warning("Can't remove LV on old secondary",
                    hint="Cleanup stale volumes by hand")
  
            warning("Can't remove LV on old secondary",
                    hint="Cleanup stale volumes by hand")
  
@@ -4237,13 +5110,10 @@ class LUReplaceDisks(LogicalUnit):
      if instance.status == "down":
        _StartInstanceDisks(self, instance, True)
  
      if instance.status == "down":
        _StartInstanceDisks(self, instance, True)
  
-    if instance.disk_template == constants.DT_DRBD8:
-      if self.op.remote_node is None:
-        fn = self._ExecD8DiskOnly
-      else:
-        fn = self._ExecD8Secondary
+    if self.op.mode == constants.REPLACE_DISK_CHG:
+      fn = self._ExecD8Secondary
      else:
      else:
-      raise errors.ProgrammerError("Unhandled disk replacement case")
+      fn = self._ExecD8DiskOnly
  
      ret = fn(feedback_fn)
  
  
      ret = fn(feedback_fn)
  
@@ -4298,6 +5168,10 @@ class LUGrowDisk(LogicalUnit):
      instance = self.cfg.GetInstanceInfo(self.op.instance_name)
      assert instance is not None, \
        "Cannot retrieve locked instance %s" % self.op.instance_name
      instance = self.cfg.GetInstanceInfo(self.op.instance_name)
      assert instance is not None, \
        "Cannot retrieve locked instance %s" % self.op.instance_name
+    _CheckNodeOnline(self, instance.primary_node)
+    for node in instance.secondary_nodes:
+      _CheckNodeOnline(self, node)
+
  
      self.instance = instance
  
  
      self.instance = instance
  
@@ -4305,49 +5179,48 @@ class LUGrowDisk(LogicalUnit):
        raise errors.OpPrereqError("Instance's disk layout does not support"
                                   " growing.")
  
        raise errors.OpPrereqError("Instance's disk layout does not support"
                                   " growing.")
  
-    if instance.FindDisk(self.op.disk) is None:
-      raise errors.OpPrereqError("Disk '%s' not found for instance '%s'" %
-                                 (self.op.disk, instance.name))
+    self.disk = instance.FindDisk(self.op.disk)
  
      nodenames = [instance.primary_node] + list(instance.secondary_nodes)
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
                                         instance.hypervisor)
      for node in nodenames:
  
      nodenames = [instance.primary_node] + list(instance.secondary_nodes)
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
                                         instance.hypervisor)
      for node in nodenames:
-      info = nodeinfo.get(node, None)
-      if not info:
+      info = nodeinfo[node]
+      if info.failed or not info.data:
          raise errors.OpPrereqError("Cannot get current information"
                                     " from node '%s'" % node)
          raise errors.OpPrereqError("Cannot get current information"
                                     " from node '%s'" % node)
-      vg_free = info.get('vg_free', None)
+      vg_free = info.data.get('vg_free', None)
        if not isinstance(vg_free, int):
          raise errors.OpPrereqError("Can't compute free disk space on"
                                     " node %s" % node)
        if not isinstance(vg_free, int):
          raise errors.OpPrereqError("Can't compute free disk space on"
                                     " node %s" % node)
-      if self.op.amount > info['vg_free']:
+      if self.op.amount > vg_free:
          raise errors.OpPrereqError("Not enough disk space on target node %s:"
                                     " %d MiB available, %d MiB required" %
          raise errors.OpPrereqError("Not enough disk space on target node %s:"
                                     " %d MiB available, %d MiB required" %
-                                   (node, info['vg_free'], self.op.amount))
+                                   (node, vg_free, self.op.amount))
  
    def Exec(self, feedback_fn):
      """Execute disk grow.
  
      """
      instance = self.instance
  
    def Exec(self, feedback_fn):
      """Execute disk grow.
  
      """
      instance = self.instance
-    disk = instance.FindDisk(self.op.disk)
+    disk = self.disk
      for node in (instance.secondary_nodes + (instance.primary_node,)):
        self.cfg.SetDiskID(disk, node)
        result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
      for node in (instance.secondary_nodes + (instance.primary_node,)):
        self.cfg.SetDiskID(disk, node)
        result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
-      if (not result or not isinstance(result, (list, tuple)) or
-          len(result) != 2):
-        raise errors.OpExecError("grow request failed to node %s" % node)
-      elif not result[0]:
-        raise errors.OpExecError("grow request failed to node %s: %s" %
-                                 (node, result[1]))
+      result.Raise()
+      if (not result.data or not isinstance(result.data, (list, tuple)) or
+          len(result.data) != 2):
+        raise errors.OpExecError("Grow request failed to node %s" % node)
+      elif not result.data[0]:
+        raise errors.OpExecError("Grow request failed to node %s: %s" %
+                                 (node, result.data[1]))
      disk.RecordGrow(self.op.amount)
      self.cfg.Update(instance)
      if self.op.wait_for_sync:
      disk.RecordGrow(self.op.amount)
      self.cfg.Update(instance)
      if self.op.wait_for_sync:
-      disk_abort = not _WaitForSync(self.cfg, instance, self.proc)
+      disk_abort = not _WaitForSync(self, instance)
        if disk_abort:
        if disk_abort:
-        logger.Error("Warning: disk sync-ing has not returned a good status.\n"
-                     " Please check the instance.")
+        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
+                             " status.\nPlease check the instance.")
  
  
  class LUQueryInstanceData(NoHooksLU):
  
  
  class LUQueryInstanceData(NoHooksLU):
@@ -4369,8 +5242,7 @@ class LUQueryInstanceData(NoHooksLU):
        for name in self.op.instances:
          full_name = self.cfg.ExpandInstanceName(name)
          if full_name is None:
        for name in self.op.instances:
          full_name = self.cfg.ExpandInstanceName(name)
          if full_name is None:
-          raise errors.OpPrereqError("Instance '%s' not known" %
-                                     self.op.instance_name)
+          raise errors.OpPrereqError("Instance '%s' not known" % name)
          self.wanted_names.append(full_name)
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
      else:
          self.wanted_names.append(full_name)
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
      else:
@@ -4405,6 +5277,8 @@ class LUQueryInstanceData(NoHooksLU):
      if not static:
        self.cfg.SetDiskID(dev, instance.primary_node)
        dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev)
      if not static:
        self.cfg.SetDiskID(dev, instance.primary_node)
        dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev)
+      dev_pstatus.Raise()
+      dev_pstatus = dev_pstatus.data
      else:
        dev_pstatus = None
  
      else:
        dev_pstatus = None
  
@@ -4418,6 +5292,8 @@ class LUQueryInstanceData(NoHooksLU):
      if snode and not static:
        self.cfg.SetDiskID(dev, snode)
        dev_sstatus = self.rpc.call_blockdev_find(snode, dev)
      if snode and not static:
        self.cfg.SetDiskID(dev, snode)
        dev_sstatus = self.rpc.call_blockdev_find(snode, dev)
+      dev_sstatus.Raise()
+      dev_sstatus = dev_sstatus.data
      else:
        dev_sstatus = None
  
      else:
        dev_sstatus = None
  
@@ -4435,6 +5311,7 @@ class LUQueryInstanceData(NoHooksLU):
        "pstatus": dev_pstatus,
        "sstatus": dev_sstatus,
        "children": dev_children,
        "pstatus": dev_pstatus,
        "sstatus": dev_sstatus,
        "children": dev_children,
+      "mode": dev.mode,
        }
  
      return data
        }
  
      return data
@@ -4450,6 +5327,8 @@ class LUQueryInstanceData(NoHooksLU):
          remote_info = self.rpc.call_instance_info(instance.primary_node,
                                                    instance.name,
                                                    instance.hypervisor)
          remote_info = self.rpc.call_instance_info(instance.primary_node,
                                                    instance.name,
                                                    instance.hypervisor)
+        remote_info.Raise()
+        remote_info = remote_info.data
          if remote_info and "state" in remote_info:
            remote_state = "up"
          else:
          if remote_info and "state" in remote_info:
            remote_state = "up"
          else:
@@ -4492,15 +5371,101 @@ class LUSetInstanceParams(LogicalUnit):
    """
    HPATH = "instance-modify"
    HTYPE = constants.HTYPE_INSTANCE
    """
    HPATH = "instance-modify"
    HTYPE = constants.HTYPE_INSTANCE
-  _OP_REQP = ["instance_name", "hvparams"]
+  _OP_REQP = ["instance_name"]
    REQ_BGL = False
  
    REQ_BGL = False
  
+  def CheckArguments(self):
+    if not hasattr(self.op, 'nics'):
+      self.op.nics = []
+    if not hasattr(self.op, 'disks'):
+      self.op.disks = []
+    if not hasattr(self.op, 'beparams'):
+      self.op.beparams = {}
+    if not hasattr(self.op, 'hvparams'):
+      self.op.hvparams = {}
+    self.op.force = getattr(self.op, "force", False)
+    if not (self.op.nics or self.op.disks or
+            self.op.hvparams or self.op.beparams):
+      raise errors.OpPrereqError("No changes submitted")
+
+    utils.CheckBEParams(self.op.beparams)
+
+    # Disk validation
+    disk_addremove = 0
+    for disk_op, disk_dict in self.op.disks:
+      if disk_op == constants.DDM_REMOVE:
+        disk_addremove += 1
+        continue
+      elif disk_op == constants.DDM_ADD:
+        disk_addremove += 1
+      else:
+        if not isinstance(disk_op, int):
+          raise errors.OpPrereqError("Invalid disk index")
+      if disk_op == constants.DDM_ADD:
+        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
+        if mode not in (constants.DISK_RDONLY, constants.DISK_RDWR):
+          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
+        size = disk_dict.get('size', None)
+        if size is None:
+          raise errors.OpPrereqError("Required disk parameter size missing")
+        try:
+          size = int(size)
+        except ValueError, err:
+          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
+                                     str(err))
+        disk_dict['size'] = size
+      else:
+        # modification of disk
+        if 'size' in disk_dict:
+          raise errors.OpPrereqError("Disk size change not possible, use"
+                                     " grow-disk")
+
+    if disk_addremove > 1:
+      raise errors.OpPrereqError("Only one disk add or remove operation"
+                                 " supported at a time")
+
+    # NIC validation
+    nic_addremove = 0
+    for nic_op, nic_dict in self.op.nics:
+      if nic_op == constants.DDM_REMOVE:
+        nic_addremove += 1
+        continue
+      elif nic_op == constants.DDM_ADD:
+        nic_addremove += 1
+      else:
+        if not isinstance(nic_op, int):
+          raise errors.OpPrereqError("Invalid nic index")
+
+      # nic_dict should be a dict
+      nic_ip = nic_dict.get('ip', None)
+      if nic_ip is not None:
+        if nic_ip.lower() == "none":
+          nic_dict['ip'] = None
+        else:
+          if not utils.IsValidIP(nic_ip):
+            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
+      # we can only check None bridges and assign the default one
+      nic_bridge = nic_dict.get('bridge', None)
+      if nic_bridge is None:
+        nic_dict['bridge'] = self.cfg.GetDefBridge()
+      # but we can validate MACs
+      nic_mac = nic_dict.get('mac', None)
+      if nic_mac is not None:
+        if self.cfg.IsMacInUse(nic_mac):
+          raise errors.OpPrereqError("MAC address %s already in use"
+                                     " in cluster" % nic_mac)
+        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
+          if not utils.IsValidMac(nic_mac):
+            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
+    if nic_addremove > 1:
+      raise errors.OpPrereqError("Only one NIC add or remove operation"
+                                 " supported at a time")
+
    def ExpandNames(self):
      self._ExpandAndLockInstance()
      self.needed_locks[locking.LEVEL_NODE] = []
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
  
    def ExpandNames(self):
      self._ExpandAndLockInstance()
      self.needed_locks[locking.LEVEL_NODE] = []
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
  
-
    def DeclareLocks(self, level):
      if level == locking.LEVEL_NODE:
        self._LockInstancesNodes()
    def DeclareLocks(self, level):
      if level == locking.LEVEL_NODE:
        self._LockInstancesNodes()
@@ -4515,21 +5480,8 @@ class LUSetInstanceParams(LogicalUnit):
      if constants.BE_MEMORY in self.be_new:
        args['memory'] = self.be_new[constants.BE_MEMORY]
      if constants.BE_VCPUS in self.be_new:
      if constants.BE_MEMORY in self.be_new:
        args['memory'] = self.be_new[constants.BE_MEMORY]
      if constants.BE_VCPUS in self.be_new:
-      args['vcpus'] = self.be_bnew[constants.BE_VCPUS]
-    if self.do_ip or self.do_bridge or self.mac:
-      if self.do_ip:
-        ip = self.ip
-      else:
-        ip = self.instance.nics[0].ip
-      if self.bridge:
-        bridge = self.bridge
-      else:
-        bridge = self.instance.nics[0].bridge
-      if self.mac:
-        mac = self.mac
-      else:
-        mac = self.instance.nics[0].mac
-      args['nics'] = [(ip, bridge, mac)]
+      args['vcpus'] = self.be_new[constants.BE_VCPUS]
+    # FIXME: readd disk/nic changes
      env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
      nl = [self.cfg.GetMasterNode(),
            self.instance.primary_node] + list(self.instance.secondary_nodes)
      env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
      nl = [self.cfg.GetMasterNode(),
            self.instance.primary_node] + list(self.instance.secondary_nodes)
@@ -4541,44 +5493,7 @@ class LUSetInstanceParams(LogicalUnit):
      This only checks the instance list against the existing names.
  
      """
      This only checks the instance list against the existing names.
  
      """
-    # FIXME: all the parameters could be checked before, in ExpandNames, or in
-    # a separate CheckArguments function, if we implement one, so the operation
-    # can be aborted without waiting for any lock, should it have an error...
-    self.ip = getattr(self.op, "ip", None)
-    self.mac = getattr(self.op, "mac", None)
-    self.bridge = getattr(self.op, "bridge", None)
-    self.kernel_path = getattr(self.op, "kernel_path", None)
-    self.initrd_path = getattr(self.op, "initrd_path", None)
-    self.force = getattr(self.op, "force", None)
-    all_parms = [self.ip, self.bridge, self.mac]
-    if (all_parms.count(None) == len(all_parms) and
-        not self.op.hvparams and
-        not self.op.beparams):
-      raise errors.OpPrereqError("No changes submitted")
-    for item in (constants.BE_MEMORY, constants.BE_VCPUS):
-      val = self.op.beparams.get(item, None)
-      if val is not None:
-        try:
-          val = int(val)
-        except ValueError, err:
-          raise errors.OpPrereqError("Invalid %s size: %s" % (item, str(err)))
-        self.op.beparams[item] = val
-    if self.ip is not None:
-      self.do_ip = True
-      if self.ip.lower() == "none":
-        self.ip = None
-      else:
-        if not utils.IsValidIP(self.ip):
-          raise errors.OpPrereqError("Invalid IP address '%s'." % self.ip)
-    else:
-      self.do_ip = False
-    self.do_bridge = (self.bridge is not None)
-    if self.mac is not None:
-      if self.cfg.IsMacInUse(self.mac):
-        raise errors.OpPrereqError('MAC address %s already in use in cluster' %
-                                   self.mac)
-      if not utils.IsValidMac(self.mac):
-        raise errors.OpPrereqError('Invalid MAC address %s' % self.mac)
+    force = self.force = self.op.force
  
      # checking the new params on the primary/secondary nodes
  
  
      # checking the new params on the primary/secondary nodes
  
@@ -4593,11 +5508,13 @@ class LUSetInstanceParams(LogicalUnit):
      if self.op.hvparams:
        i_hvdict = copy.deepcopy(instance.hvparams)
        for key, val in self.op.hvparams.iteritems():
      if self.op.hvparams:
        i_hvdict = copy.deepcopy(instance.hvparams)
        for key, val in self.op.hvparams.iteritems():
-        if val is None:
+        if val == constants.VALUE_DEFAULT:
            try:
              del i_hvdict[key]
            except KeyError:
              pass
            try:
              del i_hvdict[key]
            except KeyError:
              pass
+        elif val == constants.VALUE_NONE:
+          i_hvdict[key] = None
          else:
            i_hvdict[key] = val
        cluster = self.cfg.GetClusterInfo()
          else:
            i_hvdict[key] = val
        cluster = self.cfg.GetClusterInfo()
@@ -4616,7 +5533,7 @@ class LUSetInstanceParams(LogicalUnit):
      if self.op.beparams:
        i_bedict = copy.deepcopy(instance.beparams)
        for key, val in self.op.beparams.iteritems():
      if self.op.beparams:
        i_bedict = copy.deepcopy(instance.beparams)
        for key, val in self.op.beparams.iteritems():
-        if val is None:
+        if val == constants.VALUE_DEFAULT:
            try:
              del i_bedict[key]
            except KeyError:
            try:
              del i_bedict[key]
            except KeyError:
@@ -4629,7 +5546,7 @@ class LUSetInstanceParams(LogicalUnit):
        self.be_new = be_new # the new actual values
        self.be_inst = i_bedict # the new dict (without defaults)
      else:
        self.be_new = be_new # the new actual values
        self.be_inst = i_bedict # the new dict (without defaults)
      else:
-      self.hv_new = self.hv_inst = {}
+      self.be_new = self.be_inst = {}
  
      self.warn = []
  
  
      self.warn = []
  
@@ -4642,39 +5559,91 @@ class LUSetInstanceParams(LogicalUnit):
                                                    instance.hypervisor)
        nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
                                           instance.hypervisor)
                                                    instance.hypervisor)
        nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
                                           instance.hypervisor)
-
-      if pnode not in nodeinfo or not isinstance(nodeinfo[pnode], dict):
+      if nodeinfo[pnode].failed or not isinstance(nodeinfo[pnode].data, dict):
          # Assume the primary node is unreachable and go ahead
          self.warn.append("Can't get info from primary node %s" % pnode)
        else:
          # Assume the primary node is unreachable and go ahead
          self.warn.append("Can't get info from primary node %s" % pnode)
        else:
-        if instance_info:
-          current_mem = instance_info['memory']
+        if not instance_info.failed and instance_info.data:
+          current_mem = instance_info.data['memory']
          else:
            # Assume instance not running
            # (there is a slight race condition here, but it's not very probable,
            # and we have no other way to check)
            current_mem = 0
          miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
          else:
            # Assume instance not running
            # (there is a slight race condition here, but it's not very probable,
            # and we have no other way to check)
            current_mem = 0
          miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
-                    nodeinfo[pnode]['memory_free'])
+                    nodeinfo[pnode].data['memory_free'])
          if miss_mem > 0:
            raise errors.OpPrereqError("This change will prevent the instance"
                                       " from starting, due to %d MB of memory"
                                       " missing on its primary node" % miss_mem)
  
        if be_new[constants.BE_AUTO_BALANCE]:
          if miss_mem > 0:
            raise errors.OpPrereqError("This change will prevent the instance"
                                       " from starting, due to %d MB of memory"
                                       " missing on its primary node" % miss_mem)
  
        if be_new[constants.BE_AUTO_BALANCE]:
-        for node in instance.secondary_nodes:
-          if node not in nodeinfo or not isinstance(nodeinfo[node], dict):
+        for node, nres in nodeinfo.iteritems():
+          if node not in instance.secondary_nodes:
+            continue
+          if nres.failed or not isinstance(nres.data, dict):
              self.warn.append("Can't get info from secondary node %s" % node)
              self.warn.append("Can't get info from secondary node %s" % node)
-          elif be_new[constants.BE_MEMORY] > nodeinfo[node]['memory_free']:
+          elif be_new[constants.BE_MEMORY] > nres.data['memory_free']:
              self.warn.append("Not enough memory to failover instance to"
                               " secondary node %s" % node)
  
              self.warn.append("Not enough memory to failover instance to"
                               " secondary node %s" % node)
  
+    # NIC processing
+    for nic_op, nic_dict in self.op.nics:
+      if nic_op == constants.DDM_REMOVE:
+        if not instance.nics:
+          raise errors.OpPrereqError("Instance has no NICs, cannot remove")
+        continue
+      if nic_op != constants.DDM_ADD:
+        # an existing nic
+        if nic_op < 0 or nic_op >= len(instance.nics):
+          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
+                                     " are 0 to %d" %
+                                     (nic_op, len(instance.nics)))
+      nic_bridge = nic_dict.get('bridge', None)
+      if nic_bridge is not None:
+        if not self.rpc.call_bridges_exist(pnode, [nic_bridge]):
+          msg = ("Bridge '%s' doesn't exist on one of"
+                 " the instance nodes" % nic_bridge)
+          if self.force:
+            self.warn.append(msg)
+          else:
+            raise errors.OpPrereqError(msg)
+
+    # DISK processing
+    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
+      raise errors.OpPrereqError("Disk operations not supported for"
+                                 " diskless instances")
+    for disk_op, disk_dict in self.op.disks:
+      if disk_op == constants.DDM_REMOVE:
+        if len(instance.disks) == 1:
+          raise errors.OpPrereqError("Cannot remove the last disk of"
+                                     " an instance")
+        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
+        ins_l = ins_l[pnode]
+        if ins_l.failed or not isinstance(ins_l.data, list):
+          raise errors.OpPrereqError("Can't contact node '%s'" % pnode)
+        if instance.name in ins_l.data:
+          raise errors.OpPrereqError("Instance is running, can't remove"
+                                     " disks.")
+
+      if (disk_op == constants.DDM_ADD and
+          len(instance.nics) >= constants.MAX_DISKS):
+        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
+                                   " add more" % constants.MAX_DISKS)
+      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
+        # an existing disk
+        if disk_op < 0 or disk_op >= len(instance.disks):
+          raise errors.OpPrereqError("Invalid disk index %s, valid values"
+                                     " are 0 to %d" %
+                                     (disk_op, len(instance.disks)))
+
      return
  
    def Exec(self, feedback_fn):
      """Modifies an instance.
  
      All parameters take effect only at the next restart of the instance.
      return
  
    def Exec(self, feedback_fn):
      """Modifies an instance.
  
      All parameters take effect only at the next restart of the instance.
+
      """
      # Process here the warnings from CheckPrereq, as we don't have a
      # feedback_fn there.
      """
      # Process here the warnings from CheckPrereq, as we don't have a
      # feedback_fn there.
@@ -4683,19 +5652,98 @@ class LUSetInstanceParams(LogicalUnit):
  
      result = []
      instance = self.instance
  
      result = []
      instance = self.instance
-    if self.do_ip:
-      instance.nics[0].ip = self.ip
-      result.append(("ip", self.ip))
-    if self.bridge:
-      instance.nics[0].bridge = self.bridge
-      result.append(("bridge", self.bridge))
-    if self.mac:
-      instance.nics[0].mac = self.mac
-      result.append(("mac", self.mac))
+    # disk changes
+    for disk_op, disk_dict in self.op.disks:
+      if disk_op == constants.DDM_REMOVE:
+        # remove the last disk
+        device = instance.disks.pop()
+        device_idx = len(instance.disks)
+        for node, disk in device.ComputeNodeTree(instance.primary_node):
+          self.cfg.SetDiskID(disk, node)
+          rpc_result = self.rpc.call_blockdev_remove(node, disk)
+          if rpc_result.failed or not rpc_result.data:
+            self.proc.LogWarning("Could not remove disk/%d on node %s,"
+                                 " continuing anyway", device_idx, node)
+        result.append(("disk/%d" % device_idx, "remove"))
+      elif disk_op == constants.DDM_ADD:
+        # add a new disk
+        if instance.disk_template == constants.DT_FILE:
+          file_driver, file_path = instance.disks[0].logical_id
+          file_path = os.path.dirname(file_path)
+        else:
+          file_driver = file_path = None
+        disk_idx_base = len(instance.disks)
+        new_disk = _GenerateDiskTemplate(self,
+                                         instance.disk_template,
+                                         instance, instance.primary_node,
+                                         instance.secondary_nodes,
+                                         [disk_dict],
+                                         file_path,
+                                         file_driver,
+                                         disk_idx_base)[0]
+        new_disk.mode = disk_dict['mode']
+        instance.disks.append(new_disk)
+        info = _GetInstanceInfoText(instance)
+
+        logging.info("Creating volume %s for instance %s",
+                     new_disk.iv_name, instance.name)
+        # Note: this needs to be kept in sync with _CreateDisks
+        #HARDCODE
+        for secondary_node in instance.secondary_nodes:
+          try:
+            _CreateBlockDevOnSecondary(self, secondary_node, instance,
+                                       new_disk, False, info)
+          except error.OpExecError, err:
+            self.LogWarning("Failed to create volume %s (%s) on"
+                            " secondary node %s: %s",
+                            new_disk.iv_name, new_disk, secondary_node, err)
+        #HARDCODE
+        try:
+          _CreateBlockDevOnPrimary(self, instance.primary_node,
+                                   instance, new_disk, info)
+        except errors.OpExecError, err:
+          self.LogWarning("Failed to create volume %s on primary: %s",
+                          new_disk.iv_name, err)
+        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
+                       (new_disk.size, new_disk.mode)))
+      else:
+        # change a given disk
+        instance.disks[disk_op].mode = disk_dict['mode']
+        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
+    # NIC changes
+    for nic_op, nic_dict in self.op.nics:
+      if nic_op == constants.DDM_REMOVE:
+        # remove the last nic
+        del instance.nics[-1]
+        result.append(("nic.%d" % len(instance.nics), "remove"))
+      elif nic_op == constants.DDM_ADD:
+        # add a new nic
+        if 'mac' not in nic_dict:
+          mac = constants.VALUE_GENERATE
+        else:
+          mac = nic_dict['mac']
+        if mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
+          mac = self.cfg.GenerateMAC()
+        new_nic = objects.NIC(mac=mac, ip=nic_dict.get('ip', None),
+                              bridge=nic_dict.get('bridge', None))
+        instance.nics.append(new_nic)
+        result.append(("nic.%d" % (len(instance.nics) - 1),
+                       "add:mac=%s,ip=%s,bridge=%s" %
+                       (new_nic.mac, new_nic.ip, new_nic.bridge)))
+      else:
+        # change a given nic
+        for key in 'mac', 'ip', 'bridge':
+          if key in nic_dict:
+            setattr(instance.nics[nic_op], key, nic_dict[key])
+            result.append(("nic.%s/%d" % (key, nic_op), nic_dict[key]))
+
+    # hvparams changes
      if self.op.hvparams:
        instance.hvparams = self.hv_new
        for key, val in self.op.hvparams.iteritems():
          result.append(("hv/%s" % key, val))
      if self.op.hvparams:
        instance.hvparams = self.hv_new
        for key, val in self.op.hvparams.iteritems():
          result.append(("hv/%s" % key, val))
+
+    # beparams changes
      if self.op.beparams:
        instance.beparams = self.be_inst
        for key, val in self.op.beparams.iteritems():
      if self.op.beparams:
        instance.beparams = self.be_inst
        for key, val in self.op.beparams.iteritems():
@@ -4731,13 +5779,21 @@ class LUQueryExports(NoHooksLU):
    def Exec(self, feedback_fn):
      """Compute the list of all the exported system images.
  
    def Exec(self, feedback_fn):
      """Compute the list of all the exported system images.
  
-    Returns:
-      a dictionary with the structure node->(export-list)
-      where export-list is a list of the instances exported on
-      that node.
+    @rtype: dict
+    @return: a dictionary with the structure node->(export-list)
+        where export-list is a list of the instances exported on
+        that node.
  
      """
  
      """
-    return self.rpc.call_export_list(self.nodes)
+    rpcresult = self.rpc.call_export_list(self.nodes)
+    result = {}
+    for node in rpcresult:
+      if rpcresult[node].failed:
+        result[node] = False
+      else:
+        result[node] = rpcresult[node].data
+
+    return result
  
  
  class LUExportInstance(LogicalUnit):
  
  
  class LUExportInstance(LogicalUnit):
@@ -4790,12 +5846,15 @@ class LUExportInstance(LogicalUnit):
      self.instance = self.cfg.GetInstanceInfo(instance_name)
      assert self.instance is not None, \
            "Cannot retrieve locked instance %s" % self.op.instance_name
      self.instance = self.cfg.GetInstanceInfo(instance_name)
      assert self.instance is not None, \
            "Cannot retrieve locked instance %s" % self.op.instance_name
+    _CheckNodeOnline(self, self.instance.primary_node)
  
      self.dst_node = self.cfg.GetNodeInfo(
        self.cfg.ExpandNodeName(self.op.target_node))
  
  
      self.dst_node = self.cfg.GetNodeInfo(
        self.cfg.ExpandNodeName(self.op.target_node))
  
-    assert self.dst_node is not None, \
-          "Cannot retrieve locked node %s" % self.op.target_node
+    if self.dst_node is None:
+      # This is wrong node name, not a non-locked node
+      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
+    _CheckNodeOnline(self, self.dst_node.name)
  
      # instance disk type verification
      for disk in self.instance.disks:
  
      # instance disk type verification
      for disk in self.instance.disks:
@@ -4812,7 +5871,9 @@ class LUExportInstance(LogicalUnit):
      src_node = instance.primary_node
      if self.op.shutdown:
        # shutdown the instance, but not the disks
      src_node = instance.primary_node
      if self.op.shutdown:
        # shutdown the instance, but not the disks
-      if not self.rpc.call_instance_shutdown(src_node, instance):
+      result = self.rpc.call_instance_shutdown(src_node, instance)
+      result.Raise()
+      if not result.data:
          raise errors.OpExecError("Could not shutdown instance %s on node %s" %
                                   (instance.name, src_node))
  
          raise errors.OpExecError("Could not shutdown instance %s on node %s" %
                                   (instance.name, src_node))
  
@@ -4820,43 +5881,53 @@ class LUExportInstance(LogicalUnit):
  
      snap_disks = []
  
  
      snap_disks = []
  
+    # set the disks ID correctly since call_instance_start needs the
+    # correct drbd minor to create the symlinks
+    for disk in instance.disks:
+      self.cfg.SetDiskID(disk, src_node)
+
      try:
        for disk in instance.disks:
      try:
        for disk in instance.disks:
-        if disk.iv_name == "sda":
-          # new_dev_name will be a snapshot of an lvm leaf of the one we passed
-          new_dev_name = self.rpc.call_blockdev_snapshot(src_node, disk)
-
-          if not new_dev_name:
-            logger.Error("could not snapshot block device %s on node %s" %
-                         (disk.logical_id[1], src_node))
-          else:
-            new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
-                                      logical_id=(vgname, new_dev_name),
-                                      physical_id=(vgname, new_dev_name),
-                                      iv_name=disk.iv_name)
-            snap_disks.append(new_dev)
+        # new_dev_name will be a snapshot of an lvm leaf of the one we passed
+        new_dev_name = self.rpc.call_blockdev_snapshot(src_node, disk)
+        if new_dev_name.failed or not new_dev_name.data:
+          self.LogWarning("Could not snapshot block device %s on node %s",
+                          disk.logical_id[1], src_node)
+          snap_disks.append(False)
+        else:
+          new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
+                                 logical_id=(vgname, new_dev_name.data),
+                                 physical_id=(vgname, new_dev_name.data),
+                                 iv_name=disk.iv_name)
+          snap_disks.append(new_dev)
  
      finally:
        if self.op.shutdown and instance.status == "up":
  
      finally:
        if self.op.shutdown and instance.status == "up":
-        if not self.rpc.call_instance_start(src_node, instance, None):
+        result = self.rpc.call_instance_start(src_node, instance, None)
+        if result.failed or not result.data:
            _ShutdownInstanceDisks(self, instance)
            raise errors.OpExecError("Could not start instance")
  
      # TODO: check for size
  
      cluster_name = self.cfg.GetClusterName()
            _ShutdownInstanceDisks(self, instance)
            raise errors.OpExecError("Could not start instance")
  
      # TODO: check for size
  
      cluster_name = self.cfg.GetClusterName()
-    for dev in snap_disks:
-      if not self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
-                                      instance, cluster_name):
-        logger.Error("could not export block device %s from node %s to node %s"
-                     % (dev.logical_id[1], src_node, dst_node.name))
-      if not self.rpc.call_blockdev_remove(src_node, dev):
-        logger.Error("could not remove snapshot block device %s from node %s" %
-                     (dev.logical_id[1], src_node))
-
-    if not self.rpc.call_finalize_export(dst_node.name, instance, snap_disks):
-      logger.Error("could not finalize export for instance %s on node %s" %
-                   (instance.name, dst_node.name))
+    for idx, dev in enumerate(snap_disks):
+      if dev:
+        result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
+                                               instance, cluster_name, idx)
+        if result.failed or not result.data:
+          self.LogWarning("Could not export block device %s from node %s to"
+                          " node %s", dev.logical_id[1], src_node,
+                          dst_node.name)
+        result = self.rpc.call_blockdev_remove(src_node, dev)
+        if result.failed or not result.data:
+          self.LogWarning("Could not remove snapshot block device %s from node"
+                          " %s", dev.logical_id[1], src_node)
+
+    result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
+    if result.failed or not result.data:
+      self.LogWarning("Could not finalize export for instance %s on node %s",
+                      instance.name, dst_node.name)
  
      nodelist = self.cfg.GetNodeList()
      nodelist.remove(dst_node.name)
  
      nodelist = self.cfg.GetNodeList()
      nodelist.remove(dst_node.name)
@@ -4867,10 +5938,12 @@ class LUExportInstance(LogicalUnit):
      if nodelist:
        exportlist = self.rpc.call_export_list(nodelist)
        for node in exportlist:
      if nodelist:
        exportlist = self.rpc.call_export_list(nodelist)
        for node in exportlist:
-        if instance.name in exportlist[node]:
+        if exportlist[node].failed:
+          continue
+        if instance.name in exportlist[node].data:
            if not self.rpc.call_export_remove(node, instance.name):
            if not self.rpc.call_export_remove(node, instance.name):
-            logger.Error("could not remove older export for instance %s"
-                         " on node %s" % (instance.name, node))
+            self.LogWarning("Could not remove older export for instance %s"
+                            " on node %s", instance.name, node)
  
  
  class LURemoveExport(NoHooksLU):
  
  
  class LURemoveExport(NoHooksLU):
@@ -4908,11 +5981,15 @@ class LURemoveExport(NoHooksLU):
        locking.LEVEL_NODE])
      found = False
      for node in exportlist:
        locking.LEVEL_NODE])
      found = False
      for node in exportlist:
-      if instance_name in exportlist[node]:
+      if exportlist[node].failed:
+        self.LogWarning("Failed to query node %s, continuing" % node)
+        continue
+      if instance_name in exportlist[node].data:
          found = True
          found = True
-        if not self.rpc.call_export_remove(node, instance_name):
-          logger.Error("could not remove export for instance %s"
-                       " on node %s" % (instance_name, node))
+        result = self.rpc.call_export_remove(node, instance_name)
+        if result.failed or not result.data:
+          logging.error("Could not remove export for instance %s"
+                        " on node %s", instance_name, node)
  
      if fqdn_warn and not found:
        feedback_fn("Export not found. If trying to remove an export belonging"
  
      if fqdn_warn and not found:
        feedback_fn("Export not found. If trying to remove an export belonging"
@@ -5127,9 +6204,10 @@ class LUTestDelay(NoHooksLU):
        if not result:
          raise errors.OpExecError("Complete failure from rpc call")
        for node, node_result in result.items():
        if not result:
          raise errors.OpExecError("Complete failure from rpc call")
        for node, node_result in result.items():
-        if not node_result:
+        node_result.Raise()
+        if not node_result.data:
            raise errors.OpExecError("Failure during rpc call to node %s,"
            raise errors.OpExecError("Failure during rpc call to node %s,"
-                                   " result: %s" % (node, node_result))
+                                   " result: %s" % (node, node_result.data))
  
  
  class IAllocator(object):
  
  
  class IAllocator(object):
@@ -5147,7 +6225,7 @@ class IAllocator(object):
    """
    _ALLO_KEYS = [
      "mem_size", "disks", "disk_template",
    """
    _ALLO_KEYS = [
      "mem_size", "disks", "disk_template",
-    "os", "tags", "nics", "vcpus",
+    "os", "tags", "nics", "vcpus", "hypervisor",
      ]
    _RELO_KEYS = [
      "relocate_from",
      ]
    _RELO_KEYS = [
      "relocate_from",
@@ -5162,6 +6240,7 @@ class IAllocator(object):
      self.name = name
      self.mem_size = self.disks = self.disk_template = None
      self.os = self.tags = self.nics = self.vcpus = None
      self.name = name
      self.mem_size = self.disks = self.disk_template = None
      self.os = self.tags = self.nics = self.vcpus = None
+    self.hypervisor = None
      self.relocate_from = None
      # computed fields
      self.required_nodes = None
      self.relocate_from = None
      # computed fields
      self.required_nodes = None
@@ -5201,25 +6280,28 @@ class IAllocator(object):
        "enable_hypervisors": list(cluster_info.enabled_hypervisors),
        # we don't have job IDs
        }
        "enable_hypervisors": list(cluster_info.enabled_hypervisors),
        # we don't have job IDs
        }
-
-    i_list = []
-    cluster = self.cfg.GetClusterInfo()
-    for iname in cfg.GetInstanceList():
-      i_obj = cfg.GetInstanceInfo(iname)
-      i_list.append((i_obj, cluster.FillBE(i_obj)))
+    iinfo = cfg.GetAllInstancesInfo().values()
+    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
  
      # node data
      node_results = {}
      node_list = cfg.GetNodeList()
  
      # node data
      node_results = {}
      node_list = cfg.GetNodeList()
-    # FIXME: here we have only one hypervisor information, but
-    # instance can belong to different hypervisors
+
+    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
+      hypervisor_name = self.hypervisor
+    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
+      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
+
      node_data = self.lu.rpc.call_node_info(node_list, cfg.GetVGName(),
      node_data = self.lu.rpc.call_node_info(node_list, cfg.GetVGName(),
-                                           cfg.GetHypervisorType())
+                                           hypervisor_name)
+    node_iinfo = self.lu.rpc.call_all_instances_info(node_list,
+                       cluster_info.enabled_hypervisors)
      for nname in node_list:
        ninfo = cfg.GetNodeInfo(nname)
      for nname in node_list:
        ninfo = cfg.GetNodeInfo(nname)
-      if nname not in node_data or not isinstance(node_data[nname], dict):
+      node_data[nname].Raise()
+      if not isinstance(node_data[nname].data, dict):
          raise errors.OpExecError("Can't get data for node %s" % nname)
          raise errors.OpExecError("Can't get data for node %s" % nname)
-      remote_info = node_data[nname]
+      remote_info = node_data[nname].data
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
                     'vg_size', 'vg_free', 'cpu_total']:
          if attr not in remote_info:
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
                     'vg_size', 'vg_free', 'cpu_total']:
          if attr not in remote_info:
@@ -5235,6 +6317,13 @@ class IAllocator(object):
        for iinfo, beinfo in i_list:
          if iinfo.primary_node == nname:
            i_p_mem += beinfo[constants.BE_MEMORY]
        for iinfo, beinfo in i_list:
          if iinfo.primary_node == nname:
            i_p_mem += beinfo[constants.BE_MEMORY]
+          if iinfo.name not in node_iinfo[nname]:
+            i_used_mem = 0
+          else:
+            i_used_mem = int(node_iinfo[nname][iinfo.name]['memory'])
+          i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
+          remote_info['memory_free'] -= max(0, i_mem_diff)
+
            if iinfo.status == "up":
              i_p_up_mem += beinfo[constants.BE_MEMORY]
  
            if iinfo.status == "up":
              i_p_up_mem += beinfo[constants.BE_MEMORY]
  
@@ -5251,6 +6340,7 @@ class IAllocator(object):
          "primary_ip": ninfo.primary_ip,
          "secondary_ip": ninfo.secondary_ip,
          "total_cpus": remote_info['cpu_total'],
          "primary_ip": ninfo.primary_ip,
          "secondary_ip": ninfo.secondary_ip,
          "total_cpus": remote_info['cpu_total'],
+        "offline": ninfo.offline,
          }
        node_results[nname] = pnr
      data["nodes"] = node_results
          }
        node_results[nname] = pnr
      data["nodes"] = node_results
@@ -5292,8 +6382,7 @@ class IAllocator(object):
      if len(self.disks) != 2:
        raise errors.OpExecError("Only two-disk configurations supported")
  
      if len(self.disks) != 2:
        raise errors.OpExecError("Only two-disk configurations supported")
  
-    disk_space = _ComputeDiskSize(self.disk_template,
-                                  self.disks[0]["size"], self.disks[1]["size"])
+    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
  
      if self.disk_template in constants.DTS_NET_MIRROR:
        self.required_nodes = 2
  
      if self.disk_template in constants.DTS_NET_MIRROR:
        self.required_nodes = 2
@@ -5336,10 +6425,8 @@ class IAllocator(object):
        raise errors.OpPrereqError("Instance has not exactly one secondary node")
  
      self.required_nodes = 1
        raise errors.OpPrereqError("Instance has not exactly one secondary node")
  
      self.required_nodes = 1
-
-    disk_space = _ComputeDiskSize(instance.disk_template,
-                                  instance.disks[0].size,
-                                  instance.disks[1].size)
+    disk_sizes = [{'size': disk.size} for disk in instance.disks]
+    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
  
      request = {
        "type": "relocate",
  
      request = {
        "type": "relocate",
@@ -5372,11 +6459,12 @@ class IAllocator(object):
      data = self.in_text
  
      result = call_fn(self.lu.cfg.GetMasterNode(), name, self.in_text)
      data = self.in_text
  
      result = call_fn(self.lu.cfg.GetMasterNode(), name, self.in_text)
+    result.Raise()
  
  
-    if not isinstance(result, (list, tuple)) or len(result) != 4:
+    if not isinstance(result.data, (list, tuple)) or len(result.data) != 4:
        raise errors.OpExecError("Invalid result from master iallocator runner")
  
        raise errors.OpExecError("Invalid result from master iallocator runner")
  
-    rcode, stdout, stderr, fail = result
+    rcode, stdout, stderr, fail = result.data
  
      if rcode == constants.IARUN_NOTFOUND:
        raise errors.OpExecError("Can't find allocator '%s'" % name)
  
      if rcode == constants.IARUN_NOTFOUND:
        raise errors.OpExecError("Can't find allocator '%s'" % name)
@@ -5459,6 +6547,8 @@ class LUTestAllocator(NoHooksLU):
              row["mode"] not in ['r', 'w']):
            raise errors.OpPrereqError("Invalid contents of the"
                                       " 'disks' parameter")
              row["mode"] not in ['r', 'w']):
            raise errors.OpPrereqError("Invalid contents of the"
                                       " 'disks' parameter")
+      if self.op.hypervisor is None:
+        self.op.hypervisor = self.cfg.GetHypervisorType()
      elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
        if not hasattr(self.op, "name"):
          raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
      elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
        if not hasattr(self.op, "name"):
          raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
@@ -5494,6 +6584,7 @@ class LUTestAllocator(NoHooksLU):
                         tags=self.op.tags,
                         nics=self.op.nics,
                         vcpus=self.op.vcpus,
                         tags=self.op.tags,
                         nics=self.op.nics,
                         vcpus=self.op.vcpus,
+                       hypervisor=self.op.hypervisor,
                         )
      else:
        ial = IAllocator(self,
                         )
      else:
        ial = IAllocator(self,