X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/2b301fd28ded1a9aef6196de40321de85d4624b6..e82ac01cfd4fa14a8e5e03a0c77045c21537f4e0:/lib/cmdlib.py

diff --git a/lib/cmdlib.py b/lib/cmdlib.py
index 23590d8..caae061 100644
--- a/lib/cmdlib.py
+++ b/lib/cmdlib.py
@@ -718,13 +718,24 @@ class LUVerifyCluster(LogicalUnit):
 
     if 'nodelist' not in node_result:
       bad = True
-      feedback_fn("  - ERROR: node hasn't returned node connectivity data")
+      feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
     else:
       if node_result['nodelist']:
         bad = True
         for node in node_result['nodelist']:
-          feedback_fn("  - ERROR: communication with node '%s': %s" %
+          feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
                           (node, node_result['nodelist'][node]))
+    if 'node-net-test' not in node_result:
+      bad = True
+      feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
+    else:
+      if node_result['node-net-test']:
+        bad = True
+        nlist = utils.NiceSort(node_result['node-net-test'].keys())
+        for node in nlist:
+          feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
+                          (node, node_result['node-net-test'][node]))
+
     hyp_result = node_result.get('hypervisor', None)
     if hyp_result is not None:
       feedback_fn("  - ERROR: hypervisor verify failure: '%s'" % hyp_result)
@@ -821,7 +832,8 @@ class LUVerifyCluster(LogicalUnit):
       for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
         needed_mem = 0
         for instance in instances:
-          needed_mem += instance_cfg[instance].memory
+          if instance_cfg[instance].auto_balance:
+            needed_mem += instance_cfg[instance].memory
         if nodeinfo['mfree'] < needed_mem:
           feedback_fn("  - ERROR: not enough memory on node %s to accomodate"
                       " failovers should node %s fail" % (node, prinode))
@@ -847,8 +859,11 @@ class LUVerifyCluster(LogicalUnit):
 
     """
     all_nodes = self.cfg.GetNodeList()
+    tags = self.cfg.GetClusterInfo().GetTags()
     # TODO: populate the environment with useful information for verify hooks
-    env = {}
+    env = {
+      "CLUSTER_TAGS": " ".join(tags),
+      }
     return env, [], all_nodes
 
   def Exec(self, feedback_fn):
@@ -862,8 +877,10 @@ class LUVerifyCluster(LogicalUnit):
 
     vg_name = self.cfg.GetVGName()
     nodelist = utils.NiceSort(self.cfg.GetNodeList())
+    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
     i_non_redundant = [] # Non redundant instances
+    i_non_a_balanced = [] # Non auto-balanced instances
     node_volume = {}
     node_instance = {}
     node_info = {}
@@ -884,11 +901,15 @@ class LUVerifyCluster(LogicalUnit):
       'filelist': file_names,
       'nodelist': nodelist,
       'hypervisor': None,
+      'node-net-test': [(node.name, node.primary_ip, node.secondary_ip)
+                        for node in nodeinfo]
       }
     all_nvinfo = rpc.call_node_verify(nodelist, node_verify_param)
     all_rversion = rpc.call_version(nodelist)
     all_ninfo = rpc.call_node_info(nodelist, self.cfg.GetVGName())
 
+    incomplete_nodeinfo = False
+
     for node in nodelist:
       feedback_fn("* Verifying node %s" % node)
       result = self._VerifyNode(node, file_names, local_checksums,
@@ -907,6 +928,7 @@ class LUVerifyCluster(LogicalUnit):
       elif not isinstance(volumeinfo, dict):
         feedback_fn("  - ERROR: connection to %s failed" % (node,))
         bad = True
+        incomplete_nodeinfo = True
         continue
       else:
         node_volume[node] = volumeinfo
@@ -916,6 +938,7 @@ class LUVerifyCluster(LogicalUnit):
       if type(nodeinstance) != list:
         feedback_fn("  - ERROR: connection to %s failed" % (node,))
         bad = True
+        incomplete_nodeinfo = True
         continue
 
       node_instance[node] = nodeinstance
@@ -925,6 +948,7 @@ class LUVerifyCluster(LogicalUnit):
       if not isinstance(nodeinfo, dict):
         feedback_fn("  - ERROR: connection to %s failed" % (node,))
         bad = True
+        incomplete_nodeinfo = True
         continue
 
       try:
@@ -941,9 +965,10 @@ class LUVerifyCluster(LogicalUnit):
           # secondary.
           "sinst-by-pnode": {},
         }
-      except ValueError:
+      except (ValueError, TypeError):
         feedback_fn("  - ERROR: invalid value returned from node %s" % (node,))
         bad = True
+        incomplete_nodeinfo = True
         continue
 
     node_vol_should = {}
@@ -978,6 +1003,9 @@ class LUVerifyCluster(LogicalUnit):
         feedback_fn("  - WARNING: multiple secondaries for instance %s"
                     % instance)
 
+      if not inst_config.auto_balance:
+        i_non_a_balanced.append(instance)
+
       for snode in inst_config.secondary_nodes:
         if snode in node_info:
           node_info[snode]['sinst'].append(instance)
@@ -998,7 +1026,8 @@ class LUVerifyCluster(LogicalUnit):
                                          feedback_fn)
     bad = bad or result
 
-    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
+    if (constants.VERIFY_NPLUSONE_MEM not in self.skip_set and
+        not incomplete_nodeinfo):
       feedback_fn("* Verifying N+1 Memory redundancy")
       result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
       bad = bad or result
@@ -1008,6 +1037,10 @@ class LUVerifyCluster(LogicalUnit):
       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
                   % len(i_non_redundant))
 
+    if i_non_a_balanced:
+      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
+                  % len(i_non_a_balanced))
+
     return int(bad)
 
   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
@@ -1157,8 +1190,7 @@ class LURenameCluster(LogicalUnit):
       raise errors.OpPrereqError("Neither the name nor the IP address of the"
                                  " cluster has changed")
     if new_ip != old_ip:
-      result = utils.RunCmd(["fping", "-q", new_ip])
-      if not result.failed:
+      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
         raise errors.OpPrereqError("The given cluster IP address (%s) is"
                                    " reachable on the network. Aborting." %
                                    new_ip)
@@ -1467,12 +1499,12 @@ class LUQueryNodes(NoHooksLU):
       "dtotal", "dfree",
       "mtotal", "mnode", "mfree",
       "bootid",
-      "ctotal",
+      "ctotal", "cnodes", "csockets",
       ])
 
     _CheckOutputFields(static=["name", "pinst_cnt", "sinst_cnt",
                                "pinst_list", "sinst_list",
-                               "pip", "sip"],
+                               "pip", "sip", "tags"],
                        dynamic=self.dynamic_fields,
                        selected=self.op.output_fields)
 
@@ -1493,14 +1525,17 @@ class LUQueryNodes(NoHooksLU):
       for name in nodenames:
         nodeinfo = node_data.get(name, None)
         if nodeinfo:
+          fn = utils.TryConvert
           live_data[name] = {
-            "mtotal": utils.TryConvert(int, nodeinfo['memory_total']),
-            "mnode": utils.TryConvert(int, nodeinfo['memory_dom0']),
-            "mfree": utils.TryConvert(int, nodeinfo['memory_free']),
-            "dtotal": utils.TryConvert(int, nodeinfo['vg_size']),
-            "dfree": utils.TryConvert(int, nodeinfo['vg_free']),
-            "ctotal": utils.TryConvert(int, nodeinfo['cpu_total']),
-            "bootid": nodeinfo['bootid'],
+            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
+            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
+            "mfree": fn(int, nodeinfo.get('memory_free', None)),
+            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
+            "dfree": fn(int, nodeinfo.get('vg_free', None)),
+            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
+            "bootid": nodeinfo.get('bootid', None),
+            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
+            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
             }
         else:
           live_data[name] = {}
@@ -1543,6 +1578,8 @@ class LUQueryNodes(NoHooksLU):
           val = node.primary_ip
         elif field == "sip":
           val = node.secondary_ip
+        elif field == "tags":
+          val = list(node.GetTags())
         elif field in self.dynamic_fields:
           val = live_data[node.name].get(field, None)
         else:
@@ -2235,7 +2272,8 @@ def _CheckNodeFreeMemory(cfg, node, reason, requested):
 
   """
   nodeinfo = rpc.call_node_info([node], cfg.GetVGName())
-  if not nodeinfo or not isinstance(nodeinfo, dict):
+  if not (nodeinfo and isinstance(nodeinfo, dict) and
+          node in nodeinfo and isinstance(nodeinfo[node], dict)):
     raise errors.OpPrereqError("Could not contact node %s for resource"
                              " information" % (node,))
 
@@ -2563,9 +2601,7 @@ class LURenameInstance(LogicalUnit):
                                  new_name)
 
     if not getattr(self.op, "ignore_ip", False):
-      command = ["fping", "-q", name_info.ip]
-      result = utils.RunCmd(command)
-      if not result.failed:
+      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
         raise errors.OpPrereqError("IP %s of instance %s already in use" %
                                    (name_info.ip, new_name))
 
@@ -2586,8 +2622,8 @@ class LURenameInstance(LogicalUnit):
     try:
       if not rpc.call_instance_run_rename(inst.primary_node, inst, old_name,
                                           "sda", "sdb"):
-        msg = ("Could run OS rename script for instance %s on node %s (but the"
-               " instance has been renamed in Ganeti)" %
+        msg = ("Could not run OS rename script for instance %s on node %s"
+               " (but the instance has been renamed in Ganeti)" %
                (inst.name, inst.primary_node))
         logger.Error(msg)
     finally:
@@ -2600,7 +2636,7 @@ class LURemoveInstance(LogicalUnit):
   """
   HPATH = "instance-remove"
   HTYPE = constants.HTYPE_INSTANCE
-  _OP_REQP = ["instance_name"]
+  _OP_REQP = ["instance_name", "ignore_failures"]
 
   def BuildHooksEnv(self):
     """Build hooks env.
@@ -2669,7 +2705,12 @@ class LUQueryInstances(NoHooksLU):
     _CheckOutputFields(static=["name", "os", "pnode", "snodes",
                                "admin_state", "admin_ram",
                                "disk_template", "ip", "mac", "bridge",
-                               "sda_size", "sdb_size", "vcpus"],
+                               "sda_size", "sdb_size", "vcpus", "tags",
+                               "auto_balance",
+                               "network_port", "kernel_path", "initrd_path",
+                               "hvm_boot_order", "hvm_acpi", "hvm_pae",
+                               "hvm_cdrom_image_path", "hvm_nic_type",
+                               "hvm_disk_type", "vnc_bind_address"],
                        dynamic=self.dynamic_fields,
                        selected=self.op.output_fields)
 
@@ -2762,6 +2803,21 @@ class LUQueryInstances(NoHooksLU):
             val = disk.size
         elif field == "vcpus":
           val = instance.vcpus
+        elif field == "tags":
+          val = list(instance.GetTags())
+        elif field == "auto_balance":
+          val = instance.auto_balance
+        elif field in ("network_port", "kernel_path", "initrd_path",
+                       "hvm_boot_order", "hvm_acpi", "hvm_pae",
+                       "hvm_cdrom_image_path", "hvm_nic_type",
+                       "hvm_disk_type", "vnc_bind_address"):
+          val = getattr(instance, field, None)
+          if val is None:
+            if field in ("hvm_nic_type", "hvm_disk_type",
+                         "kernel_path", "initrd_path"):
+              val = "default"
+            else:
+              val = "-"
         else:
           raise errors.ParameterError(field)
         iout.append(val)
@@ -2865,7 +2921,7 @@ class LUFailoverInstance(LogicalUnit):
 
     instance.primary_node = target_node
     # distribute new instance config to the other nodes
-    self.cfg.AddInstance(instance)
+    self.cfg.Update(instance)
 
     # Only start the instance if it's marked as up
     if instance.status == "up":
@@ -2886,6 +2942,305 @@ class LUFailoverInstance(LogicalUnit):
                                  (instance.name, target_node))
 
 
+class LUMigrateInstance(LogicalUnit):
+  """Migrate an instance.
+
+  This is migration without shutting down, compared to the failover,
+  which is done with shutdown.
+
+  """
+  HPATH = "instance-migrate"
+  HTYPE = constants.HTYPE_INSTANCE
+  _OP_REQP = ["instance_name", "live", "cleanup"]
+
+  def BuildHooksEnv(self):
+    """Build hooks env.
+
+    This runs on master, primary and secondary nodes of the instance.
+
+    """
+    env = _BuildInstanceHookEnvByObject(self.instance)
+    nl = [self.sstore.GetMasterNode()] + list(self.instance.secondary_nodes)
+    return env, nl, nl
+
+  def CheckPrereq(self):
+    """Check prerequisites.
+
+    This checks that the instance is in the cluster.
+
+    """
+    instance = self.cfg.GetInstanceInfo(
+      self.cfg.ExpandInstanceName(self.op.instance_name))
+    if instance is None:
+      raise errors.OpPrereqError("Instance '%s' not known" %
+                                 self.op.instance_name)
+
+    if instance.disk_template != constants.DT_DRBD8:
+      raise errors.OpPrereqError("Instance's disk layout is not"
+                                 " drbd8, cannot migrate.")
+
+    secondary_nodes = instance.secondary_nodes
+    if not secondary_nodes:
+      raise errors.ProgrammerError("no secondary node but using "
+                                   "drbd8 disk template")
+
+    target_node = secondary_nodes[0]
+    # check memory requirements on the secondary node
+    _CheckNodeFreeMemory(self.cfg, target_node, "migrating instance %s" %
+                         instance.name, instance.memory)
+
+    # check bridge existance
+    brlist = [nic.bridge for nic in instance.nics]
+    if not rpc.call_bridges_exist(target_node, brlist):
+      raise errors.OpPrereqError("One or more target bridges %s does not"
+                                 " exist on destination node '%s'" %
+                                 (brlist, target_node))
+
+    if not self.op.cleanup:
+      migratable = rpc.call_instance_migratable(instance.primary_node,
+                                                instance)
+      if not migratable:
+        raise errors.OpPrereqError("Can't contact node '%s'" %
+                                   instance.primary_node)
+      if not migratable[0]:
+        raise errors.OpPrereqError("Can't migrate: %s - please use failover" %
+                                   migratable[1])
+
+    self.instance = instance
+
+  def _WaitUntilSync(self):
+    """Poll with custom rpc for disk sync.
+
+    This uses our own step-based rpc call.
+
+    """
+    self.feedback_fn("* wait until resync is done")
+    all_done = False
+    while not all_done:
+      all_done = True
+      result = rpc.call_drbd_reconfig_net(self.all_nodes, self.instance.name,
+                                          self.instance.disks,
+                                          self.nodes_ip, False,
+                                          constants.DRBD_RECONF_RPC_WFSYNC)
+      min_percent = 100
+      for node in self.all_nodes:
+        if not result[node] or not result[node][0]:
+          raise errors.OpExecError("Cannot resync disks on node %s" % (node,))
+        node_done, node_percent = result[node][1]
+        all_done = all_done and node_done
+        if node_percent is not None:
+          min_percent = min(min_percent, node_percent)
+      if not all_done:
+        if min_percent < 100:
+          self.feedback_fn("   - progress: %.1f%%" % min_percent)
+        time.sleep(2)
+
+  def _EnsureSecondary(self, node):
+    """Demote a node to secondary.
+
+    """
+    self.feedback_fn("* switching node %s to secondary mode" % node)
+    result = rpc.call_drbd_reconfig_net([node], self.instance.name,
+                                        self.instance.disks,
+                                        self.nodes_ip, False,
+                                        constants.DRBD_RECONF_RPC_SECONDARY)
+    if not result[node] or not result[node][0]:
+        raise errors.OpExecError("Cannot change disk to secondary on node %s,"
+                                 " error %s" %
+                                 (node, result[node][1]))
+
+  def _GoStandalone(self):
+    """Disconnect from the network.
+
+    """
+    self.feedback_fn("* changing into standalone mode")
+    result = rpc.call_drbd_reconfig_net(self.all_nodes, self.instance.name,
+                                        self.instance.disks,
+                                        self.nodes_ip, True,
+                                        constants.DRBD_RECONF_RPC_DISCONNECT)
+    for node in self.all_nodes:
+      if not result[node] or not result[node][0]:
+        raise errors.OpExecError("Cannot disconnect disks node %s,"
+                                 " error %s" % (node, result[node][1]))
+
+  def _GoReconnect(self, multimaster):
+    """Reconnect to the network.
+
+    """
+    if multimaster:
+      msg = "dual-master"
+    else:
+      msg = "single-master"
+    self.feedback_fn("* changing disks into %s mode" % msg)
+    result = rpc.call_drbd_reconfig_net(self.all_nodes, self.instance.name,
+                                        self.instance.disks,
+                                        self.nodes_ip,
+                                        multimaster,
+                                        constants.DRBD_RECONF_RPC_RECONNECT)
+    for node in self.all_nodes:
+      if not result[node] or not result[node][0]:
+        raise errors.OpExecError("Cannot change disks config on node %s,"
+                                 " error %s" % (node, result[node][1]))
+
+  def _IdentifyDisks(self):
+    """Start the migration RPC sequence.
+
+    """
+    self.feedback_fn("* identifying disks")
+    result = rpc.call_drbd_reconfig_net(self.all_nodes,
+                                        self.instance.name,
+                                        self.instance.disks,
+                                        self.nodes_ip, True,
+                                        constants.DRBD_RECONF_RPC_INIT)
+    for node in self.all_nodes:
+      if not result[node] or not result[node][0]:
+        raise errors.OpExecError("Cannot identify disks node %s,"
+                                 " error %s" % (node, result[node][1]))
+
+  def _ExecCleanup(self):
+    """Try to cleanup after a failed migration.
+
+    The cleanup is done by:
+      - check that the instance is running only on one node
+        (and update the config if needed)
+      - change disks on its secondary node to secondary
+      - wait until disks are fully synchronized
+      - disconnect from the network
+      - change disks into single-master mode
+      - wait again until disks are fully synchronized
+
+    """
+    instance = self.instance
+    target_node = self.target_node
+    source_node = self.source_node
+
+    # check running on only one node
+    self.feedback_fn("* checking where the instance actually runs"
+                     " (if this hangs, the hypervisor might be in"
+                     " a bad state)")
+    ins_l = rpc.call_instance_list(self.all_nodes)
+    for node in self.all_nodes:
+      if not type(ins_l[node]) is list:
+        raise errors.OpExecError("Can't contact node '%s'" % node)
+
+    runningon_source = instance.name in ins_l[source_node]
+    runningon_target = instance.name in ins_l[target_node]
+
+    if runningon_source and runningon_target:
+      raise errors.OpExecError("Instance seems to be running on two nodes,"
+                               " or the hypervisor is confused. You will have"
+                               " to ensure manually that it runs only on one"
+                               " and restart this operation.")
+
+    if not (runningon_source or runningon_target):
+      raise errors.OpExecError("Instance does not seem to be running at all."
+                               " In this case, it's safer to repair by"
+                               " running 'gnt-instance stop' to ensure disk"
+                               " shutdown, and then restarting it.")
+
+    if runningon_target:
+      # the migration has actually succeeded, we need to update the config
+      self.feedback_fn("* instance running on secondary node (%s),"
+                       " updating config" % target_node)
+      instance.primary_node = target_node
+      self.cfg.Update(instance)
+      demoted_node = source_node
+    else:
+      self.feedback_fn("* instance confirmed to be running on its"
+                       " primary node (%s)" % source_node)
+      demoted_node = target_node
+
+    self._IdentifyDisks()
+
+    self._EnsureSecondary(demoted_node)
+    self._WaitUntilSync()
+    self._GoStandalone()
+    self._GoReconnect(False)
+    self._WaitUntilSync()
+
+    self.feedback_fn("* done")
+
+  def _ExecMigration(self):
+    """Migrate an instance.
+
+    The migrate is done by:
+      - change the disks into dual-master mode
+      - wait until disks are fully synchronized again
+      - migrate the instance
+      - change disks on the new secondary node (the old primary) to secondary
+      - wait until disks are fully synchronized
+      - change disks into single-master mode
+
+    """
+    instance = self.instance
+    target_node = self.target_node
+    source_node = self.source_node
+
+    self.feedback_fn("* checking disk consistency between source and target")
+    for dev in instance.disks:
+      if not _CheckDiskConsistency(self.cfg, dev, target_node, False):
+        raise errors.OpExecError("Disk %s is degraded or not fully"
+                                 " synchronized on target node,"
+                                 " aborting migrate." % dev.iv_name)
+
+    self._IdentifyDisks()
+
+    self._EnsureSecondary(target_node)
+    self._GoStandalone()
+    self._GoReconnect(True)
+    self._WaitUntilSync()
+
+    self.feedback_fn("* migrating instance to %s" % target_node)
+    time.sleep(10)
+    result = rpc.call_instance_migrate(source_node, instance,
+                                       self.nodes_ip[target_node],
+                                       self.op.live)
+    if not result or not result[0]:
+      logger.Error("Instance migration failed, trying to revert disk status")
+      try:
+        self._EnsureSecondary(target_node)
+        self._GoStandalone()
+        self._GoReconnect(False)
+        self._WaitUntilSync()
+      except errors.OpExecError, err:
+        logger.Error("Can't reconnect the drives: error '%s'\n"
+                     "Please look and recover the instance status" % str(err))
+
+      raise errors.OpExecError("Could not migrate instance %s: %s" %
+                               (instance.name, result[1]))
+    time.sleep(10)
+
+    instance.primary_node = target_node
+    # distribute new instance config to the other nodes
+    self.cfg.Update(instance)
+
+    self._EnsureSecondary(source_node)
+    self._WaitUntilSync()
+    self._GoStandalone()
+    self._GoReconnect(False)
+    self._WaitUntilSync()
+
+    self.feedback_fn("* done")
+
+  def Exec(self, feedback_fn):
+    """Perform the migration.
+
+    """
+    self.feedback_fn = feedback_fn
+
+    self.source_node = self.instance.primary_node
+    self.target_node = self.instance.secondary_nodes[0]
+    self.all_nodes = [self.source_node, self.target_node]
+    self.nodes_ip = {
+      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
+      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
+      }
+    if self.op.cleanup:
+      return self._ExecCleanup()
+    else:
+      return self._ExecMigration()
+
+
 def _CreateBlockDevOnPrimary(cfg, node, instance, device, info):
   """Create a tree of block devices on the primary node.
 
@@ -3157,7 +3512,7 @@ class LUCreateInstance(LogicalUnit):
   HTYPE = constants.HTYPE_INSTANCE
   _OP_REQP = ["instance_name", "mem_size", "disk_size",
               "disk_template", "swap_size", "mode", "start", "vcpus",
-              "wait_for_sync", "ip_check", "mac"]
+              "wait_for_sync", "ip_check", "mac", "auto_balance"]
 
   def _RunAllocator(self):
     """Run the allocator based on input opcode.
@@ -3188,7 +3543,8 @@ class LUCreateInstance(LogicalUnit):
     if len(ial.nodes) != ial.required_nodes:
       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
                                  " of nodes (%s), required %s" %
-                                 (len(ial.nodes), ial.required_nodes))
+                                 (self.op.iallocator, len(ial.nodes),
+                                  ial.required_nodes))
     self.op.pnode = ial.nodes[0]
     logger.ToStdout("Selected nodes for the instance: %s" %
                     (", ".join(ial.nodes),))
@@ -3236,7 +3592,7 @@ class LUCreateInstance(LogicalUnit):
     # set optional parameters to none if they don't exist
     for attr in ["kernel_path", "initrd_path", "hvm_boot_order", "pnode",
                  "iallocator", "hvm_acpi", "hvm_pae", "hvm_cdrom_image_path",
-                 "vnc_bind_address"]:
+                 "hvm_nic_type", "hvm_disk_type", "vnc_bind_address"]:
       if not hasattr(self.op, attr):
         setattr(self.op, attr, None)
 
@@ -3386,7 +3742,7 @@ class LUCreateInstance(LogicalUnit):
         info = nodeinfo.get(node, None)
         if not info:
           raise errors.OpPrereqError("Cannot get current information"
-                                     " from node '%s'" % nodeinfo)
+                                     " from node '%s'" % node)
         vg_free = info.get('vg_free', None)
         if not isinstance(vg_free, int):
           raise errors.OpPrereqError("Can't compute free disk space on"
@@ -3412,6 +3768,12 @@ class LUCreateInstance(LogicalUnit):
                                  " destination node '%s'" %
                                  (self.op.bridge, pnode.name))
 
+    # memory check on primary node
+    if self.op.start:
+      _CheckNodeFreeMemory(self.cfg, self.pnode.name,
+                           "creating instance %s" % self.op.instance_name,
+                           self.op.mem_size)
+
     # hvm_cdrom_image_path verification
     if self.op.hvm_cdrom_image_path is not None:
       if not os.path.isabs(self.op.hvm_cdrom_image_path):
@@ -3431,6 +3793,15 @@ class LUCreateInstance(LogicalUnit):
                                    " like a valid IP address" %
                                    self.op.vnc_bind_address)
 
+    # Xen HVM device type checks
+    if self.sstore.GetHypervisorType() == constants.HT_XEN_HVM31:
+      if self.op.hvm_nic_type not in constants.HT_HVM_VALID_NIC_TYPES:
+        raise errors.OpPrereqError("Invalid NIC type %s specified for Xen HVM"
+                                   " hypervisor" % self.op.hvm_nic_type)
+      if self.op.hvm_disk_type not in constants.HT_HVM_VALID_DISK_TYPES:
+        raise errors.OpPrereqError("Invalid disk type %s specified for Xen HVM"
+                                   " hypervisor" % self.op.hvm_disk_type)
+
     if self.op.start:
       self.instance_status = 'up'
     else:
@@ -3482,6 +3853,9 @@ class LUCreateInstance(LogicalUnit):
                             hvm_pae=self.op.hvm_pae,
                             hvm_cdrom_image_path=self.op.hvm_cdrom_image_path,
                             vnc_bind_address=self.op.vnc_bind_address,
+                            hvm_nic_type=self.op.hvm_nic_type,
+                            hvm_disk_type=self.op.hvm_disk_type,
+                            auto_balance=bool(self.op.auto_balance),
                             )
 
     feedback_fn("* creating instance disks...")
@@ -3876,7 +4250,7 @@ class LUReplaceDisks(LogicalUnit):
       if self.op.remote_node is not None:
         raise errors.OpPrereqError("Give either the iallocator or the new"
                                    " secondary, not both")
-      self.op.remote_node = self._RunAllocator()
+      self._RunAllocator()
 
     remote_node = self.op.remote_node
     if remote_node is not None:
@@ -4370,6 +4744,12 @@ class LUReplaceDisks(LogicalUnit):
 
     """
     instance = self.instance
+
+    # Activate the instance disks if we're replacing them on a down instance
+    if instance.status == "down":
+      op = opcodes.OpActivateInstanceDisks(instance_name=instance.name)
+      self.proc.ChainOpCode(op)
+
     if instance.disk_template == constants.DT_REMOTE_RAID1:
       fn = self._ExecRR1
     elif instance.disk_template == constants.DT_DRBD8:
@@ -4379,14 +4759,118 @@ class LUReplaceDisks(LogicalUnit):
         fn = self._ExecD8Secondary
     else:
       raise errors.ProgrammerError("Unhandled disk replacement case")
-    return fn(feedback_fn)
+
+    ret = fn(feedback_fn)
+
+    # Deactivate the instance disks if we're replacing them on a down instance
+    if instance.status == "down":
+      op = opcodes.OpDeactivateInstanceDisks(instance_name=instance.name)
+      self.proc.ChainOpCode(op)
+
+    return ret
+
+
+class LUGrowDisk(LogicalUnit):
+  """Grow a disk of an instance.
+
+  """
+  HPATH = "disk-grow"
+  HTYPE = constants.HTYPE_INSTANCE
+  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
+
+  def BuildHooksEnv(self):
+    """Build hooks env.
+
+    This runs on the master, the primary and all the secondaries.
+
+    """
+    env = {
+      "DISK": self.op.disk,
+      "AMOUNT": self.op.amount,
+      }
+    env.update(_BuildInstanceHookEnvByObject(self.instance))
+    nl = [
+      self.sstore.GetMasterNode(),
+      self.instance.primary_node,
+      ]
+    return env, nl, nl
+
+  def CheckPrereq(self):
+    """Check prerequisites.
+
+    This checks that the instance is in the cluster.
+
+    """
+    instance = self.cfg.GetInstanceInfo(
+      self.cfg.ExpandInstanceName(self.op.instance_name))
+    if instance is None:
+      raise errors.OpPrereqError("Instance '%s' not known" %
+                                 self.op.instance_name)
+
+    if self.op.amount <= 0:
+      raise errors.OpPrereqError("Invalid grow-by amount: %s" % self.op.amount)
+
+    self.instance = instance
+    self.op.instance_name = instance.name
+
+    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
+      raise errors.OpPrereqError("Instance's disk layout does not support"
+                                 " growing.")
+
+    self.disk = instance.FindDisk(self.op.disk)
+    if self.disk is None:
+      raise errors.OpPrereqError("Disk '%s' not found for instance '%s'" %
+                                 (self.op.disk, instance.name))
+
+    nodenames = [instance.primary_node] + list(instance.secondary_nodes)
+    nodeinfo = rpc.call_node_info(nodenames, self.cfg.GetVGName())
+    for node in nodenames:
+      info = nodeinfo.get(node, None)
+      if not info:
+        raise errors.OpPrereqError("Cannot get current information"
+                                   " from node '%s'" % node)
+      vg_free = info.get('vg_free', None)
+      if not isinstance(vg_free, int):
+        raise errors.OpPrereqError("Can't compute free disk space on"
+                                   " node %s" % node)
+      if self.op.amount > info['vg_free']:
+        raise errors.OpPrereqError("Not enough disk space on target node %s:"
+                                   " %d MiB available, %d MiB required" %
+                                   (node, info['vg_free'], self.op.amount))
+      is_primary = (node == instance.primary_node)
+      if not _CheckDiskConsistency(self.cfg, self.disk, node, is_primary):
+        raise errors.OpPrereqError("Disk %s is degraded or not fully"
+                                 " synchronized on node %s,"
+                                 " aborting grow." % (self.op.disk, node))
+
+  def Exec(self, feedback_fn):
+    """Execute disk grow.
+
+    """
+    instance = self.instance
+    disk = self.disk
+    for node in (instance.secondary_nodes + (instance.primary_node,)):
+      self.cfg.SetDiskID(disk, node)
+      result = rpc.call_blockdev_grow(node, disk, self.op.amount)
+      if not result or not isinstance(result, tuple) or len(result) != 2:
+        raise errors.OpExecError("grow request failed to node %s" % node)
+      elif not result[0]:
+        raise errors.OpExecError("grow request failed to node %s: %s" %
+                                 (node, result[1]))
+    disk.RecordGrow(self.op.amount)
+    self.cfg.Update(instance)
+    if self.op.wait_for_sync:
+      disk_abort = not _WaitForSync(self.cfg, instance, self.proc)
+      if disk_abort:
+        logger.Error("Warning: disk sync-ing has not returned a good status.\n"
+                     " Please check the instance.")
 
 
 class LUQueryInstanceData(NoHooksLU):
   """Query runtime instance data.
 
   """
-  _OP_REQP = ["instances"]
+  _OP_REQP = ["instances", "static"]
 
   def CheckPrereq(self):
     """Check prerequisites.
@@ -4414,8 +4898,13 @@ class LUQueryInstanceData(NoHooksLU):
     """Compute block device status.
 
     """
-    self.cfg.SetDiskID(dev, instance.primary_node)
-    dev_pstatus = rpc.call_blockdev_find(instance.primary_node, dev)
+    static = self.op.static
+    if not static:
+      self.cfg.SetDiskID(dev, instance.primary_node)
+      dev_pstatus = rpc.call_blockdev_find(instance.primary_node, dev)
+    else:
+      dev_pstatus = None
+
     if dev.dev_type in constants.LDS_DRBD:
       # we change the snode then (otherwise we use the one passed in)
       if dev.logical_id[0] == instance.primary_node:
@@ -4423,7 +4912,7 @@ class LUQueryInstanceData(NoHooksLU):
       else:
         snode = dev.logical_id[0]
 
-    if snode:
+    if snode and not static:
       self.cfg.SetDiskID(dev, snode)
       dev_sstatus = rpc.call_blockdev_find(snode, dev)
     else:
@@ -4451,12 +4940,15 @@ class LUQueryInstanceData(NoHooksLU):
     """Gather and return data"""
     result = {}
     for instance in self.wanted_instances:
-      remote_info = rpc.call_instance_info(instance.primary_node,
-                                                instance.name)
-      if remote_info and "state" in remote_info:
-        remote_state = "up"
+      if not self.op.static:
+        remote_info = rpc.call_instance_info(instance.primary_node,
+                                                  instance.name)
+        if remote_info and "state" in remote_info:
+          remote_state = "up"
+        else:
+          remote_state = "down"
       else:
-        remote_state = "down"
+        remote_state = None
       if instance.status == "down":
         config_state = "down"
       else:
@@ -4476,6 +4968,7 @@ class LUQueryInstanceData(NoHooksLU):
         "nics": [(nic.mac, nic.ip, nic.bridge) for nic in instance.nics],
         "disks": disks,
         "vcpus": instance.vcpus,
+        "auto_balance": instance.auto_balance,
         }
 
       htkind = self.sstore.GetHypervisorType()
@@ -4488,9 +4981,28 @@ class LUQueryInstanceData(NoHooksLU):
         idict["hvm_acpi"] = instance.hvm_acpi
         idict["hvm_pae"] = instance.hvm_pae
         idict["hvm_cdrom_image_path"] = instance.hvm_cdrom_image_path
+        idict["hvm_nic_type"] = instance.hvm_nic_type
+        idict["hvm_disk_type"] = instance.hvm_disk_type
 
       if htkind in constants.HTS_REQ_PORT:
-        idict["vnc_bind_address"] = instance.vnc_bind_address
+        if instance.vnc_bind_address is None:
+          vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
+        else:
+          vnc_bind_address = instance.vnc_bind_address
+        if instance.network_port is None:
+          vnc_console_port = None
+        elif vnc_bind_address == constants.BIND_ADDRESS_GLOBAL:
+          vnc_console_port = "%s:%s" % (instance.primary_node,
+                                       instance.network_port)
+        elif vnc_bind_address == constants.LOCALHOST_IP_ADDRESS:
+          vnc_console_port = "%s:%s on node %s" % (vnc_bind_address,
+                                                   instance.network_port,
+                                                   instance.primary_node)
+        else:
+          vnc_console_port = "%s:%s" % (instance.vnc_bind_address,
+                                        instance.network_port)
+        idict["vnc_console_port"] = vnc_console_port
+        idict["vnc_bind_address"] = vnc_bind_address
         idict["network_port"] = instance.network_port
 
       result[instance.name] = idict
@@ -4552,12 +5064,19 @@ class LUSetInstanceParms(LogicalUnit):
     self.hvm_boot_order = getattr(self.op, "hvm_boot_order", None)
     self.hvm_acpi = getattr(self.op, "hvm_acpi", None)
     self.hvm_pae = getattr(self.op, "hvm_pae", None)
+    self.hvm_nic_type = getattr(self.op, "hvm_nic_type", None)
+    self.hvm_disk_type = getattr(self.op, "hvm_disk_type", None)
     self.hvm_cdrom_image_path = getattr(self.op, "hvm_cdrom_image_path", None)
     self.vnc_bind_address = getattr(self.op, "vnc_bind_address", None)
-    all_parms = [self.mem, self.vcpus, self.ip, self.bridge, self.mac,
-                 self.kernel_path, self.initrd_path, self.hvm_boot_order,
-                 self.hvm_acpi, self.hvm_pae, self.hvm_cdrom_image_path,
-                 self.vnc_bind_address]
+    self.force = getattr(self.op, "force", None)
+    self.auto_balance = getattr(self.op, "auto_balance", None)
+    all_parms = [
+      self.mem, self.vcpus, self.ip, self.bridge, self.mac,
+      self.kernel_path, self.initrd_path, self.hvm_boot_order,
+      self.hvm_acpi, self.hvm_pae, self.hvm_cdrom_image_path,
+      self.vnc_bind_address, self.hvm_nic_type, self.hvm_disk_type,
+      self.auto_balance,
+      ]
     if all_parms.count(None) == len(all_parms):
       raise errors.OpPrereqError("No changes submitted")
     if self.mem is not None:
@@ -4619,11 +5138,13 @@ class LUSetInstanceParms(LogicalUnit):
 
     # hvm_cdrom_image_path verification
     if self.op.hvm_cdrom_image_path is not None:
-      if not os.path.isabs(self.op.hvm_cdrom_image_path):
+      if not (os.path.isabs(self.op.hvm_cdrom_image_path) or
+              self.op.hvm_cdrom_image_path.lower() == "none"):
         raise errors.OpPrereqError("The path to the HVM CDROM image must"
                                    " be an absolute path or None, not %s" %
                                    self.op.hvm_cdrom_image_path)
-      if not os.path.isfile(self.op.hvm_cdrom_image_path):
+      if not (os.path.isfile(self.op.hvm_cdrom_image_path) or
+              self.op.hvm_cdrom_image_path.lower() == "none"):
         raise errors.OpPrereqError("The HVM CDROM image must either be a"
                                    " regular file or a symlink pointing to"
                                    " an existing regular file, not %s" %
@@ -4636,6 +5157,22 @@ class LUSetInstanceParms(LogicalUnit):
                                    " like a valid IP address" %
                                    self.op.vnc_bind_address)
 
+    # Xen HVM device type checks
+    if self.sstore.GetHypervisorType() == constants.HT_XEN_HVM31:
+      if self.op.hvm_nic_type is not None:
+        if self.op.hvm_nic_type not in constants.HT_HVM_VALID_NIC_TYPES:
+          raise errors.OpPrereqError("Invalid NIC type %s specified for Xen"
+                                     " HVM  hypervisor" % self.op.hvm_nic_type)
+      if self.op.hvm_disk_type is not None:
+        if self.op.hvm_disk_type not in constants.HT_HVM_VALID_DISK_TYPES:
+          raise errors.OpPrereqError("Invalid disk type %s specified for Xen"
+                                     " HVM hypervisor" % self.op.hvm_disk_type)
+
+    # auto balance setting
+    if self.auto_balance is not None:
+      # convert the value to a proper bool value, if it's not
+      self.auto_balance = bool(self.auto_balance)
+
     instance = self.cfg.GetInstanceInfo(
       self.cfg.ExpandInstanceName(self.op.instance_name))
     if instance is None:
@@ -4643,6 +5180,39 @@ class LUSetInstanceParms(LogicalUnit):
                                  self.op.instance_name)
     self.op.instance_name = instance.name
     self.instance = instance
+    self.warn = []
+    if self.mem is not None and not self.force:
+      pnode = self.instance.primary_node
+      nodelist = [pnode]
+      if instance.auto_balance:
+        nodelist.extend(instance.secondary_nodes)
+      instance_info = rpc.call_instance_info(pnode, instance.name)
+      nodeinfo = rpc.call_node_info(nodelist, self.cfg.GetVGName())
+
+      if pnode not in nodeinfo or not isinstance(nodeinfo[pnode], dict):
+        # Assume the primary node is unreachable and go ahead
+        self.warn.append("Can't get info from primary node %s" % pnode)
+      else:
+        if instance_info:
+          current_mem = instance_info['memory']
+        else:
+          # Assume instance not running
+          # (there is a slight race condition here, but it's not very probable,
+          # and we have no other way to check)
+          current_mem = 0
+        miss_mem = self.mem - current_mem - nodeinfo[pnode]['memory_free']
+        if miss_mem > 0:
+          raise errors.OpPrereqError("This change will prevent the instance"
+                                     " from starting, due to %d MB of memory"
+                                     " missing on its primary node" % miss_mem)
+
+      if instance.auto_balance:
+        for node in instance.secondary_nodes:
+          if node not in nodeinfo or not isinstance(nodeinfo[node], dict):
+            self.warn.append("Can't get info from secondary node %s" % node)
+          elif self.mem > nodeinfo[node]['memory_free']:
+            self.warn.append("Not enough memory to failover instance to"
+                             " secondary node %s" % node)
     return
 
   def Exec(self, feedback_fn):
@@ -4650,6 +5220,11 @@ class LUSetInstanceParms(LogicalUnit):
 
     All parameters take effect only at the next restart of the instance.
     """
+    # Process here the warnings from CheckPrereq, as we don't have a
+    # feedback_fn there.
+    for warn in self.warn:
+      feedback_fn("WARNING: %s" % warn)
+
     result = []
     instance = self.instance
     if self.mem:
@@ -4679,18 +5254,30 @@ class LUSetInstanceParms(LogicalUnit):
       else:
         instance.hvm_boot_order = self.hvm_boot_order
       result.append(("hvm_boot_order", self.hvm_boot_order))
-    if self.hvm_acpi:
+    if self.hvm_acpi is not None:
       instance.hvm_acpi = self.hvm_acpi
       result.append(("hvm_acpi", self.hvm_acpi))
-    if self.hvm_pae:
+    if self.hvm_pae is not None:
       instance.hvm_pae = self.hvm_pae
       result.append(("hvm_pae", self.hvm_pae))
+    if self.hvm_nic_type is not None:
+      instance.hvm_nic_type = self.hvm_nic_type
+      result.append(("hvm_nic_type", self.hvm_nic_type))
+    if self.hvm_disk_type is not None:
+      instance.hvm_disk_type = self.hvm_disk_type
+      result.append(("hvm_disk_type", self.hvm_disk_type))
     if self.hvm_cdrom_image_path:
-      instance.hvm_cdrom_image_path = self.hvm_cdrom_image_path
+      if self.hvm_cdrom_image_path == constants.VALUE_NONE:
+        instance.hvm_cdrom_image_path = None
+      else:
+        instance.hvm_cdrom_image_path = self.hvm_cdrom_image_path
       result.append(("hvm_cdrom_image_path", self.hvm_cdrom_image_path))
     if self.vnc_bind_address:
       instance.vnc_bind_address = self.vnc_bind_address
       result.append(("vnc_bind_address", self.vnc_bind_address))
+    if self.auto_balance is not None:
+      instance.auto_balance = self.auto_balance
+      result.append(("auto_balance", self.auto_balance))
 
     self.cfg.AddInstance(instance)
 
@@ -5002,7 +5589,7 @@ class LUDelTags(TagsLU):
     """
     TagsLU.CheckPrereq(self)
     for tag in self.op.tags:
-      objects.TaggableObject.ValidateTag(tag)
+      objects.TaggableObject.ValidateTag(tag, removal=True)
     del_tags = frozenset(self.op.tags)
     cur_tags = self.target.GetTags()
     if not del_tags <= cur_tags: