Add --force-join option to gnt-node add
[ganeti-local] / lib / cmdlib.py
index f7bfc45..782768d 100644 (file)
@@ -1397,6 +1397,13 @@ class LUClusterVerify(LogicalUnit):
         _ErrorIf(test, self.ENODEHV, node,
                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
 
+    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
+    if ninfo.vm_capable and isinstance(hvp_result, list):
+      for item, hv_name, hv_result in hvp_result:
+        _ErrorIf(True, self.ENODEHV, node,
+                 "hypervisor %s parameter verify failure (source %s): %s",
+                 hv_name, item, hv_result)
+
     test = nresult.get(constants.NV_NODESETUP,
                            ["Missing NODESETUP results"])
     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
@@ -2029,6 +2036,21 @@ class LUClusterVerify(LogicalUnit):
 
     return instdisk
 
+  def _VerifyHVP(self, hvp_data):
+    """Verifies locally the syntax of the hypervisor parameters.
+
+    """
+    for item, hv_name, hv_params in hvp_data:
+      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
+             (item, hv_name))
+      try:
+        hv_class = hypervisor.GetHypervisor(hv_name)
+        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
+        hv_class.CheckParameterSyntax(hv_params)
+      except errors.GenericError, err:
+        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
+
+
   def BuildHooksEnv(self):
     """Build hooks env.
 
@@ -2094,12 +2116,32 @@ class LUClusterVerify(LogicalUnit):
 
     local_checksums = utils.FingerprintFiles(file_names)
 
+    # Compute the set of hypervisor parameters
+    hvp_data = []
+    for hv_name in hypervisors:
+      hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
+    for os_name, os_hvp in cluster.os_hvp.items():
+      for hv_name, hv_params in os_hvp.items():
+        if not hv_params:
+          continue
+        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
+        hvp_data.append(("os %s" % os_name, hv_name, full_params))
+    # TODO: collapse identical parameter values in a single one
+    for instance in instanceinfo.values():
+      if not instance.hvparams:
+        continue
+      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
+                       cluster.FillHV(instance)))
+    # and verify them locally
+    self._VerifyHVP(hvp_data)
+
     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
     node_verify_param = {
       constants.NV_FILELIST: file_names,
       constants.NV_NODELIST: [node.name for node in nodeinfo
                               if not node.offline],
       constants.NV_HYPERVISOR: hypervisors,
+      constants.NV_HVPARAMS: hvp_data,
       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
                                   node.secondary_ip) for node in nodeinfo
                                  if not node.offline],
@@ -2405,15 +2447,13 @@ class LUClusterVerifyDisks(NoHooksLU):
     """
     result = res_nodes, res_instances, res_missing = {}, [], {}
 
-    nodes = utils.NiceSort(self.cfg.GetNodeList())
-    instances = [self.cfg.GetInstanceInfo(name)
-                 for name in self.cfg.GetInstanceList()]
+    nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
+    instances = self.cfg.GetAllInstancesInfo().values()
 
     nv_dict = {}
     for inst in instances:
       inst_lvs = {}
-      if (not inst.admin_up or
-          inst.disk_template not in constants.DTS_NET_MIRROR):
+      if not inst.admin_up:
         continue
       inst.MapLVsByNode(inst_lvs)
       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
@@ -2424,13 +2464,8 @@ class LUClusterVerifyDisks(NoHooksLU):
     if not nv_dict:
       return result
 
-    vg_names = self.rpc.call_vg_list(nodes)
-    vg_names.Raise("Cannot get list of VGs")
-
-    for node in nodes:
-      # node_volume
-      node_res = self.rpc.call_lv_list([node],
-                                       vg_names[node].payload.keys())[node]
+    node_lvs = self.rpc.call_lv_list(nodes, [])
+    for node, node_res in node_lvs.items():
       if node_res.offline:
         continue
       msg = node_res.fail_msg
@@ -3191,31 +3226,33 @@ class LUOobCommand(NoHooksLU):
     Any errors are signaled by raising errors.OpPrereqError.
 
     """
-    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
-    node = self.cfg.GetNodeInfo(self.op.node_name)
-
-    if node is None:
-      raise errors.OpPrereqError("Node %s not found" % self.op.node_name)
-
-    self.oob_program = _SupportsOob(self.cfg, node)
+    self.nodes = []
+    for node_name in self.op.node_names:
+      node = self.cfg.GetNodeInfo(node_name)
 
-    if not self.oob_program:
-      raise errors.OpPrereqError("OOB is not supported for node %s" %
-                                 self.op.node_name)
-
-    if self.op.command == constants.OOB_POWER_OFF and not node.offline:
-      raise errors.OpPrereqError(("Cannot power off node %s because it is"
-                                  " not marked offline") % self.op.node_name)
+      if node is None:
+        raise errors.OpPrereqError("Node %s not found" % node_name,
+                                   errors.ECODE_NOENT)
+      else:
+        self.nodes.append(node)
 
-    self.node = node
+      if (self.op.command == constants.OOB_POWER_OFF and not node.offline):
+        raise errors.OpPrereqError(("Cannot power off node %s because it is"
+                                    " not marked offline") % node_name,
+                                   errors.ECODE_STATE)
 
   def ExpandNames(self):
     """Gather locks we need.
 
     """
-    node_name = _ExpandNodeName(self.cfg, self.op.node_name)
+    if self.op.node_names:
+      self.op.node_names = [_ExpandNodeName(self.cfg, name)
+                            for name in self.op.node_names]
+    else:
+      self.op.node_names = self.cfg.GetNodeList()
+
     self.needed_locks = {
-      locking.LEVEL_NODE: [node_name],
+      locking.LEVEL_NODE: self.op.node_names,
       }
 
   def Exec(self, feedback_fn):
@@ -3223,40 +3260,63 @@ class LUOobCommand(NoHooksLU):
 
     """
     master_node = self.cfg.GetMasterNode()
-    node = self.node
-
-    logging.info("Executing out-of-band command '%s' using '%s' on %s",
-                 self.op.command, self.oob_program, self.op.node_name)
-    result = self.rpc.call_run_oob(master_node, self.oob_program,
-                                   self.op.command, self.op.node_name,
-                                   self.op.timeout)
+    ret = []
 
-    result.Raise("An error occurred on execution of OOB helper")
+    for node in self.nodes:
+      node_entry = [(constants.RS_NORMAL, node.name)]
+      ret.append(node_entry)
 
-    self._CheckPayload(result)
+      oob_program = _SupportsOob(self.cfg, node)
 
-    if self.op.command == constants.OOB_HEALTH:
-      # For health we should log important events
-      for item, status in result.payload:
-        if status in [constants.OOB_STATUS_WARNING,
-                      constants.OOB_STATUS_CRITICAL]:
-          logging.warning("On node '%s' item '%s' has status '%s'",
-                          self.op.node_name, item, status)
-
-    if self.op.command == constants.OOB_POWER_ON:
-      node.powered = True
-    elif self.op.command == constants.OOB_POWER_OFF:
-      node.powered = False
-    elif self.op.command == constants.OOB_POWER_STATUS:
-      powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
-      if powered != self.node.powered:
-        logging.warning(("Recorded power state (%s) of node '%s' does not match"
-                         " actual power state (%s)"), node.powered,
-                        self.op.node_name, powered)
+      if not oob_program:
+        node_entry.append((constants.RS_UNAVAIL, None))
+        continue
 
-    self.cfg.Update(node, feedback_fn)
+      logging.info("Executing out-of-band command '%s' using '%s' on %s",
+                   self.op.command, oob_program, node.name)
+      result = self.rpc.call_run_oob(master_node, oob_program,
+                                     self.op.command, node.name,
+                                     self.op.timeout)
 
-    return result.payload
+      if result.fail_msg:
+        self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
+                        node.name, result.fail_msg)
+        node_entry.append((constants.RS_NODATA, None))
+      else:
+        try:
+          self._CheckPayload(result)
+        except errors.OpExecError, err:
+          self.LogWarning("The payload returned by '%s' is not valid: %s",
+                          node.name, err)
+          node_entry.append((constants.RS_NODATA, None))
+        else:
+          if self.op.command == constants.OOB_HEALTH:
+            # For health we should log important events
+            for item, status in result.payload:
+              if status in [constants.OOB_STATUS_WARNING,
+                            constants.OOB_STATUS_CRITICAL]:
+                self.LogWarning("On node '%s' item '%s' has status '%s'",
+                                node.name, item, status)
+
+          if self.op.command == constants.OOB_POWER_ON:
+            node.powered = True
+          elif self.op.command == constants.OOB_POWER_OFF:
+            node.powered = False
+          elif self.op.command == constants.OOB_POWER_STATUS:
+            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
+            if powered != node.powered:
+              logging.warning(("Recorded power state (%s) of node '%s' does not"
+                               " match actual power state (%s)"), node.powered,
+                              node.name, powered)
+
+          # For configuration changing commands we should update the node
+          if self.op.command in (constants.OOB_POWER_ON,
+                                 constants.OOB_POWER_OFF):
+            self.cfg.Update(node, feedback_fn)
+
+          node_entry.append((constants.RS_NORMAL, result.payload))
+
+    return ret
 
   def _CheckPayload(self, result):
     """Checks if the payload is valid.
@@ -3270,10 +3330,11 @@ class LUOobCommand(NoHooksLU):
       if not isinstance(result.payload, list):
         errs.append("command 'health' is expected to return a list but got %s" %
                     type(result.payload))
-      for item, status in result.payload:
-        if status not in constants.OOB_STATUSES:
-          errs.append("health item '%s' has invalid status '%s'" %
-                      (item, status))
+      else:
+        for item, status in result.payload:
+          if status not in constants.OOB_STATUSES:
+            errs.append("health item '%s' has invalid status '%s'" %
+                        (item, status))
 
     if self.op.command == constants.OOB_POWER_STATUS:
       if not isinstance(result.payload, dict):
@@ -3368,7 +3429,9 @@ class LUOsDiagnose(NoHooksLU):
     """Compute the list of OSes.
 
     """
-    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
+    valid_nodes = [node.name
+                   for node in self.cfg.GetAllNodesInfo().values()
+                   if not node.offline and node.vm_capable]
     node_data = self.rpc.call_os_diagnose(valid_nodes)
     pol = self._DiagnoseByOS(node_data)
     output = []
@@ -3805,18 +3868,21 @@ class _InstanceQuery(_QueryBase):
     """Computes the list of instances and their attributes.
 
     """
+    cluster = lu.cfg.GetClusterInfo()
     all_info = lu.cfg.GetAllInstancesInfo()
 
     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
 
     instance_list = [all_info[name] for name in instance_names]
-    nodes = frozenset([inst.primary_node for inst in instance_list])
+    nodes = frozenset(itertools.chain(*(inst.all_nodes
+                                        for inst in instance_list)))
     hv_list = list(set([inst.hypervisor for inst in instance_list]))
     bad_nodes = []
     offline_nodes = []
+    wrongnode_inst = set()
 
     # Gather data as requested
-    if query.IQ_LIVE in self.requested_data:
+    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
       live_data = {}
       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
       for name in nodes:
@@ -3828,7 +3894,11 @@ class _InstanceQuery(_QueryBase):
         if result.fail_msg:
           bad_nodes.append(name)
         elif result.payload:
-          live_data.update(result.payload)
+          for inst in result.payload:
+            if all_info[inst].primary_node == name:
+              live_data.update(result.payload)
+            else:
+              wrongnode_inst.add(inst)
         # else no instance is alive
     else:
       live_data = {}
@@ -3842,9 +3912,21 @@ class _InstanceQuery(_QueryBase):
     else:
       disk_usage = None
 
+    if query.IQ_CONSOLE in self.requested_data:
+      consinfo = {}
+      for inst in instance_list:
+        if inst.name in live_data:
+          # Instance is running
+          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
+        else:
+          consinfo[inst.name] = None
+      assert set(consinfo.keys()) == set(instance_names)
+    else:
+      consinfo = None
+
     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
                                    disk_usage, offline_nodes, bad_nodes,
-                                   live_data)
+                                   live_data, wrongnode_inst, consinfo)
 
 
 class LUQuery(NoHooksLU):
@@ -4318,15 +4400,15 @@ class LUNodeSetParams(LogicalUnit):
                                    errors.ECODE_STATE)
 
     if node.master_candidate and self.might_demote and not self.lock_all:
-      assert not self.op.auto_promote, "auto-promote set but lock_all not"
+      assert not self.op.auto_promote, "auto_promote set but lock_all not"
       # check if after removing the current node, we're missing master
       # candidates
       (mc_remaining, mc_should, _) = \
           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
       if mc_remaining < mc_should:
         raise errors.OpPrereqError("Not enough master candidates, please"
-                                   " pass auto_promote to allow promotion",
-                                   errors.ECODE_STATE)
+                                   " pass auto promote option to allow"
+                                   " promotion", errors.ECODE_STATE)
 
     self.old_flags = old_flags = (node.master_candidate,
                                   node.drained, node.offline)
@@ -4577,6 +4659,8 @@ class LUClusterQuery(NoHooksLU):
       "reserved_lvs": cluster.reserved_lvs,
       "primary_ip_version": primary_ip_version,
       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
+      "hidden_os": cluster.hidden_os,
+      "blacklisted_os": cluster.blacklisted_os,
       }
 
     return result
@@ -4699,13 +4783,13 @@ def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
   # SyncSource, etc.)
 
   # 1st pass, assemble on all nodes in secondary mode
-  for inst_disk in disks:
+  for idx, inst_disk in enumerate(disks):
     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
       if ignore_size:
         node_disk = node_disk.Copy()
         node_disk.UnsetSize()
       lu.cfg.SetDiskID(node_disk, node)
-      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
+      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
       msg = result.fail_msg
       if msg:
         lu.proc.LogWarning("Could not prepare block device %s on node %s"
@@ -4717,7 +4801,7 @@ def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
   # FIXME: race condition on drbd migration to primary
 
   # 2nd pass, do only the primary node
-  for inst_disk in disks:
+  for idx, inst_disk in enumerate(disks):
     dev_path = None
 
     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
@@ -4727,7 +4811,7 @@ def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
         node_disk = node_disk.Copy()
         node_disk.UnsetSize()
       lu.cfg.SetDiskID(node_disk, node)
-      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
+      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
       msg = result.fail_msg
       if msg:
         lu.proc.LogWarning("Could not prepare block device %s on node %s"
@@ -4793,7 +4877,10 @@ class LUInstanceDeactivateDisks(NoHooksLU):
 
     """
     instance = self.instance
-    _SafeShutdownInstanceDisks(self, instance)
+    if self.op.force:
+      _ShutdownInstanceDisks(self, instance)
+    else:
+      _SafeShutdownInstanceDisks(self, instance)
 
 
 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
@@ -5862,7 +5949,7 @@ class LUInstanceMove(LogicalUnit):
     for idx, disk in enumerate(instance.disks):
       self.LogInfo("Copying data for disk %d", idx)
       result = self.rpc.call_blockdev_assemble(target_node, disk,
-                                               instance.name, True)
+                                               instance.name, True, idx)
       if result.fail_msg:
         self.LogWarning("Can't assemble newly created disk %d: %s",
                         idx, result.fail_msg)
@@ -7444,12 +7531,11 @@ class LUInstanceCreate(LogicalUnit):
           raise errors.OpPrereqError("LV named %s used by another instance" %
                                      lv_name, errors.ECODE_NOTUNIQUE)
 
-      vg_names = self.rpc.call_vg_list([pnode.name])
+      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
 
       node_lvs = self.rpc.call_lv_list([pnode.name],
-                                       vg_names[pnode.name].payload.keys()
-                                      )[pnode.name]
+                                       vg_names.payload.keys())[pnode.name]
       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
       node_lvs = node_lvs.payload
 
@@ -7731,18 +7817,28 @@ class LUInstanceConsole(NoHooksLU):
 
     logging.debug("Connecting to console of %s on %s", instance.name, node)
 
-    hyper = hypervisor.GetHypervisor(instance.hypervisor)
-    cluster = self.cfg.GetClusterInfo()
-    # beparams and hvparams are passed separately, to avoid editing the
-    # instance and then saving the defaults in the instance itself.
-    hvparams = cluster.FillHV(instance)
-    beparams = cluster.FillBE(instance)
-    console = hyper.GetInstanceConsole(instance, hvparams, beparams)
+    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
+
+
+def _GetInstanceConsole(cluster, instance):
+  """Returns console information for an instance.
+
+  @type cluster: L{objects.Cluster}
+  @type instance: L{objects.Instance}
+  @rtype: dict
+
+  """
+  hyper = hypervisor.GetHypervisor(instance.hypervisor)
+  # beparams and hvparams are passed separately, to avoid editing the
+  # instance and then saving the defaults in the instance itself.
+  hvparams = cluster.FillHV(instance)
+  beparams = cluster.FillBE(instance)
+  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
 
-    assert console.instance == instance.name
-    assert console.Validate()
+  assert console.instance == instance.name
+  assert console.Validate()
 
-    return console.ToDict()
+  return console.ToDict()
 
 
 class LUInstanceReplaceDisks(LogicalUnit):
@@ -10286,9 +10382,9 @@ class LUGroupRemove(LogicalUnit):
 
     # Verify the cluster would not be left group-less.
     if len(self.cfg.GetNodeGroupList()) == 1:
-      raise errors.OpPrereqError("Group '%s' is the last group in the cluster,"
-                                 " which cannot be left without at least one"
-                                 " group" % self.op.group_name,
+      raise errors.OpPrereqError("Group '%s' is the only group,"
+                                 " cannot be removed" %
+                                 self.op.group_name,
                                  errors.ECODE_STATE)
 
   def BuildHooksEnv(self):