Do not repeatedly call GetClusterInfo() in inner loop
[ganeti-local] / lib / cmdlib.py
index d5c4252..49f149d 100644 (file)
@@ -1254,8 +1254,8 @@ class LUClusterVerify(LogicalUnit):
     @ivar instances: a list of running instances (runtime)
     @ivar pinst: list of configured primary instances (config)
     @ivar sinst: list of configured secondary instances (config)
-    @ivar sbp: diction of {secondary-node: list of instances} of all peers
-        of this node (config)
+    @ivar sbp: dictionary of {primary-node: list of instances} for all
+        instances for which this node is secondary (config)
     @ivar mfree: free memory, as reported by hypervisor (runtime)
     @ivar dfree: free disk, as reported by the node (runtime)
     @ivar offline: the offline status (config)
@@ -1397,6 +1397,13 @@ class LUClusterVerify(LogicalUnit):
         _ErrorIf(test, self.ENODEHV, node,
                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
 
+    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
+    if ninfo.vm_capable and isinstance(hvp_result, list):
+      for item, hv_name, hv_result in hvp_result:
+        _ErrorIf(True, self.ENODEHV, node,
+                 "hypervisor %s parameter verify failure (source %s): %s",
+                 hv_name, item, hv_result)
+
     test = nresult.get(constants.NV_NODESETUP,
                            ["Missing NODESETUP results"])
     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
@@ -1606,6 +1613,7 @@ class LUClusterVerify(LogicalUnit):
     instances it was primary for.
 
     """
+    cluster_info = self.cfg.GetClusterInfo()
     for node, n_img in node_image.items():
       # This code checks that every node which is now listed as
       # secondary has enough memory to host all instances it is
@@ -1618,7 +1626,7 @@ class LUClusterVerify(LogicalUnit):
       for prinode, instances in n_img.sbp.items():
         needed_mem = 0
         for instance in instances:
-          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
+          bep = cluster_info.FillBE(instance_cfg[instance])
           if bep[constants.BE_AUTO_BALANCE]:
             needed_mem += bep[constants.BE_MEMORY]
         test = n_img.mfree < needed_mem
@@ -2029,6 +2037,21 @@ class LUClusterVerify(LogicalUnit):
 
     return instdisk
 
+  def _VerifyHVP(self, hvp_data):
+    """Verifies locally the syntax of the hypervisor parameters.
+
+    """
+    for item, hv_name, hv_params in hvp_data:
+      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
+             (item, hv_name))
+      try:
+        hv_class = hypervisor.GetHypervisor(hv_name)
+        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
+        hv_class.CheckParameterSyntax(hv_params)
+      except errors.GenericError, err:
+        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
+
+
   def BuildHooksEnv(self):
     """Build hooks env.
 
@@ -2094,12 +2117,32 @@ class LUClusterVerify(LogicalUnit):
 
     local_checksums = utils.FingerprintFiles(file_names)
 
+    # Compute the set of hypervisor parameters
+    hvp_data = []
+    for hv_name in hypervisors:
+      hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
+    for os_name, os_hvp in cluster.os_hvp.items():
+      for hv_name, hv_params in os_hvp.items():
+        if not hv_params:
+          continue
+        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
+        hvp_data.append(("os %s" % os_name, hv_name, full_params))
+    # TODO: collapse identical parameter values in a single one
+    for instance in instanceinfo.values():
+      if not instance.hvparams:
+        continue
+      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
+                       cluster.FillHV(instance)))
+    # and verify them locally
+    self._VerifyHVP(hvp_data)
+
     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
     node_verify_param = {
       constants.NV_FILELIST: file_names,
       constants.NV_NODELIST: [node.name for node in nodeinfo
                               if not node.offline],
       constants.NV_HYPERVISOR: hypervisors,
+      constants.NV_HVPARAMS: hvp_data,
       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
                                   node.secondary_ip) for node in nodeinfo
                                  if not node.offline],
@@ -2405,15 +2448,13 @@ class LUClusterVerifyDisks(NoHooksLU):
     """
     result = res_nodes, res_instances, res_missing = {}, [], {}
 
-    nodes = utils.NiceSort(self.cfg.GetNodeList())
-    instances = [self.cfg.GetInstanceInfo(name)
-                 for name in self.cfg.GetInstanceList()]
+    nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
+    instances = self.cfg.GetAllInstancesInfo().values()
 
     nv_dict = {}
     for inst in instances:
       inst_lvs = {}
-      if (not inst.admin_up or
-          inst.disk_template not in constants.DTS_NET_MIRROR):
+      if not inst.admin_up:
         continue
       inst.MapLVsByNode(inst_lvs)
       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
@@ -2424,13 +2465,8 @@ class LUClusterVerifyDisks(NoHooksLU):
     if not nv_dict:
       return result
 
-    vg_names = self.rpc.call_vg_list(nodes)
-    vg_names.Raise("Cannot get list of VGs")
-
-    for node in nodes:
-      # node_volume
-      node_res = self.rpc.call_lv_list([node],
-                                       vg_names[node].payload.keys())[node]
+    node_lvs = self.rpc.call_lv_list(nodes, [])
+    for node, node_res in node_lvs.items():
       if node_res.offline:
         continue
       msg = node_res.fail_msg
@@ -3191,31 +3227,56 @@ class LUOobCommand(NoHooksLU):
     Any errors are signaled by raising errors.OpPrereqError.
 
     """
-    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
-    node = self.cfg.GetNodeInfo(self.op.node_name)
-
-    if node is None:
-      raise errors.OpPrereqError("Node %s not found" % self.op.node_name)
+    self.nodes = []
+    master_node = self.cfg.GetMasterNode()
+    for node_name in self.op.node_names:
+      node = self.cfg.GetNodeInfo(node_name)
 
-    self.oob_program = _SupportsOob(self.cfg, node)
+      if node is None:
+        raise errors.OpPrereqError("Node %s not found" % node_name,
+                                   errors.ECODE_NOENT)
+      else:
+        self.nodes.append(node)
 
-    if not self.oob_program:
-      raise errors.OpPrereqError("OOB is not supported for node %s" %
-                                 self.op.node_name)
+      if (not self.op.ignore_status and
+          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
+        raise errors.OpPrereqError(("Cannot power off node %s because it is"
+                                    " not marked offline") % node_name,
+                                   errors.ECODE_STATE)
 
-    if self.op.command == constants.OOB_POWER_OFF and not node.offline:
-      raise errors.OpPrereqError(("Cannot power off node %s because it is"
-                                  " not marked offline") % self.op.node_name)
+    if self.op.command in (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE):
+      # This does two things, it checks if master is in the list and if so and
+      # force_master is set it puts it to the end so the master is done last
+      try:
+        self.op.node_names.remove(master_node)
+      except ValueError:
+        pass
+      else:
+        if self.op.force_master:
+          self.op.node_names.append(master_node)
+        else:
+          self.LogWarning("Master %s was skipped, use the force master"
+                          " option to operate on the master too",
+                          master_node)
+          if not self.op.node_names:
+            raise errors.OpPrereqError("No nodes left to operate on, aborting",
+                                       errors.ECODE_INVAL)
 
-    self.node = node
+      assert (master_node not in self.op.node_names or
+              self.op.node_names[-1] == master_node)
 
   def ExpandNames(self):
     """Gather locks we need.
 
     """
-    node_name = _ExpandNodeName(self.cfg, self.op.node_name)
+    if self.op.node_names:
+      self.op.node_names = [_ExpandNodeName(self.cfg, name)
+                            for name in self.op.node_names]
+    else:
+      self.op.node_names = self.cfg.GetNodeList()
+
     self.needed_locks = {
-      locking.LEVEL_NODE: [node_name],
+      locking.LEVEL_NODE: self.op.node_names,
       }
 
   def Exec(self, feedback_fn):
@@ -3223,40 +3284,63 @@ class LUOobCommand(NoHooksLU):
 
     """
     master_node = self.cfg.GetMasterNode()
-    node = self.node
-
-    logging.info("Executing out-of-band command '%s' using '%s' on %s",
-                 self.op.command, self.oob_program, self.op.node_name)
-    result = self.rpc.call_run_oob(master_node, self.oob_program,
-                                   self.op.command, self.op.node_name,
-                                   self.op.timeout)
+    ret = []
 
-    result.Raise("An error occurred on execution of OOB helper")
+    for node in self.nodes:
+      node_entry = [(constants.RS_NORMAL, node.name)]
+      ret.append(node_entry)
 
-    self._CheckPayload(result)
+      oob_program = _SupportsOob(self.cfg, node)
 
-    if self.op.command == constants.OOB_HEALTH:
-      # For health we should log important events
-      for item, status in result.payload:
-        if status in [constants.OOB_STATUS_WARNING,
-                      constants.OOB_STATUS_CRITICAL]:
-          logging.warning("On node '%s' item '%s' has status '%s'",
-                          self.op.node_name, item, status)
-
-    if self.op.command == constants.OOB_POWER_ON:
-      node.powered = True
-    elif self.op.command == constants.OOB_POWER_OFF:
-      node.powered = False
-    elif self.op.command == constants.OOB_POWER_STATUS:
-      powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
-      if powered != self.node.powered:
-        logging.warning(("Recorded power state (%s) of node '%s' does not match"
-                         " actual power state (%s)"), node.powered,
-                        self.op.node_name, powered)
+      if not oob_program:
+        node_entry.append((constants.RS_UNAVAIL, None))
+        continue
 
-    self.cfg.Update(node, feedback_fn)
+      logging.info("Executing out-of-band command '%s' using '%s' on %s",
+                   self.op.command, oob_program, node.name)
+      result = self.rpc.call_run_oob(master_node, oob_program,
+                                     self.op.command, node.name,
+                                     self.op.timeout)
 
-    return result.payload
+      if result.fail_msg:
+        self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
+                        node.name, result.fail_msg)
+        node_entry.append((constants.RS_NODATA, None))
+      else:
+        try:
+          self._CheckPayload(result)
+        except errors.OpExecError, err:
+          self.LogWarning("The payload returned by '%s' is not valid: %s",
+                          node.name, err)
+          node_entry.append((constants.RS_NODATA, None))
+        else:
+          if self.op.command == constants.OOB_HEALTH:
+            # For health we should log important events
+            for item, status in result.payload:
+              if status in [constants.OOB_STATUS_WARNING,
+                            constants.OOB_STATUS_CRITICAL]:
+                self.LogWarning("On node '%s' item '%s' has status '%s'",
+                                node.name, item, status)
+
+          if self.op.command == constants.OOB_POWER_ON:
+            node.powered = True
+          elif self.op.command == constants.OOB_POWER_OFF:
+            node.powered = False
+          elif self.op.command == constants.OOB_POWER_STATUS:
+            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
+            if powered != node.powered:
+              logging.warning(("Recorded power state (%s) of node '%s' does not"
+                               " match actual power state (%s)"), node.powered,
+                              node.name, powered)
+
+          # For configuration changing commands we should update the node
+          if self.op.command in (constants.OOB_POWER_ON,
+                                 constants.OOB_POWER_OFF):
+            self.cfg.Update(node, feedback_fn)
+
+          node_entry.append((constants.RS_NORMAL, result.payload))
+
+    return ret
 
   def _CheckPayload(self, result):
     """Checks if the payload is valid.
@@ -3270,10 +3354,11 @@ class LUOobCommand(NoHooksLU):
       if not isinstance(result.payload, list):
         errs.append("command 'health' is expected to return a list but got %s" %
                     type(result.payload))
-      for item, status in result.payload:
-        if status not in constants.OOB_STATUSES:
-          errs.append("health item '%s' has invalid status '%s'" %
-                      (item, status))
+      else:
+        for item, status in result.payload:
+          if status not in constants.OOB_STATUSES:
+            errs.append("health item '%s' has invalid status '%s'" %
+                        (item, status))
 
     if self.op.command == constants.OOB_POWER_STATUS:
       if not isinstance(result.payload, dict):
@@ -3295,7 +3380,7 @@ class LUOobCommand(NoHooksLU):
 
 
 
-class LUDiagnoseOS(NoHooksLU):
+class LUOsDiagnose(NoHooksLU):
   """Logical unit for OS diagnose/query.
 
   """
@@ -3368,7 +3453,9 @@ class LUDiagnoseOS(NoHooksLU):
     """Compute the list of OSes.
 
     """
-    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
+    valid_nodes = [node.name
+                   for node in self.cfg.GetAllNodesInfo().values()
+                   if not node.offline and node.vm_capable]
     node_data = self.rpc.call_os_diagnose(valid_nodes)
     pol = self._DiagnoseByOS(node_data)
     output = []
@@ -3429,7 +3516,7 @@ class LUDiagnoseOS(NoHooksLU):
     return output
 
 
-class LURemoveNode(LogicalUnit):
+class LUNodeRemove(LogicalUnit):
   """Logical unit for removing a node.
 
   """
@@ -3598,7 +3685,7 @@ class _NodeQuery(_QueryBase):
                                oob_support, lu.cfg.GetClusterInfo())
 
 
-class LUQueryNodes(NoHooksLU):
+class LUNodeQuery(NoHooksLU):
   """Logical unit for querying nodes.
 
   """
@@ -3616,7 +3703,7 @@ class LUQueryNodes(NoHooksLU):
     return self.nq.OldStyleQuery(self)
 
 
-class LUQueryNodeVolumes(NoHooksLU):
+class LUNodeQueryvols(NoHooksLU):
   """Logical unit for getting volumes on node(s).
 
   """
@@ -3694,7 +3781,7 @@ class LUQueryNodeVolumes(NoHooksLU):
     return output
 
 
-class LUQueryNodeStorage(NoHooksLU):
+class LUNodeQueryStorage(NoHooksLU):
   """Logical unit for getting information on storage units on node(s).
 
   """
@@ -4191,7 +4278,7 @@ class LUNodeAdd(LogicalUnit):
       self.context.AddNode(new_node, self.proc.GetECId())
 
 
-class LUSetNodeParams(LogicalUnit):
+class LUNodeSetParams(LogicalUnit):
   """Modifies the parameters of a node.
 
   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
@@ -4318,15 +4405,15 @@ class LUSetNodeParams(LogicalUnit):
                                    errors.ECODE_STATE)
 
     if node.master_candidate and self.might_demote and not self.lock_all:
-      assert not self.op.auto_promote, "auto-promote set but lock_all not"
+      assert not self.op.auto_promote, "auto_promote set but lock_all not"
       # check if after removing the current node, we're missing master
       # candidates
       (mc_remaining, mc_should, _) = \
           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
       if mc_remaining < mc_should:
         raise errors.OpPrereqError("Not enough master candidates, please"
-                                   " pass auto_promote to allow promotion",
-                                   errors.ECODE_STATE)
+                                   " pass auto promote option to allow"
+                                   " promotion", errors.ECODE_STATE)
 
     self.old_flags = old_flags = (node.master_candidate,
                                   node.drained, node.offline)
@@ -4484,7 +4571,7 @@ class LUSetNodeParams(LogicalUnit):
     return result
 
 
-class LUPowercycleNode(NoHooksLU):
+class LUNodePowercycle(NoHooksLU):
   """Powercycles a node.
 
   """
@@ -4577,6 +4664,8 @@ class LUClusterQuery(NoHooksLU):
       "reserved_lvs": cluster.reserved_lvs,
       "primary_ip_version": primary_ip_version,
       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
+      "hidden_os": cluster.hidden_os,
+      "blacklisted_os": cluster.blacklisted_os,
       }
 
     return result
@@ -4793,7 +4882,10 @@ class LUInstanceDeactivateDisks(NoHooksLU):
 
     """
     instance = self.instance
-    _SafeShutdownInstanceDisks(self, instance)
+    if self.op.force:
+      _ShutdownInstanceDisks(self, instance)
+    else:
+      _SafeShutdownInstanceDisks(self, instance)
 
 
 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
@@ -7444,12 +7536,11 @@ class LUInstanceCreate(LogicalUnit):
           raise errors.OpPrereqError("LV named %s used by another instance" %
                                      lv_name, errors.ECODE_NOTUNIQUE)
 
-      vg_names = self.rpc.call_vg_list([pnode.name])
+      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
 
       node_lvs = self.rpc.call_lv_list([pnode.name],
-                                       vg_names[pnode.name].payload.keys()
-                                      )[pnode.name]
+                                       vg_names.payload.keys())[pnode.name]
       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
       node_lvs = node_lvs.payload
 
@@ -9253,7 +9344,7 @@ class LUInstanceSetParams(LogicalUnit):
         _CheckInstanceDown(self, instance, "cannot remove disks")
 
       if (disk_op == constants.DDM_ADD and
-          len(instance.nics) >= constants.MAX_DISKS):
+          len(instance.disks) >= constants.MAX_DISKS):
         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
                                    " add more" % constants.MAX_DISKS,
                                    errors.ECODE_STATE)
@@ -10414,7 +10505,7 @@ class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
                                  str(self.op.kind), errors.ECODE_INVAL)
 
 
-class LUGetTags(TagsLU):
+class LUTagsGet(TagsLU):
   """Returns the tags of a given object.
 
   """
@@ -10433,7 +10524,7 @@ class LUGetTags(TagsLU):
     return list(self.target.GetTags())
 
 
-class LUSearchTags(NoHooksLU):
+class LUTagsSearch(NoHooksLU):
   """Searches the tags for a given pattern.
 
   """
@@ -10472,7 +10563,7 @@ class LUSearchTags(NoHooksLU):
     return results
 
 
-class LUAddTags(TagsLU):
+class LUTagsSet(TagsLU):
   """Sets a tag on a given object.
 
   """
@@ -10500,7 +10591,7 @@ class LUAddTags(TagsLU):
     self.cfg.Update(self.target, feedback_fn)
 
 
-class LUDelTags(TagsLU):
+class LUTagsDel(TagsLU):
   """Delete a list of tags from a given object.
 
   """
@@ -10582,7 +10673,7 @@ class LUTestDelay(NoHooksLU):
         self._TestDelay()
 
 
-class LUTestJobqueue(NoHooksLU):
+class LUTestJqueue(NoHooksLU):
   """Utility LU to test some aspects of the job queue.
 
   """