cluster-verify checks uniformity of PV sizes

[ganeti-local] / lib / cmdlib.py
diff --git a/lib/cmdlib.py b/lib/cmdlib.py

index 74abf21..f13b1c1 100644 (file)
--- a/lib/cmdlib.py
+++ b/lib/cmdlib.py
@@ -2093,7 +2093,7 @@ class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
        msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
               (item, hv_name))
        try:
-        hv_class = hypervisor.GetHypervisor(hv_name)
+        hv_class = hypervisor.GetHypervisorClass(hv_name)
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
          hv_class.CheckParameterSyntax(hv_params)
        except errors.GenericError, err:
@@ -2213,6 +2213,10 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
      @ivar oslist: list of OSes as diagnosed by DiagnoseOS
      @type vm_capable: boolean
      @ivar vm_capable: whether the node can host instances
+    @type pv_min: float
+    @ivar pv_min: size in MiB of the smallest PVs
+    @type pv_max: float
+    @ivar pv_max: size in MiB of the biggest PVs
  
      """
      def __init__(self, offline=False, name=None, vm_capable=True):
@@ -2232,6 +2236,8 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
        self.ghost = False
        self.os_fail = False
        self.oslist = {}
+      self.pv_min = None
+      self.pv_max = None
  
    def ExpandNames(self):
      # This raises errors.OpPrereqError on its own:
@@ -2433,13 +2439,15 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
               "Node time diverges by at least %s from master node time",
               ntime_diff)
  
-  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
-    """Check the node LVM results.
+  def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
+    """Check the node LVM results and update info for cross-node checks.
  
      @type ninfo: L{objects.Node}
      @param ninfo: the node to check
      @param nresult: the remote results for the node
      @param vg_name: the configured VG name
+    @type nimg: L{NodeImage}
+    @param nimg: node image
  
      """
      if vg_name is None:
@@ -2471,6 +2479,42 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
          _ErrorIf(test, constants.CV_ENODELVM, node,
                   "Invalid character ':' in PV '%s' of VG '%s'",
                   pv.name, pv.vg_name)
+      if self._exclusive_storage:
+        (errmsgs, (pvmin, pvmax)) = utils.LvmExclusiveCheckNodePvs(pvlist)
+        for msg in errmsgs:
+          self._Error(constants.CV_ENODELVM, node, msg)
+        nimg.pv_min = pvmin
+        nimg.pv_max = pvmax
+
+  def _VerifyGroupLVM(self, node_image, vg_name):
+    """Check cross-node consistency in LVM.
+
+    @type node_image: dict
+    @param node_image: info about nodes, mapping from node to names to
+      L{NodeImage} objects
+    @param vg_name: the configured VG name
+
+    """
+    if vg_name is None:
+      return
+
+    # Only exlcusive storage needs this kind of checks
+    if not self._exclusive_storage:
+      return
+
+    # exclusive_storage wants all PVs to have the same size (approximately),
+    # if the smallest and the biggest ones are okay, everything is fine.
+    # pv_min is None iff pv_max is None
+    vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
+    if not vals:
+      return
+    (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
+    (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
+    bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
+    self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
+                  "PV sizes differ too much in the group; smallest (%s MB) is"
+                  " on %s, biggest (%s MB) is on %s",
+                  pvmin, minnode, pvmax, maxnode)
  
    def _VerifyNodeBridges(self, ninfo, nresult, bridges):
      """Check the node bridges.
@@ -3371,6 +3415,23 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
            nimg.sbp[pnode] = []
          nimg.sbp[pnode].append(instance)
  
+    es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names)
+    es_unset_nodes = []
+    # The value of exclusive_storage should be the same across the group, so if
+    # it's True for at least a node, we act as if it were set for all the nodes
+    self._exclusive_storage = compat.any(es_flags.values())
+    if self._exclusive_storage:
+      es_unset_nodes = [n for (n, es) in es_flags.items()
+                        if not es]
+
+    if es_unset_nodes:
+      self._Error(constants.CV_EGROUPMIXEDESFLAG, self.group_info.name,
+                  "The exclusive_storage flag should be uniform in a group,"
+                  " but these nodes have it unset: %s",
+                  utils.CommaJoin(utils.NiceSort(es_unset_nodes)))
+      self.LogWarning("Some checks required by exclusive storage will be"
+                      " performed also on nodes with the flag unset")
+
      # At this point, we have the in-memory data structures complete,
      # except for the runtime information, which we'll gather next
  
@@ -3474,7 +3535,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
                                     node == master_node)
  
        if nimg.vm_capable:
-        self._VerifyNodeLVM(node_i, nresult, vg_name)
+        self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
          self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
                               all_drbd_map)
  
@@ -3501,6 +3562,8 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
            _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
                     "node is running unknown instance %s", inst)
  
+    self._VerifyGroupLVM(node_image, vg_name)
+
      for node, result in extra_lv_nvinfo.items():
        self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
                                node_image[node], vg_name)
@@ -4264,7 +4327,10 @@ class LUClusterSetParams(LogicalUnit):
            self.new_os_hvp[os_name] = hvs
          else:
            for hv_name, hv_dict in hvs.items():
-            if hv_name not in self.new_os_hvp[os_name]:
+            if hv_dict is None:
+              # Delete if it exists
+              self.new_os_hvp[os_name].pop(hv_name, None)
+            elif hv_name not in self.new_os_hvp[os_name]:
                self.new_os_hvp[os_name][hv_name] = hv_dict
              else:
                self.new_os_hvp[os_name][hv_name].update(hv_dict)
@@ -4310,7 +4376,7 @@ class LUClusterSetParams(LogicalUnit):
              (self.op.enabled_hypervisors and
               hv_name in self.op.enabled_hypervisors)):
            # either this is a new hypervisor, or its parameters have changed
-          hv_class = hypervisor.GetHypervisor(hv_name)
+          hv_class = hypervisor.GetHypervisorClass(hv_name)
            utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
            hv_class.CheckParameterSyntax(hv_params)
            _CheckHVParams(self, node_list, hv_name, hv_params)
@@ -4324,7 +4390,7 @@ class LUClusterSetParams(LogicalUnit):
            # we need to fill in the new os_hvp on top of the actual hv_p
            cluster_defaults = self.new_hvparams.get(hv_name, {})
            new_osp = objects.FillDict(cluster_defaults, hv_params)
-          hv_class = hypervisor.GetHypervisor(hv_name)
+          hv_class = hypervisor.GetHypervisorClass(hv_name)
            hv_class.CheckParameterSyntax(new_osp)
            _CheckHVParams(self, node_list, hv_name, new_osp)
  
@@ -4546,12 +4612,14 @@ def _ComputeAncillaryFiles(cluster, redist):
    files_vm = set(
      filename
      for hv_name in cluster.enabled_hypervisors
-    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
+    for filename in
+      hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0])
  
    files_opt |= set(
      filename
      for hv_name in cluster.enabled_hypervisors
-    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
+    for filename in
+      hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1])
  
    # Filenames in each category must be unique
    all_files_set = files_all | files_mc | files_vm
@@ -5158,6 +5226,159 @@ class LUOsDiagnose(NoHooksLU):
      return self.oq.OldStyleQuery(self)
  
  
+class _ExtStorageQuery(_QueryBase):
+  FIELDS = query.EXTSTORAGE_FIELDS
+
+  def ExpandNames(self, lu):
+    # Lock all nodes in shared mode
+    # Temporary removal of locks, should be reverted later
+    # TODO: reintroduce locks when they are lighter-weight
+    lu.needed_locks = {}
+    #self.share_locks[locking.LEVEL_NODE] = 1
+    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+
+    # The following variables interact with _QueryBase._GetNames
+    if self.names:
+      self.wanted = self.names
+    else:
+      self.wanted = locking.ALL_SET
+
+    self.do_locking = self.use_locking
+
+  def DeclareLocks(self, lu, level):
+    pass
+
+  @staticmethod
+  def _DiagnoseByProvider(rlist):
+    """Remaps a per-node return list into an a per-provider per-node dictionary
+
+    @param rlist: a map with node names as keys and ExtStorage objects as values
+
+    @rtype: dict
+    @return: a dictionary with extstorage providers as keys and as
+        value another map, with nodes as keys and tuples of
+        (path, status, diagnose, parameters) as values, eg::
+
+          {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
+                         "node2": [(/srv/..., False, "missing file")]
+                         "node3": [(/srv/..., True, "", [])]
+          }
+
+    """
+    all_es = {}
+    # we build here the list of nodes that didn't fail the RPC (at RPC
+    # level), so that nodes with a non-responding node daemon don't
+    # make all OSes invalid
+    good_nodes = [node_name for node_name in rlist
+                  if not rlist[node_name].fail_msg]
+    for node_name, nr in rlist.items():
+      if nr.fail_msg or not nr.payload:
+        continue
+      for (name, path, status, diagnose, params) in nr.payload:
+        if name not in all_es:
+          # build a list of nodes for this os containing empty lists
+          # for each node in node_list
+          all_es[name] = {}
+          for nname in good_nodes:
+            all_es[name][nname] = []
+        # convert params from [name, help] to (name, help)
+        params = [tuple(v) for v in params]
+        all_es[name][node_name].append((path, status, diagnose, params))
+    return all_es
+
+  def _GetQueryData(self, lu):
+    """Computes the list of nodes and their attributes.
+
+    """
+    # Locking is not used
+    assert not (compat.any(lu.glm.is_owned(level)
+                           for level in locking.LEVELS
+                           if level != locking.LEVEL_CLUSTER) or
+                self.do_locking or self.use_locking)
+
+    valid_nodes = [node.name
+                   for node in lu.cfg.GetAllNodesInfo().values()
+                   if not node.offline and node.vm_capable]
+    pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
+
+    data = {}
+
+    nodegroup_list = lu.cfg.GetNodeGroupList()
+
+    for (es_name, es_data) in pol.items():
+      # For every provider compute the nodegroup validity.
+      # To do this we need to check the validity of each node in es_data
+      # and then construct the corresponding nodegroup dict:
+      #      { nodegroup1: status
+      #        nodegroup2: status
+      #      }
+      ndgrp_data = {}
+      for nodegroup in nodegroup_list:
+        ndgrp = lu.cfg.GetNodeGroup(nodegroup)
+
+        nodegroup_nodes = ndgrp.members
+        nodegroup_name = ndgrp.name
+        node_statuses = []
+
+        for node in nodegroup_nodes:
+          if node in valid_nodes:
+            if es_data[node] != []:
+              node_status = es_data[node][0][1]
+              node_statuses.append(node_status)
+            else:
+              node_statuses.append(False)
+
+        if False in node_statuses:
+          ndgrp_data[nodegroup_name] = False
+        else:
+          ndgrp_data[nodegroup_name] = True
+
+      # Compute the provider's parameters
+      parameters = set()
+      for idx, esl in enumerate(es_data.values()):
+        valid = bool(esl and esl[0][1])
+        if not valid:
+          break
+
+        node_params = esl[0][3]
+        if idx == 0:
+          # First entry
+          parameters.update(node_params)
+        else:
+          # Filter out inconsistent values
+          parameters.intersection_update(node_params)
+
+      params = list(parameters)
+
+      # Now fill all the info for this provider
+      info = query.ExtStorageInfo(name=es_name, node_status=es_data,
+                                  nodegroup_status=ndgrp_data,
+                                  parameters=params)
+
+      data[es_name] = info
+
+    # Prepare data in requested order
+    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
+            if name in data]
+
+
+class LUExtStorageDiagnose(NoHooksLU):
+  """Logical unit for ExtStorage diagnose/query.
+
+  """
+  REQ_BGL = False
+
+  def CheckArguments(self):
+    self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
+                               self.op.output_fields, False)
+
+  def ExpandNames(self):
+    self.eq.ExpandNames(self)
+
+  def Exec(self, feedback_fn):
+    return self.eq.OldStyleQuery(self)
+
+
  class LUNodeRemove(LogicalUnit):
    """Logical unit for removing a node.
  
@@ -7004,7 +7225,7 @@ class LUInstanceStartup(LogicalUnit):
        utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
        filled_hvp = cluster.FillHV(instance)
        filled_hvp.update(self.op.hvparams)
-      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
+      hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
        hv_type.CheckParameterSyntax(filled_hvp)
        _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
  
@@ -7382,7 +7603,8 @@ class LUInstanceRecreateDisks(LogicalUnit):
                                          disks=[{constants.IDISK_SIZE: d.size,
                                                  constants.IDISK_MODE: d.mode}
                                                  for d in self.instance.disks],
-                                        hypervisor=self.instance.hypervisor)
+                                        hypervisor=self.instance.hypervisor,
+                                        node_whitelist=None)
      ial = iallocator.IAllocator(self.cfg, self.rpc, req)
  
      ial.Run(self.op.iallocator)
@@ -10342,7 +10564,7 @@ class LUInstanceCreate(LogicalUnit):
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
      filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
                                        self.op.hvparams)
-    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
+    hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
      hv_type.CheckParameterSyntax(filled_hvp)
      self.hv_full = filled_hvp
      # check that we don't specify global parameters on an instance
@@ -11112,7 +11334,7 @@ def _GetInstanceConsole(cluster, instance):
    @rtype: dict
  
    """
-  hyper = hypervisor.GetHypervisor(instance.hypervisor)
+  hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
    # beparams and hvparams are passed separately, to avoid editing the
    # instance and then saving the defaults in the instance itself.
    hvparams = cluster.FillHV(instance)
@@ -13411,7 +13633,7 @@ class LUInstanceSetParams(LogicalUnit):
        hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
  
        # local check
-      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
+      hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
        _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
        self.hv_proposed = self.hv_new = hv_new # the new actual values
        self.hv_inst = i_hvdict # the new dict (without defaults)
@@ -16264,23 +16486,19 @@ class _NetworkQuery(_QueryBase):
  
    def ExpandNames(self, lu):
      lu.needed_locks = {}
+    lu.share_locks = _ShareAll()
+
+    self.do_locking = self.use_locking
  
-    self._all_networks = lu.cfg.GetAllNetworksInfo()
-    name_to_uuid = dict((n.name, n.uuid) for n in self._all_networks.values())
+    all_networks = lu.cfg.GetAllNetworksInfo()
+    name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
  
-    if not self.names:
-      self.wanted = [name_to_uuid[name]
-                     for name in utils.NiceSort(name_to_uuid.keys())]
-    else:
-      # Accept names to be either names or UUIDs.
+    if self.names:
        missing = []
        self.wanted = []
-      all_uuid = frozenset(self._all_networks.keys())
  
        for name in self.names:
-        if name in all_uuid:
-          self.wanted.append(name)
-        elif name in name_to_uuid:
+        if name in name_to_uuid:
            self.wanted.append(name_to_uuid[name])
          else:
            missing.append(name)
@@ -16288,6 +16506,15 @@ class _NetworkQuery(_QueryBase):
        if missing:
          raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
                                     errors.ECODE_NOENT)
+    else:
+      self.wanted = locking.ALL_SET
+
+    if self.do_locking:
+      lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted
+      if query.NETQ_INST in self.requested_data:
+        lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
+      if query.NETQ_GROUP in self.requested_data:
+        lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET
  
    def DeclareLocks(self, lu, level):
      pass
@@ -16296,29 +16523,25 @@ class _NetworkQuery(_QueryBase):
      """Computes the list of networks and their attributes.
  
      """
+    all_networks = lu.cfg.GetAllNetworksInfo()
+
+    network_uuids = self._GetNames(lu, all_networks.keys(),
+                                   locking.LEVEL_NETWORK)
+
+    name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
+
      do_instances = query.NETQ_INST in self.requested_data
-    do_groups = do_instances or (query.NETQ_GROUP in self.requested_data)
+    do_groups = query.NETQ_GROUP in self.requested_data
  
      network_to_instances = None
+    network_to_groups = None
  
      # For NETQ_GROUP, we need to map network->[groups]
      if do_groups:
        all_groups = lu.cfg.GetAllNodeGroupsInfo()
-      network_to_groups = dict((uuid, []) for uuid in self.wanted)
-
-      if do_instances:
-        all_instances = lu.cfg.GetAllInstancesInfo()
-        all_nodes = lu.cfg.GetAllNodesInfo()
-        network_to_instances = dict((uuid, []) for uuid in self.wanted)
-
-      for group in all_groups.values():
-        if do_instances:
-          group_nodes = [node.name for node in all_nodes.values() if
-                         node.group == group.uuid]
-          group_instances = [instance for instance in all_instances.values()
-                             if instance.primary_node in group_nodes]
-
-        for net_uuid in self.wanted:
+      network_to_groups = dict((uuid, []) for uuid in network_uuids)
+      for _, group in all_groups.iteritems():
+        for net_uuid in network_uuids:
            netparams = group.networks.get(net_uuid, None)
            if netparams:
              info = (group.name, netparams[constants.NIC_MODE],
@@ -16326,25 +16549,27 @@ class _NetworkQuery(_QueryBase):
  
              network_to_groups[net_uuid].append(info)
  
-          if do_instances:
-            for instance in group_instances:
-              for nic in instance.nics:
-                if nic.network == self._all_networks[net_uuid].name:
-                  network_to_instances[net_uuid].append(instance.name)
-                  break
-    else:
-      network_to_groups = None
+    if do_instances:
+      all_instances = lu.cfg.GetAllInstancesInfo()
+      network_to_instances = dict((uuid, []) for uuid in network_uuids)
+      for instance in all_instances.values():
+        for nic in instance.nics:
+          if nic.network:
+            net_uuid = name_to_uuid[nic.network]
+            if net_uuid in network_uuids:
+              network_to_instances[net_uuid].append(instance.name)
+            break
  
      if query.NETQ_STATS in self.requested_data:
        stats = \
          dict((uuid,
-              self._GetStats(network.AddressPool(self._all_networks[uuid])))
-             for uuid in self.wanted)
+              self._GetStats(network.AddressPool(all_networks[uuid])))
+             for uuid in network_uuids)
      else:
        stats = None
  
-    return query.NetworkQueryData([self._all_networks[uuid]
-                                   for uuid in self.wanted],
+    return query.NetworkQueryData([all_networks[uuid]
+                                   for uuid in network_uuids],
                                     network_to_groups,
                                     network_to_instances,
                                     stats)
@@ -16371,7 +16596,7 @@ class LUNetworkQuery(NoHooksLU):
  
    def CheckArguments(self):
      self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
-                            self.op.output_fields, False)
+                            self.op.output_fields, self.op.use_locking)
  
    def ExpandNames(self):
      self.nq.ExpandNames(self)
@@ -16599,6 +16824,7 @@ _QUERY_IMPL = {
    constants.QR_GROUP: _GroupQuery,
    constants.QR_NETWORK: _NetworkQuery,
    constants.QR_OS: _OsQuery,
+  constants.QR_EXTSTORAGE: _ExtStorageQuery,
    constants.QR_EXPORT: _ExportQuery,
    }