Revision 7352d33b

b/Makefile.am
311 311
	lib/cmdlib/__init__.py \
312 312
	lib/cmdlib/common.py \
313 313
	lib/cmdlib/base.py \
314
	lib/cmdlib/cluster.py \
314 315
	lib/cmdlib/tags.py \
315 316
	lib/cmdlib/network.py \
316 317
	lib/cmdlib/test.py
b/lib/cmdlib/__init__.py
30 30

  
31 31
import os
32 32
import time
33
import re
34 33
import logging
35 34
import copy
36 35
import OpenSSL
37 36
import itertools
38 37
import operator
39 38

  
40
from ganeti import ssh
41 39
from ganeti import utils
42 40
from ganeti import errors
43 41
from ganeti import hypervisor
44 42
from ganeti import locking
45 43
from ganeti import constants
46 44
from ganeti import objects
47
from ganeti import ssconf
48
from ganeti import uidpool
49 45
from ganeti import compat
50 46
from ganeti import masterd
51 47
from ganeti import netutils
......
54 50
from ganeti import opcodes
55 51
from ganeti import ht
56 52
from ganeti import rpc
57
from ganeti import runtime
58 53
from ganeti import pathutils
59
from ganeti import vcluster
60 54
from ganeti import network
61 55
from ganeti.masterd import iallocator
62 56

  
......
64 58
  Tasklet, _QueryBase
65 59
from ganeti.cmdlib.common import _ExpandInstanceName, _ExpandItemName, \
66 60
  _ExpandNodeName, _ShareAll, _CheckNodeGroupInstances, _GetWantedNodes, \
67
  _GetWantedInstances
61
  _GetWantedInstances, _RunPostHook, _RedistributeAncillaryFiles, \
62
  _MergeAndVerifyHvState, _MergeAndVerifyDiskState, _GetUpdatedIPolicy, \
63
  _ComputeNewInstanceViolations, _GetUpdatedParams, _CheckOSParams, \
64
  _CheckHVParams, _AdjustCandidatePool, _CheckNodePVs, \
65
  _ComputeIPolicyInstanceViolation, _AnnotateDiskParams, _SupportsOob, \
66
  _ComputeIPolicySpecViolation
67

  
68
from ganeti.cmdlib.cluster import LUClusterActivateMasterIp, \
69
  LUClusterDeactivateMasterIp, LUClusterConfigQuery, LUClusterDestroy, \
70
  LUClusterPostInit, _ClusterQuery, LUClusterQuery, LUClusterRedistConf, \
71
  LUClusterRename, LUClusterRepairDiskSizes, LUClusterSetParams, \
72
  LUClusterVerify, LUClusterVerifyConfig, LUClusterVerifyGroup, \
73
  LUClusterVerifyDisks
68 74
from ganeti.cmdlib.tags import LUTagsGet, LUTagsSearch, LUTagsSet, LUTagsDel
69 75
from ganeti.cmdlib.network import LUNetworkAdd, LUNetworkRemove, \
70 76
  LUNetworkSetParams, _NetworkQuery, LUNetworkQuery, LUNetworkConnect, \
......
85 91
  ]))
86 92

  
87 93

  
88
def _AnnotateDiskParams(instance, devs, cfg):
89
  """Little helper wrapper to the rpc annotation method.
90

  
91
  @param instance: The instance object
92
  @type devs: List of L{objects.Disk}
93
  @param devs: The root devices (not any of its children!)
94
  @param cfg: The config object
95
  @returns The annotated disk copies
96
  @see L{rpc.AnnotateDiskParams}
97

  
98
  """
99
  return rpc.AnnotateDiskParams(instance.disk_template, devs,
100
                                cfg.GetInstanceDiskParams(instance))
101

  
102

  
103 94
def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
104 95
                              cur_group_uuid):
105 96
  """Checks if node groups for locked instances are still correct.
......
155 146
  return inst_groups
156 147

  
157 148

  
158
def _SupportsOob(cfg, node):
159
  """Tells if node supports OOB.
160

  
161
  @type cfg: L{config.ConfigWriter}
162
  @param cfg: The cluster configuration
163
  @type node: L{objects.Node}
164
  @param node: The node
165
  @return: The OOB script if supported or an empty string otherwise
166

  
167
  """
168
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
169

  
170

  
171 149
def _IsExclusiveStorageEnabledNode(cfg, node):
172 150
  """Whether exclusive_storage is in effect for the given node.
173 151

  
......
213 191
    return names[:]
214 192

  
215 193

  
216
def _GetUpdatedParams(old_params, update_dict,
217
                      use_default=True, use_none=False):
218
  """Return the new version of a parameter dictionary.
219

  
220
  @type old_params: dict
221
  @param old_params: old parameters
222
  @type update_dict: dict
223
  @param update_dict: dict containing new parameter values, or
224
      constants.VALUE_DEFAULT to reset the parameter to its default
225
      value
226
  @param use_default: boolean
227
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
228
      values as 'to be deleted' values
229
  @param use_none: boolean
230
  @type use_none: whether to recognise C{None} values as 'to be
231
      deleted' values
232
  @rtype: dict
233
  @return: the new parameter dictionary
234

  
235
  """
236
  params_copy = copy.deepcopy(old_params)
237
  for key, val in update_dict.iteritems():
238
    if ((use_default and val == constants.VALUE_DEFAULT) or
239
        (use_none and val is None)):
240
      try:
241
        del params_copy[key]
242
      except KeyError:
243
        pass
244
    else:
245
      params_copy[key] = val
246
  return params_copy
247

  
248

  
249
def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
250
  """Return the new version of an instance policy.
251

  
252
  @param group_policy: whether this policy applies to a group and thus
253
    we should support removal of policy entries
254

  
255
  """
256
  ipolicy = copy.deepcopy(old_ipolicy)
257
  for key, value in new_ipolicy.items():
258
    if key not in constants.IPOLICY_ALL_KEYS:
259
      raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
260
                                 errors.ECODE_INVAL)
261
    if (not value or value == [constants.VALUE_DEFAULT] or
262
        value == constants.VALUE_DEFAULT):
263
      if group_policy:
264
        if key in ipolicy:
265
          del ipolicy[key]
266
      else:
267
        raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
268
                                   " on the cluster'" % key,
269
                                   errors.ECODE_INVAL)
270
    else:
271
      if key in constants.IPOLICY_PARAMETERS:
272
        # FIXME: we assume all such values are float
273
        try:
274
          ipolicy[key] = float(value)
275
        except (TypeError, ValueError), err:
276
          raise errors.OpPrereqError("Invalid value for attribute"
277
                                     " '%s': '%s', error: %s" %
278
                                     (key, value, err), errors.ECODE_INVAL)
279
      elif key == constants.ISPECS_MINMAX:
280
        for minmax in value:
281
          for k in minmax.keys():
282
            utils.ForceDictType(minmax[k], constants.ISPECS_PARAMETER_TYPES)
283
        ipolicy[key] = value
284
      elif key == constants.ISPECS_STD:
285
        if group_policy:
286
          msg = "%s cannot appear in group instance specs" % key
287
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
288
        ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
289
                                         use_none=False, use_default=False)
290
        utils.ForceDictType(ipolicy[key], constants.ISPECS_PARAMETER_TYPES)
291
      else:
292
        # FIXME: we assume all others are lists; this should be redone
293
        # in a nicer way
294
        ipolicy[key] = list(value)
295
  try:
296
    objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
297
  except errors.ConfigurationError, err:
298
    raise errors.OpPrereqError("Invalid instance policy: %s" % err,
299
                               errors.ECODE_INVAL)
300
  return ipolicy
301

  
302

  
303
def _UpdateAndVerifySubDict(base, updates, type_check):
304
  """Updates and verifies a dict with sub dicts of the same type.
305

  
306
  @param base: The dict with the old data
307
  @param updates: The dict with the new data
308
  @param type_check: Dict suitable to ForceDictType to verify correct types
309
  @returns: A new dict with updated and verified values
310

  
311
  """
312
  def fn(old, value):
313
    new = _GetUpdatedParams(old, value)
314
    utils.ForceDictType(new, type_check)
315
    return new
316

  
317
  ret = copy.deepcopy(base)
318
  ret.update(dict((key, fn(base.get(key, {}), value))
319
                  for key, value in updates.items()))
320
  return ret
321

  
322

  
323
def _MergeAndVerifyHvState(op_input, obj_input):
324
  """Combines the hv state from an opcode with the one of the object
325

  
326
  @param op_input: The input dict from the opcode
327
  @param obj_input: The input dict from the objects
328
  @return: The verified and updated dict
329

  
330
  """
331
  if op_input:
332
    invalid_hvs = set(op_input) - constants.HYPER_TYPES
333
    if invalid_hvs:
334
      raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
335
                                 " %s" % utils.CommaJoin(invalid_hvs),
336
                                 errors.ECODE_INVAL)
337
    if obj_input is None:
338
      obj_input = {}
339
    type_check = constants.HVSTS_PARAMETER_TYPES
340
    return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
341

  
342
  return None
343

  
344

  
345
def _MergeAndVerifyDiskState(op_input, obj_input):
346
  """Combines the disk state from an opcode with the one of the object
347

  
348
  @param op_input: The input dict from the opcode
349
  @param obj_input: The input dict from the objects
350
  @return: The verified and updated dict
351
  """
352
  if op_input:
353
    invalid_dst = set(op_input) - constants.DS_VALID_TYPES
354
    if invalid_dst:
355
      raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
356
                                 utils.CommaJoin(invalid_dst),
357
                                 errors.ECODE_INVAL)
358
    type_check = constants.DSS_PARAMETER_TYPES
359
    if obj_input is None:
360
      obj_input = {}
361
    return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
362
                                              type_check))
363
                for key, value in op_input.items())
364

  
365
  return None
366

  
367

  
368 194
def _ReleaseLocks(lu, level, names=None, keep=None):
369 195
  """Releases locks owned by an LU.
370 196

  
......
428 254
              for vol in vols)
429 255

  
430 256

  
431
def _RunPostHook(lu, node_name):
432
  """Runs the post-hook for an opcode on a single node.
433

  
434
  """
435
  hm = lu.proc.BuildHooksManager(lu)
436
  try:
437
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
438
  except Exception, err: # pylint: disable=W0703
439
    lu.LogWarning("Errors occurred running hooks on %s: %s",
440
                  node_name, err)
441

  
442

  
443 257
def _CheckOutputFields(static, dynamic, selected):
444 258
  """Checks whether all selected fields are valid.
445 259

  
......
574 388
      raise errors.OpExecError(msg)
575 389

  
576 390

  
577
def _CheckNodePVs(nresult, exclusive_storage):
578
  """Check node PVs.
579

  
580
  """
581
  pvlist_dict = nresult.get(constants.NV_PVLIST, None)
582
  if pvlist_dict is None:
583
    return (["Can't get PV list from node"], None)
584
  pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict)
585
  errlist = []
586
  # check that ':' is not present in PV names, since it's a
587
  # special character for lvcreate (denotes the range of PEs to
588
  # use on the PV)
589
  for pv in pvlist:
590
    if ":" in pv.name:
591
      errlist.append("Invalid character ':' in PV '%s' of VG '%s'" %
592
                     (pv.name, pv.vg_name))
593
  es_pvinfo = None
594
  if exclusive_storage:
595
    (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist)
596
    errlist.extend(errmsgs)
597
    shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None)
598
    if shared_pvs:
599
      for (pvname, lvlist) in shared_pvs:
600
        # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...)
601
        errlist.append("PV %s is shared among unrelated LVs (%s)" %
602
                       (pvname, utils.CommaJoin(lvlist)))
603
  return (errlist, es_pvinfo)
604

  
605

  
606 391
def _GetClusterDomainSecret():
607 392
  """Reads the cluster domain secret.
608 393

  
......
642 427
                     " is down")
643 428

  
644 429

  
645
def _ComputeMinMaxSpec(name, qualifier, ispecs, value):
646
  """Computes if value is in the desired range.
647

  
648
  @param name: name of the parameter for which we perform the check
649
  @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
650
      not just 'disk')
651
  @param ispecs: dictionary containing min and max values
652
  @param value: actual value that we want to use
653
  @return: None or an error string
654

  
655
  """
656
  if value in [None, constants.VALUE_AUTO]:
657
    return None
658
  max_v = ispecs[constants.ISPECS_MAX].get(name, value)
659
  min_v = ispecs[constants.ISPECS_MIN].get(name, value)
660
  if value > max_v or min_v > value:
661
    if qualifier:
662
      fqn = "%s/%s" % (name, qualifier)
663
    else:
664
      fqn = name
665
    return ("%s value %s is not in range [%s, %s]" %
666
            (fqn, value, min_v, max_v))
667
  return None
668

  
669

  
670
def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
671
                                 nic_count, disk_sizes, spindle_use,
672
                                 disk_template,
673
                                 _compute_fn=_ComputeMinMaxSpec):
674
  """Verifies ipolicy against provided specs.
675

  
676
  @type ipolicy: dict
677
  @param ipolicy: The ipolicy
678
  @type mem_size: int
679
  @param mem_size: The memory size
680
  @type cpu_count: int
681
  @param cpu_count: Used cpu cores
682
  @type disk_count: int
683
  @param disk_count: Number of disks used
684
  @type nic_count: int
685
  @param nic_count: Number of nics used
686
  @type disk_sizes: list of ints
687
  @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
688
  @type spindle_use: int
689
  @param spindle_use: The number of spindles this instance uses
690
  @type disk_template: string
691
  @param disk_template: The disk template of the instance
692
  @param _compute_fn: The compute function (unittest only)
693
  @return: A list of violations, or an empty list of no violations are found
694

  
695
  """
696
  assert disk_count == len(disk_sizes)
697

  
698
  test_settings = [
699
    (constants.ISPEC_MEM_SIZE, "", mem_size),
700
    (constants.ISPEC_CPU_COUNT, "", cpu_count),
701
    (constants.ISPEC_NIC_COUNT, "", nic_count),
702
    (constants.ISPEC_SPINDLE_USE, "", spindle_use),
703
    ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
704
         for idx, d in enumerate(disk_sizes)]
705
  if disk_template != constants.DT_DISKLESS:
706
    # This check doesn't make sense for diskless instances
707
    test_settings.append((constants.ISPEC_DISK_COUNT, "", disk_count))
708
  ret = []
709
  allowed_dts = ipolicy[constants.IPOLICY_DTS]
710
  if disk_template not in allowed_dts:
711
    ret.append("Disk template %s is not allowed (allowed templates: %s)" %
712
               (disk_template, utils.CommaJoin(allowed_dts)))
713

  
714
  min_errs = None
715
  for minmax in ipolicy[constants.ISPECS_MINMAX]:
716
    errs = filter(None,
717
                  (_compute_fn(name, qualifier, minmax, value)
718
                   for (name, qualifier, value) in test_settings))
719
    if min_errs is None or len(errs) < len(min_errs):
720
      min_errs = errs
721
  assert min_errs is not None
722
  return ret + min_errs
723

  
724

  
725
def _ComputeIPolicyInstanceViolation(ipolicy, instance, cfg,
726
                                     _compute_fn=_ComputeIPolicySpecViolation):
727
  """Compute if instance meets the specs of ipolicy.
728

  
729
  @type ipolicy: dict
730
  @param ipolicy: The ipolicy to verify against
731
  @type instance: L{objects.Instance}
732
  @param instance: The instance to verify
733
  @type cfg: L{config.ConfigWriter}
734
  @param cfg: Cluster configuration
735
  @param _compute_fn: The function to verify ipolicy (unittest only)
736
  @see: L{_ComputeIPolicySpecViolation}
737

  
738
  """
739
  be_full = cfg.GetClusterInfo().FillBE(instance)
740
  mem_size = be_full[constants.BE_MAXMEM]
741
  cpu_count = be_full[constants.BE_VCPUS]
742
  spindle_use = be_full[constants.BE_SPINDLE_USE]
743
  disk_count = len(instance.disks)
744
  disk_sizes = [disk.size for disk in instance.disks]
745
  nic_count = len(instance.nics)
746
  disk_template = instance.disk_template
747

  
748
  return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
749
                     disk_sizes, spindle_use, disk_template)
750

  
751

  
752 430
def _ComputeIPolicyInstanceSpecViolation(
753 431
  ipolicy, instance_spec, disk_template,
754 432
  _compute_fn=_ComputeIPolicySpecViolation):
......
822 500
      raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
823 501

  
824 502

  
825
def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances, cfg):
826
  """Computes a set of any instances that would violate the new ipolicy.
827

  
828
  @param old_ipolicy: The current (still in-place) ipolicy
829
  @param new_ipolicy: The new (to become) ipolicy
830
  @param instances: List of instances to verify
831
  @type cfg: L{config.ConfigWriter}
832
  @param cfg: Cluster configuration
833
  @return: A list of instances which violates the new ipolicy but
834
      did not before
835

  
836
  """
837
  return (_ComputeViolatingInstances(new_ipolicy, instances, cfg) -
838
          _ComputeViolatingInstances(old_ipolicy, instances, cfg))
839

  
840

  
841 503
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
842 504
                          minmem, maxmem, vcpus, nics, disk_template, disks,
843 505
                          bep, hvp, hypervisor_name, tags):
......
1022 684
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1023 685

  
1024 686

  
1025
def _AdjustCandidatePool(lu, exceptions):
1026
  """Adjust the candidate pool after node operations.
1027

  
1028
  """
1029
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1030
  if mod_list:
1031
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1032
               utils.CommaJoin(node.name for node in mod_list))
1033
    for name in mod_list:
1034
      lu.context.ReaddNode(name)
1035
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1036
  if mc_now > mc_max:
1037
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1038
               (mc_now, mc_max))
1039

  
1040

  
1041 687
def _DecideSelfPromotion(lu, exceptions=None):
1042 688
  """Decide whether I should promote myself as a master candidate.
1043 689

  
......
1049 695
  return mc_now < mc_should
1050 696

  
1051 697

  
1052
def _ComputeViolatingInstances(ipolicy, instances, cfg):
1053
  """Computes a set of instances who violates given ipolicy.
1054

  
1055
  @param ipolicy: The ipolicy to verify
1056
  @type instances: L{objects.Instance}
1057
  @param instances: List of instances to verify
1058
  @type cfg: L{config.ConfigWriter}
1059
  @param cfg: Cluster configuration
1060
  @return: A frozenset of instance names violating the ipolicy
1061

  
1062
  """
1063
  return frozenset([inst.name for inst in instances
1064
                    if _ComputeIPolicyInstanceViolation(ipolicy, inst, cfg)])
1065

  
1066

  
1067 698
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1068 699
  """Check that the brigdes needed by a list of nics exist.
1069 700

  
......
1173 804
  """Check the sanity of iallocator and node arguments and use the
1174 805
  cluster-wide iallocator if appropriate.
1175 806

  
1176
  Check that at most one of (iallocator, node) is specified. If none is
1177
  specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1178
  then the LU's opcode's iallocator slot is filled with the cluster-wide
1179
  default iallocator.
1180

  
1181
  @type iallocator_slot: string
1182
  @param iallocator_slot: the name of the opcode iallocator slot
1183
  @type node_slot: string
1184
  @param node_slot: the name of the opcode target node slot
1185

  
1186
  """
1187
  node = getattr(lu.op, node_slot, None)
1188
  ialloc = getattr(lu.op, iallocator_slot, None)
1189
  if node == []:
1190
    node = None
1191

  
1192
  if node is not None and ialloc is not None:
1193
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1194
                               errors.ECODE_INVAL)
1195
  elif ((node is None and ialloc is None) or
1196
        ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1197
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1198
    if default_iallocator:
1199
      setattr(lu.op, iallocator_slot, default_iallocator)
1200
    else:
1201
      raise errors.OpPrereqError("No iallocator or node given and no"
1202
                                 " cluster-wide default iallocator found;"
1203
                                 " please specify either an iallocator or a"
1204
                                 " node, or set a cluster-wide default"
1205
                                 " iallocator", errors.ECODE_INVAL)
1206

  
1207

  
1208
def _GetDefaultIAllocator(cfg, ialloc):
1209
  """Decides on which iallocator to use.
1210

  
1211
  @type cfg: L{config.ConfigWriter}
1212
  @param cfg: Cluster configuration object
1213
  @type ialloc: string or None
1214
  @param ialloc: Iallocator specified in opcode
1215
  @rtype: string
1216
  @return: Iallocator name
1217

  
1218
  """
1219
  if not ialloc:
1220
    # Use default iallocator
1221
    ialloc = cfg.GetDefaultIAllocator()
1222

  
1223
  if not ialloc:
1224
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1225
                               " opcode nor as a cluster-wide default",
1226
                               errors.ECODE_INVAL)
1227

  
1228
  return ialloc
1229

  
1230

  
1231
def _CheckHostnameSane(lu, name):
1232
  """Ensures that a given hostname resolves to a 'sane' name.
1233

  
1234
  The given name is required to be a prefix of the resolved hostname,
1235
  to prevent accidental mismatches.
1236

  
1237
  @param lu: the logical unit on behalf of which we're checking
1238
  @param name: the name we should resolve and check
1239
  @return: the resolved hostname object
1240

  
1241
  """
1242
  hostname = netutils.GetHostname(name=name)
1243
  if hostname.name != name:
1244
    lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1245
  if not utils.MatchNameComponent(name, [hostname.name]):
1246
    raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1247
                                " same as given hostname '%s'") %
1248
                                (hostname.name, name), errors.ECODE_INVAL)
1249
  return hostname
1250

  
1251

  
1252
class LUClusterPostInit(LogicalUnit):
1253
  """Logical unit for running hooks after cluster initialization.
1254

  
1255
  """
1256
  HPATH = "cluster-init"
1257
  HTYPE = constants.HTYPE_CLUSTER
1258

  
1259
  def BuildHooksEnv(self):
1260
    """Build hooks env.
1261

  
1262
    """
1263
    return {
1264
      "OP_TARGET": self.cfg.GetClusterName(),
1265
      }
1266

  
1267
  def BuildHooksNodes(self):
1268
    """Build hooks nodes.
1269

  
1270
    """
1271
    return ([], [self.cfg.GetMasterNode()])
1272

  
1273
  def Exec(self, feedback_fn):
1274
    """Nothing to do.
1275

  
1276
    """
1277
    return True
1278

  
1279

  
1280
class LUClusterDestroy(LogicalUnit):
1281
  """Logical unit for destroying the cluster.
1282

  
1283
  """
1284
  HPATH = "cluster-destroy"
1285
  HTYPE = constants.HTYPE_CLUSTER
1286

  
1287
  def BuildHooksEnv(self):
1288
    """Build hooks env.
1289

  
1290
    """
1291
    return {
1292
      "OP_TARGET": self.cfg.GetClusterName(),
1293
      }
1294

  
1295
  def BuildHooksNodes(self):
1296
    """Build hooks nodes.
1297

  
1298
    """
1299
    return ([], [])
1300

  
1301
  def CheckPrereq(self):
1302
    """Check prerequisites.
1303

  
1304
    This checks whether the cluster is empty.
1305

  
1306
    Any errors are signaled by raising errors.OpPrereqError.
1307

  
1308
    """
1309
    master = self.cfg.GetMasterNode()
1310

  
1311
    nodelist = self.cfg.GetNodeList()
1312
    if len(nodelist) != 1 or nodelist[0] != master:
1313
      raise errors.OpPrereqError("There are still %d node(s) in"
1314
                                 " this cluster." % (len(nodelist) - 1),
1315
                                 errors.ECODE_INVAL)
1316
    instancelist = self.cfg.GetInstanceList()
1317
    if instancelist:
1318
      raise errors.OpPrereqError("There are still %d instance(s) in"
1319
                                 " this cluster." % len(instancelist),
1320
                                 errors.ECODE_INVAL)
1321

  
1322
  def Exec(self, feedback_fn):
1323
    """Destroys the cluster.
1324

  
1325
    """
1326
    master_params = self.cfg.GetMasterNetworkParameters()
1327

  
1328
    # Run post hooks on master node before it's removed
1329
    _RunPostHook(self, master_params.name)
1330

  
1331
    ems = self.cfg.GetUseExternalMipScript()
1332
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1333
                                                     master_params, ems)
1334
    if result.fail_msg:
1335
      self.LogWarning("Error disabling the master IP address: %s",
1336
                      result.fail_msg)
1337

  
1338
    return master_params.name
1339

  
1340

  
1341
def _VerifyCertificate(filename):
1342
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1343

  
1344
  @type filename: string
1345
  @param filename: Path to PEM file
1346

  
1347
  """
1348
  try:
1349
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1350
                                           utils.ReadFile(filename))
1351
  except Exception, err: # pylint: disable=W0703
1352
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1353
            "Failed to load X509 certificate %s: %s" % (filename, err))
1354

  
1355
  (errcode, msg) = \
1356
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1357
                                constants.SSL_CERT_EXPIRATION_ERROR)
1358

  
1359
  if msg:
1360
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1361
  else:
1362
    fnamemsg = None
1363

  
1364
  if errcode is None:
1365
    return (None, fnamemsg)
1366
  elif errcode == utils.CERT_WARNING:
1367
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1368
  elif errcode == utils.CERT_ERROR:
1369
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1370

  
1371
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1372

  
1373

  
1374
def _GetAllHypervisorParameters(cluster, instances):
1375
  """Compute the set of all hypervisor parameters.
1376

  
1377
  @type cluster: L{objects.Cluster}
1378
  @param cluster: the cluster object
1379
  @param instances: list of L{objects.Instance}
1380
  @param instances: additional instances from which to obtain parameters
1381
  @rtype: list of (origin, hypervisor, parameters)
1382
  @return: a list with all parameters found, indicating the hypervisor they
1383
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1384

  
1385
  """
1386
  hvp_data = []
1387

  
1388
  for hv_name in cluster.enabled_hypervisors:
1389
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1390

  
1391
  for os_name, os_hvp in cluster.os_hvp.items():
1392
    for hv_name, hv_params in os_hvp.items():
1393
      if hv_params:
1394
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1395
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1396

  
1397
  # TODO: collapse identical parameter values in a single one
1398
  for instance in instances:
1399
    if instance.hvparams:
1400
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1401
                       cluster.FillHV(instance)))
1402

  
1403
  return hvp_data
1404

  
1405

  
1406
class _VerifyErrors(object):
1407
  """Mix-in for cluster/group verify LUs.
1408

  
1409
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1410
  self.op and self._feedback_fn to be available.)
1411

  
1412
  """
1413

  
1414
  ETYPE_FIELD = "code"
1415
  ETYPE_ERROR = "ERROR"
1416
  ETYPE_WARNING = "WARNING"
1417

  
1418
  def _Error(self, ecode, item, msg, *args, **kwargs):
1419
    """Format an error message.
1420

  
1421
    Based on the opcode's error_codes parameter, either format a
1422
    parseable error code, or a simpler error string.
1423

  
1424
    This must be called only from Exec and functions called from Exec.
1425

  
1426
    """
1427
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1428
    itype, etxt, _ = ecode
1429
    # If the error code is in the list of ignored errors, demote the error to a
1430
    # warning
1431
    if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1432
      ltype = self.ETYPE_WARNING
1433
    # first complete the msg
1434
    if args:
1435
      msg = msg % args
1436
    # then format the whole message
1437
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1438
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1439
    else:
1440
      if item:
1441
        item = " " + item
1442
      else:
1443
        item = ""
1444
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1445
    # and finally report it via the feedback_fn
1446
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1447
    # do not mark the operation as failed for WARN cases only
1448
    if ltype == self.ETYPE_ERROR:
1449
      self.bad = True
1450

  
1451
  def _ErrorIf(self, cond, *args, **kwargs):
1452
    """Log an error message if the passed condition is True.
1453

  
1454
    """
1455
    if (bool(cond)
1456
        or self.op.debug_simulate_errors): # pylint: disable=E1101
1457
      self._Error(*args, **kwargs)
1458

  
1459

  
1460
class LUClusterVerify(NoHooksLU):
1461
  """Submits all jobs necessary to verify the cluster.
1462

  
1463
  """
1464
  REQ_BGL = False
1465

  
1466
  def ExpandNames(self):
1467
    self.needed_locks = {}
1468

  
1469
  def Exec(self, feedback_fn):
1470
    jobs = []
1471

  
1472
    if self.op.group_name:
1473
      groups = [self.op.group_name]
1474
      depends_fn = lambda: None
1475
    else:
1476
      groups = self.cfg.GetNodeGroupList()
1477

  
1478
      # Verify global configuration
1479
      jobs.append([
1480
        opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
1481
        ])
1482

  
1483
      # Always depend on global verification
1484
      depends_fn = lambda: [(-len(jobs), [])]
1485

  
1486
    jobs.extend(
1487
      [opcodes.OpClusterVerifyGroup(group_name=group,
1488
                                    ignore_errors=self.op.ignore_errors,
1489
                                    depends=depends_fn())]
1490
      for group in groups)
1491

  
1492
    # Fix up all parameters
1493
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1494
      op.debug_simulate_errors = self.op.debug_simulate_errors
1495
      op.verbose = self.op.verbose
1496
      op.error_codes = self.op.error_codes
1497
      try:
1498
        op.skip_checks = self.op.skip_checks
1499
      except AttributeError:
1500
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1501

  
1502
    return ResultWithJobs(jobs)
1503

  
1504

  
1505
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1506
  """Verifies the cluster config.
1507

  
1508
  """
1509
  REQ_BGL = False
1510

  
1511
  def _VerifyHVP(self, hvp_data):
1512
    """Verifies locally the syntax of the hypervisor parameters.
1513

  
1514
    """
1515
    for item, hv_name, hv_params in hvp_data:
1516
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1517
             (item, hv_name))
1518
      try:
1519
        hv_class = hypervisor.GetHypervisorClass(hv_name)
1520
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1521
        hv_class.CheckParameterSyntax(hv_params)
1522
      except errors.GenericError, err:
1523
        self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1524

  
1525
  def ExpandNames(self):
1526
    self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1527
    self.share_locks = _ShareAll()
1528

  
1529
  def CheckPrereq(self):
1530
    """Check prerequisites.
1531

  
1532
    """
1533
    # Retrieve all information
1534
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1535
    self.all_node_info = self.cfg.GetAllNodesInfo()
1536
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1537

  
1538
  def Exec(self, feedback_fn):
1539
    """Verify integrity of cluster, performing various test on nodes.
1540

  
1541
    """
1542
    self.bad = False
1543
    self._feedback_fn = feedback_fn
1544

  
1545
    feedback_fn("* Verifying cluster config")
1546

  
1547
    for msg in self.cfg.VerifyConfig():
1548
      self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1549

  
1550
    feedback_fn("* Verifying cluster certificate files")
1551

  
1552
    for cert_filename in pathutils.ALL_CERT_FILES:
1553
      (errcode, msg) = _VerifyCertificate(cert_filename)
1554
      self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1555

  
1556
    feedback_fn("* Verifying hypervisor parameters")
1557

  
1558
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1559
                                                self.all_inst_info.values()))
1560

  
1561
    feedback_fn("* Verifying all nodes belong to an existing group")
1562

  
1563
    # We do this verification here because, should this bogus circumstance
1564
    # occur, it would never be caught by VerifyGroup, which only acts on
1565
    # nodes/instances reachable from existing node groups.
1566

  
1567
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1568
                         if node.group not in self.all_group_info)
1569

  
1570
    dangling_instances = {}
1571
    no_node_instances = []
1572

  
1573
    for inst in self.all_inst_info.values():
1574
      if inst.primary_node in dangling_nodes:
1575
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1576
      elif inst.primary_node not in self.all_node_info:
1577
        no_node_instances.append(inst.name)
1578

  
1579
    pretty_dangling = [
1580
        "%s (%s)" %
1581
        (node.name,
1582
         utils.CommaJoin(dangling_instances.get(node.name,
1583
                                                ["no instances"])))
1584
        for node in dangling_nodes]
1585

  
1586
    self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1587
                  None,
1588
                  "the following nodes (and their instances) belong to a non"
1589
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1590

  
1591
    self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1592
                  None,
1593
                  "the following instances have a non-existing primary-node:"
1594
                  " %s", utils.CommaJoin(no_node_instances))
1595

  
1596
    return not self.bad
1597

  
1598

  
1599
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1600
  """Verifies the status of a node group.
1601

  
1602
  """
1603
  HPATH = "cluster-verify"
1604
  HTYPE = constants.HTYPE_CLUSTER
1605
  REQ_BGL = False
1606

  
1607
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1608

  
1609
  class NodeImage(object):
1610
    """A class representing the logical and physical status of a node.
1611

  
1612
    @type name: string
1613
    @ivar name: the node name to which this object refers
1614
    @ivar volumes: a structure as returned from
1615
        L{ganeti.backend.GetVolumeList} (runtime)
1616
    @ivar instances: a list of running instances (runtime)
1617
    @ivar pinst: list of configured primary instances (config)
1618
    @ivar sinst: list of configured secondary instances (config)
1619
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1620
        instances for which this node is secondary (config)
1621
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1622
    @ivar dfree: free disk, as reported by the node (runtime)
1623
    @ivar offline: the offline status (config)
1624
    @type rpc_fail: boolean
1625
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1626
        not whether the individual keys were correct) (runtime)
1627
    @type lvm_fail: boolean
1628
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1629
    @type hyp_fail: boolean
1630
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1631
    @type ghost: boolean
1632
    @ivar ghost: whether this is a known node or not (config)
1633
    @type os_fail: boolean
1634
    @ivar os_fail: whether the RPC call didn't return valid OS data
1635
    @type oslist: list
1636
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1637
    @type vm_capable: boolean
1638
    @ivar vm_capable: whether the node can host instances
1639
    @type pv_min: float
1640
    @ivar pv_min: size in MiB of the smallest PVs
1641
    @type pv_max: float
1642
    @ivar pv_max: size in MiB of the biggest PVs
1643

  
1644
    """
1645
    def __init__(self, offline=False, name=None, vm_capable=True):
1646
      self.name = name
1647
      self.volumes = {}
1648
      self.instances = []
1649
      self.pinst = []
1650
      self.sinst = []
1651
      self.sbp = {}
1652
      self.mfree = 0
1653
      self.dfree = 0
1654
      self.offline = offline
1655
      self.vm_capable = vm_capable
1656
      self.rpc_fail = False
1657
      self.lvm_fail = False
1658
      self.hyp_fail = False
1659
      self.ghost = False
1660
      self.os_fail = False
1661
      self.oslist = {}
1662
      self.pv_min = None
1663
      self.pv_max = None
1664

  
1665
  def ExpandNames(self):
1666
    # This raises errors.OpPrereqError on its own:
1667
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1668

  
1669
    # Get instances in node group; this is unsafe and needs verification later
1670
    inst_names = \
1671
      self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1672

  
1673
    self.needed_locks = {
1674
      locking.LEVEL_INSTANCE: inst_names,
1675
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1676
      locking.LEVEL_NODE: [],
1677

  
1678
      # This opcode is run by watcher every five minutes and acquires all nodes
1679
      # for a group. It doesn't run for a long time, so it's better to acquire
1680
      # the node allocation lock as well.
1681
      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
1682
      }
1683

  
1684
    self.share_locks = _ShareAll()
1685

  
1686
  def DeclareLocks(self, level):
1687
    if level == locking.LEVEL_NODE:
1688
      # Get members of node group; this is unsafe and needs verification later
1689
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1690

  
1691
      all_inst_info = self.cfg.GetAllInstancesInfo()
1692

  
1693
      # In Exec(), we warn about mirrored instances that have primary and
1694
      # secondary living in separate node groups. To fully verify that
1695
      # volumes for these instances are healthy, we will need to do an
1696
      # extra call to their secondaries. We ensure here those nodes will
1697
      # be locked.
1698
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1699
        # Important: access only the instances whose lock is owned
1700
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1701
          nodes.update(all_inst_info[inst].secondary_nodes)
1702

  
1703
      self.needed_locks[locking.LEVEL_NODE] = nodes
1704

  
1705
  def CheckPrereq(self):
1706
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1707
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1708

  
1709
    group_nodes = set(self.group_info.members)
1710
    group_instances = \
1711
      self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1712

  
1713
    unlocked_nodes = \
1714
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1715

  
1716
    unlocked_instances = \
1717
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1718

  
1719
    if unlocked_nodes:
1720
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1721
                                 utils.CommaJoin(unlocked_nodes),
1722
                                 errors.ECODE_STATE)
1723

  
1724
    if unlocked_instances:
1725
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1726
                                 utils.CommaJoin(unlocked_instances),
1727
                                 errors.ECODE_STATE)
1728

  
1729
    self.all_node_info = self.cfg.GetAllNodesInfo()
1730
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1731

  
1732
    self.my_node_names = utils.NiceSort(group_nodes)
1733
    self.my_inst_names = utils.NiceSort(group_instances)
1734

  
1735
    self.my_node_info = dict((name, self.all_node_info[name])
1736
                             for name in self.my_node_names)
1737

  
1738
    self.my_inst_info = dict((name, self.all_inst_info[name])
1739
                             for name in self.my_inst_names)
1740

  
1741
    # We detect here the nodes that will need the extra RPC calls for verifying
1742
    # split LV volumes; they should be locked.
1743
    extra_lv_nodes = set()
1744

  
1745
    for inst in self.my_inst_info.values():
1746
      if inst.disk_template in constants.DTS_INT_MIRROR:
1747
        for nname in inst.all_nodes:
1748
          if self.all_node_info[nname].group != self.group_uuid:
1749
            extra_lv_nodes.add(nname)
1750

  
1751
    unlocked_lv_nodes = \
1752
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1753

  
1754
    if unlocked_lv_nodes:
1755
      raise errors.OpPrereqError("Missing node locks for LV check: %s" %
1756
                                 utils.CommaJoin(unlocked_lv_nodes),
1757
                                 errors.ECODE_STATE)
1758
    self.extra_lv_nodes = list(extra_lv_nodes)
1759

  
1760
  def _VerifyNode(self, ninfo, nresult):
1761
    """Perform some basic validation on data returned from a node.
1762

  
1763
      - check the result data structure is well formed and has all the
1764
        mandatory fields
1765
      - check ganeti version
1766

  
1767
    @type ninfo: L{objects.Node}
1768
    @param ninfo: the node to check
1769
    @param nresult: the results from the node
1770
    @rtype: boolean
1771
    @return: whether overall this call was successful (and we can expect
1772
         reasonable values in the respose)
1773

  
1774
    """
1775
    node = ninfo.name
1776
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1777

  
1778
    # main result, nresult should be a non-empty dict
1779
    test = not nresult or not isinstance(nresult, dict)
1780
    _ErrorIf(test, constants.CV_ENODERPC, node,
1781
                  "unable to verify node: no data returned")
1782
    if test:
1783
      return False
1784

  
1785
    # compares ganeti version
1786
    local_version = constants.PROTOCOL_VERSION
1787
    remote_version = nresult.get("version", None)
1788
    test = not (remote_version and
1789
                isinstance(remote_version, (list, tuple)) and
1790
                len(remote_version) == 2)
1791
    _ErrorIf(test, constants.CV_ENODERPC, node,
1792
             "connection to node returned invalid data")
1793
    if test:
1794
      return False
1795

  
1796
    test = local_version != remote_version[0]
1797
    _ErrorIf(test, constants.CV_ENODEVERSION, node,
1798
             "incompatible protocol versions: master %s,"
1799
             " node %s", local_version, remote_version[0])
1800
    if test:
1801
      return False
1802

  
1803
    # node seems compatible, we can actually try to look into its results
1804

  
1805
    # full package version
1806
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1807
                  constants.CV_ENODEVERSION, node,
1808
                  "software version mismatch: master %s, node %s",
1809
                  constants.RELEASE_VERSION, remote_version[1],
1810
                  code=self.ETYPE_WARNING)
1811

  
1812
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1813
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1814
      for hv_name, hv_result in hyp_result.iteritems():
1815
        test = hv_result is not None
1816
        _ErrorIf(test, constants.CV_ENODEHV, node,
1817
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1818

  
1819
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1820
    if ninfo.vm_capable and isinstance(hvp_result, list):
1821
      for item, hv_name, hv_result in hvp_result:
1822
        _ErrorIf(True, constants.CV_ENODEHV, node,
1823
                 "hypervisor %s parameter verify failure (source %s): %s",
1824
                 hv_name, item, hv_result)
1825

  
1826
    test = nresult.get(constants.NV_NODESETUP,
1827
                       ["Missing NODESETUP results"])
1828
    _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1829
             "; ".join(test))
1830

  
1831
    return True
1832

  
1833
  def _VerifyNodeTime(self, ninfo, nresult,
1834
                      nvinfo_starttime, nvinfo_endtime):
1835
    """Check the node time.
1836

  
1837
    @type ninfo: L{objects.Node}
1838
    @param ninfo: the node to check
1839
    @param nresult: the remote results for the node
1840
    @param nvinfo_starttime: the start time of the RPC call
1841
    @param nvinfo_endtime: the end time of the RPC call
1842

  
1843
    """
1844
    node = ninfo.name
1845
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1846

  
1847
    ntime = nresult.get(constants.NV_TIME, None)
1848
    try:
1849
      ntime_merged = utils.MergeTime(ntime)
1850
    except (ValueError, TypeError):
1851
      _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1852
      return
1853

  
1854
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1855
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1856
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1857
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1858
    else:
1859
      ntime_diff = None
1860

  
1861
    _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1862
             "Node time diverges by at least %s from master node time",
1863
             ntime_diff)
1864

  
1865
  def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
1866
    """Check the node LVM results and update info for cross-node checks.
1867

  
1868
    @type ninfo: L{objects.Node}
1869
    @param ninfo: the node to check
1870
    @param nresult: the remote results for the node
1871
    @param vg_name: the configured VG name
1872
    @type nimg: L{NodeImage}
1873
    @param nimg: node image
1874

  
1875
    """
1876
    if vg_name is None:
1877
      return
1878

  
1879
    node = ninfo.name
1880
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1881

  
1882
    # checks vg existence and size > 20G
1883
    vglist = nresult.get(constants.NV_VGLIST, None)
1884
    test = not vglist
1885
    _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1886
    if not test:
1887
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1888
                                            constants.MIN_VG_SIZE)
1889
      _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1890

  
1891
    # Check PVs
1892
    (errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage)
1893
    for em in errmsgs:
1894
      self._Error(constants.CV_ENODELVM, node, em)
1895
    if pvminmax is not None:
1896
      (nimg.pv_min, nimg.pv_max) = pvminmax
1897

  
1898
  def _VerifyGroupLVM(self, node_image, vg_name):
1899
    """Check cross-node consistency in LVM.
1900

  
1901
    @type node_image: dict
1902
    @param node_image: info about nodes, mapping from node to names to
1903
      L{NodeImage} objects
1904
    @param vg_name: the configured VG name
1905

  
1906
    """
1907
    if vg_name is None:
1908
      return
1909

  
1910
    # Only exlcusive storage needs this kind of checks
1911
    if not self._exclusive_storage:
1912
      return
1913

  
1914
    # exclusive_storage wants all PVs to have the same size (approximately),
1915
    # if the smallest and the biggest ones are okay, everything is fine.
1916
    # pv_min is None iff pv_max is None
1917
    vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
1918
    if not vals:
1919
      return
1920
    (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
1921
    (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
1922
    bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
1923
    self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
1924
                  "PV sizes differ too much in the group; smallest (%s MB) is"
1925
                  " on %s, biggest (%s MB) is on %s",
1926
                  pvmin, minnode, pvmax, maxnode)
1927

  
1928
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1929
    """Check the node bridges.
1930

  
1931
    @type ninfo: L{objects.Node}
1932
    @param ninfo: the node to check
1933
    @param nresult: the remote results for the node
1934
    @param bridges: the expected list of bridges
1935

  
1936
    """
1937
    if not bridges:
1938
      return
1939

  
1940
    node = ninfo.name
1941
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1942

  
1943
    missing = nresult.get(constants.NV_BRIDGES, None)
1944
    test = not isinstance(missing, list)
1945
    _ErrorIf(test, constants.CV_ENODENET, node,
1946
             "did not return valid bridge information")
1947
    if not test:
1948
      _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1949
               "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1950

  
1951
  def _VerifyNodeUserScripts(self, ninfo, nresult):
1952
    """Check the results of user scripts presence and executability on the node
1953

  
1954
    @type ninfo: L{objects.Node}
1955
    @param ninfo: the node to check
1956
    @param nresult: the remote results for the node
1957

  
1958
    """
1959
    node = ninfo.name
1960

  
1961
    test = not constants.NV_USERSCRIPTS in nresult
1962
    self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
1963
                  "did not return user scripts information")
1964

  
1965
    broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
1966
    if not test:
1967
      self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
1968
                    "user scripts not present or not executable: %s" %
1969
                    utils.CommaJoin(sorted(broken_scripts)))
1970

  
1971
  def _VerifyNodeNetwork(self, ninfo, nresult):
1972
    """Check the node network connectivity results.
1973

  
1974
    @type ninfo: L{objects.Node}
1975
    @param ninfo: the node to check
1976
    @param nresult: the remote results for the node
1977

  
1978
    """
1979
    node = ninfo.name
1980
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1981

  
1982
    test = constants.NV_NODELIST not in nresult
1983
    _ErrorIf(test, constants.CV_ENODESSH, node,
1984
             "node hasn't returned node ssh connectivity data")
1985
    if not test:
1986
      if nresult[constants.NV_NODELIST]:
1987
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1988
          _ErrorIf(True, constants.CV_ENODESSH, node,
1989
                   "ssh communication with node '%s': %s", a_node, a_msg)
1990

  
1991
    test = constants.NV_NODENETTEST not in nresult
1992
    _ErrorIf(test, constants.CV_ENODENET, node,
1993
             "node hasn't returned node tcp connectivity data")
1994
    if not test:
1995
      if nresult[constants.NV_NODENETTEST]:
1996
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1997
        for anode in nlist:
1998
          _ErrorIf(True, constants.CV_ENODENET, node,
1999
                   "tcp communication with node '%s': %s",
2000
                   anode, nresult[constants.NV_NODENETTEST][anode])
2001

  
2002
    test = constants.NV_MASTERIP not in nresult
2003
    _ErrorIf(test, constants.CV_ENODENET, node,
2004
             "node hasn't returned node master IP reachability data")
2005
    if not test:
2006
      if not nresult[constants.NV_MASTERIP]:
2007
        if node == self.master_node:
2008
          msg = "the master node cannot reach the master IP (not configured?)"
2009
        else:
2010
          msg = "cannot reach the master IP"
2011
        _ErrorIf(True, constants.CV_ENODENET, node, msg)
2012

  
2013
  def _VerifyInstance(self, instance, inst_config, node_image,
2014
                      diskstatus):
2015
    """Verify an instance.
2016

  
2017
    This function checks to see if the required block devices are
2018
    available on the instance's node, and that the nodes are in the correct
2019
    state.
2020

  
2021
    """
2022
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2023
    pnode = inst_config.primary_node
2024
    pnode_img = node_image[pnode]
2025
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2026

  
2027
    node_vol_should = {}
2028
    inst_config.MapLVsByNode(node_vol_should)
2029

  
2030
    cluster = self.cfg.GetClusterInfo()
2031
    ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2032
                                                            self.group_info)
2033
    err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config, self.cfg)
2034
    _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2035
             code=self.ETYPE_WARNING)
2036

  
2037
    for node in node_vol_should:
2038
      n_img = node_image[node]
2039
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2040
        # ignore missing volumes on offline or broken nodes
2041
        continue
2042
      for volume in node_vol_should[node]:
2043
        test = volume not in n_img.volumes
2044
        _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2045
                 "volume %s missing on node %s", volume, node)
2046

  
2047
    if inst_config.admin_state == constants.ADMINST_UP:
2048
      test = instance not in pnode_img.instances and not pnode_img.offline
2049
      _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2050
               "instance not running on its primary node %s",
2051
               pnode)
2052
      _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
2053
               "instance is marked as running and lives on offline node %s",
2054
               pnode)
2055

  
2056
    diskdata = [(nname, success, status, idx)
2057
                for (nname, disks) in diskstatus.items()
2058
                for idx, (success, status) in enumerate(disks)]
2059

  
2060
    for nname, success, bdev_status, idx in diskdata:
2061
      # the 'ghost node' construction in Exec() ensures that we have a
2062
      # node here
2063
      snode = node_image[nname]
2064
      bad_snode = snode.ghost or snode.offline
2065
      _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2066
               not success and not bad_snode,
2067
               constants.CV_EINSTANCEFAULTYDISK, instance,
2068
               "couldn't retrieve status for disk/%s on %s: %s",
2069
               idx, nname, bdev_status)
2070
      _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
2071
                success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2072
               constants.CV_EINSTANCEFAULTYDISK, instance,
2073
               "disk/%s on %s is faulty", idx, nname)
2074

  
2075
    _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2076
             constants.CV_ENODERPC, pnode, "instance %s, connection to"
2077
             " primary node failed", instance)
2078

  
2079
    _ErrorIf(len(inst_config.secondary_nodes) > 1,
2080
             constants.CV_EINSTANCELAYOUT,
2081
             instance, "instance has multiple secondary nodes: %s",
2082
             utils.CommaJoin(inst_config.secondary_nodes),
2083
             code=self.ETYPE_WARNING)
2084

  
2085
    if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
2086
      # Disk template not compatible with exclusive_storage: no instance
2087
      # node should have the flag set
2088
      es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
2089
                                                     inst_config.all_nodes)
2090
      es_nodes = [n for (n, es) in es_flags.items()
2091
                  if es]
2092
      _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance,
2093
               "instance has template %s, which is not supported on nodes"
2094
               " that have exclusive storage set: %s",
2095
               inst_config.disk_template, utils.CommaJoin(es_nodes))
2096

  
2097
    if inst_config.disk_template in constants.DTS_INT_MIRROR:
2098
      instance_nodes = utils.NiceSort(inst_config.all_nodes)
2099
      instance_groups = {}
2100

  
2101
      for node in instance_nodes:
2102
        instance_groups.setdefault(self.all_node_info[node].group,
2103
                                   []).append(node)
2104

  
2105
      pretty_list = [
2106
        "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2107
        # Sort so that we always list the primary node first.
2108
        for group, nodes in sorted(instance_groups.items(),
2109
                                   key=lambda (_, nodes): pnode in nodes,
2110
                                   reverse=True)]
2111

  
2112
      self._ErrorIf(len(instance_groups) > 1,
2113
                    constants.CV_EINSTANCESPLITGROUPS,
2114
                    instance, "instance has primary and secondary nodes in"
2115
                    " different groups: %s", utils.CommaJoin(pretty_list),
2116
                    code=self.ETYPE_WARNING)
2117

  
2118
    inst_nodes_offline = []
2119
    for snode in inst_config.secondary_nodes:
2120
      s_img = node_image[snode]
2121
      _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2122
               snode, "instance %s, connection to secondary node failed",
2123
               instance)
2124

  
2125
      if s_img.offline:
2126
        inst_nodes_offline.append(snode)
2127

  
2128
    # warn that the instance lives on offline nodes
2129
    _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2130
             "instance has offline secondary node(s) %s",
2131
             utils.CommaJoin(inst_nodes_offline))
2132
    # ... or ghost/non-vm_capable nodes
2133
    for node in inst_config.all_nodes:
2134
      _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2135
               instance, "instance lives on ghost node %s", node)
2136
      _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2137
               instance, "instance lives on non-vm_capable node %s", node)
2138

  
2139
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2140
    """Verify if there are any unknown volumes in the cluster.
2141

  
2142
    The .os, .swap and backup volumes are ignored. All other volumes are
2143
    reported as unknown.
2144

  
2145
    @type reserved: L{ganeti.utils.FieldSet}
2146
    @param reserved: a FieldSet of reserved volume names
2147

  
2148
    """
2149
    for node, n_img in node_image.items():
2150
      if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2151
          self.all_node_info[node].group != self.group_uuid):
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff