return _BuildInstanceHookEnv(**args)
-def _AdjustCandidatePool(lu):
+def _AdjustCandidatePool(lu, exceptions):
"""Adjust the candidate pool after node operations.
"""
- mod_list = lu.cfg.MaintainCandidatePool()
+ mod_list = lu.cfg.MaintainCandidatePool(exceptions)
if mod_list:
lu.LogInfo("Promoted nodes to master candidate role: %s",
", ".join(node.name for node in mod_list))
for name in mod_list:
lu.context.ReaddNode(name)
- mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
+ mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
if mc_now > mc_max:
lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
(mc_now, mc_max))
+def _DecideSelfPromotion(lu, exceptions=None):
+ """Decide whether I should promote myself as a master candidate.
+
+ """
+ cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
+ mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
+ # the new node will increase mc_max with one, so:
+ mc_should = min(mc_should + 1, cp_size)
+ return mc_now < mc_should
+
+
def _CheckNicsBridgesExist(lu, target_nics, target_node,
profile=constants.PP_DEFAULT):
"""Check that the brigdes needed by a list of nics exist.
_CheckNicsBridgesExist(lu, instance.nics, node)
-def _GetNodePrimaryInstances(cfg, node_name):
- """Returns primary instances on a node.
+def _CheckOSVariant(os, name):
+ """Check whether an OS name conforms to the os variants specification.
+
+ @type os: L{objects.OS}
+ @param os: OS object to check
+ @type name: string
+ @param name: OS name passed by the user, to check for validity
"""
- instances = []
+ if not os.supported_variants:
+ return
+ try:
+ variant = name.split("+", 1)[1]
+ except IndexError:
+ raise errors.OpPrereqError("OS name must include a variant")
- for (_, inst) in cfg.GetAllInstancesInfo().iteritems():
- if node_name == inst.primary_node:
- instances.append(inst)
+ if variant not in os.supported_variants:
+ raise errors.OpPrereqError("Unsupported OS variant")
- return instances
+def _GetNodeInstancesInner(cfg, fn):
+ return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
-def _GetNodeSecondaryInstances(cfg, node_name):
- """Returns secondary instances on a node.
+
+def _GetNodeInstances(cfg, node_name):
+ """Returns a list of all primary and secondary instances on a node.
+
+ """
+
+ return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
+
+
+def _GetNodePrimaryInstances(cfg, node_name):
+ """Returns primary instances on a node.
"""
- instances = []
+ return _GetNodeInstancesInner(cfg,
+ lambda inst: node_name == inst.primary_node)
- for (_, inst) in cfg.GetAllInstancesInfo().iteritems():
- if node_name in inst.secondary_nodes:
- instances.append(inst)
- return instances
+def _GetNodeSecondaryInstances(cfg, node_name):
+ """Returns secondary instances on a node.
+
+ """
+ return _GetNodeInstancesInner(cfg,
+ lambda inst: node_name in inst.secondary_nodes)
def _GetStorageTypeArgs(cfg, storage_type):
return []
-class LUDestroyCluster(NoHooksLU):
+def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
+ faulty = []
+
+ for dev in instance.disks:
+ cfg.SetDiskID(dev, node_name)
+
+ result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
+ result.Raise("Failed to get disk status from node %s" % node_name,
+ prereq=prereq)
+
+ for idx, bdev_status in enumerate(result.payload):
+ if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
+ faulty.append(idx)
+
+ return faulty
+
+
+class LUPostInitCluster(LogicalUnit):
+ """Logical unit for running hooks after cluster initialization.
+
+ """
+ HPATH = "cluster-init"
+ HTYPE = constants.HTYPE_CLUSTER
+ _OP_REQP = []
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ """
+ env = {"OP_TARGET": self.cfg.GetClusterName()}
+ mn = self.cfg.GetMasterNode()
+ return env, [], [mn]
+
+ def CheckPrereq(self):
+ """No prerequisites to check.
+
+ """
+ return True
+
+ def Exec(self, feedback_fn):
+ """Nothing to do.
+
+ """
+ return True
+
+
+class LUDestroyCluster(LogicalUnit):
"""Logical unit for destroying the cluster.
"""
+ HPATH = "cluster-destroy"
+ HTYPE = constants.HTYPE_CLUSTER
_OP_REQP = []
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ """
+ env = {"OP_TARGET": self.cfg.GetClusterName()}
+ return env, [], []
+
def CheckPrereq(self):
"""Check prerequisites.
"""
master = self.cfg.GetMasterNode()
+
+ # Run post hooks on master node before it's removed
+ hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
+ try:
+ hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
+ except:
+ self.LogWarning("Errors occurred running hooks on %s" % master)
+
result = self.rpc.call_node_stop_master(master, False)
result.Raise("Could not disable the master role")
priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
"""
HPATH = "cluster-verify"
HTYPE = constants.HTYPE_CLUSTER
- _OP_REQP = ["skip_checks"]
+ _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
REQ_BGL = False
+ TCLUSTER = "cluster"
+ TNODE = "node"
+ TINSTANCE = "instance"
+
+ ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
+ EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
+ EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
+ EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
+ EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
+ EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
+ EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
+ ENODEDRBD = (TNODE, "ENODEDRBD")
+ ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
+ ENODEHOOKS = (TNODE, "ENODEHOOKS")
+ ENODEHV = (TNODE, "ENODEHV")
+ ENODELVM = (TNODE, "ENODELVM")
+ ENODEN1 = (TNODE, "ENODEN1")
+ ENODENET = (TNODE, "ENODENET")
+ ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
+ ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
+ ENODERPC = (TNODE, "ENODERPC")
+ ENODESSH = (TNODE, "ENODESSH")
+ ENODEVERSION = (TNODE, "ENODEVERSION")
+
+ ETYPE_FIELD = "code"
+ ETYPE_ERROR = "ERROR"
+ ETYPE_WARNING = "WARNING"
+
def ExpandNames(self):
self.needed_locks = {
locking.LEVEL_NODE: locking.ALL_SET,
}
self.share_locks = dict.fromkeys(locking.LEVELS, 1)
+ def _Error(self, ecode, item, msg, *args, **kwargs):
+ """Format an error message.
+
+ Based on the opcode's error_codes parameter, either format a
+ parseable error code, or a simpler error string.
+
+ This must be called only from Exec and functions called from Exec.
+
+ """
+ ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
+ itype, etxt = ecode
+ # first complete the msg
+ if args:
+ msg = msg % args
+ # then format the whole message
+ if self.op.error_codes:
+ msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
+ else:
+ if item:
+ item = " " + item
+ else:
+ item = ""
+ msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
+ # and finally report it via the feedback_fn
+ self._feedback_fn(" - %s" % msg)
+
+ def _ErrorIf(self, cond, *args, **kwargs):
+ """Log an error message if the passed condition is True.
+
+ """
+ cond = bool(cond) or self.op.debug_simulate_errors
+ if cond:
+ self._Error(*args, **kwargs)
+ # do not mark the operation as failed for WARN cases only
+ if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
+ self.bad = self.bad or cond
+
def _VerifyNode(self, nodeinfo, file_list, local_cksum,
- node_result, feedback_fn, master_files,
- drbd_map, vg_name):
+ node_result, master_files, drbd_map, vg_name):
"""Run multiple tests against a node.
Test list:
@param file_list: required list of files
@param local_cksum: dictionary of local files and their checksums
@param node_result: the results from the node
- @param feedback_fn: function used to accumulate results
@param master_files: list of files that only masters should have
@param drbd_map: the useddrbd minors for this node, in
form of minor: (instance, must_exist) which correspond to instances
"""
node = nodeinfo.name
+ _ErrorIf = self._ErrorIf
# main result, node_result should be a non-empty dict
- if not node_result or not isinstance(node_result, dict):
- feedback_fn(" - ERROR: unable to verify node %s." % (node,))
- return True
+ test = not node_result or not isinstance(node_result, dict)
+ _ErrorIf(test, self.ENODERPC, node,
+ "unable to verify node: no data returned")
+ if test:
+ return
# compares ganeti version
local_version = constants.PROTOCOL_VERSION
remote_version = node_result.get('version', None)
- if not (remote_version and isinstance(remote_version, (list, tuple)) and
- len(remote_version) == 2):
- feedback_fn(" - ERROR: connection to %s failed" % (node))
- return True
-
- if local_version != remote_version[0]:
- feedback_fn(" - ERROR: incompatible protocol versions: master %s,"
- " node %s %s" % (local_version, node, remote_version[0]))
- return True
+ test = not (remote_version and
+ isinstance(remote_version, (list, tuple)) and
+ len(remote_version) == 2)
+ _ErrorIf(test, self.ENODERPC, node,
+ "connection to node returned invalid data")
+ if test:
+ return
+
+ test = local_version != remote_version[0]
+ _ErrorIf(test, self.ENODEVERSION, node,
+ "incompatible protocol versions: master %s,"
+ " node %s", local_version, remote_version[0])
+ if test:
+ return
# node seems compatible, we can actually try to look into its results
- bad = False
-
# full package version
- if constants.RELEASE_VERSION != remote_version[1]:
- feedback_fn(" - WARNING: software version mismatch: master %s,"
- " node %s %s" %
- (constants.RELEASE_VERSION, node, remote_version[1]))
+ self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
+ self.ENODEVERSION, node,
+ "software version mismatch: master %s, node %s",
+ constants.RELEASE_VERSION, remote_version[1],
+ code=self.ETYPE_WARNING)
# checks vg existence and size > 20G
if vg_name is not None:
vglist = node_result.get(constants.NV_VGLIST, None)
- if not vglist:
- feedback_fn(" - ERROR: unable to check volume groups on node %s." %
- (node,))
- bad = True
- else:
+ test = not vglist
+ _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
+ if not test:
vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
constants.MIN_VG_SIZE)
- if vgstatus:
- feedback_fn(" - ERROR: %s on node %s" % (vgstatus, node))
- bad = True
+ _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
# checks config file checksum
remote_cksum = node_result.get(constants.NV_FILELIST, None)
- if not isinstance(remote_cksum, dict):
- bad = True
- feedback_fn(" - ERROR: node hasn't returned file checksum data")
- else:
+ test = not isinstance(remote_cksum, dict)
+ _ErrorIf(test, self.ENODEFILECHECK, node,
+ "node hasn't returned file checksum data")
+ if not test:
for file_name in file_list:
node_is_mc = nodeinfo.master_candidate
- must_have_file = file_name not in master_files
- if file_name not in remote_cksum:
- if node_is_mc or must_have_file:
- bad = True
- feedback_fn(" - ERROR: file '%s' missing" % file_name)
- elif remote_cksum[file_name] != local_cksum[file_name]:
- if node_is_mc or must_have_file:
- bad = True
- feedback_fn(" - ERROR: file '%s' has wrong checksum" % file_name)
- else:
- # not candidate and this is not a must-have file
- bad = True
- feedback_fn(" - ERROR: file '%s' should not exist on non master"
- " candidates (and the file is outdated)" % file_name)
- else:
- # all good, except non-master/non-must have combination
- if not node_is_mc and not must_have_file:
- feedback_fn(" - ERROR: file '%s' should not exist on non master"
- " candidates" % file_name)
+ must_have = (file_name not in master_files) or node_is_mc
+ # missing
+ test1 = file_name not in remote_cksum
+ # invalid checksum
+ test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
+ # existing and good
+ test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
+ _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
+ "file '%s' missing", file_name)
+ _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
+ "file '%s' has wrong checksum", file_name)
+ # not candidate and this is not a must-have file
+ _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
+ "file '%s' should not exist on non master"
+ " candidates (and the file is outdated)", file_name)
+ # all good, except non-master/non-must have combination
+ _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
+ "file '%s' should not exist"
+ " on non master candidates", file_name)
# checks ssh to any
- if constants.NV_NODELIST not in node_result:
- bad = True
- feedback_fn(" - ERROR: node hasn't returned node ssh connectivity data")
- else:
+ test = constants.NV_NODELIST not in node_result
+ _ErrorIf(test, self.ENODESSH, node,
+ "node hasn't returned node ssh connectivity data")
+ if not test:
if node_result[constants.NV_NODELIST]:
- bad = True
- for node in node_result[constants.NV_NODELIST]:
- feedback_fn(" - ERROR: ssh communication with node '%s': %s" %
- (node, node_result[constants.NV_NODELIST][node]))
-
- if constants.NV_NODENETTEST not in node_result:
- bad = True
- feedback_fn(" - ERROR: node hasn't returned node tcp connectivity data")
- else:
+ for a_node, a_msg in node_result[constants.NV_NODELIST].items():
+ _ErrorIf(True, self.ENODESSH, node,
+ "ssh communication with node '%s': %s", a_node, a_msg)
+
+ test = constants.NV_NODENETTEST not in node_result
+ _ErrorIf(test, self.ENODENET, node,
+ "node hasn't returned node tcp connectivity data")
+ if not test:
if node_result[constants.NV_NODENETTEST]:
- bad = True
nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
- for node in nlist:
- feedback_fn(" - ERROR: tcp communication with node '%s': %s" %
- (node, node_result[constants.NV_NODENETTEST][node]))
+ for anode in nlist:
+ _ErrorIf(True, self.ENODENET, node,
+ "tcp communication with node '%s': %s",
+ anode, node_result[constants.NV_NODENETTEST][anode])
hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
if isinstance(hyp_result, dict):
for hv_name, hv_result in hyp_result.iteritems():
- if hv_result is not None:
- feedback_fn(" - ERROR: hypervisor %s verify failure: '%s'" %
- (hv_name, hv_result))
+ test = hv_result is not None
+ _ErrorIf(test, self.ENODEHV, node,
+ "hypervisor %s verify failure: '%s'", hv_name, hv_result)
# check used drbd list
if vg_name is not None:
used_minors = node_result.get(constants.NV_DRBDLIST, [])
- if not isinstance(used_minors, (tuple, list)):
- feedback_fn(" - ERROR: cannot parse drbd status file: %s" %
- str(used_minors))
- else:
+ test = not isinstance(used_minors, (tuple, list))
+ _ErrorIf(test, self.ENODEDRBD, node,
+ "cannot parse drbd status file: %s", str(used_minors))
+ if not test:
for minor, (iname, must_exist) in drbd_map.items():
- if minor not in used_minors and must_exist:
- feedback_fn(" - ERROR: drbd minor %d of instance %s is"
- " not active" % (minor, iname))
- bad = True
+ test = minor not in used_minors and must_exist
+ _ErrorIf(test, self.ENODEDRBD, node,
+ "drbd minor %d of instance %s is not active",
+ minor, iname)
for minor in used_minors:
- if minor not in drbd_map:
- feedback_fn(" - ERROR: unallocated drbd minor %d is in use" %
- minor)
- bad = True
-
- return bad
+ test = minor not in drbd_map
+ _ErrorIf(test, self.ENODEDRBD, node,
+ "unallocated drbd minor %d is in use", minor)
def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
- node_instance, feedback_fn, n_offline):
+ node_instance, n_offline):
"""Verify an instance.
This function checks to see if the required block devices are
available on the instance's node.
"""
- bad = False
-
+ _ErrorIf = self._ErrorIf
node_current = instanceconfig.primary_node
node_vol_should = {}
# ignore missing volumes on offline nodes
continue
for volume in node_vol_should[node]:
- if node not in node_vol_is or volume not in node_vol_is[node]:
- feedback_fn(" - ERROR: volume %s missing on node %s" %
- (volume, node))
- bad = True
+ test = node not in node_vol_is or volume not in node_vol_is[node]
+ _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
+ "volume %s missing on node %s", volume, node)
if instanceconfig.admin_up:
- if ((node_current not in node_instance or
- not instance in node_instance[node_current]) and
- node_current not in n_offline):
- feedback_fn(" - ERROR: instance %s not running on node %s" %
- (instance, node_current))
- bad = True
+ test = ((node_current not in node_instance or
+ not instance in node_instance[node_current]) and
+ node_current not in n_offline)
+ _ErrorIf(test, self.EINSTANCEDOWN, instance,
+ "instance not running on its primary node %s",
+ node_current)
for node in node_instance:
if (not node == node_current):
- if instance in node_instance[node]:
- feedback_fn(" - ERROR: instance %s should not run on node %s" %
- (instance, node))
- bad = True
+ test = instance in node_instance[node]
+ _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
+ "instance should not run on node %s", node)
- return bad
-
- def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
+ def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
"""Verify if there are any unknown volumes in the cluster.
The .os, .swap and backup volumes are ignored. All other volumes are
reported as unknown.
"""
- bad = False
-
for node in node_vol_is:
for volume in node_vol_is[node]:
- if node not in node_vol_should or volume not in node_vol_should[node]:
- feedback_fn(" - ERROR: volume %s on node %s should not exist" %
- (volume, node))
- bad = True
- return bad
+ test = (node not in node_vol_should or
+ volume not in node_vol_should[node])
+ self._ErrorIf(test, self.ENODEORPHANLV, node,
+ "volume %s is unknown", volume)
- def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
+ def _VerifyOrphanInstances(self, instancelist, node_instance):
"""Verify the list of running instances.
This checks what instances are running but unknown to the cluster.
"""
- bad = False
for node in node_instance:
- for runninginstance in node_instance[node]:
- if runninginstance not in instancelist:
- feedback_fn(" - ERROR: instance %s on node %s should not exist" %
- (runninginstance, node))
- bad = True
- return bad
-
- def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
+ for o_inst in node_instance[node]:
+ test = o_inst not in instancelist
+ self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
+ "instance %s on node %s should not exist", o_inst, node)
+
+ def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
"""Verify N+1 Memory Resilience.
Check that if one single node dies we can still start all the instances it
was primary for.
"""
- bad = False
-
for node, nodeinfo in node_info.iteritems():
# This code checks that every node which is now listed as secondary has
# enough memory to host all instances it is supposed to should a single
bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
if bep[constants.BE_AUTO_BALANCE]:
needed_mem += bep[constants.BE_MEMORY]
- if nodeinfo['mfree'] < needed_mem:
- feedback_fn(" - ERROR: not enough memory on node %s to accommodate"
- " failovers should node %s fail" % (node, prinode))
- bad = True
- return bad
+ test = nodeinfo['mfree'] < needed_mem
+ self._ErrorIf(test, self.ENODEN1, node,
+ "not enough memory on to accommodate"
+ " failovers should peer node %s fail", prinode)
def CheckPrereq(self):
"""Check prerequisites.
"""Verify integrity of cluster, performing various test on nodes.
"""
- bad = False
+ self.bad = False
+ _ErrorIf = self._ErrorIf
+ verbose = self.op.verbose
+ self._feedback_fn = feedback_fn
feedback_fn("* Verifying global settings")
for msg in self.cfg.VerifyConfig():
- feedback_fn(" - ERROR: %s" % msg)
+ _ErrorIf(True, self.ECLUSTERCFG, None, msg)
vg_name = self.cfg.GetVGName()
hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
master_node = self.cfg.GetMasterNode()
all_drbd_map = self.cfg.ComputeDRBDMap()
+ feedback_fn("* Verifying node status")
for node_i in nodeinfo:
node = node_i.name
if node_i.offline:
- feedback_fn("* Skipping offline node %s" % (node,))
+ if verbose:
+ feedback_fn("* Skipping offline node %s" % (node,))
n_offline.append(node)
continue
n_drained.append(node)
else:
ntype = "regular"
- feedback_fn("* Verifying node %s (%s)" % (node, ntype))
+ if verbose:
+ feedback_fn("* Verifying node %s (%s)" % (node, ntype))
msg = all_nvinfo[node].fail_msg
+ _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
if msg:
- feedback_fn(" - ERROR: while contacting node %s: %s" % (node, msg))
- bad = True
continue
nresult = all_nvinfo[node].payload
node_drbd = {}
for minor, instance in all_drbd_map[node].items():
- if instance not in instanceinfo:
- feedback_fn(" - ERROR: ghost instance '%s' in temporary DRBD map" %
- instance)
+ test = instance not in instanceinfo
+ _ErrorIf(test, self.ECLUSTERCFG, None,
+ "ghost instance '%s' in temporary DRBD map", instance)
# ghost instance should not be running, but otherwise we
# don't give double warnings (both ghost instance and
# unallocated minor in use)
+ if test:
node_drbd[minor] = (instance, False)
else:
instance = instanceinfo[instance]
node_drbd[minor] = (instance.name, instance.admin_up)
- result = self._VerifyNode(node_i, file_names, local_checksums,
- nresult, feedback_fn, master_files,
- node_drbd, vg_name)
- bad = bad or result
+ self._VerifyNode(node_i, file_names, local_checksums,
+ nresult, master_files, node_drbd, vg_name)
lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
if vg_name is None:
node_volume[node] = {}
elif isinstance(lvdata, basestring):
- feedback_fn(" - ERROR: LVM problem on node %s: %s" %
- (node, utils.SafeEncode(lvdata)))
- bad = True
+ _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
+ utils.SafeEncode(lvdata))
node_volume[node] = {}
elif not isinstance(lvdata, dict):
- feedback_fn(" - ERROR: connection to %s failed (lvlist)" % (node,))
- bad = True
+ _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
continue
else:
node_volume[node] = lvdata
# node_instance
idata = nresult.get(constants.NV_INSTANCELIST, None)
- if not isinstance(idata, list):
- feedback_fn(" - ERROR: connection to %s failed (instancelist)" %
- (node,))
- bad = True
+ test = not isinstance(idata, list)
+ _ErrorIf(test, self.ENODEHV, node,
+ "rpc call to node failed (instancelist)")
+ if test:
continue
node_instance[node] = idata
# node_info
nodeinfo = nresult.get(constants.NV_HVINFO, None)
- if not isinstance(nodeinfo, dict):
- feedback_fn(" - ERROR: connection to %s failed (hvinfo)" % (node,))
- bad = True
+ test = not isinstance(nodeinfo, dict)
+ _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
+ if test:
continue
try:
}
# FIXME: devise a free space model for file based instances as well
if vg_name is not None:
- if (constants.NV_VGLIST not in nresult or
- vg_name not in nresult[constants.NV_VGLIST]):
- feedback_fn(" - ERROR: node %s didn't return data for the"
- " volume group '%s' - it is either missing or broken" %
- (node, vg_name))
- bad = True
+ test = (constants.NV_VGLIST not in nresult or
+ vg_name not in nresult[constants.NV_VGLIST])
+ _ErrorIf(test, self.ENODELVM, node,
+ "node didn't return data for the volume group '%s'"
+ " - it is either missing or broken", vg_name)
+ if test:
continue
node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
except (ValueError, KeyError):
- feedback_fn(" - ERROR: invalid nodeinfo value returned"
- " from node %s" % (node,))
- bad = True
+ _ErrorIf(True, self.ENODERPC, node,
+ "node returned invalid nodeinfo, check lvm/hypervisor")
continue
node_vol_should = {}
+ feedback_fn("* Verifying instance status")
for instance in instancelist:
- feedback_fn("* Verifying instance %s" % instance)
+ if verbose:
+ feedback_fn("* Verifying instance %s" % instance)
inst_config = instanceinfo[instance]
- result = self._VerifyInstance(instance, inst_config, node_volume,
- node_instance, feedback_fn, n_offline)
- bad = bad or result
+ self._VerifyInstance(instance, inst_config, node_volume,
+ node_instance, n_offline)
inst_nodes_offline = []
inst_config.MapLVsByNode(node_vol_should)
instance_cfg[instance] = inst_config
pnode = inst_config.primary_node
+ _ErrorIf(pnode not in node_info and pnode not in n_offline,
+ self.ENODERPC, pnode, "instance %s, connection to"
+ " primary node failed", instance)
if pnode in node_info:
node_info[pnode]['pinst'].append(instance)
- elif pnode not in n_offline:
- feedback_fn(" - ERROR: instance %s, connection to primary node"
- " %s failed" % (instance, pnode))
- bad = True
if pnode in n_offline:
inst_nodes_offline.append(pnode)
# FIXME: does not support file-backed instances
if len(inst_config.secondary_nodes) == 0:
i_non_redundant.append(instance)
- elif len(inst_config.secondary_nodes) > 1:
- feedback_fn(" - WARNING: multiple secondaries for instance %s"
- % instance)
+ _ErrorIf(len(inst_config.secondary_nodes) > 1,
+ self.EINSTANCELAYOUT, instance,
+ "instance has multiple secondary nodes", code="WARNING")
if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
i_non_a_balanced.append(instance)
for snode in inst_config.secondary_nodes:
+ _ErrorIf(snode not in node_info and snode not in n_offline,
+ self.ENODERPC, snode,
+ "instance %s, connection to secondary node"
+ "failed", instance)
+
if snode in node_info:
node_info[snode]['sinst'].append(instance)
if pnode not in node_info[snode]['sinst-by-pnode']:
node_info[snode]['sinst-by-pnode'][pnode] = []
node_info[snode]['sinst-by-pnode'][pnode].append(instance)
- elif snode not in n_offline:
- feedback_fn(" - ERROR: instance %s, connection to secondary node"
- " %s failed" % (instance, snode))
- bad = True
+
if snode in n_offline:
inst_nodes_offline.append(snode)
- if inst_nodes_offline:
- # warn that the instance lives on offline nodes, and set bad=True
- feedback_fn(" - ERROR: instance lives on offline node(s) %s" %
- ", ".join(inst_nodes_offline))
- bad = True
+ # warn that the instance lives on offline nodes
+ _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
+ "instance lives on offline node(s) %s",
+ ", ".join(inst_nodes_offline))
feedback_fn("* Verifying orphan volumes")
- result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
- feedback_fn)
- bad = bad or result
+ self._VerifyOrphanVolumes(node_vol_should, node_volume)
feedback_fn("* Verifying remaining instances")
- result = self._VerifyOrphanInstances(instancelist, node_instance,
- feedback_fn)
- bad = bad or result
+ self._VerifyOrphanInstances(instancelist, node_instance)
if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
feedback_fn("* Verifying N+1 Memory redundancy")
- result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
- bad = bad or result
+ self._VerifyNPlusOneMemory(node_info, instance_cfg)
feedback_fn("* Other Notes")
if i_non_redundant:
if n_drained:
feedback_fn(" - NOTICE: %d drained node(s) found." % len(n_drained))
- return not bad
+ return not self.bad
def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
"""Analyze the post-hooks' result
# Used to change hooks' output to proper indentation
indent_re = re.compile('^', re.M)
feedback_fn("* Hooks Results")
- if not hooks_results:
- feedback_fn(" - ERROR: general communication failure")
- lu_result = 1
- else:
- for node_name in hooks_results:
- show_node_header = True
- res = hooks_results[node_name]
- msg = res.fail_msg
- if msg:
- if res.offline:
- # no need to warn or set fail return value
- continue
- feedback_fn(" Communication failure in hooks execution: %s" %
- msg)
+ assert hooks_results, "invalid result from hooks"
+
+ for node_name in hooks_results:
+ show_node_header = True
+ res = hooks_results[node_name]
+ msg = res.fail_msg
+ test = msg and not res.offline
+ self._ErrorIf(test, self.ENODEHOOKS, node_name,
+ "Communication failure in hooks execution: %s", msg)
+ if test:
+ # override manually lu_result here as _ErrorIf only
+ # overrides self.bad
+ lu_result = 1
+ continue
+ for script, hkr, output in res.payload:
+ test = hkr == constants.HKR_FAIL
+ self._ErrorIf(test, self.ENODEHOOKS, node_name,
+ "Script %s failed, output:", script)
+ if test:
+ output = indent_re.sub(' ', output)
+ feedback_fn("%s" % output)
lu_result = 1
- continue
- for script, hkr, output in res.payload:
- if hkr == constants.HKR_FAIL:
- # The node header is only shown once, if there are
- # failing hooks on that node
- if show_node_header:
- feedback_fn(" Node %s:" % node_name)
- show_node_header = False
- feedback_fn(" ERROR: Script %s failed, output:" % script)
- output = indent_re.sub(' ', output)
- feedback_fn("%s" % output)
- lu_result = 1
return lu_result
return result
+class LURepairDiskSizes(NoHooksLU):
+ """Verifies the cluster disks sizes.
+
+ """
+ _OP_REQP = ["instances"]
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ if not isinstance(self.op.instances, list):
+ raise errors.OpPrereqError("Invalid argument type 'instances'")
+
+ if self.op.instances:
+ self.wanted_names = []
+ for name in self.op.instances:
+ full_name = self.cfg.ExpandInstanceName(name)
+ if full_name is None:
+ raise errors.OpPrereqError("Instance '%s' not known" % name)
+ self.wanted_names.append(full_name)
+ self.needed_locks = {
+ locking.LEVEL_NODE: [],
+ locking.LEVEL_INSTANCE: self.wanted_names,
+ }
+ self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
+ else:
+ self.wanted_names = None
+ self.needed_locks = {
+ locking.LEVEL_NODE: locking.ALL_SET,
+ locking.LEVEL_INSTANCE: locking.ALL_SET,
+ }
+ self.share_locks = dict(((i, 1) for i in locking.LEVELS))
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODE and self.wanted_names is not None:
+ self._LockInstancesNodes(primary_only=True)
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This only checks the optional instance list against the existing names.
+
+ """
+ if self.wanted_names is None:
+ self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
+
+ self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
+ in self.wanted_names]
+
+ def _EnsureChildSizes(self, disk):
+ """Ensure children of the disk have the needed disk size.
+
+ This is valid mainly for DRBD8 and fixes an issue where the
+ children have smaller disk size.
+
+ @param disk: an L{ganeti.objects.Disk} object
+
+ """
+ if disk.dev_type == constants.LD_DRBD8:
+ assert disk.children, "Empty children for DRBD8?"
+ fchild = disk.children[0]
+ mismatch = fchild.size < disk.size
+ if mismatch:
+ self.LogInfo("Child disk has size %d, parent %d, fixing",
+ fchild.size, disk.size)
+ fchild.size = disk.size
+
+ # and we recurse on this child only, not on the metadev
+ return self._EnsureChildSizes(fchild) or mismatch
+ else:
+ return False
+
+ def Exec(self, feedback_fn):
+ """Verify the size of cluster disks.
+
+ """
+ # TODO: check child disks too
+ # TODO: check differences in size between primary/secondary nodes
+ per_node_disks = {}
+ for instance in self.wanted_instances:
+ pnode = instance.primary_node
+ if pnode not in per_node_disks:
+ per_node_disks[pnode] = []
+ for idx, disk in enumerate(instance.disks):
+ per_node_disks[pnode].append((instance, idx, disk))
+
+ changed = []
+ for node, dskl in per_node_disks.items():
+ newl = [v[2].Copy() for v in dskl]
+ for dsk in newl:
+ self.cfg.SetDiskID(dsk, node)
+ result = self.rpc.call_blockdev_getsizes(node, newl)
+ if result.fail_msg:
+ self.LogWarning("Failure in blockdev_getsizes call to node"
+ " %s, ignoring", node)
+ continue
+ if len(result.data) != len(dskl):
+ self.LogWarning("Invalid result from node %s, ignoring node results",
+ node)
+ continue
+ for ((instance, idx, disk), size) in zip(dskl, result.data):
+ if size is None:
+ self.LogWarning("Disk %d of instance %s did not return size"
+ " information, ignoring", idx, instance.name)
+ continue
+ if not isinstance(size, (int, long)):
+ self.LogWarning("Disk %d of instance %s did not return valid"
+ " size information, ignoring", idx, instance.name)
+ continue
+ size = size >> 20
+ if size != disk.size:
+ self.LogInfo("Disk %d of instance %s has mismatched size,"
+ " correcting: recorded %d, actual %d", idx,
+ instance.name, disk.size, size)
+ disk.size = size
+ self.cfg.Update(instance)
+ changed.append((instance.name, idx, size))
+ if self._EnsureChildSizes(disk):
+ self.cfg.Update(instance)
+ changed.append((instance.name, idx, disk.size))
+ return changed
+
+
class LURenameCluster(LogicalUnit):
"""Rename the cluster.
invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
if invalid_hvs:
raise errors.OpPrereqError("Enabled hypervisors contains invalid"
- " entries: %s" % invalid_hvs)
+ " entries: %s" % " ,".join(invalid_hvs))
else:
self.hv_list = cluster.enabled_hypervisors
if self.op.candidate_pool_size is not None:
self.cluster.candidate_pool_size = self.op.candidate_pool_size
# we need to update the pool size here, otherwise the save will fail
- _AdjustCandidatePool(self)
+ _AdjustCandidatePool(self, [])
self.cfg.Update(self.cluster)
else:
rem_time = "no time estimate"
lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
- (instance.disks[i].iv_name, mstat.sync_percent, rem_time))
+ (instance.disks[i].iv_name, mstat.sync_percent,
+ rem_time))
# if we're done but degraded, let's do a few small retries, to
# make sure we see a stable and not transient situation; therefore
result = False
else:
if ldisk:
- result = result and not rstats.payload.ldisk_degraded
+ result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
else:
result = result and not rstats.payload.is_degraded
_OP_REQP = ["output_fields", "names"]
REQ_BGL = False
_FIELDS_STATIC = utils.FieldSet()
- _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
+ _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
+ # Fields that need calculation of global os validity
+ _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
def ExpandNames(self):
if self.op.names:
for node_name, nr in rlist.items():
if nr.fail_msg or not nr.payload:
continue
- for name, path, status, diagnose in nr.payload:
+ for name, path, status, diagnose, variants in nr.payload:
if name not in all_os:
# build a list of nodes for this os containing empty lists
# for each node in node_list
all_os[name] = {}
for nname in good_nodes:
all_os[name][nname] = []
- all_os[name][node_name].append((path, status, diagnose))
+ all_os[name][node_name].append((path, status, diagnose, variants))
return all_os
def Exec(self, feedback_fn):
node_data = self.rpc.call_os_diagnose(valid_nodes)
pol = self._DiagnoseByOS(valid_nodes, node_data)
output = []
+ calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
+ calc_variants = "variants" in self.op.output_fields
+
for os_name, os_data in pol.items():
row = []
+ if calc_valid:
+ valid = True
+ variants = None
+ for osl in os_data.values():
+ valid = valid and osl and osl[0][1]
+ if not valid:
+ variants = None
+ break
+ if calc_variants:
+ node_variants = osl[0][3]
+ if variants is None:
+ variants = node_variants
+ else:
+ variants = [v for v in variants if v in node_variants]
+
for field in self.op.output_fields:
if field == "name":
val = os_name
elif field == "valid":
- val = utils.all([osl and osl[0][1] for osl in os_data.values()])
+ val = valid
elif field == "node_status":
# this is just a copy of the dict
val = {}
for node_name, nos_list in os_data.items():
val[node_name] = nos_list
+ elif field == "variants":
+ val = variants
else:
raise errors.ParameterError(field)
row.append(val)
"NODE_NAME": self.op.node_name,
}
all_nodes = self.cfg.GetNodeList()
- all_nodes.remove(self.op.node_name)
+ if self.op.node_name in all_nodes:
+ all_nodes.remove(self.op.node_name)
return env, all_nodes, all_nodes
def CheckPrereq(self):
logging.info("Stopping the node daemon and removing configs from node %s",
node.name)
+ # Promote nodes to master candidate as needed
+ _AdjustCandidatePool(self, exceptions=[node.name])
self.context.RemoveNode(node.name)
+ # Run post hooks on the node before it's removed
+ hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
+ try:
+ h_results = hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
+ except:
+ self.LogWarning("Errors occurred running hooks on %s" % node.name)
+
result = self.rpc.call_node_leave_cluster(node.name)
msg = result.fail_msg
if msg:
self.LogWarning("Errors encountered on the remote node while leaving"
" the cluster: %s", msg)
- # Promote nodes to master candidate as needed
- _AdjustCandidatePool(self)
-
class LUQueryNodes(NoHooksLU):
"""Logical unit for querying nodes.
"""
_OP_REQP = ["output_fields", "names", "use_locking"]
REQ_BGL = False
+
+ _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
+ "master_candidate", "offline", "drained"]
+
_FIELDS_DYNAMIC = utils.FieldSet(
"dtotal", "dfree",
"mtotal", "mnode", "mfree",
"ctotal", "cnodes", "csockets",
)
- _FIELDS_STATIC = utils.FieldSet(
- "name", "pinst_cnt", "sinst_cnt",
+ _FIELDS_STATIC = utils.FieldSet(*[
+ "pinst_cnt", "sinst_cnt",
"pinst_list", "sinst_list",
"pip", "sip", "tags",
- "serial_no",
- "master_candidate",
"master",
- "offline",
- "drained",
- "role",
+ "role"] + _SIMPLE_FIELDS
)
def ExpandNames(self):
for node in nodelist:
node_output = []
for field in self.op.output_fields:
- if field == "name":
- val = node.name
+ if field in self._SIMPLE_FIELDS:
+ val = getattr(node, field)
elif field == "pinst_list":
val = list(node_to_primary[node.name])
elif field == "sinst_list":
val = node.secondary_ip
elif field == "tags":
val = list(node.GetTags())
- elif field == "serial_no":
- val = node.serial_no
- elif field == "master_candidate":
- val = node.master_candidate
elif field == "master":
val = node.name == master_node
- elif field == "offline":
- val = node.offline
- elif field == "drained":
- val = node.drained
elif self._FIELDS_DYNAMIC.Matches(field):
val = live_data[node.name].get(field, None)
elif field == "role":
raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
" based ping to noded port")
- cp_size = self.cfg.GetClusterInfo().candidate_pool_size
if self.op.readd:
exceptions = [node]
else:
exceptions = []
- mc_now, mc_max = self.cfg.GetMasterCandidateStats(exceptions)
- # the new node will increase mc_max with one, so:
- mc_max = min(mc_max + 1, cp_size)
- self.master_candidate = mc_now < mc_max
+
+ self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
if self.op.readd:
self.new_node = self.cfg.GetNodeInfo(node)
priv_key, pub_key]
for i in keyfiles:
- f = open(i, 'r')
- try:
- keyarray.append(f.read())
- finally:
- f.close()
+ keyarray.append(utils.ReadFile(i))
result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
keyarray[2],
node_verify_list = [self.cfg.GetMasterNode()]
node_verify_param = {
- 'nodelist': [node],
+ constants.NV_NODELIST: [node],
# TODO: do a node-net-test as well?
}
self.cfg.GetClusterName())
for verifier in node_verify_list:
result[verifier].Raise("Cannot communicate with node %s" % verifier)
- nl_payload = result[verifier].payload['nodelist']
+ nl_payload = result[verifier].payload[constants.NV_NODELIST]
if nl_payload:
for failed in nl_payload:
- feedback_fn("ssh/hostname verification failed %s -> %s" %
+ feedback_fn("ssh/hostname verification failed"
+ " (checking from %s): %s" %
(verifier, nl_payload[failed]))
raise errors.OpExecError("ssh/hostname verification failed.")
# and make sure the new node will not have old files around
if not new_node.master_candidate:
result = self.rpc.call_node_demote_from_mc(new_node.name)
- msg = result.RemoteFailMsg()
+ msg = result.fail_msg
if msg:
self.LogWarning("Node failed to demote itself from master"
" candidate status: %s" % msg)
"""
node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
- if ((self.op.master_candidate == False or self.op.offline == True or
- self.op.drained == True) and node.master_candidate):
- # we will demote the node from master_candidate
+ if (self.op.master_candidate is not None or
+ self.op.drained is not None or
+ self.op.offline is not None):
+ # we can't change the master's node flags
if self.op.node_name == self.cfg.GetMasterNode():
- raise errors.OpPrereqError("The master node has to be a"
- " master candidate, online and not drained")
+ raise errors.OpPrereqError("The master role can be changed"
+ " only via masterfailover")
+
+ # Boolean value that tells us whether we're offlining or draining the node
+ offline_or_drain = self.op.offline == True or self.op.drained == True
+ deoffline_or_drain = self.op.offline == False or self.op.drained == False
+
+ if (node.master_candidate and
+ (self.op.master_candidate == False or offline_or_drain)):
cp_size = self.cfg.GetClusterInfo().candidate_pool_size
- num_candidates, _ = self.cfg.GetMasterCandidateStats()
- if num_candidates <= cp_size:
+ mc_now, mc_should, mc_max = self.cfg.GetMasterCandidateStats()
+ if mc_now <= cp_size:
msg = ("Not enough master candidates (desired"
- " %d, new value will be %d)" % (cp_size, num_candidates-1))
- if self.op.force:
+ " %d, new value will be %d)" % (cp_size, mc_now-1))
+ # Only allow forcing the operation if it's an offline/drain operation,
+ # and we could not possibly promote more nodes.
+ # FIXME: this can still lead to issues if in any way another node which
+ # could be promoted appears in the meantime.
+ if self.op.force and offline_or_drain and mc_should == mc_max:
self.LogWarning(msg)
else:
raise errors.OpPrereqError(msg)
raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
" to master_candidate" % node.name)
+ # If we're being deofflined/drained, we'll MC ourself if needed
+ if (deoffline_or_drain and not offline_or_drain and not
+ self.op.master_candidate == True):
+ self.op.master_candidate = _DecideSelfPromotion(self)
+ if self.op.master_candidate:
+ self.LogInfo("Autopromoting node to master candidate")
+
return
def Exec(self, feedback_fn):
changed_mc = True
result.append(("master_candidate", "auto-demotion due to drain"))
rrc = self.rpc.call_node_demote_from_mc(node.name)
- msg = rrc.RemoteFailMsg()
+ msg = rrc.fail_msg
if msg:
self.LogWarning("Node failed to demote itself: %s" % msg)
if node.offline:
"master_netdev": cluster.master_netdev,
"volume_group_name": cluster.volume_group_name,
"file_storage_dir": cluster.file_storage_dir,
+ "ctime": cluster.ctime,
+ "mtime": cluster.mtime,
+ "uuid": cluster.uuid,
+ "tags": list(cluster.GetTags()),
}
return result
_OP_REQP = []
REQ_BGL = False
_FIELDS_DYNAMIC = utils.FieldSet()
- _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag")
+ _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
+ "watcher_pause")
def ExpandNames(self):
self.needed_locks = {}
entry = self.cfg.GetMasterNode()
elif field == "drain_flag":
entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
+ elif field == "watcher_pause":
+ return utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
else:
raise errors.ParameterError(field)
values.append(entry)
assert self.instance is not None, \
"Cannot retrieve locked instance %s" % self.op.instance_name
_CheckNodeOnline(self, self.instance.primary_node)
+ if not hasattr(self.op, "ignore_size"):
+ self.op.ignore_size = False
def Exec(self, feedback_fn):
"""Activate the disks.
"""
- disks_ok, disks_info = _AssembleInstanceDisks(self, self.instance)
+ disks_ok, disks_info = \
+ _AssembleInstanceDisks(self, self.instance,
+ ignore_size=self.op.ignore_size)
if not disks_ok:
raise errors.OpExecError("Cannot activate block devices")
return disks_info
-def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
+def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
+ ignore_size=False):
"""Prepare the block devices for an instance.
This sets up the block devices on all nodes.
@type ignore_secondaries: boolean
@param ignore_secondaries: if true, errors on secondary nodes
won't result in an error return from the function
+ @type ignore_size: boolean
+ @param ignore_size: if true, the current known size of the disk
+ will not be used during the disk activation, useful for cases
+ when the size is wrong
@return: False if the operation failed, otherwise a list of
(host, instance_visible_name, node_visible_name)
with the mapping from node devices to instance devices
# 1st pass, assemble on all nodes in secondary mode
for inst_disk in instance.disks:
for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
+ if ignore_size:
+ node_disk = node_disk.Copy()
+ node_disk.UnsetSize()
lu.cfg.SetDiskID(node_disk, node)
result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
msg = result.fail_msg
for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
if node != instance.primary_node:
continue
+ if ignore_size:
+ node_disk = node_disk.Copy()
+ node_disk.UnsetSize()
lu.cfg.SetDiskID(node_disk, node)
result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
msg = result.fail_msg
instance.primary_node))
self.op.os_type = getattr(self.op, "os_type", None)
+ self.op.force_variant = getattr(self.op, "force_variant", False)
if self.op.os_type is not None:
# OS verification
pnode = self.cfg.GetNodeInfo(
result = self.rpc.call_os_get(pnode.name, self.op.os_type)
result.Raise("OS '%s' not in supported OS list for primary node %s" %
(self.op.os_type, pnode.name), prereq=True)
+ if not self.op.force_variant:
+ _CheckOSVariant(result.payload, self.op.os_type)
self.instance = instance
_ShutdownInstanceDisks(self, inst)
+class LURecreateInstanceDisks(LogicalUnit):
+ """Recreate an instance's missing disks.
+
+ """
+ HPATH = "instance-recreate-disks"
+ HTYPE = constants.HTYPE_INSTANCE
+ _OP_REQP = ["instance_name", "disks"]
+ REQ_BGL = False
+
+ def CheckArguments(self):
+ """Check the arguments.
+
+ """
+ if not isinstance(self.op.disks, list):
+ raise errors.OpPrereqError("Invalid disks parameter")
+ for item in self.op.disks:
+ if (not isinstance(item, int) or
+ item < 0):
+ raise errors.OpPrereqError("Invalid disk specification '%s'" %
+ str(item))
+
+ def ExpandNames(self):
+ self._ExpandAndLockInstance()
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ This runs on master, primary and secondary nodes of the instance.
+
+ """
+ env = _BuildInstanceHookEnvByObject(self, self.instance)
+ nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
+ return env, nl, nl
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This checks that the instance is in the cluster and is not running.
+
+ """
+ instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+ assert instance is not None, \
+ "Cannot retrieve locked instance %s" % self.op.instance_name
+ _CheckNodeOnline(self, instance.primary_node)
+
+ if instance.disk_template == constants.DT_DISKLESS:
+ raise errors.OpPrereqError("Instance '%s' has no disks" %
+ self.op.instance_name)
+ if instance.admin_up:
+ raise errors.OpPrereqError("Instance '%s' is marked to be up" %
+ self.op.instance_name)
+ remote_info = self.rpc.call_instance_info(instance.primary_node,
+ instance.name,
+ instance.hypervisor)
+ remote_info.Raise("Error checking node %s" % instance.primary_node,
+ prereq=True)
+ if remote_info.payload:
+ raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
+ (self.op.instance_name,
+ instance.primary_node))
+
+ if not self.op.disks:
+ self.op.disks = range(len(instance.disks))
+ else:
+ for idx in self.op.disks:
+ if idx >= len(instance.disks):
+ raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx)
+
+ self.instance = instance
+
+ def Exec(self, feedback_fn):
+ """Recreate the disks.
+
+ """
+ to_skip = []
+ for idx, disk in enumerate(self.instance.disks):
+ if idx not in self.op.disks: # disk idx has not been passed in
+ to_skip.append(idx)
+ continue
+
+ _CreateDisks(self, self.instance, to_skip=to_skip)
+
+
class LURenameInstance(LogicalUnit):
"""Rename an instance.
"""
_OP_REQP = ["output_fields", "names", "use_locking"]
REQ_BGL = False
+ _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
+ "serial_no", "ctime", "mtime", "uuid"]
_FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
"admin_state",
"disk_template", "ip", "mac", "bridge",
r"(nic)\.(bridge)/([0-9]+)",
r"(nic)\.(macs|ips|modes|links|bridges)",
r"(disk|nic)\.(count)",
- "serial_no", "hypervisor", "hvparams",] +
+ "hvparams",
+ ] + _SIMPLE_FIELDS +
["hv/%s" % name
for name in constants.HVS_PARAMETERS] +
["be/%s" % name
if result.offline:
# offline nodes will be in both lists
off_nodes.append(name)
- if result.failed or result.fail_msg:
+ if result.fail_msg:
bad_nodes.append(name)
else:
if result.payload:
nic.nicparams) for nic in instance.nics]
for field in self.op.output_fields:
st_match = self._FIELDS_STATIC.Matches(field)
- if field == "name":
- val = instance.name
- elif field == "os":
- val = instance.os
+ if field in self._SIMPLE_FIELDS:
+ val = getattr(instance, field)
elif field == "pnode":
val = instance.primary_node
elif field == "snodes":
val = _ComputeDiskSize(instance.disk_template, disk_sizes)
elif field == "tags":
val = list(instance.GetTags())
- elif field == "serial_no":
- val = instance.serial_no
- elif field == "network_port":
- val = instance.network_port
- elif field == "hypervisor":
- val = instance.hypervisor
elif field == "hvparams":
val = i_hv
elif (field.startswith(HVPREFIX) and
return env, nl, nl
+class LUMoveInstance(LogicalUnit):
+ """Move an instance by data-copying.
+
+ """
+ HPATH = "instance-move"
+ HTYPE = constants.HTYPE_INSTANCE
+ _OP_REQP = ["instance_name", "target_node"]
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self._ExpandAndLockInstance()
+ target_node = self.cfg.ExpandNodeName(self.op.target_node)
+ if target_node is None:
+ raise errors.OpPrereqError("Node '%s' not known" %
+ self.op.target_node)
+ self.op.target_node = target_node
+ self.needed_locks[locking.LEVEL_NODE] = [target_node]
+ self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODE:
+ self._LockInstancesNodes(primary_only=True)
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ This runs on master, primary and secondary nodes of the instance.
+
+ """
+ env = {
+ "TARGET_NODE": self.op.target_node,
+ }
+ env.update(_BuildInstanceHookEnvByObject(self, self.instance))
+ nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
+ self.op.target_node]
+ return env, nl, nl
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This checks that the instance is in the cluster.
+
+ """
+ self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+ assert self.instance is not None, \
+ "Cannot retrieve locked instance %s" % self.op.instance_name
+
+ node = self.cfg.GetNodeInfo(self.op.target_node)
+ assert node is not None, \
+ "Cannot retrieve locked node %s" % self.op.target_node
+
+ self.target_node = target_node = node.name
+
+ if target_node == instance.primary_node:
+ raise errors.OpPrereqError("Instance %s is already on the node %s" %
+ (instance.name, target_node))
+
+ bep = self.cfg.GetClusterInfo().FillBE(instance)
+
+ for idx, dsk in enumerate(instance.disks):
+ if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
+ raise errors.OpPrereqError("Instance disk %d has a complex layout,"
+ " cannot copy")
+
+ _CheckNodeOnline(self, target_node)
+ _CheckNodeNotDrained(self, target_node)
+
+ if instance.admin_up:
+ # check memory requirements on the secondary node
+ _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
+ instance.name, bep[constants.BE_MEMORY],
+ instance.hypervisor)
+ else:
+ self.LogInfo("Not checking memory on the secondary node as"
+ " instance will not be started")
+
+ # check bridge existance
+ _CheckInstanceBridgesExist(self, instance, node=target_node)
+
+ def Exec(self, feedback_fn):
+ """Move an instance.
+
+ The move is done by shutting it down on its present node, copying
+ the data over (slow) and starting it on the new node.
+
+ """
+ instance = self.instance
+
+ source_node = instance.primary_node
+ target_node = self.target_node
+
+ self.LogInfo("Shutting down instance %s on source node %s",
+ instance.name, source_node)
+
+ result = self.rpc.call_instance_shutdown(source_node, instance)
+ msg = result.fail_msg
+ if msg:
+ if self.op.ignore_consistency:
+ self.proc.LogWarning("Could not shutdown instance %s on node %s."
+ " Proceeding anyway. Please make sure node"
+ " %s is down. Error details: %s",
+ instance.name, source_node, source_node, msg)
+ else:
+ raise errors.OpExecError("Could not shutdown instance %s on"
+ " node %s: %s" %
+ (instance.name, source_node, msg))
+
+ # create the target disks
+ try:
+ _CreateDisks(self, instance, target_node=target_node)
+ except errors.OpExecError:
+ self.LogWarning("Device creation failed, reverting...")
+ try:
+ _RemoveDisks(self, instance, target_node=target_node)
+ finally:
+ self.cfg.ReleaseDRBDMinors(instance.name)
+ raise
+
+ cluster_name = self.cfg.GetClusterInfo().cluster_name
+
+ errs = []
+ # activate, get path, copy the data over
+ for idx, disk in enumerate(instance.disks):
+ self.LogInfo("Copying data for disk %d", idx)
+ result = self.rpc.call_blockdev_assemble(target_node, disk,
+ instance.name, True)
+ if result.fail_msg:
+ self.LogWarning("Can't assemble newly created disk %d: %s",
+ idx, result.fail_msg)
+ errs.append(result.fail_msg)
+ break
+ dev_path = result.payload
+ result = self.rpc.call_blockdev_export(source_node, disk,
+ target_node, dev_path,
+ cluster_name)
+ if result.fail_msg:
+ self.LogWarning("Can't copy data over for disk %d: %s",
+ idx, result.fail_msg)
+ errs.append(result.fail_msg)
+ break
+
+ if errs:
+ self.LogWarning("Some disks failed to copy, aborting")
+ try:
+ _RemoveDisks(self, instance, target_node=target_node)
+ finally:
+ self.cfg.ReleaseDRBDMinors(instance.name)
+ raise errors.OpExecError("Errors during disk copy: %s" %
+ (",".join(errs),))
+
+ instance.primary_node = target_node
+ self.cfg.Update(instance)
+
+ self.LogInfo("Removing the disks on the original node")
+ _RemoveDisks(self, instance, target_node=source_node)
+
+ # Only start the instance if it's marked as up
+ if instance.admin_up:
+ self.LogInfo("Starting instance %s on node %s",
+ instance.name, target_node)
+
+ disks_ok, _ = _AssembleInstanceDisks(self, instance,
+ ignore_secondaries=True)
+ if not disks_ok:
+ _ShutdownInstanceDisks(self, instance)
+ raise errors.OpExecError("Can't activate the instance's disks")
+
+ result = self.rpc.call_instance_start(target_node, instance, None, None)
+ msg = result.fail_msg
+ if msg:
+ _ShutdownInstanceDisks(self, instance)
+ raise errors.OpExecError("Could not start instance %s on node %s: %s" %
+ (instance.name, target_node, msg))
+
+
class LUMigrateNode(LogicalUnit):
"""Migrate all instances from a node.
return "originstname+%s" % instance.name
-def _CreateDisks(lu, instance):
+def _CreateDisks(lu, instance, to_skip=None, target_node=None):
"""Create all disks for an instance.
This abstracts away some work from AddInstance.
@param lu: the logical unit on whose behalf we execute
@type instance: L{objects.Instance}
@param instance: the instance whose disks we should create
+ @type to_skip: list
+ @param to_skip: list of indices to skip
+ @type target_node: string
+ @param target_node: if passed, overrides the target node for creation
@rtype: boolean
@return: the success of the creation
"""
info = _GetInstanceInfoText(instance)
- pnode = instance.primary_node
+ if target_node is None:
+ pnode = instance.primary_node
+ all_nodes = instance.all_nodes
+ else:
+ pnode = target_node
+ all_nodes = [pnode]
if instance.disk_template == constants.DT_FILE:
file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
result.Raise("Failed to create directory '%s' on"
- " node %s: %s" % (file_storage_dir, pnode))
+ " node %s" % (file_storage_dir, pnode))
# Note: this needs to be kept in sync with adding of disks in
# LUSetInstanceParams
- for device in instance.disks:
+ for idx, device in enumerate(instance.disks):
+ if to_skip and idx in to_skip:
+ continue
logging.info("Creating volume %s for instance %s",
device.iv_name, instance.name)
#HARDCODE
- for node in instance.all_nodes:
+ for node in all_nodes:
f_create = node == pnode
_CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
-def _RemoveDisks(lu, instance):
+def _RemoveDisks(lu, instance, target_node=None):
"""Remove all disks for an instance.
This abstracts away some work from `AddInstance()` and
@param lu: the logical unit on whose behalf we execute
@type instance: L{objects.Instance}
@param instance: the instance whose disks we should remove
+ @type target_node: string
+ @param target_node: used to override the node on which to remove the disks
@rtype: boolean
@return: the success of the removal
all_result = True
for device in instance.disks:
- for node, disk in device.ComputeNodeTree(instance.primary_node):
+ if target_node:
+ edata = [(target_node, device)]
+ else:
+ edata = device.ComputeNodeTree(instance.primary_node)
+ for node, disk in edata:
lu.cfg.SetDiskID(disk, node)
msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
if msg:
if instance.disk_template == constants.DT_FILE:
file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
- result = lu.rpc.call_file_storage_dir_remove(instance.primary_node,
- file_storage_dir)
- msg = result.fail_msg
- if msg:
+ if target_node:
+ tgt = target_node
+ else:
+ tgt = instance.primary_node
+ result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
+ if result.fail_msg:
lu.LogWarning("Could not remove directory '%s' on node %s: %s",
- file_storage_dir, instance.primary_node, msg)
+ file_storage_dir, instance.primary_node, result.fail_msg)
all_result = False
return all_result
if not utils.IsValidMac(mac.lower()):
raise errors.OpPrereqError("Invalid MAC address specified: %s" %
mac)
+ else:
+ # or validate/reserve the current one
+ if self.cfg.IsMacInUse(mac):
+ raise errors.OpPrereqError("MAC address %s already in use"
+ " in cluster" % mac)
+
# bridge verification
bridge = nic.get("bridge", None)
link = nic.get("link", None)
self.op.src_path = src_path = \
os.path.join(constants.EXPORT_DIR, src_path)
+ # On import force_variant must be True, because if we forced it at
+ # initial install, our only chance when importing it back is that it
+ # works again!
+ self.op.force_variant = True
+
else: # INSTANCE_CREATE
if getattr(self.op, "os_type", None) is None:
raise errors.OpPrereqError("No guest OS specified")
+ self.op.force_variant = getattr(self.op, "force_variant", False)
def _RunAllocator(self):
"""Run the allocator based on input opcode.
result = self.rpc.call_os_get(pnode.name, self.op.os_type)
result.Raise("OS '%s' not in supported os list for primary node %s" %
(self.op.os_type, pnode.name), prereq=True)
+ if not self.op.force_variant:
+ _CheckOSVariant(result.payload, self.op.os_type)
_CheckNicsBridgesExist(self, self.nics, self.pnode.name)
"""
# check for valid parameter combination
- cnt = [remote_node, iallocator].count(None)
if mode == constants.REPLACE_DISK_CHG:
- if cnt == 2:
+ if remote_node is None and iallocator is None:
raise errors.OpPrereqError("When changing the secondary either an"
" iallocator script must be used or the"
" new node given")
- elif cnt == 0:
+
+ if remote_node is not None and iallocator is not None:
raise errors.OpPrereqError("Give either the iallocator or the new"
" secondary, not both")
- else: # not replacing the secondary
- if cnt != 2:
- raise errors.OpPrereqError("The iallocator and new node options can"
- " be used only when changing the"
- " secondary node")
+
+ elif remote_node is not None or iallocator is not None:
+ # Not replacing the secondary
+ raise errors.OpPrereqError("The iallocator and new node options can"
+ " only be used when changing the"
+ " secondary node")
@staticmethod
def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
return remote_node_name
+ def _FindFaultyDisks(self, node_name):
+ return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
+ node_name, True)
+
def CheckPrereq(self):
"""Check prerequisites.
raise errors.OpPrereqError("The specified node is already the"
" secondary node of the instance.")
- if self.mode == constants.REPLACE_DISK_PRI:
- self.target_node = self.instance.primary_node
- self.other_node = secondary_node
- check_nodes = [self.target_node, self.other_node]
+ if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
+ constants.REPLACE_DISK_CHG):
+ raise errors.OpPrereqError("Cannot specify disks to be replaced")
+
+ if self.mode == constants.REPLACE_DISK_AUTO:
+ faulty_primary = self._FindFaultyDisks(self.instance.primary_node)
+ faulty_secondary = self._FindFaultyDisks(secondary_node)
+
+ if faulty_primary and faulty_secondary:
+ raise errors.OpPrereqError("Instance %s has faulty disks on more than"
+ " one node and can not be repaired"
+ " automatically" % self.instance_name)
+
+ if faulty_primary:
+ self.disks = faulty_primary
+ self.target_node = self.instance.primary_node
+ self.other_node = secondary_node
+ check_nodes = [self.target_node, self.other_node]
+ elif faulty_secondary:
+ self.disks = faulty_secondary
+ self.target_node = secondary_node
+ self.other_node = self.instance.primary_node
+ check_nodes = [self.target_node, self.other_node]
+ else:
+ self.disks = []
+ check_nodes = []
- elif self.mode == constants.REPLACE_DISK_SEC:
- self.target_node = secondary_node
- self.other_node = self.instance.primary_node
- check_nodes = [self.target_node, self.other_node]
+ else:
+ # Non-automatic modes
+ if self.mode == constants.REPLACE_DISK_PRI:
+ self.target_node = self.instance.primary_node
+ self.other_node = secondary_node
+ check_nodes = [self.target_node, self.other_node]
- elif self.mode == constants.REPLACE_DISK_CHG:
- self.new_node = remote_node
- self.other_node = self.instance.primary_node
- self.target_node = secondary_node
- check_nodes = [self.new_node, self.other_node]
+ elif self.mode == constants.REPLACE_DISK_SEC:
+ self.target_node = secondary_node
+ self.other_node = self.instance.primary_node
+ check_nodes = [self.target_node, self.other_node]
- _CheckNodeNotDrained(self.lu, remote_node)
+ elif self.mode == constants.REPLACE_DISK_CHG:
+ self.new_node = remote_node
+ self.other_node = self.instance.primary_node
+ self.target_node = secondary_node
+ check_nodes = [self.new_node, self.other_node]
- else:
- raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
- self.mode)
+ _CheckNodeNotDrained(self.lu, remote_node)
+
+ else:
+ raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
+ self.mode)
+
+ # If not specified all disks should be replaced
+ if not self.disks:
+ self.disks = range(len(self.instance.disks))
for node in check_nodes:
_CheckNodeOnline(self.lu, node)
- # If not specified all disks should be replaced
- if not self.disks:
- self.disks = range(len(self.instance.disks))
-
# Check whether disks are valid
for disk_idx in self.disks:
self.instance.FindDisk(disk_idx)
This dispatches the disk replacement to the appropriate handler.
"""
- feedback_fn("Replacing disks for %s" % self.instance.name)
+ if not self.disks:
+ feedback_fn("No disks need replacement")
+ return
+
+ feedback_fn("Replacing disk(s) %s for %s" %
+ (", ".join([str(i) for i in self.disks]), self.instance.name))
activate_disks = (not self.instance.admin_up)
_StartInstanceDisks(self.lu, self.instance, True)
try:
- if self.mode == constants.REPLACE_DISK_CHG:
+ # Should we replace the secondary node?
+ if self.new_node is not None:
return self._ExecDrbd8Secondary()
else:
return self._ExecDrbd8DiskOnly()
for dev, old_lvs, new_lvs in iv_names.itervalues():
self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
- result = self.rpc.call_blockdev_removechildren(self.target_node, dev, old_lvs)
+ result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
+ old_lvs)
result.Raise("Can't detach drbd from local storage on node"
" %s for device %s" % (self.target_node, dev.iv_name))
#dev.children = []
rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
self.lu.LogInfo("Renaming the old LVs on the target node")
- result = self.rpc.call_blockdev_rename(self.target_node, rename_old_to_new)
+ result = self.rpc.call_blockdev_rename(self.target_node,
+ rename_old_to_new)
result.Raise("Can't rename old LVs on node %s" % self.target_node)
# Now we rename the new LVs to the old LVs
self.lu.LogInfo("Renaming the new LVs on the target node")
rename_new_to_old = [(new, old.physical_id)
for old, new in zip(old_lvs, new_lvs)]
- result = self.rpc.call_blockdev_rename(self.target_node, rename_new_to_old)
+ result = self.rpc.call_blockdev_rename(self.target_node,
+ rename_new_to_old)
result.Raise("Can't rename new LVs on node %s" % self.target_node)
for old, new in zip(old_lvs, new_lvs):
# Now that the new lvs have the old name, we can add them to the device
self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
- result = self.rpc.call_blockdev_addchildren(self.target_node, dev, new_lvs)
+ result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
+ new_lvs)
msg = result.fail_msg
if msg:
for new_lv in new_lvs:
- msg2 = self.rpc.call_blockdev_remove(self.target_node, new_lv).fail_msg
+ msg2 = self.rpc.call_blockdev_remove(self.target_node,
+ new_lv).fail_msg
if msg2:
self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
hint=("cleanup manually the unused logical"
# after this, we must manually remove the drbd minors on both the
# error and the success paths
self.lu.LogStep(4, steps_total, "Changing drbd configuration")
- minors = self.cfg.AllocateDRBDMinor([self.new_node for dev in self.instance.disks],
+ minors = self.cfg.AllocateDRBDMinor([self.new_node
+ for dev in self.instance.disks],
self.instance.name)
logging.debug("Allocated minors %r" % (minors,))
iv_names = {}
for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
- self.lu.LogInfo("activating a new drbd on %s for disk/%d" % (self.new_node, idx))
+ self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
+ (self.new_node, idx))
# create new devices on new_node; note that we create two IDs:
# one without port, so the drbd will be activated without
# networking information on the new node at this stage, and one
else:
p_minor = o_minor2
- new_alone_id = (self.instance.primary_node, self.new_node, None, p_minor, new_minor, o_secret)
- new_net_id = (self.instance.primary_node, self.new_node, o_port, p_minor, new_minor, o_secret)
+ new_alone_id = (self.instance.primary_node, self.new_node, None,
+ p_minor, new_minor, o_secret)
+ new_net_id = (self.instance.primary_node, self.new_node, o_port,
+ p_minor, new_minor, o_secret)
iv_names[idx] = (dev, dev.children, new_net_id)
logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
" soon as possible"))
self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
- result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node], self.node_secondary_ip,
- self.instance.disks)[self.instance.primary_node]
+ result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
+ self.node_secondary_ip,
+ self.instance.disks)\
+ [self.instance.primary_node]
msg = result.fail_msg
if msg:
# and now perform the drbd attach
self.lu.LogInfo("Attaching primary drbds to new secondary"
" (standalone => connected)")
- result = self.rpc.call_drbd_attach_net([self.instance.primary_node, self.new_node], self.node_secondary_ip,
- self.instance.disks, self.instance.name,
+ result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
+ self.new_node],
+ self.node_secondary_ip,
+ self.instance.disks,
+ self.instance.name,
False)
for to_node, to_result in result.items():
msg = to_result.fail_msg
if msg:
- self.lu.LogWarning("Can't attach drbd disks on node %s: %s", to_node, msg,
+ self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
+ to_node, msg,
hint=("please do a gnt-instance info to see the"
" status of disks"))
self._RemoveOldStorage(self.target_node, iv_names)
+class LURepairNodeStorage(NoHooksLU):
+ """Repairs the volume group on a node.
+
+ """
+ _OP_REQP = ["node_name"]
+ REQ_BGL = False
+
+ def CheckArguments(self):
+ node_name = self.cfg.ExpandNodeName(self.op.node_name)
+ if node_name is None:
+ raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
+
+ self.op.node_name = node_name
+
+ def ExpandNames(self):
+ self.needed_locks = {
+ locking.LEVEL_NODE: [self.op.node_name],
+ }
+
+ def _CheckFaultyDisks(self, instance, node_name):
+ if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
+ node_name, True):
+ raise errors.OpPrereqError("Instance '%s' has faulty disks on"
+ " node '%s'" % (instance.name, node_name))
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ """
+ storage_type = self.op.storage_type
+
+ if (constants.SO_FIX_CONSISTENCY not in
+ constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
+ raise errors.OpPrereqError("Storage units of type '%s' can not be"
+ " repaired" % storage_type)
+
+ # Check whether any instance on this node has faulty disks
+ for inst in _GetNodeInstances(self.cfg, self.op.node_name):
+ check_nodes = set(inst.all_nodes)
+ check_nodes.discard(self.op.node_name)
+ for inst_node_name in check_nodes:
+ self._CheckFaultyDisks(inst, inst_node_name)
+
+ def Exec(self, feedback_fn):
+ feedback_fn("Repairing storage unit '%s' on %s ..." %
+ (self.op.name, self.op.node_name))
+
+ st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
+ result = self.rpc.call_storage_execute(self.op.node_name,
+ self.op.storage_type, st_args,
+ self.op.name,
+ constants.SO_FIX_CONSISTENCY)
+ result.Raise("Failed to repair storage unit '%s' on %s" %
+ (self.op.name, self.op.node_name))
+
+
class LUGrowDisk(LogicalUnit):
"""Grow a disk of an instance.
"""Returns the status of a block device
"""
- if self.op.static:
+ if self.op.static or not node:
return None
self.cfg.SetDiskID(dev, node)
return (status.dev_path, status.major, status.minor,
status.sync_percent, status.estimated_time,
- status.is_degraded, status.ldisk_degraded)
+ status.is_degraded, status.ldisk_status)
def _ComputeDiskStatus(self, instance, snode, dev):
"""Compute block device status.
"hv_actual": cluster.FillHV(instance),
"be_instance": instance.beparams,
"be_actual": cluster.FillBE(instance),
+ "serial_no": instance.serial_no,
+ "mtime": instance.mtime,
+ "ctime": instance.ctime,
+ "uuid": instance.uuid,
}
result[instance.name] = idict
instance = self.instance
dst_node = self.dst_node
src_node = instance.primary_node
+
if self.op.shutdown:
# shutdown the instance, but not the disks
+ feedback_fn("Shutting down instance %s" % instance.name)
result = self.rpc.call_instance_shutdown(src_node, instance)
result.Raise("Could not shutdown instance %s on"
" node %s" % (instance.name, src_node))
for disk in instance.disks:
self.cfg.SetDiskID(disk, src_node)
+ # per-disk results
+ dresults = []
try:
for idx, disk in enumerate(instance.disks):
+ feedback_fn("Creating a snapshot of disk/%s on node %s" %
+ (idx, src_node))
+
# result.payload will be a snapshot of an lvm leaf of the one we passed
result = self.rpc.call_blockdev_snapshot(src_node, disk)
msg = result.fail_msg
finally:
if self.op.shutdown and instance.admin_up:
+ feedback_fn("Starting instance %s" % instance.name)
result = self.rpc.call_instance_start(src_node, instance, None, None)
msg = result.fail_msg
if msg:
cluster_name = self.cfg.GetClusterName()
for idx, dev in enumerate(snap_disks):
+ feedback_fn("Exporting snapshot %s from %s to %s" %
+ (idx, src_node, dst_node.name))
if dev:
result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
instance, cluster_name, idx)
if msg:
self.LogWarning("Could not export disk/%s from node %s to"
" node %s: %s", idx, src_node, dst_node.name, msg)
+ dresults.append(False)
+ else:
+ dresults.append(True)
msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
if msg:
self.LogWarning("Could not remove snapshot for disk/%d from node"
" %s: %s", idx, src_node, msg)
+ else:
+ dresults.append(False)
+ feedback_fn("Finalizing export on %s" % dst_node.name)
result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
+ fin_resu = True
msg = result.fail_msg
if msg:
self.LogWarning("Could not finalize export for instance %s"
" on node %s: %s", instance.name, dst_node.name, msg)
+ fin_resu = False
nodelist = self.cfg.GetNodeList()
nodelist.remove(dst_node.name)
# substitutes an empty list with the full cluster node list.
iname = instance.name
if nodelist:
+ feedback_fn("Removing old exports for instance %s" % iname)
exportlist = self.rpc.call_export_list(nodelist)
for node in exportlist:
if exportlist[node].fail_msg:
if msg:
self.LogWarning("Could not remove older export for instance %s"
" on node %s: %s", iname, node, msg)
+ return fin_resu, dresults
class LURemoveExport(NoHooksLU):
"master_candidate": ninfo.master_candidate,
}
- if not ninfo.offline:
+ if not (ninfo.offline or ninfo.drained):
nresult.Raise("Can't get data for node %s" % nname)
node_iinfo[nname].Raise("Can't get node instance info from node %s" %
nname)
remote_info = nresult.payload
+
for attr in ['memory_total', 'memory_free', 'memory_dom0',
'vg_size', 'vg_free', 'cpu_total']:
if attr not in remote_info: