Subclasses must follow these rules:
- implement ExpandNames
- - implement CheckPrereq
- - implement Exec
+ - implement CheckPrereq (except when tasklets are used)
+ - implement Exec (except when tasklets are used)
- implement BuildHooksEnv
- redefine HPATH and HTYPE
- optionally redefine their run requirements:
# support for dry-run
self.dry_run_result = None
+ # Tasklets
+ self.tasklets = None
+
for attr_name in self._OP_REQP:
attr_val = getattr(op, attr_name, None)
if attr_val is None:
raise errors.OpPrereqError("Required parameter '%s' missing" %
attr_name)
+
self.CheckArguments()
def __GetSSH(self):
level you can modify self.share_locks, setting a true value (usually 1) for
that level. By default locks are not shared.
+ This function can also define a list of tasklets, which then will be
+ executed in order instead of the usual LU-level CheckPrereq and Exec
+ functions, if those are not defined by the LU.
+
Examples::
# Acquire all nodes and one instance
their canonical form if it hasn't been done by ExpandNames before.
"""
- raise NotImplementedError
+ if self.tasklets is not None:
+ for (idx, tl) in enumerate(self.tasklets):
+ logging.debug("Checking prerequisites for tasklet %s/%s",
+ idx + 1, len(self.tasklets))
+ tl.CheckPrereq()
+ else:
+ raise NotImplementedError
def Exec(self, feedback_fn):
"""Execute the LU.
code, or expected.
"""
- raise NotImplementedError
+ if self.tasklets is not None:
+ for (idx, tl) in enumerate(self.tasklets):
+ logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
+ tl.Exec(feedback_fn)
+ else:
+ raise NotImplementedError
def BuildHooksEnv(self):
"""Build hooks environment for this LU.
- Implement Exec
"""
+ def __init__(self, lu):
+ self.lu = lu
+
+ # Shortcuts
+ self.cfg = lu.cfg
+ self.rpc = lu.rpc
+
def CheckPrereq(self):
"""Check prerequisites for this tasklets.
return env
+
def _NICListToTuple(lu, nics):
"""Build a list of nic information tuples.
hooks_nics.append((ip, mac, mode, link))
return hooks_nics
+
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
"""Builds instance related env variables for hooks from an object.
_CheckNicsBridgesExist(lu, instance.nics, node)
+def _GetNodeInstancesInner(cfg, fn):
+ return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
+
+
+def _GetNodeInstances(cfg, node_name):
+ """Returns a list of all primary and secondary instances on a node.
+
+ """
+
+ return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
+
+
+def _GetNodePrimaryInstances(cfg, node_name):
+ """Returns primary instances on a node.
+
+ """
+ return _GetNodeInstancesInner(cfg,
+ lambda inst: node_name == inst.primary_node)
+
+
+def _GetNodeSecondaryInstances(cfg, node_name):
+ """Returns secondary instances on a node.
+
+ """
+ return _GetNodeInstancesInner(cfg,
+ lambda inst: node_name in inst.secondary_nodes)
+
+
+def _GetStorageTypeArgs(cfg, storage_type):
+ """Returns the arguments for a storage type.
+
+ """
+ # Special case for file storage
+ if storage_type == constants.ST_FILE:
+ # storage.FileStorage wants a list of storage directories
+ return [[cfg.GetFileStorageDir()]]
+
+ return []
+
+
+def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
+ faulty = []
+
+ for dev in instance.disks:
+ cfg.SetDiskID(dev, node_name)
+
+ result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
+ result.Raise("Failed to get disk status from node %s" % node_name,
+ prereq=prereq)
+
+ for idx, bdev_status in enumerate(result.payload):
+ if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
+ faulty.append(idx)
+
+ return faulty
+
+
+class LUPostInitCluster(LogicalUnit):
+ """Logical unit for running hooks after cluster initialization.
+
+ """
+ HPATH = "cluster-init"
+ HTYPE = constants.HTYPE_CLUSTER
+ _OP_REQP = []
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ """
+ env = {"OP_TARGET": self.cfg.GetClusterName()}
+ mn = self.cfg.GetMasterNode()
+ return env, [], [mn]
+
+ def CheckPrereq(self):
+ """No prerequisites to check.
+
+ """
+ return True
+
+ def Exec(self, feedback_fn):
+ """Nothing to do.
+
+ """
+ return True
+
+
class LUDestroyCluster(NoHooksLU):
"""Logical unit for destroying the cluster.
return result
+class LURepairDiskSizes(NoHooksLU):
+ """Verifies the cluster disks sizes.
+
+ """
+ _OP_REQP = ["instances"]
+ REQ_BGL = False
+
+ def ExpandNames(self):
+
+ if not isinstance(self.op.instances, list):
+ raise errors.OpPrereqError("Invalid argument type 'instances'")
+
+ if self.op.instances:
+ self.wanted_names = []
+ for name in self.op.instances:
+ full_name = self.cfg.ExpandInstanceName(name)
+ if full_name is None:
+ raise errors.OpPrereqError("Instance '%s' not known" % name)
+ self.wanted_names.append(full_name)
+ self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
+ self.needed_locks = {
+ locking.LEVEL_NODE: [],
+ locking.LEVEL_INSTANCE: self.wanted_names,
+ }
+ self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
+ else:
+ self.wanted_names = None
+ self.needed_locks = {
+ locking.LEVEL_NODE: locking.ALL_SET,
+ locking.LEVEL_INSTANCE: locking.ALL_SET,
+ }
+ self.share_locks = dict(((i, 1) for i in locking.LEVELS))
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODE and self.wanted_names is not None:
+ self._LockInstancesNodes(primary_only=True)
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This only checks the optional instance list against the existing names.
+
+ """
+ if self.wanted_names is None:
+ self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
+
+ self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
+ in self.wanted_names]
+
+ def Exec(self, feedback_fn):
+ """Verify the size of cluster disks.
+
+ """
+ # TODO: check child disks too
+ # TODO: check differences in size between primary/secondary nodes
+ per_node_disks = {}
+ for instance in self.wanted_instances:
+ pnode = instance.primary_node
+ if pnode not in per_node_disks:
+ per_node_disks[pnode] = []
+ for idx, disk in enumerate(instance.disks):
+ per_node_disks[pnode].append((instance, idx, disk))
+
+ changed = []
+ for node, dskl in per_node_disks.items():
+ result = self.rpc.call_blockdev_getsizes(node, [v[2] for v in dskl])
+ if result.failed:
+ self.LogWarning("Failure in blockdev_getsizes call to node"
+ " %s, ignoring", node)
+ continue
+ if len(result.data) != len(dskl):
+ self.LogWarning("Invalid result from node %s, ignoring node results",
+ node)
+ continue
+ for ((instance, idx, disk), size) in zip(dskl, result.data):
+ if size is None:
+ self.LogWarning("Disk %d of instance %s did not return size"
+ " information, ignoring", idx, instance.name)
+ continue
+ if not isinstance(size, (int, long)):
+ self.LogWarning("Disk %d of instance %s did not return valid"
+ " size information, ignoring", idx, instance.name)
+ continue
+ size = size >> 20
+ if size != disk.size:
+ self.LogInfo("Disk %d of instance %s has mismatched size,"
+ " correcting: recorded %d, actual %d", idx,
+ instance.name, disk.size, size)
+ disk.size = size
+ self.cfg.Update(instance)
+ changed.append((instance.name, idx, size))
+ return changed
+
+
class LURenameCluster(LogicalUnit):
"""Rename the cluster.
lu.LogWarning("Can't compute data for node %s/%s",
node, instance.disks[i].iv_name)
continue
- # we ignore the ldisk parameter
- perc_done, est_time, is_degraded, _ = mstat
- cumul_degraded = cumul_degraded or (is_degraded and perc_done is None)
- if perc_done is not None:
+
+ cumul_degraded = (cumul_degraded or
+ (mstat.is_degraded and mstat.sync_percent is None))
+ if mstat.sync_percent is not None:
done = False
- if est_time is not None:
- rem_time = "%d estimated seconds remaining" % est_time
- max_time = est_time
+ if mstat.estimated_time is not None:
+ rem_time = "%d estimated seconds remaining" % mstat.estimated_time
+ max_time = mstat.estimated_time
else:
rem_time = "no time estimate"
lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
- (instance.disks[i].iv_name, perc_done, rem_time))
+ (instance.disks[i].iv_name, mstat.sync_percent, rem_time))
# if we're done but degraded, let's do a few small retries, to
# make sure we see a stable and not transient situation; therefore
"""
lu.cfg.SetDiskID(dev, node)
- if ldisk:
- idx = 6
- else:
- idx = 5
result = True
+
if on_primary or dev.AssembleOnSecondary():
rstats = lu.rpc.call_blockdev_find(node, dev)
msg = rstats.fail_msg
lu.LogWarning("Can't find disk on node %s", node)
result = False
else:
- result = result and (not rstats.payload[idx])
+ if ldisk:
+ result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
+ else:
+ result = result and not rstats.payload.is_degraded
+
if dev.children:
for child in dev.children:
result = result and _CheckDiskConsistency(lu, child, node, on_primary)
"name", "pinst_cnt", "sinst_cnt",
"pinst_list", "sinst_list",
"pip", "sip", "tags",
- "serial_no",
+ "serial_no", "ctime", "mtime",
"master_candidate",
"master",
"offline",
val = list(node.GetTags())
elif field == "serial_no":
val = node.serial_no
+ elif field == "ctime":
+ val = node.ctime
+ elif field == "mtime":
+ val = node.mtime
elif field == "master_candidate":
val = node.master_candidate
elif field == "master":
return output
+class LUQueryNodeStorage(NoHooksLU):
+ """Logical unit for getting information on storage units on node(s).
+
+ """
+ _OP_REQP = ["nodes", "storage_type", "output_fields"]
+ REQ_BGL = False
+ _FIELDS_STATIC = utils.FieldSet("node")
+
+ def ExpandNames(self):
+ storage_type = self.op.storage_type
+
+ if storage_type not in constants.VALID_STORAGE_FIELDS:
+ raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
+
+ dynamic_fields = constants.VALID_STORAGE_FIELDS[storage_type]
+
+ _CheckOutputFields(static=self._FIELDS_STATIC,
+ dynamic=utils.FieldSet(*dynamic_fields),
+ selected=self.op.output_fields)
+
+ self.needed_locks = {}
+ self.share_locks[locking.LEVEL_NODE] = 1
+
+ if self.op.nodes:
+ self.needed_locks[locking.LEVEL_NODE] = \
+ _GetWantedNodes(self, self.op.nodes)
+ else:
+ self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This checks that the fields required are valid output fields.
+
+ """
+ self.op.name = getattr(self.op, "name", None)
+
+ self.nodes = self.acquired_locks[locking.LEVEL_NODE]
+
+ def Exec(self, feedback_fn):
+ """Computes the list of nodes and their attributes.
+
+ """
+ # Always get name to sort by
+ if constants.SF_NAME in self.op.output_fields:
+ fields = self.op.output_fields[:]
+ else:
+ fields = [constants.SF_NAME] + self.op.output_fields
+
+ # Never ask for node as it's only known to the LU
+ while "node" in fields:
+ fields.remove("node")
+
+ field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
+ name_idx = field_idx[constants.SF_NAME]
+
+ st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
+ data = self.rpc.call_storage_list(self.nodes,
+ self.op.storage_type, st_args,
+ self.op.name, fields)
+
+ result = []
+
+ for node in utils.NiceSort(self.nodes):
+ nresult = data[node]
+ if nresult.offline:
+ continue
+
+ msg = nresult.fail_msg
+ if msg:
+ self.LogWarning("Can't get storage data from node %s: %s", node, msg)
+ continue
+
+ rows = dict([(row[name_idx], row) for row in nresult.payload])
+
+ for name in utils.NiceSort(rows.keys()):
+ row = rows[name]
+
+ out = []
+
+ for field in self.op.output_fields:
+ if field == "node":
+ val = node
+ elif field in field_idx:
+ val = row[field_idx[field]]
+ else:
+ raise errors.ParameterError(field)
+
+ out.append(val)
+
+ result.append(out)
+
+ return result
+
+
+class LUModifyNodeStorage(NoHooksLU):
+ """Logical unit for modifying a storage volume on a node.
+
+ """
+ _OP_REQP = ["node_name", "storage_type", "name", "changes"]
+ REQ_BGL = False
+
+ def CheckArguments(self):
+ node_name = self.cfg.ExpandNodeName(self.op.node_name)
+ if node_name is None:
+ raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
+
+ self.op.node_name = node_name
+
+ storage_type = self.op.storage_type
+ if storage_type not in constants.VALID_STORAGE_FIELDS:
+ raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
+
+ def ExpandNames(self):
+ self.needed_locks = {
+ locking.LEVEL_NODE: self.op.node_name,
+ }
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ """
+ storage_type = self.op.storage_type
+
+ try:
+ modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
+ except KeyError:
+ raise errors.OpPrereqError("Storage units of type '%s' can not be"
+ " modified" % storage_type)
+
+ diff = set(self.op.changes.keys()) - modifiable
+ if diff:
+ raise errors.OpPrereqError("The following fields can not be modified for"
+ " storage units of type '%s': %r" %
+ (storage_type, list(diff)))
+
+ def Exec(self, feedback_fn):
+ """Computes the list of nodes and their attributes.
+
+ """
+ st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
+ result = self.rpc.call_storage_modify(self.op.node_name,
+ self.op.storage_type, st_args,
+ self.op.name, self.op.changes)
+ result.Raise("Failed to modify storage unit '%s' on %s" %
+ (self.op.name, self.op.node_name))
+
+
class LUAddNode(LogicalUnit):
"""Logical unit for adding node to the cluster.
def ExpandNames(self):
"""Locking for PowercycleNode.
- This is a last-resource option and shouldn't block on other
+ This is a last-resort option and shouldn't block on other
jobs. Therefore, we grab no locks.
"""
"master_netdev": cluster.master_netdev,
"volume_group_name": cluster.volume_group_name,
"file_storage_dir": cluster.file_storage_dir,
+ "ctime": cluster.ctime,
+ "mtime": cluster.mtime,
}
return result
assert self.instance is not None, \
"Cannot retrieve locked instance %s" % self.op.instance_name
_CheckNodeOnline(self, self.instance.primary_node)
+ if not hasattr(self.op, "ignore_size"):
+ self.op.ignore_size = False
def Exec(self, feedback_fn):
"""Activate the disks.
"""
- disks_ok, disks_info = _AssembleInstanceDisks(self, self.instance)
+ disks_ok, disks_info = \
+ _AssembleInstanceDisks(self, self.instance,
+ ignore_size=self.op.ignore_size)
if not disks_ok:
raise errors.OpExecError("Cannot activate block devices")
return disks_info
-def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
+def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
+ ignore_size=False):
"""Prepare the block devices for an instance.
This sets up the block devices on all nodes.
@type ignore_secondaries: boolean
@param ignore_secondaries: if true, errors on secondary nodes
won't result in an error return from the function
+ @type ignore_size: boolean
+ @param ignore_size: if true, the current known size of the disk
+ will not be used during the disk activation, useful for cases
+ when the size is wrong
@return: False if the operation failed, otherwise a list of
(host, instance_visible_name, node_visible_name)
with the mapping from node devices to instance devices
# 1st pass, assemble on all nodes in secondary mode
for inst_disk in instance.disks:
for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
+ if ignore_size:
+ node_disk = node_disk.Copy()
+ node_disk.UnsetSize()
lu.cfg.SetDiskID(node_disk, node)
result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
msg = result.fail_msg
for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
if node != instance.primary_node:
continue
+ if ignore_size:
+ node_disk = node_disk.Copy()
+ node_disk.UnsetSize()
lu.cfg.SetDiskID(node_disk, node)
result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
msg = result.fail_msg
_ShutdownInstanceDisks(self, inst)
+class LURecreateInstanceDisks(LogicalUnit):
+ """Recreate an instance's missing disks.
+
+ """
+ HPATH = "instance-recreate-disks"
+ HTYPE = constants.HTYPE_INSTANCE
+ _OP_REQP = ["instance_name", "disks"]
+ REQ_BGL = False
+
+ def CheckArguments(self):
+ """Check the arguments.
+
+ """
+ if not isinstance(self.op.disks, list):
+ raise errors.OpPrereqError("Invalid disks parameter")
+ for item in self.op.disks:
+ if (not isinstance(item, int) or
+ item < 0):
+ raise errors.OpPrereqError("Invalid disk specification '%s'" %
+ str(item))
+
+ def ExpandNames(self):
+ self._ExpandAndLockInstance()
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ This runs on master, primary and secondary nodes of the instance.
+
+ """
+ env = _BuildInstanceHookEnvByObject(self, self.instance)
+ nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
+ return env, nl, nl
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This checks that the instance is in the cluster and is not running.
+
+ """
+ instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+ assert instance is not None, \
+ "Cannot retrieve locked instance %s" % self.op.instance_name
+ _CheckNodeOnline(self, instance.primary_node)
+
+ if instance.disk_template == constants.DT_DISKLESS:
+ raise errors.OpPrereqError("Instance '%s' has no disks" %
+ self.op.instance_name)
+ if instance.admin_up:
+ raise errors.OpPrereqError("Instance '%s' is marked to be up" %
+ self.op.instance_name)
+ remote_info = self.rpc.call_instance_info(instance.primary_node,
+ instance.name,
+ instance.hypervisor)
+ remote_info.Raise("Error checking node %s" % instance.primary_node,
+ prereq=True)
+ if remote_info.payload:
+ raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
+ (self.op.instance_name,
+ instance.primary_node))
+
+ if not self.op.disks:
+ self.op.disks = range(len(instance.disks))
+ else:
+ for idx in self.op.disks:
+ if idx >= len(instance.disks):
+ raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx)
+
+ self.instance = instance
+
+ def Exec(self, feedback_fn):
+ """Recreate the disks.
+
+ """
+ to_skip = []
+ for idx, disk in enumerate(self.instance.disks):
+ if idx not in self.op.disks: # disk idx has not been passed in
+ to_skip.append(idx)
+ continue
+
+ _CreateDisks(self, self.instance, to_skip=to_skip)
+
+
class LURenameInstance(LogicalUnit):
"""Rename an instance.
r"(nic)\.(bridge)/([0-9]+)",
r"(nic)\.(macs|ips|modes|links|bridges)",
r"(disk|nic)\.(count)",
- "serial_no", "hypervisor", "hvparams",] +
+ "serial_no", "hypervisor", "hvparams",
+ "ctime", "mtime",
+ ] +
["hv/%s" % name
for name in constants.HVS_PARAMETERS] +
["be/%s" % name
val = list(instance.GetTags())
elif field == "serial_no":
val = instance.serial_no
+ elif field == "ctime":
+ val = instance.ctime
+ elif field == "mtime":
+ val = instance.mtime
elif field == "network_port":
val = instance.network_port
elif field == "hypervisor":
def ExpandNames(self):
self._ExpandAndLockInstance()
+
self.needed_locks[locking.LEVEL_NODE] = []
self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
+ self._migrater = TLMigrateInstance(self, self.op.instance_name,
+ self.op.live, self.op.cleanup)
+ self.tasklets = [self._migrater]
+
def DeclareLocks(self, level):
if level == locking.LEVEL_NODE:
self._LockInstancesNodes()
This runs on master, primary and secondary nodes of the instance.
"""
- env = _BuildInstanceHookEnvByObject(self, self.instance)
+ instance = self._migrater.instance
+ env = _BuildInstanceHookEnvByObject(self, instance)
env["MIGRATE_LIVE"] = self.op.live
env["MIGRATE_CLEANUP"] = self.op.cleanup
- nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
+ nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
return env, nl, nl
+
+class LUMigrateNode(LogicalUnit):
+ """Migrate all instances from a node.
+
+ """
+ HPATH = "node-migrate"
+ HTYPE = constants.HTYPE_NODE
+ _OP_REQP = ["node_name", "live"]
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
+ if self.op.node_name is None:
+ raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
+
+ self.needed_locks = {
+ locking.LEVEL_NODE: [self.op.node_name],
+ }
+
+ self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
+
+ # Create tasklets for migrating instances for all instances on this node
+ names = []
+ tasklets = []
+
+ for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
+ logging.debug("Migrating instance %s", inst.name)
+ names.append(inst.name)
+
+ tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
+
+ self.tasklets = tasklets
+
+ # Declare instance locks
+ self.needed_locks[locking.LEVEL_INSTANCE] = names
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODE:
+ self._LockInstancesNodes()
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ This runs on the master, the primary and all the secondaries.
+
+ """
+ env = {
+ "NODE_NAME": self.op.node_name,
+ }
+
+ nl = [self.cfg.GetMasterNode()]
+
+ return (env, nl, nl)
+
+
+class TLMigrateInstance(Tasklet):
+ def __init__(self, lu, instance_name, live, cleanup):
+ """Initializes this class.
+
+ """
+ Tasklet.__init__(self, lu)
+
+ # Parameters
+ self.instance_name = instance_name
+ self.live = live
+ self.cleanup = cleanup
+
def CheckPrereq(self):
"""Check prerequisites.
"""
instance = self.cfg.GetInstanceInfo(
- self.cfg.ExpandInstanceName(self.op.instance_name))
+ self.cfg.ExpandInstanceName(self.instance_name))
if instance is None:
raise errors.OpPrereqError("Instance '%s' not known" %
- self.op.instance_name)
+ self.instance_name)
if instance.disk_template != constants.DT_DRBD8:
raise errors.OpPrereqError("Instance's disk layout is not"
# check bridge existance
_CheckInstanceBridgesExist(self, instance, node=target_node)
- if not self.op.cleanup:
+ if not self.cleanup:
_CheckNodeNotDrained(self, target_node)
result = self.rpc.call_instance_migratable(instance.primary_node,
instance)
self._GoReconnect(False)
self._WaitUntilSync()
except errors.OpExecError, err:
- self.LogWarning("Migration failed and I can't reconnect the"
- " drives: error '%s'\n"
- "Please look and recover the instance status" %
- str(err))
+ self.lu.LogWarning("Migration failed and I can't reconnect the"
+ " drives: error '%s'\n"
+ "Please look and recover the instance status" %
+ str(err))
def _AbortMigration(self):
"""Call the hypervisor code to abort a started migration.
time.sleep(10)
result = self.rpc.call_instance_migrate(source_node, instance,
self.nodes_ip[target_node],
- self.op.live)
+ self.live)
msg = result.fail_msg
if msg:
logging.error("Instance migration failed, trying to revert"
"""Perform the migration.
"""
+ feedback_fn("Migrating instance %s" % self.instance.name)
+
self.feedback_fn = feedback_fn
self.source_node = self.instance.primary_node
self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
}
- if self.op.cleanup:
+
+ if self.cleanup:
return self._ExecCleanup()
else:
return self._ExecMigration()
return "originstname+%s" % instance.name
-def _CreateDisks(lu, instance):
+def _CreateDisks(lu, instance, to_skip=None):
"""Create all disks for an instance.
This abstracts away some work from AddInstance.
@param lu: the logical unit on whose behalf we execute
@type instance: L{objects.Instance}
@param instance: the instance whose disks we should create
+ @type to_skip: list
+ @param to_skip: list of indices to skip
@rtype: boolean
@return: the success of the creation
# Note: this needs to be kept in sync with adding of disks in
# LUSetInstanceParams
- for device in instance.disks:
+ for idx, device in enumerate(instance.disks):
+ if to_skip and idx in to_skip:
+ continue
logging.info("Creating volume %s for instance %s",
device.iv_name, instance.name)
#HARDCODE
if not hasattr(self.op, "iallocator"):
self.op.iallocator = None
- _DiskReplacer.CheckArguments(self.op.mode, self.op.remote_node,
- self.op.iallocator)
+ TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
+ self.op.iallocator)
def ExpandNames(self):
self._ExpandAndLockInstance()
self.needed_locks[locking.LEVEL_NODE] = []
self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
- self.replacer = _DiskReplacer(self, self.op.instance_name, self.op.mode,
- self.op.iallocator, self.op.remote_node,
- self.op.disks)
+ self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
+ self.op.iallocator, self.op.remote_node,
+ self.op.disks)
+
+ self.tasklets = [self.replacer]
def DeclareLocks(self, level):
# If we're not already locking all nodes in the set we have to declare the
nl.append(self.op.remote_node)
return env, nl, nl
- def CheckPrereq(self):
- """Check prerequisites.
- This checks that the instance is in the cluster.
+class LUEvacuateNode(LogicalUnit):
+ """Relocate the secondary instances from a node.
- """
- self.replacer.CheckPrereq()
+ """
+ HPATH = "node-evacuate"
+ HTYPE = constants.HTYPE_NODE
+ _OP_REQP = ["node_name"]
+ REQ_BGL = False
- def Exec(self, feedback_fn):
- """Execute disk replacement.
+ def CheckArguments(self):
+ if not hasattr(self.op, "remote_node"):
+ self.op.remote_node = None
+ if not hasattr(self.op, "iallocator"):
+ self.op.iallocator = None
- This dispatches the disk replacement to the appropriate handler.
+ TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
+ self.op.remote_node,
+ self.op.iallocator)
+
+ def ExpandNames(self):
+ self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
+ if self.op.node_name is None:
+ raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
+
+ self.needed_locks = {}
+
+ # Declare node locks
+ if self.op.iallocator is not None:
+ self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+
+ elif self.op.remote_node is not None:
+ remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
+ if remote_node is None:
+ raise errors.OpPrereqError("Node '%s' not known" %
+ self.op.remote_node)
+
+ self.op.remote_node = remote_node
+
+ # Warning: do not remove the locking of the new secondary here
+ # unless DRBD8.AddChildren is changed to work in parallel;
+ # currently it doesn't since parallel invocations of
+ # FindUnusedMinor will conflict
+ self.needed_locks[locking.LEVEL_NODE] = [remote_node]
+ self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
+
+ else:
+ raise errors.OpPrereqError("Invalid parameters")
+
+ # Create tasklets for replacing disks for all secondary instances on this
+ # node
+ names = []
+ tasklets = []
+
+ for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
+ logging.debug("Replacing disks for instance %s", inst.name)
+ names.append(inst.name)
+
+ replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
+ self.op.iallocator, self.op.remote_node, [])
+ tasklets.append(replacer)
+
+ self.tasklets = tasklets
+ self.instance_names = names
+
+ # Declare instance locks
+ self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
+
+ def DeclareLocks(self, level):
+ # If we're not already locking all nodes in the set we have to declare the
+ # instance's primary/secondary nodes.
+ if (level == locking.LEVEL_NODE and
+ self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
+ self._LockInstancesNodes()
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ This runs on the master, the primary and all the secondaries.
"""
- self.replacer.Exec()
+ env = {
+ "NODE_NAME": self.op.node_name,
+ }
+ nl = [self.cfg.GetMasterNode()]
-class _DiskReplacer:
+ if self.op.remote_node is not None:
+ env["NEW_SECONDARY"] = self.op.remote_node
+ nl.append(self.op.remote_node)
+
+ return (env, nl, nl)
+
+
+class TLReplaceDisks(Tasklet):
"""Replaces disks for an instance.
Note: Locking is not within the scope of this class.
"""Initializes this class.
"""
+ Tasklet.__init__(self, lu)
+
# Parameters
- self.lu = lu
self.instance_name = instance_name
self.mode = mode
self.iallocator_name = iallocator_name
self.remote_node = remote_node
self.disks = disks
- # Shortcuts
- self.cfg = lu.cfg
- self.rpc = lu.rpc
-
# Runtime data
self.instance = None
self.new_node = None
@staticmethod
def CheckArguments(mode, remote_node, iallocator):
+ """Helper function for users of this class.
+
+ """
# check for valid parameter combination
- cnt = [remote_node, iallocator].count(None)
if mode == constants.REPLACE_DISK_CHG:
- if cnt == 2:
+ if remote_node is None and iallocator is None:
raise errors.OpPrereqError("When changing the secondary either an"
" iallocator script must be used or the"
" new node given")
- elif cnt == 0:
+
+ if remote_node is not None and iallocator is not None:
raise errors.OpPrereqError("Give either the iallocator or the new"
" secondary, not both")
- else: # not replacing the secondary
- if cnt != 2:
- raise errors.OpPrereqError("The iallocator and new node options can"
- " be used only when changing the"
- " secondary node")
+
+ elif remote_node is not None or iallocator is not None:
+ # Not replacing the secondary
+ raise errors.OpPrereqError("The iallocator and new node options can"
+ " only be used when changing the"
+ " secondary node")
@staticmethod
def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
return remote_node_name
+ def _FindFaultyDisks(self, node_name):
+ return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
+ node_name, True)
+
def CheckPrereq(self):
"""Check prerequisites.
raise errors.OpPrereqError("The specified node is already the"
" secondary node of the instance.")
- if self.mode == constants.REPLACE_DISK_PRI:
- self.target_node = self.instance.primary_node
- self.other_node = secondary_node
- check_nodes = [self.target_node, self.other_node]
+ if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
+ constants.REPLACE_DISK_CHG):
+ raise errors.OpPrereqError("Cannot specify disks to be replaced")
+
+ if self.mode == constants.REPLACE_DISK_AUTO:
+ faulty_primary = self._FindFaultyDisks(self.instance.primary_node)
+ faulty_secondary = self._FindFaultyDisks(secondary_node)
+
+ if faulty_primary and faulty_secondary:
+ raise errors.OpPrereqError("Instance %s has faulty disks on more than"
+ " one node and can not be repaired"
+ " automatically" % self.instance_name)
+
+ if faulty_primary:
+ self.disks = faulty_primary
+ self.target_node = self.instance.primary_node
+ self.other_node = secondary_node
+ check_nodes = [self.target_node, self.other_node]
+ elif faulty_secondary:
+ self.disks = faulty_secondary
+ self.target_node = secondary_node
+ self.other_node = self.instance.primary_node
+ check_nodes = [self.target_node, self.other_node]
+ else:
+ self.disks = []
+ check_nodes = []
- elif self.mode == constants.REPLACE_DISK_SEC:
- self.target_node = secondary_node
- self.other_node = self.instance.primary_node
- check_nodes = [self.target_node, self.other_node]
+ else:
+ # Non-automatic modes
+ if self.mode == constants.REPLACE_DISK_PRI:
+ self.target_node = self.instance.primary_node
+ self.other_node = secondary_node
+ check_nodes = [self.target_node, self.other_node]
- elif self.mode == constants.REPLACE_DISK_CHG:
- self.new_node = remote_node
- self.other_node = self.instance.primary_node
- self.target_node = secondary_node
- check_nodes = [self.new_node, self.other_node]
+ elif self.mode == constants.REPLACE_DISK_SEC:
+ self.target_node = secondary_node
+ self.other_node = self.instance.primary_node
+ check_nodes = [self.target_node, self.other_node]
- _CheckNodeNotDrained(self.lu, remote_node)
+ elif self.mode == constants.REPLACE_DISK_CHG:
+ self.new_node = remote_node
+ self.other_node = self.instance.primary_node
+ self.target_node = secondary_node
+ check_nodes = [self.new_node, self.other_node]
- else:
- raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
- self.mode)
+ _CheckNodeNotDrained(self.lu, remote_node)
+
+ else:
+ raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
+ self.mode)
+
+ # If not specified all disks should be replaced
+ if not self.disks:
+ self.disks = range(len(self.instance.disks))
for node in check_nodes:
_CheckNodeOnline(self.lu, node)
- # If not specified all disks should be replaced
- if not self.disks:
- self.disks = range(len(self.instance.disks))
-
# Check whether disks are valid
for disk_idx in self.disks:
self.instance.FindDisk(disk_idx)
self.node_secondary_ip = node_2nd_ip
- def Exec(self):
+ def Exec(self, feedback_fn):
"""Execute disk replacement.
This dispatches the disk replacement to the appropriate handler.
"""
+ if not self.disks:
+ feedback_fn("No disks need replacement")
+ return
+
+ feedback_fn("Replacing disk(s) %s for %s" %
+ (", ".join([str(i) for i in self.disks]), self.instance.name))
+
activate_disks = (not self.instance.admin_up)
# Activate the instance disks if we're replacing them on a down instance
_StartInstanceDisks(self.lu, self.instance, True)
try:
- if self.mode == constants.REPLACE_DISK_CHG:
+ # Should we replace the secondary node?
+ if self.new_node is not None:
return self._ExecDrbd8Secondary()
else:
return self._ExecDrbd8DiskOnly()
raise errors.OpExecError("Can't find DRBD device %s: %s" %
(name, msg))
- if result.payload[5]:
+ if result.payload.is_degraded:
raise errors.OpExecError("DRBD device %s is degraded!" % name)
def _RemoveOldStorage(self, node_name, iv_names):
self._RemoveOldStorage(self.target_node, iv_names)
+class LURepairNodeStorage(NoHooksLU):
+ """Repairs the volume group on a node.
+
+ """
+ _OP_REQP = ["node_name"]
+ REQ_BGL = False
+
+ def CheckArguments(self):
+ node_name = self.cfg.ExpandNodeName(self.op.node_name)
+ if node_name is None:
+ raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
+
+ self.op.node_name = node_name
+
+ def ExpandNames(self):
+ self.needed_locks = {
+ locking.LEVEL_NODE: [self.op.node_name],
+ }
+
+ def _CheckFaultyDisks(self, instance, node_name):
+ if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
+ node_name, True):
+ raise errors.OpPrereqError("Instance '%s' has faulty disks on"
+ " node '%s'" % (inst.name, node_name))
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ """
+ storage_type = self.op.storage_type
+
+ if (constants.SO_FIX_CONSISTENCY not in
+ constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
+ raise errors.OpPrereqError("Storage units of type '%s' can not be"
+ " repaired" % storage_type)
+
+ # Check whether any instance on this node has faulty disks
+ for inst in _GetNodeInstances(self.cfg, self.op.node_name):
+ check_nodes = set(inst.all_nodes)
+ check_nodes.discard(self.op.node_name)
+ for inst_node_name in check_nodes:
+ self._CheckFaultyDisks(inst, inst_node_name)
+
+ def Exec(self, feedback_fn):
+ feedback_fn("Repairing storage unit '%s' on %s ..." %
+ (self.op.name, self.op.node_name))
+
+ st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
+ result = self.rpc.call_storage_execute(self.op.node_name,
+ self.op.storage_type, st_args,
+ self.op.name,
+ constants.SO_FIX_CONSISTENCY)
+ result.Raise("Failed to repair storage unit '%s' on %s" %
+ (self.op.name, self.op.node_name))
+
+
class LUGrowDisk(LogicalUnit):
"""Grow a disk of an instance.
in self.wanted_names]
return
+ def _ComputeBlockdevStatus(self, node, instance_name, dev):
+ """Returns the status of a block device
+
+ """
+ if self.op.static or not node:
+ return None
+
+ self.cfg.SetDiskID(dev, node)
+
+ result = self.rpc.call_blockdev_find(node, dev)
+ if result.offline:
+ return None
+
+ result.Raise("Can't compute disk status for %s" % instance_name)
+
+ status = result.payload
+ if status is None:
+ return None
+
+ return (status.dev_path, status.major, status.minor,
+ status.sync_percent, status.estimated_time,
+ status.is_degraded, status.ldisk_status)
+
def _ComputeDiskStatus(self, instance, snode, dev):
"""Compute block device status.
"""
- static = self.op.static
- if not static:
- self.cfg.SetDiskID(dev, instance.primary_node)
- dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev)
- if dev_pstatus.offline:
- dev_pstatus = None
- else:
- dev_pstatus.Raise("Can't compute disk status for %s" % instance.name)
- dev_pstatus = dev_pstatus.payload
- else:
- dev_pstatus = None
-
if dev.dev_type in constants.LDS_DRBD:
# we change the snode then (otherwise we use the one passed in)
if dev.logical_id[0] == instance.primary_node:
else:
snode = dev.logical_id[0]
- if snode and not static:
- self.cfg.SetDiskID(dev, snode)
- dev_sstatus = self.rpc.call_blockdev_find(snode, dev)
- if dev_sstatus.offline:
- dev_sstatus = None
- else:
- dev_sstatus.Raise("Can't compute disk status for %s" % instance.name)
- dev_sstatus = dev_sstatus.payload
- else:
- dev_sstatus = None
+ dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
+ instance.name, dev)
+ dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
if dev.children:
dev_children = [self._ComputeDiskStatus(instance, snode, child)
"hv_actual": cluster.FillHV(instance),
"be_instance": instance.beparams,
"be_actual": cluster.FillBE(instance),
+ "serial_no": instance.serial_no,
+ "mtime": instance.mtime,
+ "ctime": instance.ctime,
}
result[instance.name] = idict
for disk in instance.disks:
self.cfg.SetDiskID(disk, src_node)
+ # per-disk results
+ dresults = []
try:
for idx, disk in enumerate(instance.disks):
# result.payload will be a snapshot of an lvm leaf of the one we passed
if msg:
self.LogWarning("Could not export disk/%s from node %s to"
" node %s: %s", idx, src_node, dst_node.name, msg)
+ dresults.append(False)
+ else:
+ dresults.append(True)
msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
if msg:
self.LogWarning("Could not remove snapshot for disk/%d from node"
" %s: %s", idx, src_node, msg)
+ else:
+ dresults.append(False)
result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
+ fin_resu = True
msg = result.fail_msg
if msg:
self.LogWarning("Could not finalize export for instance %s"
" on node %s: %s", instance.name, dst_node.name, msg)
+ fin_resu = False
nodelist = self.cfg.GetNodeList()
nodelist.remove(dst_node.name)
if msg:
self.LogWarning("Could not remove older export for instance %s"
" on node %s: %s", iname, node, msg)
+ return fin_resu, dresults
class LURemoveExport(NoHooksLU):
"master_candidate": ninfo.master_candidate,
}
- if not ninfo.offline:
+ if not (ninfo.offline or ninfo.drained):
nresult.Raise("Can't get data for node %s" % nname)
node_iinfo[nname].Raise("Can't get node instance info from node %s" %
nname)
remote_info = nresult.payload
+
for attr in ['memory_total', 'memory_free', 'memory_dom0',
'vg_size', 'vg_free', 'cpu_total']:
if attr not in remote_info: