X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/c68174b616ff6465465e183c7966cc4e51626cbe..4dce1a83e287961a0d84ee5d27b283a0f5748c20:/lib/cmdlib.py diff --git a/lib/cmdlib.py b/lib/cmdlib.py index 7d91a22..089cd36 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -94,7 +94,7 @@ class LogicalUnit(object): self.dry_run_result = None # Tasklets - self.tasklets = [] + self.tasklets = None for attr_name in self._OP_REQP: attr_val = getattr(op, attr_name, None) @@ -213,8 +213,10 @@ class LogicalUnit(object): their canonical form if it hasn't been done by ExpandNames before. """ - if self.tasklets: - for tl in self.tasklets: + if self.tasklets is not None: + for (idx, tl) in enumerate(self.tasklets): + logging.debug("Checking prerequisites for tasklet %s/%s", + idx + 1, len(self.tasklets)) tl.CheckPrereq() else: raise NotImplementedError @@ -227,8 +229,9 @@ class LogicalUnit(object): code, or expected. """ - if self.tasklets: - for tl in self.tasklets: + if self.tasklets is not None: + for (idx, tl) in enumerate(self.tasklets): + logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets)) tl.Exec(feedback_fn) else: raise NotImplementedError @@ -367,6 +370,13 @@ class Tasklet: - Implement Exec """ + def __init__(self, lu): + self.lu = lu + + # Shortcuts + self.cfg = lu.cfg + self.rpc = lu.rpc + def CheckPrereq(self): """Check prerequisites for this tasklets. @@ -597,6 +607,7 @@ def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status, return env + def _NICListToTuple(lu, nics): """Build a list of nic information tuples. @@ -620,6 +631,7 @@ def _NICListToTuple(lu, nics): hooks_nics.append((ip, mac, mode, link)) return hooks_nics + def _BuildInstanceHookEnvByObject(lu, instance, override=None): """Builds instance related env variables for hooks from an object. @@ -699,17 +711,90 @@ def _CheckInstanceBridgesExist(lu, instance, node=None): _CheckNicsBridgesExist(lu, instance.nics, node) +def _GetNodeInstancesInner(cfg, fn): + return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)] + + +def _GetNodeInstances(cfg, node_name): + """Returns a list of all primary and secondary instances on a node. + + """ + + return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes) + + +def _GetNodePrimaryInstances(cfg, node_name): + """Returns primary instances on a node. + + """ + return _GetNodeInstancesInner(cfg, + lambda inst: node_name == inst.primary_node) + + def _GetNodeSecondaryInstances(cfg, node_name): """Returns secondary instances on a node. """ - instances = [] + return _GetNodeInstancesInner(cfg, + lambda inst: node_name in inst.secondary_nodes) + + +def _GetStorageTypeArgs(cfg, storage_type): + """Returns the arguments for a storage type. + + """ + # Special case for file storage + if storage_type == constants.ST_FILE: + # storage.FileStorage wants a list of storage directories + return [[cfg.GetFileStorageDir()]] + + return [] + + +def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq): + faulty = [] + + for dev in instance.disks: + cfg.SetDiskID(dev, node_name) + + result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks) + result.Raise("Failed to get disk status from node %s" % node_name, + prereq=prereq) + + for idx, bdev_status in enumerate(result.payload): + if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY: + faulty.append(idx) + + return faulty + + +class LUPostInitCluster(LogicalUnit): + """Logical unit for running hooks after cluster initialization. + + """ + HPATH = "cluster-init" + HTYPE = constants.HTYPE_CLUSTER + _OP_REQP = [] + + def BuildHooksEnv(self): + """Build hooks env. + + """ + env = {"OP_TARGET": self.cfg.GetClusterName()} + mn = self.cfg.GetMasterNode() + return env, [], [mn] + + def CheckPrereq(self): + """No prerequisites to check. - for (_, inst) in cfg.GetAllInstancesInfo().iteritems(): - if node_name in inst.secondary_nodes: - instances.append(inst) + """ + return True + + def Exec(self, feedback_fn): + """Nothing to do. - return instances + """ + return True class LUDestroyCluster(NoHooksLU): @@ -1434,6 +1519,100 @@ class LUVerifyDisks(NoHooksLU): return result +class LURepairDiskSizes(NoHooksLU): + """Verifies the cluster disks sizes. + + """ + _OP_REQP = ["instances"] + REQ_BGL = False + + def ExpandNames(self): + + if not isinstance(self.op.instances, list): + raise errors.OpPrereqError("Invalid argument type 'instances'") + + if self.op.instances: + self.wanted_names = [] + for name in self.op.instances: + full_name = self.cfg.ExpandInstanceName(name) + if full_name is None: + raise errors.OpPrereqError("Instance '%s' not known" % name) + self.wanted_names.append(full_name) + self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names + self.needed_locks = { + locking.LEVEL_NODE: [], + locking.LEVEL_INSTANCE: self.wanted_names, + } + self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE + else: + self.wanted_names = None + self.needed_locks = { + locking.LEVEL_NODE: locking.ALL_SET, + locking.LEVEL_INSTANCE: locking.ALL_SET, + } + self.share_locks = dict(((i, 1) for i in locking.LEVELS)) + + def DeclareLocks(self, level): + if level == locking.LEVEL_NODE and self.wanted_names is not None: + self._LockInstancesNodes(primary_only=True) + + def CheckPrereq(self): + """Check prerequisites. + + This only checks the optional instance list against the existing names. + + """ + if self.wanted_names is None: + self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE] + + self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name + in self.wanted_names] + + def Exec(self, feedback_fn): + """Verify the size of cluster disks. + + """ + # TODO: check child disks too + # TODO: check differences in size between primary/secondary nodes + per_node_disks = {} + for instance in self.wanted_instances: + pnode = instance.primary_node + if pnode not in per_node_disks: + per_node_disks[pnode] = [] + for idx, disk in enumerate(instance.disks): + per_node_disks[pnode].append((instance, idx, disk)) + + changed = [] + for node, dskl in per_node_disks.items(): + result = self.rpc.call_blockdev_getsizes(node, [v[2] for v in dskl]) + if result.failed: + self.LogWarning("Failure in blockdev_getsizes call to node" + " %s, ignoring", node) + continue + if len(result.data) != len(dskl): + self.LogWarning("Invalid result from node %s, ignoring node results", + node) + continue + for ((instance, idx, disk), size) in zip(dskl, result.data): + if size is None: + self.LogWarning("Disk %d of instance %s did not return size" + " information, ignoring", idx, instance.name) + continue + if not isinstance(size, (int, long)): + self.LogWarning("Disk %d of instance %s did not return valid" + " size information, ignoring", idx, instance.name) + continue + size = size >> 20 + if size != disk.size: + self.LogInfo("Disk %d of instance %s has mismatched size," + " correcting: recorded %d, actual %d", idx, + instance.name, disk.size, size) + disk.size = size + self.cfg.Update(instance) + changed.append((instance.name, idx, size)) + return changed + + class LURenameCluster(LogicalUnit): """Rename the cluster. @@ -1796,18 +1975,18 @@ def _WaitForSync(lu, instance, oneshot=False, unlock=False): lu.LogWarning("Can't compute data for node %s/%s", node, instance.disks[i].iv_name) continue - # we ignore the ldisk parameter - perc_done, est_time, is_degraded, _ = mstat - cumul_degraded = cumul_degraded or (is_degraded and perc_done is None) - if perc_done is not None: + + cumul_degraded = (cumul_degraded or + (mstat.is_degraded and mstat.sync_percent is None)) + if mstat.sync_percent is not None: done = False - if est_time is not None: - rem_time = "%d estimated seconds remaining" % est_time - max_time = est_time + if mstat.estimated_time is not None: + rem_time = "%d estimated seconds remaining" % mstat.estimated_time + max_time = mstat.estimated_time else: rem_time = "no time estimate" lu.proc.LogInfo("- device %s: %5.2f%% done, %s" % - (instance.disks[i].iv_name, perc_done, rem_time)) + (instance.disks[i].iv_name, mstat.sync_percent, rem_time)) # if we're done but degraded, let's do a few small retries, to # make sure we see a stable and not transient situation; therefore @@ -1837,12 +2016,9 @@ def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False): """ lu.cfg.SetDiskID(dev, node) - if ldisk: - idx = 6 - else: - idx = 5 result = True + if on_primary or dev.AssembleOnSecondary(): rstats = lu.rpc.call_blockdev_find(node, dev) msg = rstats.fail_msg @@ -1853,7 +2029,11 @@ def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False): lu.LogWarning("Can't find disk on node %s", node) result = False else: - result = result and (not rstats.payload[idx]) + if ldisk: + result = result and rstats.payload.ldisk_status == constants.LDS_OKAY + else: + result = result and not rstats.payload.is_degraded + if dev.children: for child in dev.children: result = result and _CheckDiskConsistency(lu, child, node, on_primary) @@ -2044,7 +2224,7 @@ class LUQueryNodes(NoHooksLU): "name", "pinst_cnt", "sinst_cnt", "pinst_list", "sinst_list", "pip", "sip", "tags", - "serial_no", + "serial_no", "ctime", "mtime", "master_candidate", "master", "offline", @@ -2168,6 +2348,10 @@ class LUQueryNodes(NoHooksLU): val = list(node.GetTags()) elif field == "serial_no": val = node.serial_no + elif field == "ctime": + val = node.ctime + elif field == "mtime": + val = node.mtime elif field == "master_candidate": val = node.master_candidate elif field == "master": @@ -2283,6 +2467,154 @@ class LUQueryNodeVolumes(NoHooksLU): return output +class LUQueryNodeStorage(NoHooksLU): + """Logical unit for getting information on storage units on node(s). + + """ + _OP_REQP = ["nodes", "storage_type", "output_fields"] + REQ_BGL = False + _FIELDS_STATIC = utils.FieldSet("node") + + def ExpandNames(self): + storage_type = self.op.storage_type + + if storage_type not in constants.VALID_STORAGE_FIELDS: + raise errors.OpPrereqError("Unknown storage type: %s" % storage_type) + + dynamic_fields = constants.VALID_STORAGE_FIELDS[storage_type] + + _CheckOutputFields(static=self._FIELDS_STATIC, + dynamic=utils.FieldSet(*dynamic_fields), + selected=self.op.output_fields) + + self.needed_locks = {} + self.share_locks[locking.LEVEL_NODE] = 1 + + if self.op.nodes: + self.needed_locks[locking.LEVEL_NODE] = \ + _GetWantedNodes(self, self.op.nodes) + else: + self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET + + def CheckPrereq(self): + """Check prerequisites. + + This checks that the fields required are valid output fields. + + """ + self.op.name = getattr(self.op, "name", None) + + self.nodes = self.acquired_locks[locking.LEVEL_NODE] + + def Exec(self, feedback_fn): + """Computes the list of nodes and their attributes. + + """ + # Always get name to sort by + if constants.SF_NAME in self.op.output_fields: + fields = self.op.output_fields[:] + else: + fields = [constants.SF_NAME] + self.op.output_fields + + # Never ask for node as it's only known to the LU + while "node" in fields: + fields.remove("node") + + field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)]) + name_idx = field_idx[constants.SF_NAME] + + st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type) + data = self.rpc.call_storage_list(self.nodes, + self.op.storage_type, st_args, + self.op.name, fields) + + result = [] + + for node in utils.NiceSort(self.nodes): + nresult = data[node] + if nresult.offline: + continue + + msg = nresult.fail_msg + if msg: + self.LogWarning("Can't get storage data from node %s: %s", node, msg) + continue + + rows = dict([(row[name_idx], row) for row in nresult.payload]) + + for name in utils.NiceSort(rows.keys()): + row = rows[name] + + out = [] + + for field in self.op.output_fields: + if field == "node": + val = node + elif field in field_idx: + val = row[field_idx[field]] + else: + raise errors.ParameterError(field) + + out.append(val) + + result.append(out) + + return result + + +class LUModifyNodeStorage(NoHooksLU): + """Logical unit for modifying a storage volume on a node. + + """ + _OP_REQP = ["node_name", "storage_type", "name", "changes"] + REQ_BGL = False + + def CheckArguments(self): + node_name = self.cfg.ExpandNodeName(self.op.node_name) + if node_name is None: + raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name) + + self.op.node_name = node_name + + storage_type = self.op.storage_type + if storage_type not in constants.VALID_STORAGE_FIELDS: + raise errors.OpPrereqError("Unknown storage type: %s" % storage_type) + + def ExpandNames(self): + self.needed_locks = { + locking.LEVEL_NODE: self.op.node_name, + } + + def CheckPrereq(self): + """Check prerequisites. + + """ + storage_type = self.op.storage_type + + try: + modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type] + except KeyError: + raise errors.OpPrereqError("Storage units of type '%s' can not be" + " modified" % storage_type) + + diff = set(self.op.changes.keys()) - modifiable + if diff: + raise errors.OpPrereqError("The following fields can not be modified for" + " storage units of type '%s': %r" % + (storage_type, list(diff))) + + def Exec(self, feedback_fn): + """Computes the list of nodes and their attributes. + + """ + st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type) + result = self.rpc.call_storage_modify(self.op.node_name, + self.op.storage_type, st_args, + self.op.name, self.op.changes) + result.Raise("Failed to modify storage unit '%s' on %s" % + (self.op.name, self.op.node_name)) + + class LUAddNode(LogicalUnit): """Logical unit for adding node to the cluster. @@ -2650,7 +2982,7 @@ class LUPowercycleNode(NoHooksLU): def ExpandNames(self): """Locking for PowercycleNode. - This is a last-resource option and shouldn't block on other + This is a last-resort option and shouldn't block on other jobs. Therefore, we grab no locks. """ @@ -2714,6 +3046,8 @@ class LUQueryClusterInfo(NoHooksLU): "master_netdev": cluster.master_netdev, "volume_group_name": cluster.volume_group_name, "file_storage_dir": cluster.file_storage_dir, + "ctime": cluster.ctime, + "mtime": cluster.mtime, } return result @@ -2785,19 +3119,24 @@ class LUActivateInstanceDisks(NoHooksLU): assert self.instance is not None, \ "Cannot retrieve locked instance %s" % self.op.instance_name _CheckNodeOnline(self, self.instance.primary_node) + if not hasattr(self.op, "ignore_size"): + self.op.ignore_size = False def Exec(self, feedback_fn): """Activate the disks. """ - disks_ok, disks_info = _AssembleInstanceDisks(self, self.instance) + disks_ok, disks_info = \ + _AssembleInstanceDisks(self, self.instance, + ignore_size=self.op.ignore_size) if not disks_ok: raise errors.OpExecError("Cannot activate block devices") return disks_info -def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False): +def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False, + ignore_size=False): """Prepare the block devices for an instance. This sets up the block devices on all nodes. @@ -2809,6 +3148,10 @@ def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False): @type ignore_secondaries: boolean @param ignore_secondaries: if true, errors on secondary nodes won't result in an error return from the function + @type ignore_size: boolean + @param ignore_size: if true, the current known size of the disk + will not be used during the disk activation, useful for cases + when the size is wrong @return: False if the operation failed, otherwise a list of (host, instance_visible_name, node_visible_name) with the mapping from node devices to instance devices @@ -2829,6 +3172,9 @@ def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False): # 1st pass, assemble on all nodes in secondary mode for inst_disk in instance.disks: for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node): + if ignore_size: + node_disk = node_disk.Copy() + node_disk.UnsetSize() lu.cfg.SetDiskID(node_disk, node) result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False) msg = result.fail_msg @@ -2846,6 +3192,9 @@ def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False): for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node): if node != instance.primary_node: continue + if ignore_size: + node_disk = node_disk.Copy() + node_disk.UnsetSize() lu.cfg.SetDiskID(node_disk, node) result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True) msg = result.fail_msg @@ -3302,6 +3651,89 @@ class LUReinstallInstance(LogicalUnit): _ShutdownInstanceDisks(self, inst) +class LURecreateInstanceDisks(LogicalUnit): + """Recreate an instance's missing disks. + + """ + HPATH = "instance-recreate-disks" + HTYPE = constants.HTYPE_INSTANCE + _OP_REQP = ["instance_name", "disks"] + REQ_BGL = False + + def CheckArguments(self): + """Check the arguments. + + """ + if not isinstance(self.op.disks, list): + raise errors.OpPrereqError("Invalid disks parameter") + for item in self.op.disks: + if (not isinstance(item, int) or + item < 0): + raise errors.OpPrereqError("Invalid disk specification '%s'" % + str(item)) + + def ExpandNames(self): + self._ExpandAndLockInstance() + + def BuildHooksEnv(self): + """Build hooks env. + + This runs on master, primary and secondary nodes of the instance. + + """ + env = _BuildInstanceHookEnvByObject(self, self.instance) + nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) + return env, nl, nl + + def CheckPrereq(self): + """Check prerequisites. + + This checks that the instance is in the cluster and is not running. + + """ + instance = self.cfg.GetInstanceInfo(self.op.instance_name) + assert instance is not None, \ + "Cannot retrieve locked instance %s" % self.op.instance_name + _CheckNodeOnline(self, instance.primary_node) + + if instance.disk_template == constants.DT_DISKLESS: + raise errors.OpPrereqError("Instance '%s' has no disks" % + self.op.instance_name) + if instance.admin_up: + raise errors.OpPrereqError("Instance '%s' is marked to be up" % + self.op.instance_name) + remote_info = self.rpc.call_instance_info(instance.primary_node, + instance.name, + instance.hypervisor) + remote_info.Raise("Error checking node %s" % instance.primary_node, + prereq=True) + if remote_info.payload: + raise errors.OpPrereqError("Instance '%s' is running on the node %s" % + (self.op.instance_name, + instance.primary_node)) + + if not self.op.disks: + self.op.disks = range(len(instance.disks)) + else: + for idx in self.op.disks: + if idx >= len(instance.disks): + raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx) + + self.instance = instance + + def Exec(self, feedback_fn): + """Recreate the disks. + + """ + to_skip = [] + for idx, disk in enumerate(self.instance.disks): + if idx not in self.op.disks: # disk idx has not been passed in + to_skip.append(idx) + continue + + _CreateDisks(self, self.instance, to_skip=to_skip) + + class LURenameInstance(LogicalUnit): """Rename an instance. @@ -3493,7 +3925,9 @@ class LUQueryInstances(NoHooksLU): r"(nic)\.(bridge)/([0-9]+)", r"(nic)\.(macs|ips|modes|links|bridges)", r"(disk|nic)\.(count)", - "serial_no", "hypervisor", "hvparams",] + + "serial_no", "hypervisor", "hvparams", + "ctime", "mtime", + ] + ["hv/%s" % name for name in constants.HVS_PARAMETERS] + ["be/%s" % name @@ -3678,6 +4112,10 @@ class LUQueryInstances(NoHooksLU): val = list(instance.GetTags()) elif field == "serial_no": val = instance.serial_no + elif field == "ctime": + val = instance.ctime + elif field == "mtime": + val = instance.mtime elif field == "network_port": val = instance.network_port elif field == "hypervisor": @@ -3905,9 +4343,14 @@ class LUMigrateInstance(LogicalUnit): def ExpandNames(self): self._ExpandAndLockInstance() + self.needed_locks[locking.LEVEL_NODE] = [] self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE + self._migrater = TLMigrateInstance(self, self.op.instance_name, + self.op.live, self.op.cleanup) + self.tasklets = [self._migrater] + def DeclareLocks(self, level): if level == locking.LEVEL_NODE: self._LockInstancesNodes() @@ -3918,12 +4361,80 @@ class LUMigrateInstance(LogicalUnit): This runs on master, primary and secondary nodes of the instance. """ - env = _BuildInstanceHookEnvByObject(self, self.instance) + instance = self._migrater.instance + env = _BuildInstanceHookEnvByObject(self, instance) env["MIGRATE_LIVE"] = self.op.live env["MIGRATE_CLEANUP"] = self.op.cleanup - nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes) + nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes) return env, nl, nl + +class LUMigrateNode(LogicalUnit): + """Migrate all instances from a node. + + """ + HPATH = "node-migrate" + HTYPE = constants.HTYPE_NODE + _OP_REQP = ["node_name", "live"] + REQ_BGL = False + + def ExpandNames(self): + self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name) + if self.op.node_name is None: + raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name) + + self.needed_locks = { + locking.LEVEL_NODE: [self.op.node_name], + } + + self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND + + # Create tasklets for migrating instances for all instances on this node + names = [] + tasklets = [] + + for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name): + logging.debug("Migrating instance %s", inst.name) + names.append(inst.name) + + tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False)) + + self.tasklets = tasklets + + # Declare instance locks + self.needed_locks[locking.LEVEL_INSTANCE] = names + + def DeclareLocks(self, level): + if level == locking.LEVEL_NODE: + self._LockInstancesNodes() + + def BuildHooksEnv(self): + """Build hooks env. + + This runs on the master, the primary and all the secondaries. + + """ + env = { + "NODE_NAME": self.op.node_name, + } + + nl = [self.cfg.GetMasterNode()] + + return (env, nl, nl) + + +class TLMigrateInstance(Tasklet): + def __init__(self, lu, instance_name, live, cleanup): + """Initializes this class. + + """ + Tasklet.__init__(self, lu) + + # Parameters + self.instance_name = instance_name + self.live = live + self.cleanup = cleanup + def CheckPrereq(self): """Check prerequisites. @@ -3931,10 +4442,10 @@ class LUMigrateInstance(LogicalUnit): """ instance = self.cfg.GetInstanceInfo( - self.cfg.ExpandInstanceName(self.op.instance_name)) + self.cfg.ExpandInstanceName(self.instance_name)) if instance is None: raise errors.OpPrereqError("Instance '%s' not known" % - self.op.instance_name) + self.instance_name) if instance.disk_template != constants.DT_DRBD8: raise errors.OpPrereqError("Instance's disk layout is not" @@ -3956,7 +4467,7 @@ class LUMigrateInstance(LogicalUnit): # check bridge existance _CheckInstanceBridgesExist(self, instance, node=target_node) - if not self.op.cleanup: + if not self.cleanup: _CheckNodeNotDrained(self, target_node) result = self.rpc.call_instance_migratable(instance.primary_node, instance) @@ -4103,10 +4614,10 @@ class LUMigrateInstance(LogicalUnit): self._GoReconnect(False) self._WaitUntilSync() except errors.OpExecError, err: - self.LogWarning("Migration failed and I can't reconnect the" - " drives: error '%s'\n" - "Please look and recover the instance status" % - str(err)) + self.lu.LogWarning("Migration failed and I can't reconnect the" + " drives: error '%s'\n" + "Please look and recover the instance status" % + str(err)) def _AbortMigration(self): """Call the hypervisor code to abort a started migration. @@ -4186,7 +4697,7 @@ class LUMigrateInstance(LogicalUnit): time.sleep(10) result = self.rpc.call_instance_migrate(source_node, instance, self.nodes_ip[target_node], - self.op.live) + self.live) msg = result.fail_msg if msg: logging.error("Instance migration failed, trying to revert" @@ -4224,6 +4735,8 @@ class LUMigrateInstance(LogicalUnit): """Perform the migration. """ + feedback_fn("Migrating instance %s" % self.instance.name) + self.feedback_fn = feedback_fn self.source_node = self.instance.primary_node @@ -4233,7 +4746,8 @@ class LUMigrateInstance(LogicalUnit): self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip, self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip, } - if self.op.cleanup: + + if self.cleanup: return self._ExecCleanup() else: return self._ExecMigration() @@ -4418,7 +4932,7 @@ def _GetInstanceInfoText(instance): return "originstname+%s" % instance.name -def _CreateDisks(lu, instance): +def _CreateDisks(lu, instance, to_skip=None): """Create all disks for an instance. This abstracts away some work from AddInstance. @@ -4427,6 +4941,8 @@ def _CreateDisks(lu, instance): @param lu: the logical unit on whose behalf we execute @type instance: L{objects.Instance} @param instance: the instance whose disks we should create + @type to_skip: list + @param to_skip: list of indices to skip @rtype: boolean @return: the success of the creation @@ -4443,7 +4959,9 @@ def _CreateDisks(lu, instance): # Note: this needs to be kept in sync with adding of disks in # LUSetInstanceParams - for device in instance.disks: + for idx, device in enumerate(instance.disks): + if to_skip and idx in to_skip: + continue logging.info("Creating volume %s for instance %s", device.iv_name, instance.name) #HARDCODE @@ -5222,7 +5740,7 @@ class LUReplaceDisks(LogicalUnit): self.op.iallocator, self.op.remote_node, self.op.disks) - self.tasklets.append(self.replacer) + self.tasklets = [self.replacer] def DeclareLocks(self, level): # If we're not already locking all nodes in the set we have to declare the @@ -5253,6 +5771,99 @@ class LUReplaceDisks(LogicalUnit): return env, nl, nl +class LUEvacuateNode(LogicalUnit): + """Relocate the secondary instances from a node. + + """ + HPATH = "node-evacuate" + HTYPE = constants.HTYPE_NODE + _OP_REQP = ["node_name"] + REQ_BGL = False + + def CheckArguments(self): + if not hasattr(self.op, "remote_node"): + self.op.remote_node = None + if not hasattr(self.op, "iallocator"): + self.op.iallocator = None + + TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG, + self.op.remote_node, + self.op.iallocator) + + def ExpandNames(self): + self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name) + if self.op.node_name is None: + raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name) + + self.needed_locks = {} + + # Declare node locks + if self.op.iallocator is not None: + self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET + + elif self.op.remote_node is not None: + remote_node = self.cfg.ExpandNodeName(self.op.remote_node) + if remote_node is None: + raise errors.OpPrereqError("Node '%s' not known" % + self.op.remote_node) + + self.op.remote_node = remote_node + + # Warning: do not remove the locking of the new secondary here + # unless DRBD8.AddChildren is changed to work in parallel; + # currently it doesn't since parallel invocations of + # FindUnusedMinor will conflict + self.needed_locks[locking.LEVEL_NODE] = [remote_node] + self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND + + else: + raise errors.OpPrereqError("Invalid parameters") + + # Create tasklets for replacing disks for all secondary instances on this + # node + names = [] + tasklets = [] + + for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name): + logging.debug("Replacing disks for instance %s", inst.name) + names.append(inst.name) + + replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG, + self.op.iallocator, self.op.remote_node, []) + tasklets.append(replacer) + + self.tasklets = tasklets + self.instance_names = names + + # Declare instance locks + self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names + + def DeclareLocks(self, level): + # If we're not already locking all nodes in the set we have to declare the + # instance's primary/secondary nodes. + if (level == locking.LEVEL_NODE and + self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET): + self._LockInstancesNodes() + + def BuildHooksEnv(self): + """Build hooks env. + + This runs on the master, the primary and all the secondaries. + + """ + env = { + "NODE_NAME": self.op.node_name, + } + + nl = [self.cfg.GetMasterNode()] + + if self.op.remote_node is not None: + env["NEW_SECONDARY"] = self.op.remote_node + nl.append(self.op.remote_node) + + return (env, nl, nl) + + class TLReplaceDisks(Tasklet): """Replaces disks for an instance. @@ -5264,18 +5875,15 @@ class TLReplaceDisks(Tasklet): """Initializes this class. """ + Tasklet.__init__(self, lu) + # Parameters - self.lu = lu self.instance_name = instance_name self.mode = mode self.iallocator_name = iallocator_name self.remote_node = remote_node self.disks = disks - # Shortcuts - self.cfg = lu.cfg - self.rpc = lu.rpc - # Runtime data self.instance = None self.new_node = None @@ -5290,20 +5898,21 @@ class TLReplaceDisks(Tasklet): """ # check for valid parameter combination - cnt = [remote_node, iallocator].count(None) if mode == constants.REPLACE_DISK_CHG: - if cnt == 2: + if remote_node is None and iallocator is None: raise errors.OpPrereqError("When changing the secondary either an" " iallocator script must be used or the" " new node given") - elif cnt == 0: + + if remote_node is not None and iallocator is not None: raise errors.OpPrereqError("Give either the iallocator or the new" " secondary, not both") - else: # not replacing the secondary - if cnt != 2: - raise errors.OpPrereqError("The iallocator and new node options can" - " be used only when changing the" - " secondary node") + + elif remote_node is not None or iallocator is not None: + # Not replacing the secondary + raise errors.OpPrereqError("The iallocator and new node options can" + " only be used when changing the" + " secondary node") @staticmethod def _RunAllocator(lu, iallocator_name, instance_name, relocate_from): @@ -5333,6 +5942,10 @@ class TLReplaceDisks(Tasklet): return remote_node_name + def _FindFaultyDisks(self, node_name): + return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance, + node_name, True) + def CheckPrereq(self): """Check prerequisites. @@ -5375,35 +5988,64 @@ class TLReplaceDisks(Tasklet): raise errors.OpPrereqError("The specified node is already the" " secondary node of the instance.") - if self.mode == constants.REPLACE_DISK_PRI: - self.target_node = self.instance.primary_node - self.other_node = secondary_node - check_nodes = [self.target_node, self.other_node] + if self.disks and self.mode in (constants.REPLACE_DISK_AUTO, + constants.REPLACE_DISK_CHG): + raise errors.OpPrereqError("Cannot specify disks to be replaced") + + if self.mode == constants.REPLACE_DISK_AUTO: + faulty_primary = self._FindFaultyDisks(self.instance.primary_node) + faulty_secondary = self._FindFaultyDisks(secondary_node) + + if faulty_primary and faulty_secondary: + raise errors.OpPrereqError("Instance %s has faulty disks on more than" + " one node and can not be repaired" + " automatically" % self.instance_name) + + if faulty_primary: + self.disks = faulty_primary + self.target_node = self.instance.primary_node + self.other_node = secondary_node + check_nodes = [self.target_node, self.other_node] + elif faulty_secondary: + self.disks = faulty_secondary + self.target_node = secondary_node + self.other_node = self.instance.primary_node + check_nodes = [self.target_node, self.other_node] + else: + self.disks = [] + check_nodes = [] - elif self.mode == constants.REPLACE_DISK_SEC: - self.target_node = secondary_node - self.other_node = self.instance.primary_node - check_nodes = [self.target_node, self.other_node] + else: + # Non-automatic modes + if self.mode == constants.REPLACE_DISK_PRI: + self.target_node = self.instance.primary_node + self.other_node = secondary_node + check_nodes = [self.target_node, self.other_node] - elif self.mode == constants.REPLACE_DISK_CHG: - self.new_node = remote_node - self.other_node = self.instance.primary_node - self.target_node = secondary_node - check_nodes = [self.new_node, self.other_node] + elif self.mode == constants.REPLACE_DISK_SEC: + self.target_node = secondary_node + self.other_node = self.instance.primary_node + check_nodes = [self.target_node, self.other_node] - _CheckNodeNotDrained(self.lu, remote_node) + elif self.mode == constants.REPLACE_DISK_CHG: + self.new_node = remote_node + self.other_node = self.instance.primary_node + self.target_node = secondary_node + check_nodes = [self.new_node, self.other_node] - else: - raise errors.ProgrammerError("Unhandled disk replace mode (%s)" % - self.mode) + _CheckNodeNotDrained(self.lu, remote_node) + + else: + raise errors.ProgrammerError("Unhandled disk replace mode (%s)" % + self.mode) + + # If not specified all disks should be replaced + if not self.disks: + self.disks = range(len(self.instance.disks)) for node in check_nodes: _CheckNodeOnline(self.lu, node) - # If not specified all disks should be replaced - if not self.disks: - self.disks = range(len(self.instance.disks)) - # Check whether disks are valid for disk_idx in self.disks: self.instance.FindDisk(disk_idx) @@ -5423,6 +6065,13 @@ class TLReplaceDisks(Tasklet): This dispatches the disk replacement to the appropriate handler. """ + if not self.disks: + feedback_fn("No disks need replacement") + return + + feedback_fn("Replacing disk(s) %s for %s" % + (", ".join([str(i) for i in self.disks]), self.instance.name)) + activate_disks = (not self.instance.admin_up) # Activate the instance disks if we're replacing them on a down instance @@ -5430,7 +6079,8 @@ class TLReplaceDisks(Tasklet): _StartInstanceDisks(self.lu, self.instance, True) try: - if self.mode == constants.REPLACE_DISK_CHG: + # Should we replace the secondary node? + if self.new_node is not None: return self._ExecDrbd8Secondary() else: return self._ExecDrbd8DiskOnly() @@ -5534,7 +6184,7 @@ class TLReplaceDisks(Tasklet): raise errors.OpExecError("Can't find DRBD device %s: %s" % (name, msg)) - if result.payload[5]: + if result.payload.is_degraded: raise errors.OpExecError("DRBD device %s is degraded!" % name) def _RemoveOldStorage(self, node_name, iv_names): @@ -5803,6 +6453,62 @@ class TLReplaceDisks(Tasklet): self._RemoveOldStorage(self.target_node, iv_names) +class LURepairNodeStorage(NoHooksLU): + """Repairs the volume group on a node. + + """ + _OP_REQP = ["node_name"] + REQ_BGL = False + + def CheckArguments(self): + node_name = self.cfg.ExpandNodeName(self.op.node_name) + if node_name is None: + raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name) + + self.op.node_name = node_name + + def ExpandNames(self): + self.needed_locks = { + locking.LEVEL_NODE: [self.op.node_name], + } + + def _CheckFaultyDisks(self, instance, node_name): + if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance, + node_name, True): + raise errors.OpPrereqError("Instance '%s' has faulty disks on" + " node '%s'" % (inst.name, node_name)) + + def CheckPrereq(self): + """Check prerequisites. + + """ + storage_type = self.op.storage_type + + if (constants.SO_FIX_CONSISTENCY not in + constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])): + raise errors.OpPrereqError("Storage units of type '%s' can not be" + " repaired" % storage_type) + + # Check whether any instance on this node has faulty disks + for inst in _GetNodeInstances(self.cfg, self.op.node_name): + check_nodes = set(inst.all_nodes) + check_nodes.discard(self.op.node_name) + for inst_node_name in check_nodes: + self._CheckFaultyDisks(inst, inst_node_name) + + def Exec(self, feedback_fn): + feedback_fn("Repairing storage unit '%s' on %s ..." % + (self.op.name, self.op.node_name)) + + st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type) + result = self.rpc.call_storage_execute(self.op.node_name, + self.op.storage_type, st_args, + self.op.name, + constants.SO_FIX_CONSISTENCY) + result.Raise("Failed to repair storage unit '%s' on %s" % + (self.op.name, self.op.node_name)) + + class LUGrowDisk(LogicalUnit): """Grow a disk of an instance. @@ -5939,22 +6645,33 @@ class LUQueryInstanceData(NoHooksLU): in self.wanted_names] return + def _ComputeBlockdevStatus(self, node, instance_name, dev): + """Returns the status of a block device + + """ + if self.op.static or not node: + return None + + self.cfg.SetDiskID(dev, node) + + result = self.rpc.call_blockdev_find(node, dev) + if result.offline: + return None + + result.Raise("Can't compute disk status for %s" % instance_name) + + status = result.payload + if status is None: + return None + + return (status.dev_path, status.major, status.minor, + status.sync_percent, status.estimated_time, + status.is_degraded, status.ldisk_status) + def _ComputeDiskStatus(self, instance, snode, dev): """Compute block device status. """ - static = self.op.static - if not static: - self.cfg.SetDiskID(dev, instance.primary_node) - dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev) - if dev_pstatus.offline: - dev_pstatus = None - else: - dev_pstatus.Raise("Can't compute disk status for %s" % instance.name) - dev_pstatus = dev_pstatus.payload - else: - dev_pstatus = None - if dev.dev_type in constants.LDS_DRBD: # we change the snode then (otherwise we use the one passed in) if dev.logical_id[0] == instance.primary_node: @@ -5962,16 +6679,9 @@ class LUQueryInstanceData(NoHooksLU): else: snode = dev.logical_id[0] - if snode and not static: - self.cfg.SetDiskID(dev, snode) - dev_sstatus = self.rpc.call_blockdev_find(snode, dev) - if dev_sstatus.offline: - dev_sstatus = None - else: - dev_sstatus.Raise("Can't compute disk status for %s" % instance.name) - dev_sstatus = dev_sstatus.payload - else: - dev_sstatus = None + dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node, + instance.name, dev) + dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev) if dev.children: dev_children = [self._ComputeDiskStatus(instance, snode, child) @@ -6036,6 +6746,9 @@ class LUQueryInstanceData(NoHooksLU): "hv_actual": cluster.FillHV(instance), "be_instance": instance.beparams, "be_actual": cluster.FillBE(instance), + "serial_no": instance.serial_no, + "mtime": instance.mtime, + "ctime": instance.ctime, } result[instance.name] = idict @@ -6683,6 +7396,8 @@ class LUExportInstance(LogicalUnit): for disk in instance.disks: self.cfg.SetDiskID(disk, src_node) + # per-disk results + dresults = [] try: for idx, disk in enumerate(instance.disks): # result.payload will be a snapshot of an lvm leaf of the one we passed @@ -6718,16 +7433,23 @@ class LUExportInstance(LogicalUnit): if msg: self.LogWarning("Could not export disk/%s from node %s to" " node %s: %s", idx, src_node, dst_node.name, msg) + dresults.append(False) + else: + dresults.append(True) msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg if msg: self.LogWarning("Could not remove snapshot for disk/%d from node" " %s: %s", idx, src_node, msg) + else: + dresults.append(False) result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks) + fin_resu = True msg = result.fail_msg if msg: self.LogWarning("Could not finalize export for instance %s" " on node %s: %s", instance.name, dst_node.name, msg) + fin_resu = False nodelist = self.cfg.GetNodeList() nodelist.remove(dst_node.name) @@ -6746,6 +7468,7 @@ class LUExportInstance(LogicalUnit): if msg: self.LogWarning("Could not remove older export for instance %s" " on node %s: %s", iname, node, msg) + return fin_resu, dresults class LURemoveExport(NoHooksLU): @@ -7109,11 +7832,12 @@ class IAllocator(object): "master_candidate": ninfo.master_candidate, } - if not ninfo.offline: + if not (ninfo.offline or ninfo.drained): nresult.Raise("Can't get data for node %s" % nname) node_iinfo[nname].Raise("Can't get node instance info from node %s" % nname) remote_info = nresult.payload + for attr in ['memory_total', 'memory_free', 'memory_dom0', 'vg_size', 'vg_free', 'cpu_total']: if attr not in remote_info: