X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/e7c6e02b036e6fc311cf886b28598c3cc18af795..6b0391b3e3c9b086213ec88bc8c28c85f199ae6b:/lib/opcodes.py diff --git a/lib/opcodes.py b/lib/opcodes.py index c629e0c..66698f5 100644 --- a/lib/opcodes.py +++ b/lib/opcodes.py @@ -24,11 +24,8 @@ This module implements the data structures which define the cluster operations - the so-called opcodes. - -This module implements the logic for doing operations in the cluster. There -are two kinds of classes defined: - - opcodes, which are small classes only holding data for the task at hand - - logical units, which know how to deal with their specific opcode only +Every operation which modifies the cluster state is expressed via +opcodes. """ @@ -37,97 +34,106 @@ are two kinds of classes defined: # pylint: disable-msg=R0903 -class BaseJO(object): +class BaseOpCode(object): """A simple serializable object. - This object serves as a parent class for both OpCode and Job since - they are serialized in the same way. + This object serves as a parent class for OpCode without any custom + field handling. """ __slots__ = [] def __init__(self, **kwargs): + """Constructor for BaseOpCode. + + The constructor takes only keyword arguments and will set + attributes on this object based on the passed arguments. As such, + it means that you should not pass arguments which are not in the + __slots__ attribute for this class. + + """ + slots = self._all_slots() for key in kwargs: - if key not in self.__slots__: + if key not in slots: raise TypeError("Object %s doesn't support the parameter '%s'" % (self.__class__.__name__, key)) setattr(self, key, kwargs[key]) def __getstate__(self): + """Generic serializer. + + This method just returns the contents of the instance as a + dictionary. + + @rtype: C{dict} + @return: the instance attributes and their values + + """ state = {} - for name in self.__slots__: + for name in self._all_slots(): if hasattr(self, name): state[name] = getattr(self, name) return state def __setstate__(self, state): + """Generic unserializer. + + This method just restores from the serialized state the attributes + of the current instance. + + @param state: the serialized opcode data + @type state: C{dict} + + """ if not isinstance(state, dict): raise ValueError("Invalid data to __setstate__: expected dict, got %s" % type(state)) - for name in self.__slots__: + for name in self._all_slots(): if name not in state: delattr(self, name) for name in state: setattr(self, name, state[name]) - -class Job(BaseJO): - """Job definition structure - - The Job definitions has two sets of parameters: - - the parameters of the job itself (all filled by server): - - job_id, - - status: pending, running, successfull, failed, aborted - - opcode parameters: - - op_list, list of opcodes, clients creates this - - op_status, status for each opcode, server fills in - - op_result, result for each opcode, server fills in - - """ - STATUS_PENDING = 1 - STATUS_RUNNING = 2 - STATUS_SUCCESS = 3 - STATUS_FAIL = 4 - STATUS_ABORT = 5 - - __slots__ = [ - "job_id", - "status", - "op_list", - "op_status", - "op_result", - ] - - def __getstate__(self): - """Specialized getstate for jobs + @classmethod + def _all_slots(cls): + """Compute the list of all declared slots for a class. """ - data = BaseJO.__getstate__(self) - if "op_list" in data: - data["op_list"] = [op.__getstate__() for op in data["op_list"]] - return data + slots = [] + for parent in cls.__mro__: + slots.extend(getattr(parent, "__slots__", [])) + return slots - def __setstate__(self, state): - """Specialized setstate for jobs - """ - BaseJO.__setstate__(self, state) - if "op_list" in state: - self.op_list = [OpCode.LoadOpCode(op) for op in state["op_list"]] +class OpCode(BaseOpCode): + """Abstract OpCode. + + This is the root of the actual OpCode hierarchy. All clases derived + from this class should override OP_ID. + @cvar OP_ID: The ID of this opcode. This should be unique amongst all + children of this class. + @ivar dry_run: Whether the LU should be run in dry-run mode, i.e. just + the check steps -class OpCode(BaseJO): - """Abstract OpCode""" + """ OP_ID = "OP_ABSTRACT" - __slots__ = [] + __slots__ = ["dry_run", "debug_level"] def __getstate__(self): """Specialized getstate for opcodes. + This method adds to the state dictionary the OP_ID of the class, + so that on unload we can identify the correct class for + instantiating the opcode. + + @rtype: C{dict} + @return: the state as a dictionary + """ - data = BaseJO.__getstate__(self) + data = BaseOpCode.__getstate__(self) data["OP_ID"] = self.OP_ID return data @@ -135,6 +141,13 @@ class OpCode(BaseJO): def LoadOpCode(cls, data): """Generic load opcode method. + The method identifies the correct opcode class from the dict-form + by looking for a OP_ID key, if this is not found, or its value is + not available in this module as a child of this class, we fail. + + @type data: C{dict} + @param data: the serialized opcode + """ if not isinstance(data, dict): raise ValueError("Invalid data to LoadOpCode (%s)" % type(data)) @@ -142,14 +155,9 @@ class OpCode(BaseJO): raise ValueError("Invalid data to LoadOpcode, missing OP_ID") op_id = data["OP_ID"] op_class = None - for item in globals().values(): - if (isinstance(item, type) and - issubclass(item, cls) and - hasattr(item, "OP_ID") and - getattr(item, "OP_ID") == op_id): - op_class = item - break - if op_class is None: + if op_id in OP_MAPPING: + op_class = OP_MAPPING[op_id] + else: raise ValueError("Invalid data to LoadOpCode: OP_ID %s unsupported" % op_id) op = op_class() @@ -158,17 +166,39 @@ class OpCode(BaseJO): op.__setstate__(new_data) return op + def Summary(self): + """Generates a summary description of this opcode. + + """ + # all OP_ID start with OP_, we remove that + txt = self.OP_ID[3:] + field_name = getattr(self, "OP_DSC_FIELD", None) + if field_name: + field_value = getattr(self, field_name, None) + txt = "%s(%s)" % (txt, field_value) + return txt -class OpInitCluster(OpCode): - """Initialise the cluster.""" - OP_ID = "OP_CLUSTER_INIT" - __slots__ = ["cluster_name", "secondary_ip", "hypervisor_type", - "vg_name", "mac_prefix", "def_bridge", "master_netdev", - "file_storage_dir"] + +# cluster opcodes + +class OpPostInitCluster(OpCode): + """Post cluster initialization. + + This opcode does not touch the cluster at all. Its purpose is to run hooks + after the cluster has been initialized. + + """ + OP_ID = "OP_CLUSTER_POST_INIT" + __slots__ = [] class OpDestroyCluster(OpCode): - """Destroy the cluster.""" + """Destroy the cluster. + + This opcode has no other parameters. All the state is irreversibly + lost after the execution of this opcode. + + """ OP_ID = "OP_CLUSTER_DESTROY" __slots__ = [] @@ -179,22 +209,19 @@ class OpQueryClusterInfo(OpCode): __slots__ = [] -class OpClusterCopyFile(OpCode): - """Copy a file to multiple nodes.""" - OP_ID = "OP_CLUSTER_COPYFILE" - __slots__ = ["nodes", "filename"] - - -class OpRunClusterCommand(OpCode): - """Run a command on multiple nodes.""" - OP_ID = "OP_CLUSTER_RUNCOMMAND" - __slots__ = ["nodes", "command"] +class OpVerifyCluster(OpCode): + """Verify the cluster state. + @type skip_checks: C{list} + @ivar skip_checks: steps to be skipped from the verify process; this + needs to be a subset of + L{constants.VERIFY_OPTIONAL_CHECKS}; currently + only L{constants.VERIFY_NPLUSONE_MEM} can be passed -class OpVerifyCluster(OpCode): - """Verify the cluster state.""" + """ OP_ID = "OP_CLUSTER_VERIFY" - __slots__ = ["skip_checks"] + __slots__ = ["skip_checks", "verbose", "error_codes", + "debug_simulate_errors"] class OpVerifyDisks(OpCode): @@ -202,9 +229,9 @@ class OpVerifyDisks(OpCode): Parameters: none - Result: two lists: + Result: a tuple of four elements: - list of node names with bad data returned (unreachable, etc.) - - dist of node names with broken volume groups (values: error msg) + - dict of node names with broken volume groups (values: error msg) - list of instances with degraded disks (that should be activated) - dict of instances with missing logical volumes (values: (node, vol) pairs with details about the missing volumes) @@ -222,48 +249,117 @@ class OpVerifyDisks(OpCode): __slots__ = [] -class OpMasterFailover(OpCode): - """Do a master failover.""" - OP_ID = "OP_CLUSTER_MASTERFAILOVER" - __slots__ = [] +class OpRepairDiskSizes(OpCode): + """Verify the disk sizes of the instances and fixes configuration + mimatches. + Parameters: optional instances list, in case we want to restrict the + checks to only a subset of the instances. -class OpDumpClusterConfig(OpCode): - """Dump the cluster configuration.""" - OP_ID = "OP_CLUSTER_DUMPCONFIG" - __slots__ = [] + Result: a list of tuples, (instance, disk, new-size) for changed + configurations. + + In normal operation, the list should be empty. + + @type instances: list + @ivar instances: the list of instances to check, or empty for all instances + + """ + OP_ID = "OP_CLUSTER_REPAIR_DISK_SIZES" + __slots__ = ["instances"] + + +class OpQueryConfigValues(OpCode): + """Query cluster configuration values.""" + OP_ID = "OP_CLUSTER_CONFIG_QUERY" + __slots__ = ["output_fields"] class OpRenameCluster(OpCode): - """Rename the cluster.""" + """Rename the cluster. + + @type name: C{str} + @ivar name: The new name of the cluster. The name and/or the master IP + address will be changed to match the new name and its IP + address. + + """ OP_ID = "OP_CLUSTER_RENAME" + OP_DSC_FIELD = "name" __slots__ = ["name"] class OpSetClusterParams(OpCode): - """Change the parameters of the cluster.""" + """Change the parameters of the cluster. + + @type vg_name: C{str} or C{None} + @ivar vg_name: The new volume group name or None to disable LVM usage. + + """ OP_ID = "OP_CLUSTER_SET_PARAMS" - __slots__ = ["vg_name"] + __slots__ = [ + "vg_name", + "enabled_hypervisors", + "hvparams", + "beparams", + "nicparams", + "candidate_pool_size", + ] + +class OpRedistributeConfig(OpCode): + """Force a full push of the cluster configuration. + + """ + OP_ID = "OP_CLUSTER_REDIST_CONF" + __slots__ = [] # node opcodes class OpRemoveNode(OpCode): - """Remove a node.""" + """Remove a node. + + @type node_name: C{str} + @ivar node_name: The name of the node to remove. If the node still has + instances on it, the operation will fail. + + """ OP_ID = "OP_NODE_REMOVE" + OP_DSC_FIELD = "node_name" __slots__ = ["node_name"] class OpAddNode(OpCode): - """Add a node.""" + """Add a node to the cluster. + + @type node_name: C{str} + @ivar node_name: The name of the node to add. This can be a short name, + but it will be expanded to the FQDN. + @type primary_ip: IP address + @ivar primary_ip: The primary IP of the node. This will be ignored when the + opcode is submitted, but will be filled during the node + add (so it will be visible in the job query). + @type secondary_ip: IP address + @ivar secondary_ip: The secondary IP of the node. This needs to be passed + if the cluster has been initialized in 'dual-network' + mode, otherwise it must not be given. + @type readd: C{bool} + @ivar readd: Whether to re-add an existing node to the cluster. If + this is not passed, then the operation will abort if the node + name is already in the cluster; use this parameter to 'repair' + a node that had its configuration broken, or was reinstalled + without removal from the cluster. + + """ OP_ID = "OP_NODE_ADD" + OP_DSC_FIELD = "node_name" __slots__ = ["node_name", "primary_ip", "secondary_ip", "readd"] class OpQueryNodes(OpCode): """Compute the list of nodes.""" OP_ID = "OP_NODE_QUERY" - __slots__ = ["output_fields", "names"] + __slots__ = ["output_fields", "names", "use_locking"] class OpQueryNodeVolumes(OpCode): @@ -272,107 +368,266 @@ class OpQueryNodeVolumes(OpCode): __slots__ = ["nodes", "output_fields"] +class OpQueryNodeStorage(OpCode): + """Get information on storage for node(s).""" + OP_ID = "OP_NODE_QUERY_STORAGE" + __slots__ = [ + "nodes", + "storage_type", + "name", + "output_fields", + ] + + +class OpModifyNodeStorage(OpCode): + """Modifies the properies of a storage unit""" + OP_ID = "OP_NODE_MODIFY_STORAGE" + __slots__ = [ + "node_name", + "storage_type", + "name", + "changes", + ] + + +class OpRepairNodeStorage(OpCode): + """Repairs the volume group on a node.""" + OP_ID = "OP_REPAIR_NODE_STORAGE" + OP_DSC_FIELD = "node_name" + __slots__ = [ + "node_name", + "storage_type", + "name", + "ignore_consistency", + ] + + +class OpSetNodeParams(OpCode): + """Change the parameters of a node.""" + OP_ID = "OP_NODE_SET_PARAMS" + OP_DSC_FIELD = "node_name" + __slots__ = [ + "node_name", + "force", + "master_candidate", + "offline", + "drained", + ] + + +class OpPowercycleNode(OpCode): + """Tries to powercycle a node.""" + OP_ID = "OP_NODE_POWERCYCLE" + OP_DSC_FIELD = "node_name" + __slots__ = [ + "node_name", + "force", + ] + + +class OpEvacuateNode(OpCode): + """Relocate secondary instances from a node.""" + OP_ID = "OP_NODE_EVACUATE" + OP_DSC_FIELD = "node_name" + __slots__ = [ + "node_name", "remote_node", "iallocator", "early_release", + ] + + +class OpMigrateNode(OpCode): + """Migrate all instances from a node.""" + OP_ID = "OP_NODE_MIGRATE" + OP_DSC_FIELD = "node_name" + __slots__ = [ + "node_name", + "live", + ] + + +class OpNodeEvacuationStrategy(OpCode): + """Compute the evacuation strategy for a list of nodes.""" + OP_ID = "OP_NODE_EVAC_STRATEGY" + OP_DSC_FIELD = "nodes" + __slots__ = ["nodes", "iallocator", "remote_node"] + + # instance opcodes class OpCreateInstance(OpCode): """Create an instance.""" OP_ID = "OP_INSTANCE_CREATE" + OP_DSC_FIELD = "instance_name" __slots__ = [ - "instance_name", "mem_size", "disk_size", "os_type", "pnode", - "disk_template", "snode", "swap_size", "mode", - "vcpus", "ip", "bridge", "src_node", "src_path", "start", - "wait_for_sync", "ip_check", "mac", - "kernel_path", "initrd_path", "hvm_boot_order", + "instance_name", "os_type", "force_variant", + "pnode", "disk_template", "snode", "mode", + "disks", "nics", + "src_node", "src_path", "start", + "wait_for_sync", "ip_check", "name_check", "file_storage_dir", "file_driver", "iallocator", + "hypervisor", "hvparams", "beparams", + "dry_run", ] class OpReinstallInstance(OpCode): """Reinstall an instance's OS.""" OP_ID = "OP_INSTANCE_REINSTALL" - __slots__ = ["instance_name", "os_type"] + OP_DSC_FIELD = "instance_name" + __slots__ = ["instance_name", "os_type", "force_variant"] class OpRemoveInstance(OpCode): """Remove an instance.""" OP_ID = "OP_INSTANCE_REMOVE" - __slots__ = ["instance_name", "ignore_failures"] + OP_DSC_FIELD = "instance_name" + __slots__ = [ + "instance_name", + "ignore_failures", + "shutdown_timeout", + ] class OpRenameInstance(OpCode): """Rename an instance.""" OP_ID = "OP_INSTANCE_RENAME" - __slots__ = ["instance_name", "ignore_ip", "new_name"] + __slots__ = [ + "instance_name", "ignore_ip", "new_name", + ] class OpStartupInstance(OpCode): """Startup an instance.""" OP_ID = "OP_INSTANCE_STARTUP" - __slots__ = ["instance_name", "force", "extra_args"] + OP_DSC_FIELD = "instance_name" + __slots__ = [ + "instance_name", "force", "hvparams", "beparams", + ] class OpShutdownInstance(OpCode): """Shutdown an instance.""" OP_ID = "OP_INSTANCE_SHUTDOWN" - __slots__ = ["instance_name"] + OP_DSC_FIELD = "instance_name" + __slots__ = ["instance_name", "timeout"] class OpRebootInstance(OpCode): """Reboot an instance.""" OP_ID = "OP_INSTANCE_REBOOT" - __slots__ = ["instance_name", "reboot_type", "extra_args", - "ignore_secondaries" ] + OP_DSC_FIELD = "instance_name" + __slots__ = [ + "instance_name", "reboot_type", "ignore_secondaries", "shutdown_timeout", + ] class OpReplaceDisks(OpCode): """Replace the disks of an instance.""" OP_ID = "OP_INSTANCE_REPLACE_DISKS" - __slots__ = ["instance_name", "remote_node", "mode", "disks"] + OP_DSC_FIELD = "instance_name" + __slots__ = [ + "instance_name", "remote_node", "mode", "disks", "iallocator", + "early_release", + ] class OpFailoverInstance(OpCode): """Failover an instance.""" OP_ID = "OP_INSTANCE_FAILOVER" - __slots__ = ["instance_name", "ignore_consistency"] + OP_DSC_FIELD = "instance_name" + __slots__ = [ + "instance_name", "ignore_consistency", "shutdown_timeout", + ] + + +class OpMigrateInstance(OpCode): + """Migrate an instance. + + This migrates (without shutting down an instance) to its secondary + node. + + @ivar instance_name: the name of the instance + + """ + OP_ID = "OP_INSTANCE_MIGRATE" + OP_DSC_FIELD = "instance_name" + __slots__ = ["instance_name", "live", "cleanup"] + + +class OpMoveInstance(OpCode): + """Move an instance. + + This move (with shutting down an instance and data copying) to an + arbitrary node. + + @ivar instance_name: the name of the instance + @ivar target_node: the destination node + + """ + OP_ID = "OP_INSTANCE_MOVE" + OP_DSC_FIELD = "instance_name" + __slots__ = [ + "instance_name", "target_node", "shutdown_timeout", + ] class OpConnectConsole(OpCode): """Connect to an instance's console.""" OP_ID = "OP_INSTANCE_CONSOLE" + OP_DSC_FIELD = "instance_name" __slots__ = ["instance_name"] class OpActivateInstanceDisks(OpCode): """Activate an instance's disks.""" OP_ID = "OP_INSTANCE_ACTIVATE_DISKS" - __slots__ = ["instance_name"] + OP_DSC_FIELD = "instance_name" + __slots__ = ["instance_name", "ignore_size"] class OpDeactivateInstanceDisks(OpCode): """Deactivate an instance's disks.""" OP_ID = "OP_INSTANCE_DEACTIVATE_DISKS" + OP_DSC_FIELD = "instance_name" __slots__ = ["instance_name"] +class OpRecreateInstanceDisks(OpCode): + """Deactivate an instance's disks.""" + OP_ID = "OP_INSTANCE_RECREATE_DISKS" + OP_DSC_FIELD = "instance_name" + __slots__ = ["instance_name", "disks"] + + class OpQueryInstances(OpCode): """Compute the list of instances.""" OP_ID = "OP_INSTANCE_QUERY" - __slots__ = ["output_fields", "names"] + __slots__ = ["output_fields", "names", "use_locking"] class OpQueryInstanceData(OpCode): """Compute the run-time status of instances.""" OP_ID = "OP_INSTANCE_QUERY_DATA" - __slots__ = ["instances"] + __slots__ = ["instances", "static"] class OpSetInstanceParams(OpCode): """Change the parameters of an instance.""" OP_ID = "OP_INSTANCE_SET_PARAMS" + OP_DSC_FIELD = "instance_name" + __slots__ = [ + "instance_name", + "hvparams", "beparams", "force", + "nics", "disks", + ] + + +class OpGrowDisk(OpCode): + """Grow a disk of an instance.""" + OP_ID = "OP_INSTANCE_GROW_DISK" + OP_DSC_FIELD = "instance_name" __slots__ = [ - "instance_name", "mem", "vcpus", "ip", "bridge", "mac", - "kernel_path", "initrd_path", "hvm_boot_order", + "instance_name", "disk", "amount", "wait_for_sync", ] @@ -387,25 +642,37 @@ class OpDiagnoseOS(OpCode): class OpQueryExports(OpCode): """Compute the list of exported images.""" OP_ID = "OP_BACKUP_QUERY" - __slots__ = ["nodes"] + __slots__ = ["nodes", "use_locking"] class OpExportInstance(OpCode): """Export an instance.""" OP_ID = "OP_BACKUP_EXPORT" - __slots__ = ["instance_name", "target_node", "shutdown"] + OP_DSC_FIELD = "instance_name" + __slots__ = [ + "instance_name", "target_node", "shutdown", "shutdown_timeout", + ] + + +class OpRemoveExport(OpCode): + """Remove an instance's export.""" + OP_ID = "OP_BACKUP_REMOVE" + OP_DSC_FIELD = "instance_name" + __slots__ = ["instance_name"] # Tags opcodes class OpGetTags(OpCode): """Returns the tags of the given object.""" OP_ID = "OP_TAGS_GET" + OP_DSC_FIELD = "name" __slots__ = ["kind", "name"] class OpSearchTags(OpCode): """Searches the tags in the cluster for a given pattern.""" OP_ID = "OP_TAGS_SEARCH" + OP_DSC_FIELD = "pattern" __slots__ = ["pattern"] @@ -444,6 +711,7 @@ class OpTestDelay(OpCode): """ OP_ID = "OP_TEST_DELAY" + OP_DSC_FIELD = "duration" __slots__ = ["duration", "on_master", "on_nodes"] @@ -459,8 +727,15 @@ class OpTestAllocator(OpCode): """ OP_ID = "OP_TEST_ALLOCATOR" + OP_DSC_FIELD = "allocator" __slots__ = [ "direction", "mode", "allocator", "name", "mem_size", "disks", "disk_template", - "os", "tags", "nics", "vcpus", + "os", "tags", "nics", "vcpus", "hypervisor", + "evac_nodes", ] + + +OP_MAPPING = dict([(v.OP_ID, v) for v in globals().values() + if (isinstance(v, type) and issubclass(v, OpCode) and + hasattr(v, "OP_ID"))])