4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24 This module implements the data structures which define the cluster
25 operations - the so-called opcodes.
28 This module implements the logic for doing operations in the cluster. There
29 are two kinds of classes defined:
30 - opcodes, which are small classes only holding data for the task at hand
31 - logical units, which know how to deal with their specific opcode only
35 # this are practically structures, so disable the message about too
37 # pylint: disable-msg=R0903
41 """A simple serializable object.
43 This object serves as a parent class for both OpCode and Job since
44 they are serialized in the same way.
49 def __init__(self, **kwargs):
51 if key not in self.__slots__:
52 raise TypeError("Object %s doesn't support the parameter '%s'" %
53 (self.__class__.__name__, key))
54 setattr(self, key, kwargs[key])
56 def __getstate__(self):
58 for name in self.__slots__:
59 if hasattr(self, name):
60 state[name] = getattr(self, name)
63 def __setstate__(self, state):
64 if not isinstance(state, dict):
65 raise ValueError("Invalid data to __setstate__: expected dict, got %s" %
68 for name in self.__slots__:
73 setattr(self, name, state[name])
77 """Job definition structure
79 The Job definitions has two sets of parameters:
80 - the parameters of the job itself (all filled by server):
82 - status: pending, running, successfull, failed, aborted
84 - op_list, list of opcodes, clients creates this
85 - op_status, status for each opcode, server fills in
86 - op_result, result for each opcode, server fills in
103 def __getstate__(self):
104 """Specialized getstate for jobs
107 data = BaseJO.__getstate__(self)
108 if "op_list" in data:
109 data["op_list"] = [op.__getstate__() for op in data["op_list"]]
112 def __setstate__(self, state):
113 """Specialized setstate for jobs
116 BaseJO.__setstate__(self, state)
117 if "op_list" in state:
118 self.op_list = [OpCode.LoadOpCode(op) for op in state["op_list"]]
121 class OpCode(BaseJO):
122 """Abstract OpCode"""
123 OP_ID = "OP_ABSTRACT"
126 def __getstate__(self):
127 """Specialized getstate for opcodes.
130 data = BaseJO.__getstate__(self)
131 data["OP_ID"] = self.OP_ID
135 def LoadOpCode(cls, data):
136 """Generic load opcode method.
139 if not isinstance(data, dict):
140 raise ValueError("Invalid data to LoadOpCode (%s)" % type(data))
141 if "OP_ID" not in data:
142 raise ValueError("Invalid data to LoadOpcode, missing OP_ID")
143 op_id = data["OP_ID"]
145 for item in globals().values():
146 if (isinstance(item, type) and
147 issubclass(item, cls) and
148 hasattr(item, "OP_ID") and
149 getattr(item, "OP_ID") == op_id):
153 raise ValueError("Invalid data to LoadOpCode: OP_ID %s unsupported" %
156 new_data = data.copy()
157 del new_data["OP_ID"]
158 op.__setstate__(new_data)
162 class OpDestroyCluster(OpCode):
163 """Destroy the cluster."""
164 OP_ID = "OP_CLUSTER_DESTROY"
168 class OpQueryClusterInfo(OpCode):
169 """Query cluster information."""
170 OP_ID = "OP_CLUSTER_QUERY"
174 class OpVerifyCluster(OpCode):
175 """Verify the cluster state."""
176 OP_ID = "OP_CLUSTER_VERIFY"
177 __slots__ = ["skip_checks"]
180 class OpVerifyDisks(OpCode):
181 """Verify the cluster disks.
186 - list of node names with bad data returned (unreachable, etc.)
187 - dist of node names with broken volume groups (values: error msg)
188 - list of instances with degraded disks (that should be activated)
189 - dict of instances with missing logical volumes (values: (node, vol)
190 pairs with details about the missing volumes)
192 In normal operation, all lists should be empty. A non-empty instance
193 list (3rd element of the result) is still ok (errors were fixed) but
194 non-empty node list means some node is down, and probably there are
195 unfixable drbd errors.
197 Note that only instances that are drbd-based are taken into
198 consideration. This might need to be revisited in the future.
201 OP_ID = "OP_CLUSTER_VERIFY_DISKS"
205 class OpMasterFailover(OpCode):
206 """Do a master failover."""
207 OP_ID = "OP_CLUSTER_MASTERFAILOVER"
211 class OpDumpClusterConfig(OpCode):
212 """Dump the cluster configuration."""
213 OP_ID = "OP_CLUSTER_DUMPCONFIG"
217 class OpRenameCluster(OpCode):
218 """Rename the cluster."""
219 OP_ID = "OP_CLUSTER_RENAME"
223 class OpSetClusterParams(OpCode):
224 """Change the parameters of the cluster."""
225 OP_ID = "OP_CLUSTER_SET_PARAMS"
226 __slots__ = ["vg_name"]
231 class OpRemoveNode(OpCode):
233 OP_ID = "OP_NODE_REMOVE"
234 __slots__ = ["node_name"]
237 class OpAddNode(OpCode):
239 OP_ID = "OP_NODE_ADD"
240 __slots__ = ["node_name", "primary_ip", "secondary_ip", "readd"]
243 class OpQueryNodes(OpCode):
244 """Compute the list of nodes."""
245 OP_ID = "OP_NODE_QUERY"
246 __slots__ = ["output_fields", "names"]
249 class OpQueryNodeVolumes(OpCode):
250 """Get list of volumes on node."""
251 OP_ID = "OP_NODE_QUERYVOLS"
252 __slots__ = ["nodes", "output_fields"]
257 class OpCreateInstance(OpCode):
258 """Create an instance."""
259 OP_ID = "OP_INSTANCE_CREATE"
261 "instance_name", "mem_size", "disk_size", "os_type", "pnode",
262 "disk_template", "snode", "swap_size", "mode",
263 "vcpus", "ip", "bridge", "src_node", "src_path", "start",
264 "wait_for_sync", "ip_check", "mac",
265 "kernel_path", "initrd_path", "hvm_boot_order", "hvm_acpi",
266 "hvm_pae", "hvm_cdrom_image_path", "vnc_bind_address",
267 "file_storage_dir", "file_driver",
272 class OpReinstallInstance(OpCode):
273 """Reinstall an instance's OS."""
274 OP_ID = "OP_INSTANCE_REINSTALL"
275 __slots__ = ["instance_name", "os_type"]
278 class OpRemoveInstance(OpCode):
279 """Remove an instance."""
280 OP_ID = "OP_INSTANCE_REMOVE"
281 __slots__ = ["instance_name", "ignore_failures"]
284 class OpRenameInstance(OpCode):
285 """Rename an instance."""
286 OP_ID = "OP_INSTANCE_RENAME"
287 __slots__ = ["instance_name", "ignore_ip", "new_name"]
290 class OpStartupInstance(OpCode):
291 """Startup an instance."""
292 OP_ID = "OP_INSTANCE_STARTUP"
293 __slots__ = ["instance_name", "force", "extra_args"]
296 class OpShutdownInstance(OpCode):
297 """Shutdown an instance."""
298 OP_ID = "OP_INSTANCE_SHUTDOWN"
299 __slots__ = ["instance_name"]
302 class OpRebootInstance(OpCode):
303 """Reboot an instance."""
304 OP_ID = "OP_INSTANCE_REBOOT"
305 __slots__ = ["instance_name", "reboot_type", "extra_args",
306 "ignore_secondaries" ]
309 class OpReplaceDisks(OpCode):
310 """Replace the disks of an instance."""
311 OP_ID = "OP_INSTANCE_REPLACE_DISKS"
312 __slots__ = ["instance_name", "remote_node", "mode", "disks", "iallocator"]
315 class OpFailoverInstance(OpCode):
316 """Failover an instance."""
317 OP_ID = "OP_INSTANCE_FAILOVER"
318 __slots__ = ["instance_name", "ignore_consistency"]
321 class OpConnectConsole(OpCode):
322 """Connect to an instance's console."""
323 OP_ID = "OP_INSTANCE_CONSOLE"
324 __slots__ = ["instance_name"]
327 class OpActivateInstanceDisks(OpCode):
328 """Activate an instance's disks."""
329 OP_ID = "OP_INSTANCE_ACTIVATE_DISKS"
330 __slots__ = ["instance_name"]
333 class OpDeactivateInstanceDisks(OpCode):
334 """Deactivate an instance's disks."""
335 OP_ID = "OP_INSTANCE_DEACTIVATE_DISKS"
336 __slots__ = ["instance_name"]
339 class OpQueryInstances(OpCode):
340 """Compute the list of instances."""
341 OP_ID = "OP_INSTANCE_QUERY"
342 __slots__ = ["output_fields", "names"]
345 class OpQueryInstanceData(OpCode):
346 """Compute the run-time status of instances."""
347 OP_ID = "OP_INSTANCE_QUERY_DATA"
348 __slots__ = ["instances"]
351 class OpSetInstanceParams(OpCode):
352 """Change the parameters of an instance."""
353 OP_ID = "OP_INSTANCE_SET_PARAMS"
355 "instance_name", "mem", "vcpus", "ip", "bridge", "mac",
356 "kernel_path", "initrd_path", "hvm_boot_order", "hvm_acpi",
357 "hvm_pae", "hvm_cdrom_image_path", "vnc_bind_address"
361 class OpGrowDisk(OpCode):
362 """Grow a disk of an instance."""
363 OP_ID = "OP_INSTANCE_GROW_DISK"
364 __slots__ = ["instance_name", "disk", "amount"]
368 class OpDiagnoseOS(OpCode):
369 """Compute the list of guest operating systems."""
370 OP_ID = "OP_OS_DIAGNOSE"
371 __slots__ = ["output_fields", "names"]
375 class OpQueryExports(OpCode):
376 """Compute the list of exported images."""
377 OP_ID = "OP_BACKUP_QUERY"
378 __slots__ = ["nodes"]
381 class OpExportInstance(OpCode):
382 """Export an instance."""
383 OP_ID = "OP_BACKUP_EXPORT"
384 __slots__ = ["instance_name", "target_node", "shutdown"]
386 class OpRemoveExport(OpCode):
387 """Remove an instance's export."""
388 OP_ID = "OP_BACKUP_REMOVE"
389 __slots__ = ["instance_name"]
392 class OpGetTags(OpCode):
393 """Returns the tags of the given object."""
394 OP_ID = "OP_TAGS_GET"
395 __slots__ = ["kind", "name"]
398 class OpSearchTags(OpCode):
399 """Searches the tags in the cluster for a given pattern."""
400 OP_ID = "OP_TAGS_SEARCH"
401 __slots__ = ["pattern"]
404 class OpAddTags(OpCode):
405 """Add a list of tags on a given object."""
406 OP_ID = "OP_TAGS_SET"
407 __slots__ = ["kind", "name", "tags"]
410 class OpDelTags(OpCode):
411 """Remove a list of tags from a given object."""
412 OP_ID = "OP_TAGS_DEL"
413 __slots__ = ["kind", "name", "tags"]
417 class OpTestDelay(OpCode):
418 """Sleeps for a configured amount of time.
420 This is used just for debugging and testing.
423 - duration: the time to sleep
424 - on_master: if true, sleep on the master
425 - on_nodes: list of nodes in which to sleep
427 If the on_master parameter is true, it will execute a sleep on the
428 master (before any node sleep).
430 If the on_nodes list is not empty, it will sleep on those nodes
431 (after the sleep on the master, if that is enabled).
433 As an additional feature, the case of duration < 0 will be reported
434 as an execution error, so this opcode can be used as a failure
435 generator. The case of duration == 0 will not be treated specially.
438 OP_ID = "OP_TEST_DELAY"
439 __slots__ = ["duration", "on_master", "on_nodes"]
442 class OpTestAllocator(OpCode):
443 """Allocator framework testing.
445 This opcode has two modes:
446 - gather and return allocator input for a given mode (allocate new
447 or replace secondary) and a given instance definition (direction
449 - run a selected allocator for a given operation (as above) and
450 return the allocator output (direction 'out')
453 OP_ID = "OP_TEST_ALLOCATOR"
455 "direction", "mode", "allocator", "name",
456 "mem_size", "disks", "disk_template",
457 "os", "tags", "nics", "vcpus",