4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 """Cluster related commands"""
23 # pylint: disable-msg=W0401,W0613,W0614,C0103
24 # W0401: Wildcard import ganeti.cli
25 # W0613: Unused argument, since all functions follow the same API
26 # W0614: Unused import %s from wildcard import (since we need cli)
27 # C0103: Invalid name gnt-cluster
33 from ganeti.cli import *
34 from ganeti import opcodes
35 from ganeti import constants
36 from ganeti import errors
37 from ganeti import utils
38 from ganeti import bootstrap
39 from ganeti import ssh
40 from ganeti import objects
44 def InitCluster(opts, args):
45 """Initialize the cluster.
47 @param opts: the command line options selected by the user
49 @param args: should contain only one element, the desired
52 @return: the desired exit code
55 if not opts.lvm_storage and opts.vg_name:
56 ToStderr("Options --no-lvm-storage and --vg-name conflict.")
59 vg_name = opts.vg_name
60 if opts.lvm_storage and not opts.vg_name:
61 vg_name = constants.DEFAULT_VG
63 hvlist = opts.enabled_hypervisors
65 hvlist = constants.DEFAULT_ENABLED_HYPERVISOR
66 hvlist = hvlist.split(",")
68 hvparams = dict(opts.hvparams)
69 beparams = opts.beparams
70 nicparams = opts.nicparams
72 # prepare beparams dict
73 beparams = objects.FillDict(constants.BEC_DEFAULTS, beparams)
74 utils.ForceDictType(beparams, constants.BES_PARAMETER_TYPES)
76 # prepare nicparams dict
77 nicparams = objects.FillDict(constants.NICC_DEFAULTS, nicparams)
78 utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)
80 # prepare hvparams dict
81 for hv in constants.HYPER_TYPES:
82 if hv not in hvparams:
84 hvparams[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], hvparams[hv])
85 utils.ForceDictType(hvparams[hv], constants.HVS_PARAMETER_TYPES)
87 if opts.candidate_pool_size is None:
88 opts.candidate_pool_size = constants.MASTER_POOL_SIZE_DEFAULT
90 if opts.mac_prefix is None:
91 opts.mac_prefix = constants.DEFAULT_MAC_PREFIX
93 bootstrap.InitCluster(cluster_name=args[0],
94 secondary_ip=opts.secondary_ip,
96 mac_prefix=opts.mac_prefix,
97 master_netdev=opts.master_netdev,
98 file_storage_dir=opts.file_storage_dir,
99 enabled_hypervisors=hvlist,
103 candidate_pool_size=opts.candidate_pool_size,
104 modify_etc_hosts=opts.modify_etc_hosts,
105 modify_ssh_setup=opts.modify_ssh_setup,
107 op = opcodes.OpPostInitCluster()
108 SubmitOpCode(op, opts=opts)
113 def DestroyCluster(opts, args):
114 """Destroy the cluster.
116 @param opts: the command line options selected by the user
118 @param args: should be an empty list
120 @return: the desired exit code
123 if not opts.yes_do_it:
124 ToStderr("Destroying a cluster is irreversible. If you really want"
125 " destroy this cluster, supply the --yes-do-it option.")
128 op = opcodes.OpDestroyCluster()
129 master = SubmitOpCode(op, opts=opts)
130 # if we reached this, the opcode didn't fail; we can proceed to
131 # shutdown all the daemons
132 bootstrap.FinalizeClusterDestroy(master)
136 def RenameCluster(opts, args):
137 """Rename the cluster.
139 @param opts: the command line options selected by the user
141 @param args: should contain only one element, the new cluster name
143 @return: the desired exit code
148 usertext = ("This will rename the cluster to '%s'. If you are connected"
149 " over the network to the cluster name, the operation is very"
150 " dangerous as the IP address will be removed from the node"
151 " and the change may not go through. Continue?") % name
152 if not AskUser(usertext):
155 op = opcodes.OpRenameCluster(name=name)
156 SubmitOpCode(op, opts=opts)
160 def RedistributeConfig(opts, args):
161 """Forces push of the cluster configuration.
163 @param opts: the command line options selected by the user
165 @param args: empty list
167 @return: the desired exit code
170 op = opcodes.OpRedistributeConfig()
171 SubmitOrSend(op, opts)
175 def ShowClusterVersion(opts, args):
176 """Write version of ganeti software to the standard output.
178 @param opts: the command line options selected by the user
180 @param args: should be an empty list
182 @return: the desired exit code
186 result = cl.QueryClusterInfo()
187 ToStdout("Software version: %s", result["software_version"])
188 ToStdout("Internode protocol: %s", result["protocol_version"])
189 ToStdout("Configuration format: %s", result["config_version"])
190 ToStdout("OS api version: %s", result["os_api_version"])
191 ToStdout("Export interface: %s", result["export_version"])
195 def ShowClusterMaster(opts, args):
196 """Write name of master node to the standard output.
198 @param opts: the command line options selected by the user
200 @param args: should be an empty list
202 @return: the desired exit code
205 master = bootstrap.GetMaster()
210 def _PrintGroupedParams(paramsdict):
211 """Print Grouped parameters (be, nic, disk) by group.
213 @type paramsdict: dict of dicts
214 @param paramsdict: {group: {param: value, ...}, ...}
217 for gr_name, gr_dict in paramsdict.items():
218 ToStdout(" - %s:", gr_name)
219 for item, val in gr_dict.iteritems():
220 ToStdout(" %s: %s", item, val)
223 def ShowClusterConfig(opts, args):
224 """Shows cluster information.
226 @param opts: the command line options selected by the user
228 @param args: should be an empty list
230 @return: the desired exit code
234 result = cl.QueryClusterInfo()
236 ToStdout("Cluster name: %s", result["name"])
237 ToStdout("Cluster UUID: %s", result["uuid"])
239 ToStdout("Creation time: %s", utils.FormatTime(result["ctime"]))
240 ToStdout("Modification time: %s", utils.FormatTime(result["mtime"]))
242 ToStdout("Master node: %s", result["master"])
244 ToStdout("Architecture (this node): %s (%s)",
245 result["architecture"][0], result["architecture"][1])
248 tags = utils.CommaJoin(utils.NiceSort(result["tags"]))
252 ToStdout("Tags: %s", tags)
254 ToStdout("Default hypervisor: %s", result["default_hypervisor"])
255 ToStdout("Enabled hypervisors: %s",
256 utils.CommaJoin(result["enabled_hypervisors"]))
258 ToStdout("Hypervisor parameters:")
259 _PrintGroupedParams(result["hvparams"])
261 ToStdout("Cluster parameters:")
262 ToStdout(" - candidate pool size: %s", result["candidate_pool_size"])
263 ToStdout(" - master netdev: %s", result["master_netdev"])
264 ToStdout(" - lvm volume group: %s", result["volume_group_name"])
265 ToStdout(" - file storage path: %s", result["file_storage_dir"])
267 ToStdout("Default instance parameters:")
268 _PrintGroupedParams(result["beparams"])
270 ToStdout("Default nic parameters:")
271 _PrintGroupedParams(result["nicparams"])
276 def ClusterCopyFile(opts, args):
277 """Copy a file from master to some nodes.
279 @param opts: the command line options selected by the user
281 @param args: should contain only one element, the path of
282 the file to be copied
284 @return: the desired exit code
288 if not os.path.exists(filename):
289 raise errors.OpPrereqError("No such filename '%s'" % filename,
294 myname = utils.GetHostInfo().name
296 cluster_name = cl.QueryConfigValues(["cluster_name"])[0]
298 results = GetOnlineNodes(nodes=opts.nodes, cl=cl)
299 results = [name for name in results if name != myname]
301 srun = ssh.SshRunner(cluster_name=cluster_name)
303 if not srun.CopyFileToNode(node, filename):
304 ToStderr("Copy of file %s to node %s failed", filename, node)
309 def RunClusterCommand(opts, args):
310 """Run a command on some nodes.
312 @param opts: the command line options selected by the user
314 @param args: should contain the command to be run and its arguments
316 @return: the desired exit code
321 command = " ".join(args)
323 nodes = GetOnlineNodes(nodes=opts.nodes, cl=cl)
325 cluster_name, master_node = cl.QueryConfigValues(["cluster_name",
328 srun = ssh.SshRunner(cluster_name=cluster_name)
330 # Make sure master node is at list end
331 if master_node in nodes:
332 nodes.remove(master_node)
333 nodes.append(master_node)
336 result = srun.Run(name, "root", command)
337 ToStdout("------------------------------------------------")
338 ToStdout("node: %s", name)
339 ToStdout("%s", result.output)
340 ToStdout("return code = %s", result.exit_code)
345 def VerifyCluster(opts, args):
346 """Verify integrity of cluster, performing various test on nodes.
348 @param opts: the command line options selected by the user
350 @param args: should be an empty list
352 @return: the desired exit code
356 if opts.skip_nplusone_mem:
357 skip_checks.append(constants.VERIFY_NPLUSONE_MEM)
358 op = opcodes.OpVerifyCluster(skip_checks=skip_checks,
359 verbose=opts.verbose,
360 error_codes=opts.error_codes,
361 debug_simulate_errors=opts.simulate_errors)
362 if SubmitOpCode(op, opts=opts):
368 def VerifyDisks(opts, args):
369 """Verify integrity of cluster disks.
371 @param opts: the command line options selected by the user
373 @param args: should be an empty list
375 @return: the desired exit code
378 op = opcodes.OpVerifyDisks()
379 result = SubmitOpCode(op, opts=opts)
380 if not isinstance(result, (list, tuple)) or len(result) != 3:
381 raise errors.ProgrammerError("Unknown result type for OpVerifyDisks")
383 bad_nodes, instances, missing = result
385 retcode = constants.EXIT_SUCCESS
388 for node, text in bad_nodes.items():
389 ToStdout("Error gathering data on node %s: %s",
390 node, utils.SafeEncode(text[-400:]))
392 ToStdout("You need to fix these nodes first before fixing instances")
395 for iname in instances:
398 op = opcodes.OpActivateInstanceDisks(instance_name=iname)
400 ToStdout("Activating disks for instance '%s'", iname)
401 SubmitOpCode(op, opts=opts)
402 except errors.GenericError, err:
403 nret, msg = FormatError(err)
405 ToStderr("Error activating disks for instance %s: %s", iname, msg)
408 for iname, ival in missing.iteritems():
409 all_missing = utils.all(ival, lambda x: x[0] in bad_nodes)
411 ToStdout("Instance %s cannot be verified as it lives on"
412 " broken nodes", iname)
414 ToStdout("Instance %s has missing logical volumes:", iname)
416 for node, vol in ival:
417 if node in bad_nodes:
418 ToStdout("\tbroken node %s /dev/xenvg/%s", node, vol)
420 ToStdout("\t%s /dev/xenvg/%s", node, vol)
421 ToStdout("You need to run replace_disks for all the above"
422 " instances, if this message persist after fixing nodes.")
428 def RepairDiskSizes(opts, args):
429 """Verify sizes of cluster disks.
431 @param opts: the command line options selected by the user
433 @param args: optional list of instances to restrict check to
435 @return: the desired exit code
438 op = opcodes.OpRepairDiskSizes(instances=args)
439 SubmitOpCode(op, opts=opts)
443 def MasterFailover(opts, args):
444 """Failover the master node.
446 This command, when run on a non-master node, will cause the current
447 master to cease being master, and the non-master to become new
450 @param opts: the command line options selected by the user
452 @param args: should be an empty list
454 @return: the desired exit code
458 usertext = ("This will perform the failover even if most other nodes"
459 " are down, or if this node is outdated. This is dangerous"
460 " as it can lead to a non-consistent cluster. Check the"
461 " gnt-cluster(8) man page before proceeding. Continue?")
462 if not AskUser(usertext):
465 return bootstrap.MasterFailover(no_voting=opts.no_voting)
468 def SearchTags(opts, args):
469 """Searches the tags on all the cluster.
471 @param opts: the command line options selected by the user
473 @param args: should contain only one element, the tag pattern
475 @return: the desired exit code
478 op = opcodes.OpSearchTags(pattern=args[0])
479 result = SubmitOpCode(op, opts=opts)
482 result = list(result)
484 for path, tag in result:
485 ToStdout("%s %s", path, tag)
488 def SetClusterParams(opts, args):
489 """Modify the cluster.
491 @param opts: the command line options selected by the user
493 @param args: should be an empty list
495 @return: the desired exit code
498 if not (not opts.lvm_storage or opts.vg_name or
499 opts.enabled_hypervisors or opts.hvparams or
500 opts.beparams or opts.nicparams or
501 opts.candidate_pool_size is not None):
502 ToStderr("Please give at least one of the parameters.")
505 vg_name = opts.vg_name
506 if not opts.lvm_storage and opts.vg_name:
507 ToStdout("Options --no-lvm-storage and --vg-name conflict.")
509 elif not opts.lvm_storage:
512 hvlist = opts.enabled_hypervisors
513 if hvlist is not None:
514 hvlist = hvlist.split(",")
516 # a list of (name, dict) we can pass directly to dict() (or [])
517 hvparams = dict(opts.hvparams)
518 for hv_params in hvparams.values():
519 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
521 beparams = opts.beparams
522 utils.ForceDictType(beparams, constants.BES_PARAMETER_TYPES)
524 nicparams = opts.nicparams
525 utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)
527 op = opcodes.OpSetClusterParams(vg_name=vg_name,
528 enabled_hypervisors=hvlist,
533 candidate_pool_size=opts.candidate_pool_size)
534 SubmitOpCode(op, opts=opts)
538 def QueueOps(opts, args):
541 @param opts: the command line options selected by the user
543 @param args: should contain only one element, the subcommand
545 @return: the desired exit code
550 if command in ("drain", "undrain"):
551 drain_flag = command == "drain"
552 client.SetQueueDrainFlag(drain_flag)
553 elif command == "info":
554 result = client.QueryConfigValues(["drain_flag"])
559 ToStdout("The drain flag is %s" % val)
561 raise errors.OpPrereqError("Command '%s' is not valid." % command,
567 def _ShowWatcherPause(until):
568 if until is None or until < time.time():
569 ToStdout("The watcher is not paused.")
571 ToStdout("The watcher is paused until %s.", time.ctime(until))
574 def WatcherOps(opts, args):
575 """Watcher operations.
577 @param opts: the command line options selected by the user
579 @param args: should contain only one element, the subcommand
581 @return: the desired exit code
587 if command == "continue":
588 client.SetWatcherPause(None)
589 ToStdout("The watcher is no longer paused.")
591 elif command == "pause":
593 raise errors.OpPrereqError("Missing pause duration", errors.ECODE_INVAL)
595 result = client.SetWatcherPause(time.time() + ParseTimespec(args[1]))
596 _ShowWatcherPause(result)
598 elif command == "info":
599 result = client.QueryConfigValues(["watcher_pause"])
600 _ShowWatcherPause(result[0])
603 raise errors.OpPrereqError("Command '%s' is not valid." % command,
611 InitCluster, [ArgHost(min=1, max=1)],
612 [BACKEND_OPT, CP_SIZE_OPT, ENABLED_HV_OPT, GLOBAL_FILEDIR_OPT,
613 HVLIST_OPT, MAC_PREFIX_OPT, MASTER_NETDEV_OPT, NIC_PARAMS_OPT,
614 NOLVM_STORAGE_OPT, NOMODIFY_ETCHOSTS_OPT, NOMODIFY_SSH_SETUP_OPT,
615 SECONDARY_IP_OPT, VG_NAME_OPT],
616 "[opts...] <cluster_name>", "Initialises a new cluster configuration"),
618 DestroyCluster, ARGS_NONE, [YES_DOIT_OPT],
619 "", "Destroy cluster"),
621 RenameCluster, [ArgHost(min=1, max=1)],
624 "Renames the cluster"),
626 RedistributeConfig, ARGS_NONE, [SUBMIT_OPT],
627 "", "Forces a push of the configuration file and ssconf files"
628 " to the nodes in the cluster"),
630 VerifyCluster, ARGS_NONE,
631 [VERBOSE_OPT, DEBUG_SIMERR_OPT, ERROR_CODES_OPT, NONPLUS1_OPT],
632 "", "Does a check on the cluster configuration"),
634 VerifyDisks, ARGS_NONE, [],
635 "", "Does a check on the cluster disk status"),
636 'repair-disk-sizes': (
637 RepairDiskSizes, ARGS_MANY_INSTANCES, [],
638 "", "Updates mismatches in recorded disk sizes"),
640 MasterFailover, ARGS_NONE, [NOVOTING_OPT],
641 "", "Makes the current node the master"),
643 ShowClusterVersion, ARGS_NONE, [],
644 "", "Shows the cluster version"),
646 ShowClusterMaster, ARGS_NONE, [],
647 "", "Shows the cluster master"),
649 ClusterCopyFile, [ArgFile(min=1, max=1)],
651 "[-n node...] <filename>", "Copies a file to all (or only some) nodes"),
653 RunClusterCommand, [ArgCommand(min=1)],
655 "[-n node...] <command>", "Runs a command on all (or only some) nodes"),
657 ShowClusterConfig, ARGS_NONE, [],
658 "", "Show cluster configuration"),
660 ListTags, ARGS_NONE, [], "", "List the tags of the cluster"),
662 AddTags, [ArgUnknown()], [TAG_SRC_OPT],
663 "tag...", "Add tags to the cluster"),
665 RemoveTags, [ArgUnknown()], [TAG_SRC_OPT],
666 "tag...", "Remove tags from the cluster"),
668 SearchTags, [ArgUnknown(min=1, max=1)],
669 [], "", "Searches the tags on all objects on"
670 " the cluster for a given pattern (regex)"),
673 [ArgChoice(min=1, max=1, choices=["drain", "undrain", "info"])],
674 [], "drain|undrain|info", "Change queue properties"),
677 [ArgChoice(min=1, max=1, choices=["pause", "continue", "info"]),
678 ArgSuggest(min=0, max=1, choices=["30m", "1h", "4h"])],
680 "{pause <timespec>|continue|info}", "Change watcher properties"),
682 SetClusterParams, ARGS_NONE,
683 [BACKEND_OPT, CP_SIZE_OPT, ENABLED_HV_OPT, HVLIST_OPT,
684 NIC_PARAMS_OPT, NOLVM_STORAGE_OPT, VG_NAME_OPT],
686 "Alters the parameters of the cluster"),
689 if __name__ == '__main__':
690 sys.exit(GenericMain(commands, override={"tag_type": constants.TAG_CLUSTER}))