4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 """Cluster related commands"""
23 # pylint: disable-msg=W0401,W0614,C0103
24 # W0401: Wildcard import ganeti.cli
25 # W0614: Unused import %s from wildcard import (since we need cli)
26 # C0103: Invalid name gnt-cluster
32 from ganeti.cli import *
33 from ganeti import opcodes
34 from ganeti import constants
35 from ganeti import errors
36 from ganeti import utils
37 from ganeti import bootstrap
38 from ganeti import ssh
39 from ganeti import objects
43 def InitCluster(opts, args):
44 """Initialize the cluster.
46 @param opts: the command line options selected by the user
48 @param args: should contain only one element, the desired
51 @return: the desired exit code
54 if not opts.lvm_storage and opts.vg_name:
55 ToStderr("Options --no-lvm-storage and --vg-name conflict.")
58 vg_name = opts.vg_name
59 if opts.lvm_storage and not opts.vg_name:
60 vg_name = constants.DEFAULT_VG
62 hvlist = opts.enabled_hypervisors
64 hvlist = constants.DEFAULT_ENABLED_HYPERVISOR
65 hvlist = hvlist.split(",")
67 hvparams = dict(opts.hvparams)
68 beparams = opts.beparams
69 nicparams = opts.nicparams
71 # prepare beparams dict
72 beparams = objects.FillDict(constants.BEC_DEFAULTS, beparams)
73 utils.ForceDictType(beparams, constants.BES_PARAMETER_TYPES)
75 # prepare nicparams dict
76 nicparams = objects.FillDict(constants.NICC_DEFAULTS, nicparams)
77 utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)
79 # prepare hvparams dict
80 for hv in constants.HYPER_TYPES:
81 if hv not in hvparams:
83 hvparams[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], hvparams[hv])
84 utils.ForceDictType(hvparams[hv], constants.HVS_PARAMETER_TYPES)
86 if opts.candidate_pool_size is None:
87 opts.candidate_pool_size = constants.MASTER_POOL_SIZE_DEFAULT
89 if opts.mac_prefix is None:
90 opts.mac_prefix = constants.DEFAULT_MAC_PREFIX
92 bootstrap.InitCluster(cluster_name=args[0],
93 secondary_ip=opts.secondary_ip,
95 mac_prefix=opts.mac_prefix,
96 master_netdev=opts.master_netdev,
97 file_storage_dir=opts.file_storage_dir,
98 enabled_hypervisors=hvlist,
102 candidate_pool_size=opts.candidate_pool_size,
103 modify_etc_hosts=opts.modify_etc_hosts,
104 modify_ssh_setup=opts.modify_ssh_setup,
106 op = opcodes.OpPostInitCluster()
112 def DestroyCluster(opts, args):
113 """Destroy the cluster.
115 @param opts: the command line options selected by the user
117 @param args: should be an empty list
119 @return: the desired exit code
122 if not opts.yes_do_it:
123 ToStderr("Destroying a cluster is irreversible. If you really want"
124 " destroy this cluster, supply the --yes-do-it option.")
127 op = opcodes.OpDestroyCluster()
128 master = SubmitOpCode(op)
129 # if we reached this, the opcode didn't fail; we can proceed to
130 # shutdown all the daemons
131 bootstrap.FinalizeClusterDestroy(master)
135 def RenameCluster(opts, args):
136 """Rename the cluster.
138 @param opts: the command line options selected by the user
140 @param args: should contain only one element, the new cluster name
142 @return: the desired exit code
147 usertext = ("This will rename the cluster to '%s'. If you are connected"
148 " over the network to the cluster name, the operation is very"
149 " dangerous as the IP address will be removed from the node"
150 " and the change may not go through. Continue?") % name
151 if not AskUser(usertext):
154 op = opcodes.OpRenameCluster(name=name)
159 def RedistributeConfig(opts, args):
160 """Forces push of the cluster configuration.
162 @param opts: the command line options selected by the user
164 @param args: empty list
166 @return: the desired exit code
169 op = opcodes.OpRedistributeConfig()
170 SubmitOrSend(op, opts)
174 def ShowClusterVersion(opts, args):
175 """Write version of ganeti software to the standard output.
177 @param opts: the command line options selected by the user
179 @param args: should be an empty list
181 @return: the desired exit code
185 result = cl.QueryClusterInfo()
186 ToStdout("Software version: %s", result["software_version"])
187 ToStdout("Internode protocol: %s", result["protocol_version"])
188 ToStdout("Configuration format: %s", result["config_version"])
189 ToStdout("OS api version: %s", result["os_api_version"])
190 ToStdout("Export interface: %s", result["export_version"])
194 def ShowClusterMaster(opts, args):
195 """Write name of master node to the standard output.
197 @param opts: the command line options selected by the user
199 @param args: should be an empty list
201 @return: the desired exit code
204 master = bootstrap.GetMaster()
208 def _PrintGroupedParams(paramsdict):
209 """Print Grouped parameters (be, nic, disk) by group.
211 @type paramsdict: dict of dicts
212 @param paramsdict: {group: {param: value, ...}, ...}
215 for gr_name, gr_dict in paramsdict.items():
216 ToStdout(" - %s:", gr_name)
217 for item, val in gr_dict.iteritems():
218 ToStdout(" %s: %s", item, val)
220 def ShowClusterConfig(opts, args):
221 """Shows cluster information.
223 @param opts: the command line options selected by the user
225 @param args: should be an empty list
227 @return: the desired exit code
231 result = cl.QueryClusterInfo()
233 ToStdout("Cluster name: %s", result["name"])
234 ToStdout("Cluster UUID: %s", result["uuid"])
236 ToStdout("Creation time: %s", utils.FormatTime(result["ctime"]))
237 ToStdout("Modification time: %s", utils.FormatTime(result["mtime"]))
239 ToStdout("Master node: %s", result["master"])
241 ToStdout("Architecture (this node): %s (%s)",
242 result["architecture"][0], result["architecture"][1])
245 tags = utils.CommaJoin(utils.NiceSort(result["tags"]))
249 ToStdout("Tags: %s", tags)
251 ToStdout("Default hypervisor: %s", result["default_hypervisor"])
252 ToStdout("Enabled hypervisors: %s",
253 utils.CommaJoin(result["enabled_hypervisors"]))
255 ToStdout("Hypervisor parameters:")
256 _PrintGroupedParams(result["hvparams"])
258 ToStdout("Cluster parameters:")
259 ToStdout(" - candidate pool size: %s", result["candidate_pool_size"])
260 ToStdout(" - master netdev: %s", result["master_netdev"])
261 ToStdout(" - lvm volume group: %s", result["volume_group_name"])
262 ToStdout(" - file storage path: %s", result["file_storage_dir"])
264 ToStdout("Default instance parameters:")
265 _PrintGroupedParams(result["beparams"])
267 ToStdout("Default nic parameters:")
268 _PrintGroupedParams(result["nicparams"])
273 def ClusterCopyFile(opts, args):
274 """Copy a file from master to some nodes.
276 @param opts: the command line options selected by the user
278 @param args: should contain only one element, the path of
279 the file to be copied
281 @return: the desired exit code
285 if not os.path.exists(filename):
286 raise errors.OpPrereqError("No such filename '%s'" % filename,
291 myname = utils.GetHostInfo().name
293 cluster_name = cl.QueryConfigValues(["cluster_name"])[0]
295 results = GetOnlineNodes(nodes=opts.nodes, cl=cl)
296 results = [name for name in results if name != myname]
298 srun = ssh.SshRunner(cluster_name=cluster_name)
300 if not srun.CopyFileToNode(node, filename):
301 ToStderr("Copy of file %s to node %s failed", filename, node)
306 def RunClusterCommand(opts, args):
307 """Run a command on some nodes.
309 @param opts: the command line options selected by the user
311 @param args: should contain the command to be run and its arguments
313 @return: the desired exit code
318 command = " ".join(args)
320 nodes = GetOnlineNodes(nodes=opts.nodes, cl=cl)
322 cluster_name, master_node = cl.QueryConfigValues(["cluster_name",
325 srun = ssh.SshRunner(cluster_name=cluster_name)
327 # Make sure master node is at list end
328 if master_node in nodes:
329 nodes.remove(master_node)
330 nodes.append(master_node)
333 result = srun.Run(name, "root", command)
334 ToStdout("------------------------------------------------")
335 ToStdout("node: %s", name)
336 ToStdout("%s", result.output)
337 ToStdout("return code = %s", result.exit_code)
342 def VerifyCluster(opts, args):
343 """Verify integrity of cluster, performing various test on nodes.
345 @param opts: the command line options selected by the user
347 @param args: should be an empty list
349 @return: the desired exit code
353 if opts.skip_nplusone_mem:
354 skip_checks.append(constants.VERIFY_NPLUSONE_MEM)
355 op = opcodes.OpVerifyCluster(skip_checks=skip_checks,
356 verbose=opts.verbose,
357 error_codes=opts.error_codes,
358 debug_simulate_errors=opts.simulate_errors)
365 def VerifyDisks(opts, args):
366 """Verify integrity of cluster disks.
368 @param opts: the command line options selected by the user
370 @param args: should be an empty list
372 @return: the desired exit code
375 op = opcodes.OpVerifyDisks()
376 result = SubmitOpCode(op)
377 if not isinstance(result, (list, tuple)) or len(result) != 3:
378 raise errors.ProgrammerError("Unknown result type for OpVerifyDisks")
380 bad_nodes, instances, missing = result
382 retcode = constants.EXIT_SUCCESS
385 for node, text in bad_nodes.items():
386 ToStdout("Error gathering data on node %s: %s",
387 node, utils.SafeEncode(text[-400:]))
389 ToStdout("You need to fix these nodes first before fixing instances")
392 for iname in instances:
395 op = opcodes.OpActivateInstanceDisks(instance_name=iname)
397 ToStdout("Activating disks for instance '%s'", iname)
399 except errors.GenericError, err:
400 nret, msg = FormatError(err)
402 ToStderr("Error activating disks for instance %s: %s", iname, msg)
405 for iname, ival in missing.iteritems():
406 all_missing = utils.all(ival, lambda x: x[0] in bad_nodes)
408 ToStdout("Instance %s cannot be verified as it lives on"
409 " broken nodes", iname)
411 ToStdout("Instance %s has missing logical volumes:", iname)
413 for node, vol in ival:
414 if node in bad_nodes:
415 ToStdout("\tbroken node %s /dev/xenvg/%s", node, vol)
417 ToStdout("\t%s /dev/xenvg/%s", node, vol)
418 ToStdout("You need to run replace_disks for all the above"
419 " instances, if this message persist after fixing nodes.")
425 def RepairDiskSizes(opts, args):
426 """Verify sizes of cluster disks.
428 @param opts: the command line options selected by the user
430 @param args: optional list of instances to restrict check to
432 @return: the desired exit code
435 op = opcodes.OpRepairDiskSizes(instances=args)
440 def MasterFailover(opts, args):
441 """Failover the master node.
443 This command, when run on a non-master node, will cause the current
444 master to cease being master, and the non-master to become new
447 @param opts: the command line options selected by the user
449 @param args: should be an empty list
451 @return: the desired exit code
455 usertext = ("This will perform the failover even if most other nodes"
456 " are down, or if this node is outdated. This is dangerous"
457 " as it can lead to a non-consistent cluster. Check the"
458 " gnt-cluster(8) man page before proceeding. Continue?")
459 if not AskUser(usertext):
462 return bootstrap.MasterFailover(no_voting=opts.no_voting)
465 def SearchTags(opts, args):
466 """Searches the tags on all the cluster.
468 @param opts: the command line options selected by the user
470 @param args: should contain only one element, the tag pattern
472 @return: the desired exit code
475 op = opcodes.OpSearchTags(pattern=args[0])
476 result = SubmitOpCode(op)
479 result = list(result)
481 for path, tag in result:
482 ToStdout("%s %s", path, tag)
485 def SetClusterParams(opts, args):
486 """Modify the cluster.
488 @param opts: the command line options selected by the user
490 @param args: should be an empty list
492 @return: the desired exit code
495 if not (not opts.lvm_storage or opts.vg_name or
496 opts.enabled_hypervisors or opts.hvparams or
497 opts.beparams or opts.nicparams or
498 opts.candidate_pool_size is not None):
499 ToStderr("Please give at least one of the parameters.")
502 vg_name = opts.vg_name
503 if not opts.lvm_storage and opts.vg_name:
504 ToStdout("Options --no-lvm-storage and --vg-name conflict.")
506 elif not opts.lvm_storage:
509 hvlist = opts.enabled_hypervisors
510 if hvlist is not None:
511 hvlist = hvlist.split(",")
513 # a list of (name, dict) we can pass directly to dict() (or [])
514 hvparams = dict(opts.hvparams)
515 for hv_params in hvparams.values():
516 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
518 beparams = opts.beparams
519 utils.ForceDictType(beparams, constants.BES_PARAMETER_TYPES)
521 nicparams = opts.nicparams
522 utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)
524 op = opcodes.OpSetClusterParams(vg_name=vg_name,
525 enabled_hypervisors=hvlist,
529 candidate_pool_size=opts.candidate_pool_size)
534 def QueueOps(opts, args):
537 @param opts: the command line options selected by the user
539 @param args: should contain only one element, the subcommand
541 @return: the desired exit code
546 if command in ("drain", "undrain"):
547 drain_flag = command == "drain"
548 client.SetQueueDrainFlag(drain_flag)
549 elif command == "info":
550 result = client.QueryConfigValues(["drain_flag"])
555 ToStdout("The drain flag is %s" % val)
557 raise errors.OpPrereqError("Command '%s' is not valid." % command,
563 def _ShowWatcherPause(until):
564 if until is None or until < time.time():
565 ToStdout("The watcher is not paused.")
567 ToStdout("The watcher is paused until %s.", time.ctime(until))
570 def WatcherOps(opts, args):
571 """Watcher operations.
573 @param opts: the command line options selected by the user
575 @param args: should contain only one element, the subcommand
577 @return: the desired exit code
583 if command == "continue":
584 client.SetWatcherPause(None)
585 ToStdout("The watcher is no longer paused.")
587 elif command == "pause":
589 raise errors.OpPrereqError("Missing pause duration", errors.ECODE_INVAL)
591 result = client.SetWatcherPause(time.time() + ParseTimespec(args[1]))
592 _ShowWatcherPause(result)
594 elif command == "info":
595 result = client.QueryConfigValues(["watcher_pause"])
596 _ShowWatcherPause(result)
599 raise errors.OpPrereqError("Command '%s' is not valid." % command,
607 InitCluster, [ArgHost(min=1, max=1)],
608 [BACKEND_OPT, CP_SIZE_OPT, ENABLED_HV_OPT, GLOBAL_FILEDIR_OPT,
609 HVLIST_OPT, MAC_PREFIX_OPT, MASTER_NETDEV_OPT, NIC_PARAMS_OPT,
610 NOLVM_STORAGE_OPT, NOMODIFY_ETCHOSTS_OPT, NOMODIFY_SSH_SETUP_OPT,
611 SECONDARY_IP_OPT, VG_NAME_OPT],
612 "[opts...] <cluster_name>", "Initialises a new cluster configuration"),
614 DestroyCluster, ARGS_NONE, [YES_DOIT_OPT],
615 "", "Destroy cluster"),
617 RenameCluster, [ArgHost(min=1, max=1)],
620 "Renames the cluster"),
622 RedistributeConfig, ARGS_NONE, [SUBMIT_OPT],
623 "", "Forces a push of the configuration file and ssconf files"
624 " to the nodes in the cluster"),
626 VerifyCluster, ARGS_NONE,
627 [VERBOSE_OPT, DEBUG_SIMERR_OPT, ERROR_CODES_OPT, NONPLUS1_OPT],
628 "", "Does a check on the cluster configuration"),
630 VerifyDisks, ARGS_NONE, [],
631 "", "Does a check on the cluster disk status"),
632 'repair-disk-sizes': (
633 RepairDiskSizes, ARGS_MANY_INSTANCES, [],
634 "", "Updates mismatches in recorded disk sizes"),
636 MasterFailover, ARGS_NONE, [NOVOTING_OPT],
637 "", "Makes the current node the master"),
639 ShowClusterVersion, ARGS_NONE, [],
640 "", "Shows the cluster version"),
642 ShowClusterMaster, ARGS_NONE, [],
643 "", "Shows the cluster master"),
645 ClusterCopyFile, [ArgFile(min=1, max=1)],
647 "[-n node...] <filename>", "Copies a file to all (or only some) nodes"),
649 RunClusterCommand, [ArgCommand(min=1)],
651 "[-n node...] <command>", "Runs a command on all (or only some) nodes"),
653 ShowClusterConfig, ARGS_NONE, [],
654 "", "Show cluster configuration"),
656 ListTags, ARGS_NONE, [], "", "List the tags of the cluster"),
658 AddTags, [ArgUnknown()], [TAG_SRC_OPT],
659 "tag...", "Add tags to the cluster"),
661 RemoveTags, [ArgUnknown()], [TAG_SRC_OPT],
662 "tag...", "Remove tags from the cluster"),
664 SearchTags, [ArgUnknown(min=1, max=1)],
665 [], "", "Searches the tags on all objects on"
666 " the cluster for a given pattern (regex)"),
669 [ArgChoice(min=1, max=1, choices=["drain", "undrain", "info"])],
670 [], "drain|undrain|info", "Change queue properties"),
673 [ArgChoice(min=1, max=1, choices=["pause", "continue", "info"]),
674 ArgSuggest(min=0, max=1, choices=["30m", "1h", "4h"])],
676 "{pause <timespec>|continue|info}", "Change watcher properties"),
678 SetClusterParams, ARGS_NONE,
679 [BACKEND_OPT, CP_SIZE_OPT, ENABLED_HV_OPT, HVLIST_OPT,
680 NIC_PARAMS_OPT, NOLVM_STORAGE_OPT, VG_NAME_OPT],
682 "Alters the parameters of the cluster"),
685 if __name__ == '__main__':
686 sys.exit(GenericMain(commands, override={"tag_type": constants.TAG_CLUSTER}))