4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 """Cluster related commands"""
23 # pylint: disable-msg=W0401,W0613,W0614,C0103
24 # W0401: Wildcard import ganeti.cli
25 # W0613: Unused argument, since all functions follow the same API
26 # W0614: Unused import %s from wildcard import (since we need cli)
27 # C0103: Invalid name gnt-cluster
33 from ganeti.cli import *
34 from ganeti import opcodes
35 from ganeti import constants
36 from ganeti import errors
37 from ganeti import utils
38 from ganeti import bootstrap
39 from ganeti import ssh
40 from ganeti import objects
44 def InitCluster(opts, args):
45 """Initialize the cluster.
47 @param opts: the command line options selected by the user
49 @param args: should contain only one element, the desired
52 @return: the desired exit code
55 if not opts.lvm_storage and opts.vg_name:
56 ToStderr("Options --no-lvm-storage and --vg-name conflict.")
59 vg_name = opts.vg_name
60 if opts.lvm_storage and not opts.vg_name:
61 vg_name = constants.DEFAULT_VG
63 hvlist = opts.enabled_hypervisors
65 hvlist = constants.DEFAULT_ENABLED_HYPERVISOR
66 hvlist = hvlist.split(",")
68 hvparams = dict(opts.hvparams)
69 beparams = opts.beparams
70 nicparams = opts.nicparams
72 # prepare beparams dict
73 beparams = objects.FillDict(constants.BEC_DEFAULTS, beparams)
74 utils.ForceDictType(beparams, constants.BES_PARAMETER_TYPES)
76 # prepare nicparams dict
77 nicparams = objects.FillDict(constants.NICC_DEFAULTS, nicparams)
78 utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)
80 # prepare hvparams dict
81 for hv in constants.HYPER_TYPES:
82 if hv not in hvparams:
84 hvparams[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], hvparams[hv])
85 utils.ForceDictType(hvparams[hv], constants.HVS_PARAMETER_TYPES)
87 if opts.candidate_pool_size is None:
88 opts.candidate_pool_size = constants.MASTER_POOL_SIZE_DEFAULT
90 if opts.mac_prefix is None:
91 opts.mac_prefix = constants.DEFAULT_MAC_PREFIX
93 bootstrap.InitCluster(cluster_name=args[0],
94 secondary_ip=opts.secondary_ip,
96 mac_prefix=opts.mac_prefix,
97 master_netdev=opts.master_netdev,
98 file_storage_dir=opts.file_storage_dir,
99 enabled_hypervisors=hvlist,
103 candidate_pool_size=opts.candidate_pool_size,
104 modify_etc_hosts=opts.modify_etc_hosts,
105 modify_ssh_setup=opts.modify_ssh_setup,
107 op = opcodes.OpPostInitCluster()
113 def DestroyCluster(opts, args):
114 """Destroy the cluster.
116 @param opts: the command line options selected by the user
118 @param args: should be an empty list
120 @return: the desired exit code
123 if not opts.yes_do_it:
124 ToStderr("Destroying a cluster is irreversible. If you really want"
125 " destroy this cluster, supply the --yes-do-it option.")
128 op = opcodes.OpDestroyCluster()
129 master = SubmitOpCode(op)
130 # if we reached this, the opcode didn't fail; we can proceed to
131 # shutdown all the daemons
132 bootstrap.FinalizeClusterDestroy(master)
136 def RenameCluster(opts, args):
137 """Rename the cluster.
139 @param opts: the command line options selected by the user
141 @param args: should contain only one element, the new cluster name
143 @return: the desired exit code
148 usertext = ("This will rename the cluster to '%s'. If you are connected"
149 " over the network to the cluster name, the operation is very"
150 " dangerous as the IP address will be removed from the node"
151 " and the change may not go through. Continue?") % name
152 if not AskUser(usertext):
155 op = opcodes.OpRenameCluster(name=name)
160 def RedistributeConfig(opts, args):
161 """Forces push of the cluster configuration.
163 @param opts: the command line options selected by the user
165 @param args: empty list
167 @return: the desired exit code
170 op = opcodes.OpRedistributeConfig()
171 SubmitOrSend(op, opts)
175 def ShowClusterVersion(opts, args):
176 """Write version of ganeti software to the standard output.
178 @param opts: the command line options selected by the user
180 @param args: should be an empty list
182 @return: the desired exit code
186 result = cl.QueryClusterInfo()
187 ToStdout("Software version: %s", result["software_version"])
188 ToStdout("Internode protocol: %s", result["protocol_version"])
189 ToStdout("Configuration format: %s", result["config_version"])
190 ToStdout("OS api version: %s", result["os_api_version"])
191 ToStdout("Export interface: %s", result["export_version"])
195 def ShowClusterMaster(opts, args):
196 """Write name of master node to the standard output.
198 @param opts: the command line options selected by the user
200 @param args: should be an empty list
202 @return: the desired exit code
205 master = bootstrap.GetMaster()
209 def _PrintGroupedParams(paramsdict):
210 """Print Grouped parameters (be, nic, disk) by group.
212 @type paramsdict: dict of dicts
213 @param paramsdict: {group: {param: value, ...}, ...}
216 for gr_name, gr_dict in paramsdict.items():
217 ToStdout(" - %s:", gr_name)
218 for item, val in gr_dict.iteritems():
219 ToStdout(" %s: %s", item, val)
221 def ShowClusterConfig(opts, args):
222 """Shows cluster information.
224 @param opts: the command line options selected by the user
226 @param args: should be an empty list
228 @return: the desired exit code
232 result = cl.QueryClusterInfo()
234 ToStdout("Cluster name: %s", result["name"])
235 ToStdout("Cluster UUID: %s", result["uuid"])
237 ToStdout("Creation time: %s", utils.FormatTime(result["ctime"]))
238 ToStdout("Modification time: %s", utils.FormatTime(result["mtime"]))
240 ToStdout("Master node: %s", result["master"])
242 ToStdout("Architecture (this node): %s (%s)",
243 result["architecture"][0], result["architecture"][1])
246 tags = utils.CommaJoin(utils.NiceSort(result["tags"]))
250 ToStdout("Tags: %s", tags)
252 ToStdout("Default hypervisor: %s", result["default_hypervisor"])
253 ToStdout("Enabled hypervisors: %s",
254 utils.CommaJoin(result["enabled_hypervisors"]))
256 ToStdout("Hypervisor parameters:")
257 _PrintGroupedParams(result["hvparams"])
259 ToStdout("Cluster parameters:")
260 ToStdout(" - candidate pool size: %s", result["candidate_pool_size"])
261 ToStdout(" - master netdev: %s", result["master_netdev"])
262 ToStdout(" - lvm volume group: %s", result["volume_group_name"])
263 ToStdout(" - file storage path: %s", result["file_storage_dir"])
265 ToStdout("Default instance parameters:")
266 _PrintGroupedParams(result["beparams"])
268 ToStdout("Default nic parameters:")
269 _PrintGroupedParams(result["nicparams"])
274 def ClusterCopyFile(opts, args):
275 """Copy a file from master to some nodes.
277 @param opts: the command line options selected by the user
279 @param args: should contain only one element, the path of
280 the file to be copied
282 @return: the desired exit code
286 if not os.path.exists(filename):
287 raise errors.OpPrereqError("No such filename '%s'" % filename,
292 myname = utils.GetHostInfo().name
294 cluster_name = cl.QueryConfigValues(["cluster_name"])[0]
296 results = GetOnlineNodes(nodes=opts.nodes, cl=cl)
297 results = [name for name in results if name != myname]
299 srun = ssh.SshRunner(cluster_name=cluster_name)
301 if not srun.CopyFileToNode(node, filename):
302 ToStderr("Copy of file %s to node %s failed", filename, node)
307 def RunClusterCommand(opts, args):
308 """Run a command on some nodes.
310 @param opts: the command line options selected by the user
312 @param args: should contain the command to be run and its arguments
314 @return: the desired exit code
319 command = " ".join(args)
321 nodes = GetOnlineNodes(nodes=opts.nodes, cl=cl)
323 cluster_name, master_node = cl.QueryConfigValues(["cluster_name",
326 srun = ssh.SshRunner(cluster_name=cluster_name)
328 # Make sure master node is at list end
329 if master_node in nodes:
330 nodes.remove(master_node)
331 nodes.append(master_node)
334 result = srun.Run(name, "root", command)
335 ToStdout("------------------------------------------------")
336 ToStdout("node: %s", name)
337 ToStdout("%s", result.output)
338 ToStdout("return code = %s", result.exit_code)
343 def VerifyCluster(opts, args):
344 """Verify integrity of cluster, performing various test on nodes.
346 @param opts: the command line options selected by the user
348 @param args: should be an empty list
350 @return: the desired exit code
354 if opts.skip_nplusone_mem:
355 skip_checks.append(constants.VERIFY_NPLUSONE_MEM)
356 op = opcodes.OpVerifyCluster(skip_checks=skip_checks,
357 verbose=opts.verbose,
358 error_codes=opts.error_codes,
359 debug_simulate_errors=opts.simulate_errors)
366 def VerifyDisks(opts, args):
367 """Verify integrity of cluster disks.
369 @param opts: the command line options selected by the user
371 @param args: should be an empty list
373 @return: the desired exit code
376 op = opcodes.OpVerifyDisks()
377 result = SubmitOpCode(op)
378 if not isinstance(result, (list, tuple)) or len(result) != 3:
379 raise errors.ProgrammerError("Unknown result type for OpVerifyDisks")
381 bad_nodes, instances, missing = result
383 retcode = constants.EXIT_SUCCESS
386 for node, text in bad_nodes.items():
387 ToStdout("Error gathering data on node %s: %s",
388 node, utils.SafeEncode(text[-400:]))
390 ToStdout("You need to fix these nodes first before fixing instances")
393 for iname in instances:
396 op = opcodes.OpActivateInstanceDisks(instance_name=iname)
398 ToStdout("Activating disks for instance '%s'", iname)
400 except errors.GenericError, err:
401 nret, msg = FormatError(err)
403 ToStderr("Error activating disks for instance %s: %s", iname, msg)
406 for iname, ival in missing.iteritems():
407 all_missing = utils.all(ival, lambda x: x[0] in bad_nodes)
409 ToStdout("Instance %s cannot be verified as it lives on"
410 " broken nodes", iname)
412 ToStdout("Instance %s has missing logical volumes:", iname)
414 for node, vol in ival:
415 if node in bad_nodes:
416 ToStdout("\tbroken node %s /dev/xenvg/%s", node, vol)
418 ToStdout("\t%s /dev/xenvg/%s", node, vol)
419 ToStdout("You need to run replace_disks for all the above"
420 " instances, if this message persist after fixing nodes.")
426 def RepairDiskSizes(opts, args):
427 """Verify sizes of cluster disks.
429 @param opts: the command line options selected by the user
431 @param args: optional list of instances to restrict check to
433 @return: the desired exit code
436 op = opcodes.OpRepairDiskSizes(instances=args)
441 def MasterFailover(opts, args):
442 """Failover the master node.
444 This command, when run on a non-master node, will cause the current
445 master to cease being master, and the non-master to become new
448 @param opts: the command line options selected by the user
450 @param args: should be an empty list
452 @return: the desired exit code
456 usertext = ("This will perform the failover even if most other nodes"
457 " are down, or if this node is outdated. This is dangerous"
458 " as it can lead to a non-consistent cluster. Check the"
459 " gnt-cluster(8) man page before proceeding. Continue?")
460 if not AskUser(usertext):
463 return bootstrap.MasterFailover(no_voting=opts.no_voting)
466 def SearchTags(opts, args):
467 """Searches the tags on all the cluster.
469 @param opts: the command line options selected by the user
471 @param args: should contain only one element, the tag pattern
473 @return: the desired exit code
476 op = opcodes.OpSearchTags(pattern=args[0])
477 result = SubmitOpCode(op)
480 result = list(result)
482 for path, tag in result:
483 ToStdout("%s %s", path, tag)
486 def SetClusterParams(opts, args):
487 """Modify the cluster.
489 @param opts: the command line options selected by the user
491 @param args: should be an empty list
493 @return: the desired exit code
496 if not (not opts.lvm_storage or opts.vg_name or
497 opts.enabled_hypervisors or opts.hvparams or
498 opts.beparams or opts.nicparams or
499 opts.candidate_pool_size is not None):
500 ToStderr("Please give at least one of the parameters.")
503 vg_name = opts.vg_name
504 if not opts.lvm_storage and opts.vg_name:
505 ToStdout("Options --no-lvm-storage and --vg-name conflict.")
507 elif not opts.lvm_storage:
510 hvlist = opts.enabled_hypervisors
511 if hvlist is not None:
512 hvlist = hvlist.split(",")
514 # a list of (name, dict) we can pass directly to dict() (or [])
515 hvparams = dict(opts.hvparams)
516 for hv_params in hvparams.values():
517 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
519 beparams = opts.beparams
520 utils.ForceDictType(beparams, constants.BES_PARAMETER_TYPES)
522 nicparams = opts.nicparams
523 utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)
525 op = opcodes.OpSetClusterParams(vg_name=vg_name,
526 enabled_hypervisors=hvlist,
530 candidate_pool_size=opts.candidate_pool_size)
535 def QueueOps(opts, args):
538 @param opts: the command line options selected by the user
540 @param args: should contain only one element, the subcommand
542 @return: the desired exit code
547 if command in ("drain", "undrain"):
548 drain_flag = command == "drain"
549 client.SetQueueDrainFlag(drain_flag)
550 elif command == "info":
551 result = client.QueryConfigValues(["drain_flag"])
556 ToStdout("The drain flag is %s" % val)
558 raise errors.OpPrereqError("Command '%s' is not valid." % command,
564 def _ShowWatcherPause(until):
565 if until is None or until < time.time():
566 ToStdout("The watcher is not paused.")
568 ToStdout("The watcher is paused until %s.", time.ctime(until))
571 def WatcherOps(opts, args):
572 """Watcher operations.
574 @param opts: the command line options selected by the user
576 @param args: should contain only one element, the subcommand
578 @return: the desired exit code
584 if command == "continue":
585 client.SetWatcherPause(None)
586 ToStdout("The watcher is no longer paused.")
588 elif command == "pause":
590 raise errors.OpPrereqError("Missing pause duration", errors.ECODE_INVAL)
592 result = client.SetWatcherPause(time.time() + ParseTimespec(args[1]))
593 _ShowWatcherPause(result)
595 elif command == "info":
596 result = client.QueryConfigValues(["watcher_pause"])
597 _ShowWatcherPause(result)
600 raise errors.OpPrereqError("Command '%s' is not valid." % command,
608 InitCluster, [ArgHost(min=1, max=1)],
609 [BACKEND_OPT, CP_SIZE_OPT, ENABLED_HV_OPT, GLOBAL_FILEDIR_OPT,
610 HVLIST_OPT, MAC_PREFIX_OPT, MASTER_NETDEV_OPT, NIC_PARAMS_OPT,
611 NOLVM_STORAGE_OPT, NOMODIFY_ETCHOSTS_OPT, NOMODIFY_SSH_SETUP_OPT,
612 SECONDARY_IP_OPT, VG_NAME_OPT],
613 "[opts...] <cluster_name>", "Initialises a new cluster configuration"),
615 DestroyCluster, ARGS_NONE, [YES_DOIT_OPT],
616 "", "Destroy cluster"),
618 RenameCluster, [ArgHost(min=1, max=1)],
621 "Renames the cluster"),
623 RedistributeConfig, ARGS_NONE, [SUBMIT_OPT],
624 "", "Forces a push of the configuration file and ssconf files"
625 " to the nodes in the cluster"),
627 VerifyCluster, ARGS_NONE,
628 [VERBOSE_OPT, DEBUG_SIMERR_OPT, ERROR_CODES_OPT, NONPLUS1_OPT],
629 "", "Does a check on the cluster configuration"),
631 VerifyDisks, ARGS_NONE, [],
632 "", "Does a check on the cluster disk status"),
633 'repair-disk-sizes': (
634 RepairDiskSizes, ARGS_MANY_INSTANCES, [],
635 "", "Updates mismatches in recorded disk sizes"),
637 MasterFailover, ARGS_NONE, [NOVOTING_OPT],
638 "", "Makes the current node the master"),
640 ShowClusterVersion, ARGS_NONE, [],
641 "", "Shows the cluster version"),
643 ShowClusterMaster, ARGS_NONE, [],
644 "", "Shows the cluster master"),
646 ClusterCopyFile, [ArgFile(min=1, max=1)],
648 "[-n node...] <filename>", "Copies a file to all (or only some) nodes"),
650 RunClusterCommand, [ArgCommand(min=1)],
652 "[-n node...] <command>", "Runs a command on all (or only some) nodes"),
654 ShowClusterConfig, ARGS_NONE, [],
655 "", "Show cluster configuration"),
657 ListTags, ARGS_NONE, [], "", "List the tags of the cluster"),
659 AddTags, [ArgUnknown()], [TAG_SRC_OPT],
660 "tag...", "Add tags to the cluster"),
662 RemoveTags, [ArgUnknown()], [TAG_SRC_OPT],
663 "tag...", "Remove tags from the cluster"),
665 SearchTags, [ArgUnknown(min=1, max=1)],
666 [], "", "Searches the tags on all objects on"
667 " the cluster for a given pattern (regex)"),
670 [ArgChoice(min=1, max=1, choices=["drain", "undrain", "info"])],
671 [], "drain|undrain|info", "Change queue properties"),
674 [ArgChoice(min=1, max=1, choices=["pause", "continue", "info"]),
675 ArgSuggest(min=0, max=1, choices=["30m", "1h", "4h"])],
677 "{pause <timespec>|continue|info}", "Change watcher properties"),
679 SetClusterParams, ARGS_NONE,
680 [BACKEND_OPT, CP_SIZE_OPT, ENABLED_HV_OPT, HVLIST_OPT,
681 NIC_PARAMS_OPT, NOLVM_STORAGE_OPT, VG_NAME_OPT],
683 "Alters the parameters of the cluster"),
686 if __name__ == '__main__':
687 sys.exit(GenericMain(commands, override={"tag_type": constants.TAG_CLUSTER}))