4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 """Cluster related commands"""
23 # pylint: disable-msg=W0401,W0613,W0614,C0103
24 # W0401: Wildcard import ganeti.cli
25 # W0613: Unused argument, since all functions follow the same API
26 # W0614: Unused import %s from wildcard import (since we need cli)
27 # C0103: Invalid name gnt-cluster
34 from ganeti.cli import *
35 from ganeti import opcodes
36 from ganeti import constants
37 from ganeti import errors
38 from ganeti import utils
39 from ganeti import bootstrap
40 from ganeti import ssh
41 from ganeti import objects
42 from ganeti import uidpool
43 from ganeti import compat
47 def InitCluster(opts, args):
48 """Initialize the cluster.
50 @param opts: the command line options selected by the user
52 @param args: should contain only one element, the desired
55 @return: the desired exit code
58 if not opts.lvm_storage and opts.vg_name:
59 ToStderr("Options --no-lvm-storage and --vg-name conflict.")
62 vg_name = opts.vg_name
63 if opts.lvm_storage and not opts.vg_name:
64 vg_name = constants.DEFAULT_VG
66 hvlist = opts.enabled_hypervisors
68 hvlist = constants.DEFAULT_ENABLED_HYPERVISOR
69 hvlist = hvlist.split(",")
71 hvparams = dict(opts.hvparams)
72 beparams = opts.beparams
73 nicparams = opts.nicparams
75 # prepare beparams dict
76 beparams = objects.FillDict(constants.BEC_DEFAULTS, beparams)
77 utils.ForceDictType(beparams, constants.BES_PARAMETER_TYPES)
79 # prepare nicparams dict
80 nicparams = objects.FillDict(constants.NICC_DEFAULTS, nicparams)
81 utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)
83 # prepare hvparams dict
84 for hv in constants.HYPER_TYPES:
85 if hv not in hvparams:
87 hvparams[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], hvparams[hv])
88 utils.ForceDictType(hvparams[hv], constants.HVS_PARAMETER_TYPES)
90 if opts.candidate_pool_size is None:
91 opts.candidate_pool_size = constants.MASTER_POOL_SIZE_DEFAULT
93 if opts.mac_prefix is None:
94 opts.mac_prefix = constants.DEFAULT_MAC_PREFIX
96 uid_pool = opts.uid_pool
97 if uid_pool is not None:
98 uid_pool = uidpool.ParseUidPool(uid_pool)
100 bootstrap.InitCluster(cluster_name=args[0],
101 secondary_ip=opts.secondary_ip,
103 mac_prefix=opts.mac_prefix,
104 master_netdev=opts.master_netdev,
105 file_storage_dir=opts.file_storage_dir,
106 enabled_hypervisors=hvlist,
110 candidate_pool_size=opts.candidate_pool_size,
111 modify_etc_hosts=opts.modify_etc_hosts,
112 modify_ssh_setup=opts.modify_ssh_setup,
113 maintain_node_health=opts.maintain_node_health,
116 op = opcodes.OpPostInitCluster()
117 SubmitOpCode(op, opts=opts)
122 def DestroyCluster(opts, args):
123 """Destroy the cluster.
125 @param opts: the command line options selected by the user
127 @param args: should be an empty list
129 @return: the desired exit code
132 if not opts.yes_do_it:
133 ToStderr("Destroying a cluster is irreversible. If you really want"
134 " destroy this cluster, supply the --yes-do-it option.")
137 op = opcodes.OpDestroyCluster()
138 master = SubmitOpCode(op, opts=opts)
139 # if we reached this, the opcode didn't fail; we can proceed to
140 # shutdown all the daemons
141 bootstrap.FinalizeClusterDestroy(master)
145 def RenameCluster(opts, args):
146 """Rename the cluster.
148 @param opts: the command line options selected by the user
150 @param args: should contain only one element, the new cluster name
152 @return: the desired exit code
157 usertext = ("This will rename the cluster to '%s'. If you are connected"
158 " over the network to the cluster name, the operation is very"
159 " dangerous as the IP address will be removed from the node"
160 " and the change may not go through. Continue?") % name
161 if not AskUser(usertext):
164 op = opcodes.OpRenameCluster(name=name)
165 SubmitOpCode(op, opts=opts)
169 def RedistributeConfig(opts, args):
170 """Forces push of the cluster configuration.
172 @param opts: the command line options selected by the user
174 @param args: empty list
176 @return: the desired exit code
179 op = opcodes.OpRedistributeConfig()
180 SubmitOrSend(op, opts)
184 def ShowClusterVersion(opts, args):
185 """Write version of ganeti software to the standard output.
187 @param opts: the command line options selected by the user
189 @param args: should be an empty list
191 @return: the desired exit code
195 result = cl.QueryClusterInfo()
196 ToStdout("Software version: %s", result["software_version"])
197 ToStdout("Internode protocol: %s", result["protocol_version"])
198 ToStdout("Configuration format: %s", result["config_version"])
199 ToStdout("OS api version: %s", result["os_api_version"])
200 ToStdout("Export interface: %s", result["export_version"])
204 def ShowClusterMaster(opts, args):
205 """Write name of master node to the standard output.
207 @param opts: the command line options selected by the user
209 @param args: should be an empty list
211 @return: the desired exit code
214 master = bootstrap.GetMaster()
219 def _PrintGroupedParams(paramsdict, level=1, roman=False):
220 """Print Grouped parameters (be, nic, disk) by group.
222 @type paramsdict: dict of dicts
223 @param paramsdict: {group: {param: value, ...}, ...}
225 @param level: Level of indention
229 for item, val in sorted(paramsdict.items()):
230 if isinstance(val, dict):
231 ToStdout("%s- %s:", indent, item)
232 _PrintGroupedParams(val, level=level + 1, roman=roman)
233 elif roman and isinstance(val, int):
234 ToStdout("%s %s: %s", indent, item, compat.TryToRoman(val))
236 ToStdout("%s %s: %s", indent, item, val)
239 def ShowClusterConfig(opts, args):
240 """Shows cluster information.
242 @param opts: the command line options selected by the user
244 @param args: should be an empty list
246 @return: the desired exit code
250 result = cl.QueryClusterInfo()
252 ToStdout("Cluster name: %s", result["name"])
253 ToStdout("Cluster UUID: %s", result["uuid"])
255 ToStdout("Creation time: %s", utils.FormatTime(result["ctime"]))
256 ToStdout("Modification time: %s", utils.FormatTime(result["mtime"]))
258 ToStdout("Master node: %s", result["master"])
260 ToStdout("Architecture (this node): %s (%s)",
261 result["architecture"][0], result["architecture"][1])
264 tags = utils.CommaJoin(utils.NiceSort(result["tags"]))
268 ToStdout("Tags: %s", tags)
270 ToStdout("Default hypervisor: %s", result["default_hypervisor"])
271 ToStdout("Enabled hypervisors: %s",
272 utils.CommaJoin(result["enabled_hypervisors"]))
274 ToStdout("Hypervisor parameters:")
275 _PrintGroupedParams(result["hvparams"])
277 ToStdout("OS specific hypervisor parameters:")
278 _PrintGroupedParams(result["os_hvp"])
280 ToStdout("Cluster parameters:")
281 ToStdout(" - candidate pool size: %s",
282 compat.TryToRoman(result["candidate_pool_size"],
283 convert=opts.roman_integers))
284 ToStdout(" - master netdev: %s", result["master_netdev"])
285 ToStdout(" - lvm volume group: %s", result["volume_group_name"])
286 ToStdout(" - file storage path: %s", result["file_storage_dir"])
287 ToStdout(" - maintenance of node health: %s",
288 result["maintain_node_health"])
289 ToStdout(" - uid pool: %s",
290 uidpool.FormatUidPool(result["uid_pool"],
291 roman=opts.roman_integers))
293 ToStdout("Default instance parameters:")
294 _PrintGroupedParams(result["beparams"], roman=opts.roman_integers)
296 ToStdout("Default nic parameters:")
297 _PrintGroupedParams(result["nicparams"], roman=opts.roman_integers)
302 def ClusterCopyFile(opts, args):
303 """Copy a file from master to some nodes.
305 @param opts: the command line options selected by the user
307 @param args: should contain only one element, the path of
308 the file to be copied
310 @return: the desired exit code
314 if not os.path.exists(filename):
315 raise errors.OpPrereqError("No such filename '%s'" % filename,
320 cluster_name = cl.QueryConfigValues(["cluster_name"])[0]
322 results = GetOnlineNodes(nodes=opts.nodes, cl=cl, filter_master=True,
323 secondary_ips=opts.use_replication_network)
325 srun = ssh.SshRunner(cluster_name=cluster_name)
327 if not srun.CopyFileToNode(node, filename):
328 ToStderr("Copy of file %s to node %s failed", filename, node)
333 def RunClusterCommand(opts, args):
334 """Run a command on some nodes.
336 @param opts: the command line options selected by the user
338 @param args: should contain the command to be run and its arguments
340 @return: the desired exit code
345 command = " ".join(args)
347 nodes = GetOnlineNodes(nodes=opts.nodes, cl=cl)
349 cluster_name, master_node = cl.QueryConfigValues(["cluster_name",
352 srun = ssh.SshRunner(cluster_name=cluster_name)
354 # Make sure master node is at list end
355 if master_node in nodes:
356 nodes.remove(master_node)
357 nodes.append(master_node)
360 result = srun.Run(name, "root", command)
361 ToStdout("------------------------------------------------")
362 ToStdout("node: %s", name)
363 ToStdout("%s", result.output)
364 ToStdout("return code = %s", result.exit_code)
369 def VerifyCluster(opts, args):
370 """Verify integrity of cluster, performing various test on nodes.
372 @param opts: the command line options selected by the user
374 @param args: should be an empty list
376 @return: the desired exit code
380 if opts.skip_nplusone_mem:
381 skip_checks.append(constants.VERIFY_NPLUSONE_MEM)
382 op = opcodes.OpVerifyCluster(skip_checks=skip_checks,
383 verbose=opts.verbose,
384 error_codes=opts.error_codes,
385 debug_simulate_errors=opts.simulate_errors)
386 if SubmitOpCode(op, opts=opts):
392 def VerifyDisks(opts, args):
393 """Verify integrity of cluster disks.
395 @param opts: the command line options selected by the user
397 @param args: should be an empty list
399 @return: the desired exit code
402 op = opcodes.OpVerifyDisks()
403 result = SubmitOpCode(op, opts=opts)
404 if not isinstance(result, (list, tuple)) or len(result) != 3:
405 raise errors.ProgrammerError("Unknown result type for OpVerifyDisks")
407 bad_nodes, instances, missing = result
409 retcode = constants.EXIT_SUCCESS
412 for node, text in bad_nodes.items():
413 ToStdout("Error gathering data on node %s: %s",
414 node, utils.SafeEncode(text[-400:]))
416 ToStdout("You need to fix these nodes first before fixing instances")
419 for iname in instances:
422 op = opcodes.OpActivateInstanceDisks(instance_name=iname)
424 ToStdout("Activating disks for instance '%s'", iname)
425 SubmitOpCode(op, opts=opts)
426 except errors.GenericError, err:
427 nret, msg = FormatError(err)
429 ToStderr("Error activating disks for instance %s: %s", iname, msg)
432 for iname, ival in missing.iteritems():
433 all_missing = compat.all(ival, lambda x: x[0] in bad_nodes)
435 ToStdout("Instance %s cannot be verified as it lives on"
436 " broken nodes", iname)
438 ToStdout("Instance %s has missing logical volumes:", iname)
440 for node, vol in ival:
441 if node in bad_nodes:
442 ToStdout("\tbroken node %s /dev/xenvg/%s", node, vol)
444 ToStdout("\t%s /dev/xenvg/%s", node, vol)
445 ToStdout("You need to run replace_disks for all the above"
446 " instances, if this message persist after fixing nodes.")
452 def RepairDiskSizes(opts, args):
453 """Verify sizes of cluster disks.
455 @param opts: the command line options selected by the user
457 @param args: optional list of instances to restrict check to
459 @return: the desired exit code
462 op = opcodes.OpRepairDiskSizes(instances=args)
463 SubmitOpCode(op, opts=opts)
467 def MasterFailover(opts, args):
468 """Failover the master node.
470 This command, when run on a non-master node, will cause the current
471 master to cease being master, and the non-master to become new
474 @param opts: the command line options selected by the user
476 @param args: should be an empty list
478 @return: the desired exit code
482 usertext = ("This will perform the failover even if most other nodes"
483 " are down, or if this node is outdated. This is dangerous"
484 " as it can lead to a non-consistent cluster. Check the"
485 " gnt-cluster(8) man page before proceeding. Continue?")
486 if not AskUser(usertext):
489 return bootstrap.MasterFailover(no_voting=opts.no_voting)
492 def SearchTags(opts, args):
493 """Searches the tags on all the cluster.
495 @param opts: the command line options selected by the user
497 @param args: should contain only one element, the tag pattern
499 @return: the desired exit code
502 op = opcodes.OpSearchTags(pattern=args[0])
503 result = SubmitOpCode(op, opts=opts)
506 result = list(result)
508 for path, tag in result:
509 ToStdout("%s %s", path, tag)
512 def _RenewCrypto(new_cluster_cert, new_rapi_cert, rapi_cert_filename,
513 new_confd_hmac_key, force):
514 """Renews cluster certificates, keys and secrets.
516 @type new_cluster_cert: bool
517 @param new_cluster_cert: Whether to generate a new cluster certificate
518 @type new_rapi_cert: bool
519 @param new_rapi_cert: Whether to generate a new RAPI certificate
520 @type rapi_cert_filename: string
521 @param rapi_cert_filename: Path to file containing new RAPI certificate
522 @type new_confd_hmac_key: bool
523 @param new_confd_hmac_key: Whether to generate a new HMAC key
525 @param force: Whether to ask user for confirmation
528 if new_rapi_cert and rapi_cert_filename:
529 ToStderr("Only one of the --new-rapi-certficate and --rapi-certificate"
530 " options can be specified at the same time.")
533 if rapi_cert_filename:
534 # Read and verify new certificate
536 rapi_cert_pem = utils.ReadFile(rapi_cert_filename)
538 OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
540 except Exception, err: # pylint: disable-msg=W0703
541 ToStderr("Can't load new RAPI certificate from %s: %s" %
542 (rapi_cert_filename, str(err)))
546 OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, rapi_cert_pem)
547 except Exception, err: # pylint: disable-msg=W0703
548 ToStderr("Can't load new RAPI private key from %s: %s" %
549 (rapi_cert_filename, str(err)))
556 usertext = ("This requires all daemons on all nodes to be restarted and"
557 " may take some time. Continue?")
558 if not AskUser(usertext):
561 def _RenewCryptoInner(ctx):
562 ctx.feedback_fn("Updating certificates and keys")
563 bootstrap.GenerateClusterCrypto(new_cluster_cert, new_rapi_cert,
565 rapi_cert_pem=rapi_cert_pem)
570 files_to_copy.append(constants.NODED_CERT_FILE)
572 if new_rapi_cert or rapi_cert_pem:
573 files_to_copy.append(constants.RAPI_CERT_FILE)
575 if new_confd_hmac_key:
576 files_to_copy.append(constants.CONFD_HMAC_KEY)
579 for node_name in ctx.nonmaster_nodes:
580 ctx.feedback_fn("Copying %s to %s" %
581 (", ".join(files_to_copy), node_name))
582 for file_name in files_to_copy:
583 ctx.ssh.CopyFileToNode(node_name, file_name)
585 RunWhileClusterStopped(ToStdout, _RenewCryptoInner)
587 ToStdout("All requested certificates and keys have been replaced."
588 " Running \"gnt-cluster verify\" now is recommended.")
593 def RenewCrypto(opts, args):
594 """Renews cluster certificates, keys and secrets.
597 return _RenewCrypto(opts.new_cluster_cert,
600 opts.new_confd_hmac_key,
604 def SetClusterParams(opts, args):
605 """Modify the cluster.
607 @param opts: the command line options selected by the user
609 @param args: should be an empty list
611 @return: the desired exit code
614 if not (not opts.lvm_storage or opts.vg_name or
615 opts.enabled_hypervisors or opts.hvparams or
616 opts.beparams or opts.nicparams or
617 opts.candidate_pool_size is not None or
618 opts.uid_pool is not None or
619 opts.maintain_node_health is not None or
620 opts.add_uids is not None or
621 opts.remove_uids is not None):
622 ToStderr("Please give at least one of the parameters.")
625 vg_name = opts.vg_name
626 if not opts.lvm_storage and opts.vg_name:
627 ToStderr("Options --no-lvm-storage and --vg-name conflict.")
630 if not opts.lvm_storage:
633 hvlist = opts.enabled_hypervisors
634 if hvlist is not None:
635 hvlist = hvlist.split(",")
637 # a list of (name, dict) we can pass directly to dict() (or [])
638 hvparams = dict(opts.hvparams)
639 for hv_params in hvparams.values():
640 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
642 beparams = opts.beparams
643 utils.ForceDictType(beparams, constants.BES_PARAMETER_TYPES)
645 nicparams = opts.nicparams
646 utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)
649 mnh = opts.maintain_node_health
651 uid_pool = opts.uid_pool
652 if uid_pool is not None:
653 uid_pool = uidpool.ParseUidPool(uid_pool)
655 add_uids = opts.add_uids
656 if add_uids is not None:
657 add_uids = uidpool.ParseUidPool(add_uids)
659 remove_uids = opts.remove_uids
660 if remove_uids is not None:
661 remove_uids = uidpool.ParseUidPool(remove_uids)
663 op = opcodes.OpSetClusterParams(vg_name=vg_name,
664 enabled_hypervisors=hvlist,
669 candidate_pool_size=opts.candidate_pool_size,
670 maintain_node_health=mnh,
673 remove_uids=remove_uids)
674 SubmitOpCode(op, opts=opts)
678 def QueueOps(opts, args):
681 @param opts: the command line options selected by the user
683 @param args: should contain only one element, the subcommand
685 @return: the desired exit code
690 if command in ("drain", "undrain"):
691 drain_flag = command == "drain"
692 client.SetQueueDrainFlag(drain_flag)
693 elif command == "info":
694 result = client.QueryConfigValues(["drain_flag"])
699 ToStdout("The drain flag is %s" % val)
701 raise errors.OpPrereqError("Command '%s' is not valid." % command,
707 def _ShowWatcherPause(until):
708 if until is None or until < time.time():
709 ToStdout("The watcher is not paused.")
711 ToStdout("The watcher is paused until %s.", time.ctime(until))
714 def WatcherOps(opts, args):
715 """Watcher operations.
717 @param opts: the command line options selected by the user
719 @param args: should contain only one element, the subcommand
721 @return: the desired exit code
727 if command == "continue":
728 client.SetWatcherPause(None)
729 ToStdout("The watcher is no longer paused.")
731 elif command == "pause":
733 raise errors.OpPrereqError("Missing pause duration", errors.ECODE_INVAL)
735 result = client.SetWatcherPause(time.time() + ParseTimespec(args[1]))
736 _ShowWatcherPause(result)
738 elif command == "info":
739 result = client.QueryConfigValues(["watcher_pause"])
740 _ShowWatcherPause(result[0])
743 raise errors.OpPrereqError("Command '%s' is not valid." % command,
751 InitCluster, [ArgHost(min=1, max=1)],
752 [BACKEND_OPT, CP_SIZE_OPT, ENABLED_HV_OPT, GLOBAL_FILEDIR_OPT,
753 HVLIST_OPT, MAC_PREFIX_OPT, MASTER_NETDEV_OPT, NIC_PARAMS_OPT,
754 NOLVM_STORAGE_OPT, NOMODIFY_ETCHOSTS_OPT, NOMODIFY_SSH_SETUP_OPT,
755 SECONDARY_IP_OPT, VG_NAME_OPT, MAINTAIN_NODE_HEALTH_OPT,
757 "[opts...] <cluster_name>", "Initialises a new cluster configuration"),
759 DestroyCluster, ARGS_NONE, [YES_DOIT_OPT],
760 "", "Destroy cluster"),
762 RenameCluster, [ArgHost(min=1, max=1)],
765 "Renames the cluster"),
767 RedistributeConfig, ARGS_NONE, [SUBMIT_OPT],
768 "", "Forces a push of the configuration file and ssconf files"
769 " to the nodes in the cluster"),
771 VerifyCluster, ARGS_NONE,
772 [VERBOSE_OPT, DEBUG_SIMERR_OPT, ERROR_CODES_OPT, NONPLUS1_OPT],
773 "", "Does a check on the cluster configuration"),
775 VerifyDisks, ARGS_NONE, [],
776 "", "Does a check on the cluster disk status"),
777 'repair-disk-sizes': (
778 RepairDiskSizes, ARGS_MANY_INSTANCES, [],
779 "", "Updates mismatches in recorded disk sizes"),
781 MasterFailover, ARGS_NONE, [NOVOTING_OPT],
782 "", "Makes the current node the master"),
784 ShowClusterVersion, ARGS_NONE, [],
785 "", "Shows the cluster version"),
787 ShowClusterMaster, ARGS_NONE, [],
788 "", "Shows the cluster master"),
790 ClusterCopyFile, [ArgFile(min=1, max=1)],
791 [NODE_LIST_OPT, USE_REPL_NET_OPT],
792 "[-n node...] <filename>", "Copies a file to all (or only some) nodes"),
794 RunClusterCommand, [ArgCommand(min=1)],
796 "[-n node...] <command>", "Runs a command on all (or only some) nodes"),
798 ShowClusterConfig, ARGS_NONE, [ROMAN_OPT],
799 "[--roman]", "Show cluster configuration"),
801 ListTags, ARGS_NONE, [], "", "List the tags of the cluster"),
803 AddTags, [ArgUnknown()], [TAG_SRC_OPT],
804 "tag...", "Add tags to the cluster"),
806 RemoveTags, [ArgUnknown()], [TAG_SRC_OPT],
807 "tag...", "Remove tags from the cluster"),
809 SearchTags, [ArgUnknown(min=1, max=1)],
810 [], "", "Searches the tags on all objects on"
811 " the cluster for a given pattern (regex)"),
814 [ArgChoice(min=1, max=1, choices=["drain", "undrain", "info"])],
815 [], "drain|undrain|info", "Change queue properties"),
818 [ArgChoice(min=1, max=1, choices=["pause", "continue", "info"]),
819 ArgSuggest(min=0, max=1, choices=["30m", "1h", "4h"])],
821 "{pause <timespec>|continue|info}", "Change watcher properties"),
823 SetClusterParams, ARGS_NONE,
824 [BACKEND_OPT, CP_SIZE_OPT, ENABLED_HV_OPT, HVLIST_OPT,
825 NIC_PARAMS_OPT, NOLVM_STORAGE_OPT, VG_NAME_OPT, MAINTAIN_NODE_HEALTH_OPT,
826 UIDPOOL_OPT, ADD_UIDS_OPT, REMOVE_UIDS_OPT],
828 "Alters the parameters of the cluster"),
830 RenewCrypto, ARGS_NONE,
831 [NEW_CLUSTER_CERT_OPT, NEW_RAPI_CERT_OPT, RAPI_CERT_OPT,
832 NEW_CONFD_HMAC_KEY_OPT, FORCE_OPT],
834 "Renews cluster certificates, keys and secrets"),
838 if __name__ == '__main__':
839 sys.exit(GenericMain(commands, override={"tag_type": constants.TAG_CLUSTER}))