X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/098c0958281c48228fe57eb7832153dafefbd07c..5af3da74b7ecf4aa31ed8ddaad47bbf6c0953475:/scripts/gnt-cluster diff --git a/scripts/gnt-cluster b/scripts/gnt-cluster index 3fd271c..f7f0bc3 100755 --- a/scripts/gnt-cluster +++ b/scripts/gnt-cluster @@ -19,100 +19,238 @@ # 02110-1301, USA. +# pylint: disable-msg=W0401,W0614 +# W0401: Wildcard import ganeti.cli +# W0614: Unused import %s from wildcard import (since we need cli) + import sys from optparse import make_option -import pprint +import os.path from ganeti.cli import * from ganeti import opcodes +from ganeti import constants +from ganeti import errors +from ganeti import utils +from ganeti import bootstrap +from ganeti import ssh +from ganeti import objects +@UsesRPC def InitCluster(opts, args): """Initialize the cluster. - Args: - opts - class with options as members - args - list of arguments, expected to be [clustername] + @param opts: the command line options selected by the user + @type args: list + @param args: should contain only one element, the desired + cluster name + @rtype: int + @return: the desired exit code """ - op = opcodes.OpInitCluster(cluster_name=args[0], - secondary_ip=opts.secondary_ip, - hypervisor_type=opts.hypervisor_type, - vg_name=opts.vg_name, - mac_prefix=opts.mac_prefix, - def_bridge=opts.def_bridge, - master_netdev=opts.master_netdev) - SubmitOpCode(op) + if not opts.lvm_storage and opts.vg_name: + ToStderr("Options --no-lvm-storage and --vg-name conflict.") + return 1 + + vg_name = opts.vg_name + if opts.lvm_storage and not opts.vg_name: + vg_name = constants.DEFAULT_VG + + hvlist = opts.enabled_hypervisors + if hvlist is not None: + hvlist = hvlist.split(",") + else: + hvlist = [opts.default_hypervisor] + + # avoid an impossible situation + if opts.default_hypervisor not in hvlist: + ToStderr("The default hypervisor requested (%s) is not" + " within the enabled hypervisor list (%s)" % + (opts.default_hypervisor, hvlist)) + return 1 + + hvparams = dict(opts.hvparams) + beparams = opts.beparams + nicparams = opts.nicparams + + # prepare beparams dict + beparams = objects.FillDict(constants.BEC_DEFAULTS, beparams) + utils.ForceDictType(beparams, constants.BES_PARAMETER_TYPES) + + # prepare nicparams dict + nicparams = objects.FillDict(constants.NICC_DEFAULTS, nicparams) + utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES) + + # prepare hvparams dict + for hv in constants.HYPER_TYPES: + if hv not in hvparams: + hvparams[hv] = {} + hvparams[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], hvparams[hv]) + utils.ForceDictType(hvparams[hv], constants.HVS_PARAMETER_TYPES) + + for hv in hvlist: + if hv not in constants.HYPER_TYPES: + ToStderr("invalid hypervisor: %s", hv) + return 1 + + bootstrap.InitCluster(cluster_name=args[0], + secondary_ip=opts.secondary_ip, + vg_name=vg_name, + mac_prefix=opts.mac_prefix, + def_bridge=opts.def_bridge, + master_netdev=opts.master_netdev, + file_storage_dir=opts.file_storage_dir, + enabled_hypervisors=hvlist, + default_hypervisor=opts.default_hypervisor, + hvparams=hvparams, + beparams=beparams, + nicparams=nicparams, + candidate_pool_size=opts.candidate_pool_size, + modify_etc_hosts=opts.modify_etc_hosts, + ) return 0 +@UsesRPC def DestroyCluster(opts, args): """Destroy the cluster. - Args: - opts - class with options as members + @param opts: the command line options selected by the user + @type args: list + @param args: should be an empty list + @rtype: int + @return: the desired exit code """ if not opts.yes_do_it: - print ("Destroying a cluster is irreversibly. If you really want destroy" - " this cluster, supply the --yes-do-it option.") + ToStderr("Destroying a cluster is irreversible. If you really want" + " destroy this cluster, supply the --yes-do-it option.") return 1 op = opcodes.OpDestroyCluster() + master = SubmitOpCode(op) + # if we reached this, the opcode didn't fail; we can proceed to + # shutdown all the daemons + bootstrap.FinalizeClusterDestroy(master) + return 0 + + +def RenameCluster(opts, args): + """Rename the cluster. + + @param opts: the command line options selected by the user + @type args: list + @param args: should contain only one element, the new cluster name + @rtype: int + @return: the desired exit code + + """ + name = args[0] + if not opts.force: + usertext = ("This will rename the cluster to '%s'. If you are connected" + " over the network to the cluster name, the operation is very" + " dangerous as the IP address will be removed from the node" + " and the change may not go through. Continue?") % name + if not AskUser(usertext): + return 1 + + op = opcodes.OpRenameCluster(name=name) SubmitOpCode(op) return 0 +def RedistributeConfig(opts, args): + """Forces push of the cluster configuration. + + @param opts: the command line options selected by the user + @type args: list + @param args: empty list + @rtype: int + @return: the desired exit code + + """ + op = opcodes.OpRedistributeConfig() + SubmitOrSend(op, opts) + return 0 + + def ShowClusterVersion(opts, args): """Write version of ganeti software to the standard output. - Args: - opts - class with options as members + @param opts: the command line options selected by the user + @type args: list + @param args: should be an empty list + @rtype: int + @return: the desired exit code """ - op = opcodes.OpQueryClusterInfo() - result = SubmitOpCode(op) - print ("Software version: %s" % result["software_version"]) - print ("Internode protocol: %s" % result["protocol_version"]) - print ("Configuration format: %s" % result["config_version"]) - print ("OS api version: %s" % result["os_api_version"]) - print ("Export interface: %s" % result["export_version"]) + cl = GetClient() + result = cl.QueryClusterInfo() + ToStdout("Software version: %s", result["software_version"]) + ToStdout("Internode protocol: %s", result["protocol_version"]) + ToStdout("Configuration format: %s", result["config_version"]) + ToStdout("OS api version: %s", result["os_api_version"]) + ToStdout("Export interface: %s", result["export_version"]) return 0 def ShowClusterMaster(opts, args): """Write name of master node to the standard output. - Args: - opts - class with options as members + @param opts: the command line options selected by the user + @type args: list + @param args: should be an empty list + @rtype: int + @return: the desired exit code """ - op = opcodes.OpQueryClusterInfo() - result = SubmitOpCode(op) - print (result["master"]) + master = bootstrap.GetMaster() + ToStdout(master) return 0 def ShowClusterConfig(opts, args): """Shows cluster information. + @param opts: the command line options selected by the user + @type args: list + @param args: should be an empty list + @rtype: int + @return: the desired exit code + """ - op = opcodes.OpQueryClusterInfo() - result = SubmitOpCode(op) + cl = GetClient() + result = cl.QueryClusterInfo() + + ToStdout("Cluster name: %s", result["name"]) - print ("Cluster name: %s" % result["name"]) + ToStdout("Master node: %s", result["master"]) - print ("Architecture: %s (%s)" % - (result["architecture"][0], result["architecture"][1])) + ToStdout("Architecture (this node): %s (%s)", + result["architecture"][0], result["architecture"][1]) - print ("Master node: %s" % result["master"]) + ToStdout("Default hypervisor: %s", result["default_hypervisor"]) + ToStdout("Enabled hypervisors: %s", ", ".join(result["enabled_hypervisors"])) - print ("Instances:") - for name, node in result["instances"]: - print (" - %s (on %s)" % (name, node)) - print ("Nodes:") - for name in result["nodes"]: - print (" - %s" % name) + ToStdout("Hypervisor parameters:") + for hv_name, hv_dict in result["hvparams"].items(): + ToStdout(" - %s:", hv_name) + for item, val in hv_dict.iteritems(): + ToStdout(" %s: %s", item, val) + + ToStdout("Cluster parameters:") + ToStdout(" - candidate pool size: %s", result["candidate_pool_size"]) + ToStdout(" - master netdev: %s", result["master_netdev"]) + ToStdout(" - default bridge: %s", result["default_bridge"]) + ToStdout(" - lvm volume group: %s", result["volume_group_name"]) + ToStdout(" - file storage path: %s", result["file_storage_dir"]) + + ToStdout("Default instance parameters:") + for gr_name, gr_dict in result["beparams"].items(): + ToStdout(" - %s:", gr_name) + for item, val in gr_dict.iteritems(): + ToStdout(" %s: %s", item, val) return 0 @@ -120,52 +258,156 @@ def ShowClusterConfig(opts, args): def ClusterCopyFile(opts, args): """Copy a file from master to some nodes. - Args: - opts - class with options as members - args - list containing a single element, the file name - Opts used: - nodes - list containing the name of target nodes; if empty, all nodes + @param opts: the command line options selected by the user + @type args: list + @param args: should contain only one element, the path of + the file to be copied + @rtype: int + @return: the desired exit code """ - op = opcodes.OpClusterCopyFile(filename=args[0], nodes=opts.nodes) - SubmitOpCode(op) + filename = args[0] + if not os.path.exists(filename): + raise errors.OpPrereqError("No such filename '%s'" % filename) + + cl = GetClient() + + myname = utils.HostInfo().name + + cluster_name = cl.QueryConfigValues(["cluster_name"])[0] + + results = GetOnlineNodes(nodes=opts.nodes, cl=cl) + results = [name for name in results if name != myname] + + srun = ssh.SshRunner(cluster_name=cluster_name) + for node in results: + if not srun.CopyFileToNode(node, filename): + ToStderr("Copy of file %s to node %s failed", filename, node) + return 0 def RunClusterCommand(opts, args): """Run a command on some nodes. - Args: - opts - class with options as members - args - the command list as a list - Opts used: - nodes: list containing the name of target nodes; if empty, all nodes + @param opts: the command line options selected by the user + @type args: list + @param args: should contain the command to be run and its arguments + @rtype: int + @return: the desired exit code """ + cl = GetClient() + command = " ".join(args) - nodes = opts.nodes - op = opcodes.OpRunClusterCommand(command=command, nodes=nodes) - result = SubmitOpCode(op) - for node, sshcommand, output, exit_code in result: - print ("------------------------------------------------") - print ("node: %s" % node) - print ("command: %s" % sshcommand) - print ("%s" % output) - print ("return code = %s" % exit_code) + + nodes = GetOnlineNodes(nodes=opts.nodes, cl=cl) + + cluster_name, master_node = cl.QueryConfigValues(["cluster_name", + "master_node"]) + + srun = ssh.SshRunner(cluster_name=cluster_name) + + # Make sure master node is at list end + if master_node in nodes: + nodes.remove(master_node) + nodes.append(master_node) + + for name in nodes: + result = srun.Run(name, "root", command) + ToStdout("------------------------------------------------") + ToStdout("node: %s", name) + ToStdout("%s", result.output) + ToStdout("return code = %s", result.exit_code) + + return 0 def VerifyCluster(opts, args): """Verify integrity of cluster, performing various test on nodes. - Args: - opts - class with options as members + @param opts: the command line options selected by the user + @type args: list + @param args: should be an empty list + @rtype: int + @return: the desired exit code """ - op = opcodes.OpVerifyCluster() - result = SubmitOpCode(op) - return result + skip_checks = [] + if opts.skip_nplusone_mem: + skip_checks.append(constants.VERIFY_NPLUSONE_MEM) + op = opcodes.OpVerifyCluster(skip_checks=skip_checks) + if SubmitOpCode(op): + return 0 + else: + return 1 + + +def VerifyDisks(opts, args): + """Verify integrity of cluster disks. + @param opts: the command line options selected by the user + @type args: list + @param args: should be an empty list + @rtype: int + @return: the desired exit code + """ + op = opcodes.OpVerifyDisks() + result = SubmitOpCode(op) + if not isinstance(result, (list, tuple)) or len(result) != 4: + raise errors.ProgrammerError("Unknown result type for OpVerifyDisks") + + nodes, nlvm, instances, missing = result + + if nodes: + ToStdout("Nodes unreachable or with bad data:") + for name in nodes: + ToStdout("\t%s", name) + retcode = constants.EXIT_SUCCESS + + if nlvm: + for node, text in nlvm.iteritems(): + ToStdout("Error on node %s: LVM error: %s", + node, utils.SafeEncode(text[-400:])) + retcode |= 1 + ToStdout("You need to fix these nodes first before fixing instances") + + if instances: + for iname in instances: + if iname in missing: + continue + op = opcodes.OpActivateInstanceDisks(instance_name=iname) + try: + ToStdout("Activating disks for instance '%s'", iname) + SubmitOpCode(op) + except errors.GenericError, err: + nret, msg = FormatError(err) + retcode |= nret + ToStderr("Error activating disks for instance %s: %s", iname, msg) + + if missing: + for iname, ival in missing.iteritems(): + all_missing = utils.all(ival, lambda x: x[0] in nlvm) + if all_missing: + ToStdout("Instance %s cannot be verified as it lives on" + " broken nodes", iname) + else: + ToStdout("Instance %s has missing logical volumes:", iname) + ival.sort() + for node, vol in ival: + if node in nlvm: + ToStdout("\tbroken node %s /dev/xenvg/%s", node, vol) + else: + ToStdout("\t%s /dev/xenvg/%s", node, vol) + ToStdout("You need to run replace_disks for all the above" + " instances, if this message persist after fixing nodes.") + retcode |= 1 + + return retcode + + +@UsesRPC def MasterFailover(opts, args): """Failover the master node. @@ -173,17 +415,118 @@ def MasterFailover(opts, args): master to cease being master, and the non-master to become new master. + @param opts: the command line options selected by the user + @type args: list + @param args: should be an empty list + @rtype: int + @return: the desired exit code + """ - op = opcodes.OpMasterFailover() + return bootstrap.MasterFailover() + + +def SearchTags(opts, args): + """Searches the tags on all the cluster. + + @param opts: the command line options selected by the user + @type args: list + @param args: should contain only one element, the tag pattern + @rtype: int + @return: the desired exit code + + """ + op = opcodes.OpSearchTags(pattern=args[0]) + result = SubmitOpCode(op) + if not result: + return 1 + result = list(result) + result.sort() + for path, tag in result: + ToStdout("%s %s", path, tag) + + +def SetClusterParams(opts, args): + """Modify the cluster. + + @param opts: the command line options selected by the user + @type args: list + @param args: should be an empty list + @rtype: int + @return: the desired exit code + + """ + if not (not opts.lvm_storage or opts.vg_name or + opts.enabled_hypervisors or opts.hvparams or + opts.beparams or opts.nicparams or + opts.candidate_pool_size is not None): + ToStderr("Please give at least one of the parameters.") + return 1 + + vg_name = opts.vg_name + if not opts.lvm_storage and opts.vg_name: + ToStdout("Options --no-lvm-storage and --vg-name conflict.") + return 1 + elif not opts.lvm_storage: + vg_name = '' + + hvlist = opts.enabled_hypervisors + if hvlist is not None: + hvlist = hvlist.split(",") + + # a list of (name, dict) we can pass directly to dict() (or []) + hvparams = dict(opts.hvparams) + for hv, hv_params in hvparams.iteritems(): + utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) + + beparams = opts.beparams + utils.ForceDictType(beparams, constants.BES_PARAMETER_TYPES) + + nicparams = opts.nicparams + utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES) + + op = opcodes.OpSetClusterParams(vg_name=vg_name, + enabled_hypervisors=hvlist, + hvparams=hvparams, + beparams=beparams, + nicparams=nicparams, + candidate_pool_size=opts.candidate_pool_size) SubmitOpCode(op) + return 0 + + +def QueueOps(opts, args): + """Queue operations. + + @param opts: the command line options selected by the user + @type args: list + @param args: should contain only one element, the subcommand + @rtype: int + @return: the desired exit code + """ + command = args[0] + client = GetClient() + if command in ("drain", "undrain"): + drain_flag = command == "drain" + client.SetQueueDrainFlag(drain_flag) + elif command == "info": + result = client.QueryConfigValues(["drain_flag"]) + if result[0]: + val = "set" + else: + val = "unset" + ToStdout("The drain flag is %s" % val) + else: + raise errors.OpPrereqError("Command '%s' is not valid." % command) + + return 0 # this is an option common to more than one command, so we declare # it here and reuse it node_option = make_option("-n", "--node", action="append", dest="nodes", - help="Node to copy to (if not given, all nodes)" - ", can be given multiple times", metavar="", - default=[]) + help="Node to copy to (if not given, all nodes)," + " can be given multiple times", + metavar="", default=[]) commands = { 'init': (InitCluster, ARGS_ONE, @@ -193,31 +536,67 @@ commands = { " if given, the entire cluster must have secondary" " addresses", metavar="ADDRESS", default=None), - make_option("-t", "--hypervisor-type", dest="hypervisor_type", - help="Specify the hypervisor type (xen-3.0, fake)", - metavar="TYPE", choices=["xen-3.0", "fake"], - default="xen-3.0",), make_option("-m", "--mac-prefix", dest="mac_prefix", help="Specify the mac prefix for the instance IP" " addresses, in the format XX:XX:XX", metavar="PREFIX", - default="aa:00:00",), + default=constants.DEFAULT_MAC_PREFIX,), make_option("-g", "--vg-name", dest="vg_name", help="Specify the volume group name " " (cluster-wide) for disk allocation [xenvg]", metavar="VG", - default="xenvg",), + default=None,), make_option("-b", "--bridge", dest="def_bridge", help="Specify the default bridge name (cluster-wide)" - " to connect the instances to [xen-br0]", + " to connect the instances to [%s]" % + constants.DEFAULT_BRIDGE, metavar="BRIDGE", - default="xen-br0",), + default=constants.DEFAULT_BRIDGE,), make_option("--master-netdev", dest="master_netdev", help="Specify the node interface (cluster-wide)" - " on which the master IP address will be added " - " [xen-br0]", + " on which the master IP address will be added " + " [%s]" % constants.DEFAULT_BRIDGE, metavar="NETDEV", - default="xen-br0",), + default=constants.DEFAULT_BRIDGE,), + make_option("--file-storage-dir", dest="file_storage_dir", + help="Specify the default directory (cluster-wide)" + " for storing the file-based disks [%s]" % + constants.DEFAULT_FILE_STORAGE_DIR, + metavar="DIR", + default=constants.DEFAULT_FILE_STORAGE_DIR,), + make_option("--no-lvm-storage", dest="lvm_storage", + help="No support for lvm based instances" + " (cluster-wide)", + action="store_false", default=True,), + make_option("--no-etc-hosts", dest="modify_etc_hosts", + help="Don't modify /etc/hosts" + " (cluster-wide)", + action="store_false", default=True,), + make_option("--enabled-hypervisors", dest="enabled_hypervisors", + help="Comma-separated list of hypervisors", + type="string", default=None), + make_option("-t", "--default-hypervisor", + dest="default_hypervisor", + help="Default hypervisor to use for instance creation", + choices=list(constants.HYPER_TYPES), + default=constants.DEFAULT_ENABLED_HYPERVISOR), + ikv_option("-H", "--hypervisor-parameters", dest="hvparams", + help="Hypervisor and hypervisor options, in the" + " format" + " hypervisor:option=value,option=value,...", + default=[], + action="append", + type="identkeyval"), + keyval_option("-B", "--backend-parameters", dest="beparams", + type="keyval", default={}, + help="Backend parameters"), + keyval_option("-N", "--nic-parameters", dest="nicparams", + type="keyval", default={}, + help="NIC parameters"), + make_option("-C", "--candidate-pool-size", + default=constants.MASTER_POOL_SIZE_DEFAULT, + help="Set the candidate pool size", + dest="candidate_pool_size", type="int"), ], "[opts...] ", "Initialises a new cluster configuration"), @@ -228,8 +607,22 @@ commands = { action="store_true"), ], "", "Destroy cluster"), - 'verify': (VerifyCluster, ARGS_NONE, [DEBUG_OPT], + 'rename': (RenameCluster, ARGS_ONE, [DEBUG_OPT, FORCE_OPT], + "", + "Renames the cluster"), + 'redist-conf': (RedistributeConfig, ARGS_NONE, [DEBUG_OPT, SUBMIT_OPT], + "", + "Forces a push of the configuration file and ssconf files" + " to the nodes in the cluster"), + 'verify': (VerifyCluster, ARGS_NONE, [DEBUG_OPT, + make_option("--no-nplus1-mem", dest="skip_nplusone_mem", + help="Skip N+1 memory redundancy tests", + action="store_true", + default=False,), + ], "", "Does a check on the cluster configuration"), + 'verify-disks': (VerifyDisks, ARGS_NONE, [DEBUG_OPT], + "", "Does a check on the cluster disk status"), 'masterfailover': (MasterFailover, ARGS_NONE, [DEBUG_OPT], "", "Makes the current node the master"), 'version': (ShowClusterVersion, ARGS_NONE, [DEBUG_OPT], @@ -244,8 +637,51 @@ commands = { "Runs a command on all (or only some) nodes"), 'info': (ShowClusterConfig, ARGS_NONE, [DEBUG_OPT], "", "Show cluster configuration"), + 'list-tags': (ListTags, ARGS_NONE, + [DEBUG_OPT], "", "List the tags of the cluster"), + 'add-tags': (AddTags, ARGS_ANY, [DEBUG_OPT, TAG_SRC_OPT], + "tag...", "Add tags to the cluster"), + 'remove-tags': (RemoveTags, ARGS_ANY, [DEBUG_OPT, TAG_SRC_OPT], + "tag...", "Remove tags from the cluster"), + 'search-tags': (SearchTags, ARGS_ONE, + [DEBUG_OPT], "", "Searches the tags on all objects on" + " the cluster for a given pattern (regex)"), + 'queue': (QueueOps, ARGS_ONE, [DEBUG_OPT], + "drain|undrain|info", "Change queue properties"), + 'modify': (SetClusterParams, ARGS_NONE, + [DEBUG_OPT, + make_option("-g", "--vg-name", dest="vg_name", + help="Specify the volume group name " + " (cluster-wide) for disk allocation " + "and enable lvm based storage", + metavar="VG",), + make_option("--no-lvm-storage", dest="lvm_storage", + help="Disable support for lvm based instances" + " (cluster-wide)", + action="store_false", default=True,), + make_option("--enabled-hypervisors", dest="enabled_hypervisors", + help="Comma-separated list of hypervisors", + type="string", default=None), + ikv_option("-H", "--hypervisor-parameters", dest="hvparams", + help="Hypervisor and hypervisor options, in the" + " format" + " hypervisor:option=value,option=value,...", + default=[], + action="append", + type="identkeyval"), + keyval_option("-B", "--backend-parameters", dest="beparams", + type="keyval", default={}, + help="Backend parameters"), + keyval_option("-N", "--nic-parameters", dest="nicparams", + type="keyval", default={}, + help="NIC parameters"), + make_option("-C", "--candidate-pool-size", default=None, + help="Set the candidate pool size", + dest="candidate_pool_size", type="int"), + ], + "[opts...]", + "Alters the parameters of the cluster"), } if __name__ == '__main__': - retcode = GenericMain(commands) - sys.exit(retcode) + sys.exit(GenericMain(commands, override={"tag_type": constants.TAG_CLUSTER}))