X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/8e2524c36ef76d56fe841156a3c57887c6a9ab3e..b6b45e0d6251cd870658be36004ce4116f4a63c6:/lib/bootstrap.py diff --git a/lib/bootstrap.py b/lib/bootstrap.py index 5ff75f8..b228433 100644 --- a/lib/bootstrap.py +++ b/lib/bootstrap.py @@ -28,6 +28,7 @@ import os.path import re import logging import tempfile +import time from ganeti import rpc from ganeti import ssh @@ -37,6 +38,7 @@ from ganeti import config from ganeti import constants from ganeti import objects from ganeti import ssconf +from ganeti import serializer from ganeti import hypervisor @@ -61,11 +63,7 @@ def _InitSSHSetup(): raise errors.OpExecError("Could not generate ssh keypair, error %s" % result.output) - f = open(pub_key, 'r') - try: - utils.AddAuthorizedKey(auth_keys, f.read(8192)) - finally: - f.close() + utils.AddAuthorizedKey(auth_keys, utils.ReadFile(pub_key)) def _GenerateSelfSignedSslCert(file_name, validity=(365 * 5)): @@ -79,24 +77,27 @@ def _GenerateSelfSignedSslCert(file_name, validity=(365 * 5)): """ (fd, tmp_file_name) = tempfile.mkstemp(dir=os.path.dirname(file_name)) try: - # Set permissions before writing key - os.chmod(tmp_file_name, 0600) - - result = utils.RunCmd(["openssl", "req", "-new", "-newkey", "rsa:1024", - "-days", str(validity), "-nodes", "-x509", - "-keyout", tmp_file_name, "-out", tmp_file_name, - "-batch"]) - if result.failed: - raise errors.OpExecError("Could not generate SSL certificate, command" - " %s had exitcode %s and error message %s" % - (result.cmd, result.exit_code, result.output)) - - # Make read-only - os.chmod(tmp_file_name, 0400) - - os.rename(tmp_file_name, file_name) + try: + # Set permissions before writing key + os.chmod(tmp_file_name, 0600) + + result = utils.RunCmd(["openssl", "req", "-new", "-newkey", "rsa:1024", + "-days", str(validity), "-nodes", "-x509", + "-keyout", tmp_file_name, "-out", tmp_file_name, + "-batch"]) + if result.failed: + raise errors.OpExecError("Could not generate SSL certificate, command" + " %s had exitcode %s and error message %s" % + (result.cmd, result.exit_code, result.output)) + + # Make read-only + os.chmod(tmp_file_name, 0400) + + os.rename(tmp_file_name, file_name) + finally: + utils.RemoveFile(tmp_file_name) finally: - utils.RemoveFile(tmp_file_name) + os.close(fd) def _InitGanetiServerSetup(): @@ -112,6 +113,11 @@ def _InitGanetiServerSetup(): if not os.path.exists(constants.RAPI_CERT_FILE): _GenerateSelfSignedSslCert(constants.RAPI_CERT_FILE) + if not os.path.exists(constants.HMAC_CLUSTER_KEY): + utils.WriteFile(constants.HMAC_CLUSTER_KEY, + data=utils.GenerateSecret(), + mode=0400) + result = utils.RunCmd([constants.NODE_INITD_SCRIPT, "restart"]) if result.failed: @@ -120,10 +126,11 @@ def _InitGanetiServerSetup(): (result.cmd, result.exit_code, result.output)) -def InitCluster(cluster_name, mac_prefix, def_bridge, +def InitCluster(cluster_name, mac_prefix, master_netdev, file_storage_dir, candidate_pool_size, - secondary_ip=None, vg_name=None, beparams=None, hvparams=None, - enabled_hypervisors=None, default_hypervisor=None): + secondary_ip=None, vg_name=None, beparams=None, + nicparams=None, hvparams=None, enabled_hypervisors=None, + modify_etc_hosts=True): """Initialise the cluster. @type candidate_pool_size: int @@ -134,6 +141,14 @@ def InitCluster(cluster_name, mac_prefix, def_bridge, if config.ConfigWriter.IsCluster(): raise errors.OpPrereqError("Cluster is already initialised") + if not enabled_hypervisors: + raise errors.OpPrereqError("Enabled hypervisors list must contain at" + " least one member") + invalid_hvs = set(enabled_hypervisors) - constants.HYPER_TYPES + if invalid_hvs: + raise errors.OpPrereqError("Enabled hypervisors contains invalid" + " entries: %s" % invalid_hvs) + hostname = utils.HostInfo() if hostname.ip.startswith("127."): @@ -208,6 +223,9 @@ def InitCluster(cluster_name, mac_prefix, def_bridge, utils.EnsureDirs(dirs) utils.ForceDictType(beparams, constants.BES_PARAMETER_TYPES) + utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES) + objects.NIC.CheckParameterSyntax(nicparams) + # hvparams is a mapping of hypervisor->hvparams dict for hv_name, hv_params in hvparams.iteritems(): utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) @@ -218,16 +236,16 @@ def InitCluster(cluster_name, mac_prefix, def_bridge, _InitGanetiServerSetup() # set up ssh config and /etc/hosts - f = open(constants.SSH_HOST_RSA_PUB, 'r') - try: - sshline = f.read() - finally: - f.close() + sshline = utils.ReadFile(constants.SSH_HOST_RSA_PUB) sshkey = sshline.split(" ")[1] - utils.AddHostToEtcHosts(hostname.name) + if modify_etc_hosts: + utils.AddHostToEtcHosts(hostname.name) + _InitSSHSetup() + now = time.time() + # init of cluster config file cluster_config = objects.Cluster( serial_no=1, @@ -235,7 +253,6 @@ def InitCluster(cluster_name, mac_prefix, def_bridge, highest_used_port=(constants.FIRST_DRBD_PORT - 1), mac_prefix=mac_prefix, volume_group_name=vg_name, - default_bridge=def_bridge, tcpudp_port_pool=set(), master_node=hostname.name, master_ip=clustername.ip, @@ -243,10 +260,14 @@ def InitCluster(cluster_name, mac_prefix, def_bridge, cluster_name=clustername.name, file_storage_dir=file_storage_dir, enabled_hypervisors=enabled_hypervisors, - default_hypervisor=default_hypervisor, - beparams={constants.BEGR_DEFAULT: beparams}, + beparams={constants.PP_DEFAULT: beparams}, + nicparams={constants.PP_DEFAULT: nicparams}, hvparams=hvparams, candidate_pool_size=candidate_pool_size, + modify_etc_hosts=modify_etc_hosts, + ctime=now, + mtime=now, + uuid=utils.NewUUID(), ) master_node_config = objects.Node(name=hostname.name, primary_ip=hostname.ip, @@ -255,16 +276,15 @@ def InitCluster(cluster_name, mac_prefix, def_bridge, master_candidate=True, offline=False, drained=False, ) - - sscfg = InitConfig(constants.CONFIG_VERSION, - cluster_config, master_node_config) - ssh.WriteKnownHostsFile(sscfg, constants.SSH_KNOWN_HOSTS_FILE) + InitConfig(constants.CONFIG_VERSION, cluster_config, master_node_config) cfg = config.ConfigWriter() + ssh.WriteKnownHostsFile(cfg, constants.SSH_KNOWN_HOSTS_FILE) cfg.Update(cfg.GetClusterInfo()) # start the master ip # TODO: Review rpc call from bootstrap - rpc.RpcRunner.call_node_start_master(hostname.name, True) + # TODO: Warn on failed start master + rpc.RpcRunner.call_node_start_master(hostname.name, True, False) def InitConfig(version, cluster_config, master_node_config, @@ -283,23 +303,21 @@ def InitConfig(version, cluster_config, master_node_config, @type cfg_file: string @param cfg_file: configuration file path - @rtype: L{ssconf.SimpleConfigWriter} - @return: initialized config instance - """ nodes = { master_node_config.name: master_node_config, } + now = time.time() config_data = objects.ConfigData(version=version, cluster=cluster_config, nodes=nodes, instances={}, - serial_no=1) - cfg = ssconf.SimpleConfigWriter.FromDict(config_data.ToDict(), cfg_file) - cfg.Save() - - return cfg + serial_no=1, + ctime=now, mtime=now) + utils.WriteFile(cfg_file, + data=serializer.Dump(config_data.ToDict()), + mode=0600) def FinalizeClusterDestroy(master): @@ -310,11 +328,14 @@ def FinalizeClusterDestroy(master): """ result = rpc.RpcRunner.call_node_stop_master(master, True) - if result.failed or not result.data: - logging.warning("Could not disable the master role") + msg = result.fail_msg + if msg: + logging.warning("Could not disable the master role: %s" % msg) result = rpc.RpcRunner.call_node_leave_cluster(master) - if result.failed or not result.data: - logging.warning("Could not shutdown the node daemon and cleanup the node") + msg = result.fail_msg + if msg: + logging.warning("Could not shutdown the node daemon and cleanup" + " the node: %s", msg) def SetupNodeDaemon(cluster_name, node, ssh_key_check): @@ -333,19 +354,23 @@ def SetupNodeDaemon(cluster_name, node, ssh_key_check): noded_cert = utils.ReadFile(constants.SSL_CERT_FILE) rapi_cert = utils.ReadFile(constants.RAPI_CERT_FILE) + hmac_key = utils.ReadFile(constants.HMAC_CLUSTER_KEY) # in the base64 pem encoding, neither '!' nor '.' are valid chars, # so we use this to detect an invalid certificate; as long as the # cert doesn't contain this, the here-document will be correctly - # parsed by the shell sequence below - if (re.search('^!EOF\.', noded_cert, re.MULTILINE) or - re.search('^!EOF\.', rapi_cert, re.MULTILINE)): - raise errors.OpExecError("invalid PEM encoding in the SSL certificate") + # parsed by the shell sequence below. HMAC keys are hexadecimal strings, + # so the same restrictions apply. + for content in (noded_cert, rapi_cert, hmac_key): + if re.search('^!EOF\.', content, re.MULTILINE): + raise errors.OpExecError("invalid SSL certificate or HMAC key") if not noded_cert.endswith("\n"): noded_cert += "\n" if not rapi_cert.endswith("\n"): rapi_cert += "\n" + if not hmac_key.endswith("\n"): + hmac_key += "\n" # set up inter-node password and certificate and restarts the node daemon # and then connect with ssh to set password and start ganeti-noded @@ -356,11 +381,15 @@ def SetupNodeDaemon(cluster_name, node, ssh_key_check): "%s!EOF.\n" "cat > '%s' << '!EOF.' && \n" "%s!EOF.\n" - "chmod 0400 %s %s && " + "cat > '%s' << '!EOF.' && \n" + "%s!EOF.\n" + "chmod 0400 %s %s %s && " "%s restart" % (constants.SSL_CERT_FILE, noded_cert, constants.RAPI_CERT_FILE, rapi_cert, + constants.HMAC_CLUSTER_KEY, hmac_key, constants.SSL_CERT_FILE, constants.RAPI_CERT_FILE, + constants.HMAC_CLUSTER_KEY, constants.NODE_INITD_SCRIPT)) result = sshrunner.Run(node, 'root', mycommand, batch=False, @@ -426,9 +455,10 @@ def MasterFailover(no_voting=False): logging.info("Setting master to %s, old master: %s", new_master, old_master) result = rpc.RpcRunner.call_node_stop_master(old_master, True) - if result.failed or not result.data: + msg = result.fail_msg + if msg: logging.error("Could not disable the master role on the old master" - " %s, please disable manually", old_master) + " %s, please disable manually: %s", old_master, msg) # Here we have a phase where no master should be running @@ -442,11 +472,11 @@ def MasterFailover(no_voting=False): # cluster info cfg.Update(cluster_info) - # 2.0.X: Don't start the master if no_voting is true - result = rpc.RpcRunner.call_node_start_master(new_master, not no_voting) - if result.failed or not result.data: + result = rpc.RpcRunner.call_node_start_master(new_master, True, no_voting) + msg = result.fail_msg + if msg: logging.error("Could not start the master role on the new master" - " %s, please check", new_master) + " %s, please check: %s", new_master, msg) rcode = 1 return rcode @@ -484,7 +514,7 @@ def GatherMasterVotes(node_list): @type node_list: list @param node_list: the list of nodes to query for master info; the current - node wil be removed if it is in the list + node will be removed if it is in the list @rtype: list @return: list of (node, votes) @@ -505,9 +535,16 @@ def GatherMasterVotes(node_list): votes = {} for node in results: nres = results[node] - data = nres.data - if nres.failed or not isinstance(data, (tuple, list)) or len(data) < 3: - # here the rpc layer should have already logged errors + data = nres.payload + msg = nres.fail_msg + fail = False + if msg: + logging.warning("Error contacting node %s: %s", node, msg) + fail = True + elif not isinstance(data, (tuple, list)) or len(data) < 3: + logging.warning("Invalid data received from node %s: %s", node, data) + fail = True + if fail: if None not in votes: votes[None] = 0 votes[None] += 1