X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/3ccd3243c73f9a549f42eea8d3374b917fca3419..def6577f00a482c310f4e20bb315fa90290ad5b7:/lib/backend.py diff --git a/lib/backend.py b/lib/backend.py index 9c3f094..908ccf1 100644 --- a/lib/backend.py +++ b/lib/backend.py @@ -1,7 +1,7 @@ # # -# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc. +# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -54,22 +54,28 @@ from ganeti import utils from ganeti import ssh from ganeti import hypervisor from ganeti import constants -from ganeti import bdev +from ganeti.storage import bdev +from ganeti.storage import drbd from ganeti import objects from ganeti import ssconf from ganeti import serializer from ganeti import netutils from ganeti import runtime -from ganeti import mcpu from ganeti import compat +from ganeti import pathutils +from ganeti import vcluster +from ganeti import ht +from ganeti.storage.base import BlockDev +from ganeti.storage.drbd import DRBD8 +from ganeti import hooksmaster _BOOT_ID_PATH = "/proc/sys/kernel/random/boot_id" -_ALLOWED_CLEAN_DIRS = frozenset([ - constants.DATA_DIR, - constants.JOB_QUEUE_ARCHIVE_DIR, - constants.QUEUE_DIR, - constants.CRYPTO_KEYS_DIR, +_ALLOWED_CLEAN_DIRS = compat.UniqueFrozenset([ + pathutils.DATA_DIR, + pathutils.JOB_QUEUE_ARCHIVE_DIR, + pathutils.QUEUE_DIR, + pathutils.CRYPTO_KEYS_DIR, ]) _MAX_SSL_CERT_VALIDITY = 7 * 24 * 60 * 60 _X509_KEY_FILE = "key" @@ -79,7 +85,24 @@ _IES_PID_FILE = "pid" _IES_CA_FILE = "ca" #: Valid LVS output line regex -_LVSLINE_REGEX = re.compile("^ *([^|]+)\|([^|]+)\|([0-9.]+)\|([^|]{6})\|?$") +_LVSLINE_REGEX = re.compile("^ *([^|]+)\|([^|]+)\|([0-9.]+)\|([^|]{6,})\|?$") + +# Actions for the master setup script +_MASTER_START = "start" +_MASTER_STOP = "stop" + +#: Maximum file permissions for restricted command directory and executables +_RCMD_MAX_MODE = (stat.S_IRWXU | + stat.S_IRGRP | stat.S_IXGRP | + stat.S_IROTH | stat.S_IXOTH) + +#: Delay before returning an error for restricted commands +_RCMD_INVALID_DELAY = 10 + +#: How long to wait to acquire lock for restricted commands (shorter than +#: L{_RCMD_INVALID_DELAY}) to reduce blockage of noded forks when many +#: command requests arrive +_RCMD_LOCK_TIMEOUT = _RCMD_INVALID_DELAY * 0.8 class RPCFail(Exception): @@ -90,6 +113,34 @@ class RPCFail(Exception): """ +def _GetInstReasonFilename(instance_name): + """Path of the file containing the reason of the instance status change. + + @type instance_name: string + @param instance_name: The name of the instance + @rtype: string + @return: The path of the file + + """ + return utils.PathJoin(pathutils.INSTANCE_REASON_DIR, instance_name) + + +def _StoreInstReasonTrail(instance_name, trail): + """Serialize a reason trail related to an instance change of state to file. + + The exact location of the file depends on the name of the instance and on + the configuration of the Ganeti cluster defined at deploy time. + + @type instance_name: string + @param instance_name: The name of the instance + @rtype: None + + """ + json = serializer.DumpJson(trail) + filename = _GetInstReasonFilename(instance_name) + utils.WriteFile(filename, data=json) + + def _Fail(msg, *args, **kwargs): """Log an error and the raise an RPCFail exception. @@ -193,22 +244,25 @@ def _BuildUploadFileList(): """ allowed_files = set([ - constants.CLUSTER_CONF_FILE, - constants.ETC_HOSTS, - constants.SSH_KNOWN_HOSTS_FILE, - constants.VNC_PASSWORD_FILE, - constants.RAPI_CERT_FILE, - constants.SPICE_CERT_FILE, - constants.SPICE_CACERT_FILE, - constants.RAPI_USERS_FILE, - constants.CONFD_HMAC_KEY, - constants.CLUSTER_DOMAIN_SECRET_FILE, + pathutils.CLUSTER_CONF_FILE, + pathutils.ETC_HOSTS, + pathutils.SSH_KNOWN_HOSTS_FILE, + pathutils.VNC_PASSWORD_FILE, + pathutils.RAPI_CERT_FILE, + pathutils.SPICE_CERT_FILE, + pathutils.SPICE_CACERT_FILE, + pathutils.RAPI_USERS_FILE, + pathutils.CONFD_HMAC_KEY, + pathutils.CLUSTER_DOMAIN_SECRET_FILE, ]) for hv_name in constants.HYPER_TYPES: hv_class = hypervisor.GetHypervisorClass(hv_name) allowed_files.update(hv_class.GetAncillaryFiles()[0]) + assert pathutils.FILE_STORAGE_PATHS_FILE not in allowed_files, \ + "Allowed file storage paths should never be uploaded via RPC" + return frozenset(allowed_files) @@ -222,8 +276,8 @@ def JobQueuePurge(): @return: True, None """ - _CleanDirectory(constants.QUEUE_DIR, exclude=[constants.JOB_QUEUE_LOCK_FILE]) - _CleanDirectory(constants.JOB_QUEUE_ARCHIVE_DIR) + _CleanDirectory(pathutils.QUEUE_DIR, exclude=[pathutils.JOB_QUEUE_LOCK_FILE]) + _CleanDirectory(pathutils.JOB_QUEUE_ARCHIVE_DIR) def GetMasterInfo(): @@ -248,7 +302,7 @@ def GetMasterInfo(): except errors.ConfigurationError, err: _Fail("Cluster configuration incomplete: %s", err, exc=True) return (master_netdev, master_ip, master_node, primary_ip_family, - master_netmask) + master_netmask) def RunLocalHooks(hook_opcode, hooks_path, env_builder_fn): @@ -274,10 +328,10 @@ def RunLocalHooks(hook_opcode, hooks_path, env_builder_fn): cfg = _GetConfig() hr = HooksRunner() - hm = mcpu.HooksMaster(hook_opcode, hooks_path, nodes, hr.RunLocalHooks, - None, env_fn, logging.warning, cfg.GetClusterName(), - cfg.GetMasterNode()) - + hm = hooksmaster.HooksMaster(hook_opcode, hooks_path, nodes, + hr.RunLocalHooks, None, env_fn, + logging.warning, cfg.GetClusterName(), + cfg.GetMasterNode()) hm.RunPhase(constants.HOOKS_PHASE_PRE) result = fn(*args, **kwargs) hm.RunPhase(constants.HOOKS_PHASE_POST) @@ -287,67 +341,73 @@ def RunLocalHooks(hook_opcode, hooks_path, env_builder_fn): return decorator -def _BuildMasterIpEnv(master_params): +def _BuildMasterIpEnv(master_params, use_external_mip_script=None): """Builds environment variables for master IP hooks. @type master_params: L{objects.MasterNetworkParameters} @param master_params: network parameters of the master + @type use_external_mip_script: boolean + @param use_external_mip_script: whether to use an external master IP + address setup script (unused, but necessary per the implementation of the + _RunLocalHooks decorator) """ + # pylint: disable=W0613 ver = netutils.IPAddress.GetVersionFromAddressFamily(master_params.ip_family) env = { "MASTER_NETDEV": master_params.netdev, "MASTER_IP": master_params.ip, - "MASTER_NETMASK": master_params.netmask, + "MASTER_NETMASK": str(master_params.netmask), "CLUSTER_IP_VERSION": str(ver), } return env -@RunLocalHooks(constants.FAKE_OP_MASTER_TURNUP, "master-ip-turnup", - _BuildMasterIpEnv) -def ActivateMasterIp(master_params): - """Activate the IP address of the master daemon. +def _RunMasterSetupScript(master_params, action, use_external_mip_script): + """Execute the master IP address setup script. @type master_params: L{objects.MasterNetworkParameters} @param master_params: network parameters of the master + @type action: string + @param action: action to pass to the script. Must be one of + L{backend._MASTER_START} or L{backend._MASTER_STOP} + @type use_external_mip_script: boolean + @param use_external_mip_script: whether to use an external master IP + address setup script + @raise backend.RPCFail: if there are errors during the execution of the + script """ - err_msg = None - if netutils.TcpPing(master_params.ip, constants.DEFAULT_NODED_PORT): - if netutils.IPAddress.Own(master_params.ip): - # we already have the ip: - logging.debug("Master IP already configured, doing nothing") - else: - err_msg = "Someone else has the master ip, not activating" - logging.error(err_msg) + env = _BuildMasterIpEnv(master_params) + + if use_external_mip_script: + setup_script = pathutils.EXTERNAL_MASTER_SETUP_SCRIPT else: - ipcls = netutils.IPAddress.GetClassFromIpFamily(master_params.ip_family) + setup_script = pathutils.DEFAULT_MASTER_SETUP_SCRIPT - result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "add", - "%s/%s" % (master_params.ip, master_params.netmask), - "dev", master_params.netdev, "label", - "%s:0" % master_params.netdev]) - if result.failed: - err_msg = "Can't activate master IP: %s" % result.output - logging.error(err_msg) + result = utils.RunCmd([setup_script, action], env=env, reset_env=True) - else: - # we ignore the exit code of the following cmds - if ipcls == netutils.IP4Address: - utils.RunCmd(["arping", "-q", "-U", "-c 3", "-I", master_params.netdev, - "-s", master_params.ip, master_params.ip]) - elif ipcls == netutils.IP6Address: - try: - utils.RunCmd(["ndisc6", "-q", "-r 3", master_params.ip, - master_params.netdev]) - except errors.OpExecError: - # TODO: Better error reporting - logging.warning("Can't execute ndisc6, please install if missing") + if result.failed: + _Fail("Failed to %s the master IP. Script return value: %s, output: '%s'" % + (action, result.exit_code, result.output), log=True) - if err_msg: - _Fail(err_msg) + +@RunLocalHooks(constants.FAKE_OP_MASTER_TURNUP, "master-ip-turnup", + _BuildMasterIpEnv) +def ActivateMasterIp(master_params, use_external_mip_script): + """Activate the IP address of the master daemon. + + @type master_params: L{objects.MasterNetworkParameters} + @param master_params: network parameters of the master + @type use_external_mip_script: boolean + @param use_external_mip_script: whether to use an external master IP + address setup script + @raise RPCFail: in case of errors during the IP startup + + """ + _RunMasterSetupScript(master_params, _MASTER_START, + use_external_mip_script) def StartMasterDaemons(no_voting): @@ -371,7 +431,7 @@ def StartMasterDaemons(no_voting): "EXTRA_MASTERD_ARGS": masterd_args, } - result = utils.RunCmd([constants.DAEMON_UTIL, "start-master"], env=env) + result = utils.RunCmd([pathutils.DAEMON_UTIL, "start-master"], env=env) if result.failed: msg = "Can't start Ganeti master: %s" % result.output logging.error(msg) @@ -380,22 +440,19 @@ def StartMasterDaemons(no_voting): @RunLocalHooks(constants.FAKE_OP_MASTER_TURNDOWN, "master-ip-turndown", _BuildMasterIpEnv) -def DeactivateMasterIp(master_params): +def DeactivateMasterIp(master_params, use_external_mip_script): """Deactivate the master IP on this node. @type master_params: L{objects.MasterNetworkParameters} @param master_params: network parameters of the master + @type use_external_mip_script: boolean + @param use_external_mip_script: whether to use an external master IP + address setup script + @raise RPCFail: in case of errors during the IP turndown """ - # TODO: log and report back to the caller the error failures; we - # need to decide in which case we fail the RPC for this - - result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "del", - "%s/%s" % (master_params.ip, master_params.netmask), - "dev", master_params.netdev]) - if result.failed: - logging.error("Can't remove the master IP, error: %s", result.output) - # but otherwise ignore the failure + _RunMasterSetupScript(master_params, _MASTER_STOP, + use_external_mip_script) def StopMasterDaemons(): @@ -409,7 +466,7 @@ def StopMasterDaemons(): # TODO: log and report back to the caller the error failures; we # need to decide in which case we fail the RPC for this - result = utils.RunCmd([constants.DAEMON_UTIL, "stop-master"]) + result = utils.RunCmd([pathutils.DAEMON_UTIL, "stop-master"]) if result.failed: logging.error("Could not stop Ganeti master, command %s had exitcode %s" " and error %s", @@ -428,19 +485,23 @@ def ChangeMasterNetmask(old_netmask, netmask, master_ip, master_netdev): if old_netmask == netmask: return + if not netutils.IPAddress.Own(master_ip): + _Fail("The master IP address is not up, not attempting to change its" + " netmask") + result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "add", "%s/%s" % (master_ip, netmask), "dev", master_netdev, "label", "%s:0" % master_netdev]) if result.failed: - _Fail("Could not change the master IP netmask") + _Fail("Could not set the new netmask on the master IP address") result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "del", "%s/%s" % (master_ip, old_netmask), "dev", master_netdev, "label", "%s:0" % master_netdev]) if result.failed: - _Fail("Could not change the master IP netmask") + _Fail("Could not bring down the master IP address with the old netmask") def EtcHostsModify(mode, host, ip): @@ -478,13 +539,13 @@ def LeaveCluster(modify_ssh_setup): @param modify_ssh_setup: boolean """ - _CleanDirectory(constants.DATA_DIR) - _CleanDirectory(constants.CRYPTO_KEYS_DIR) + _CleanDirectory(pathutils.DATA_DIR) + _CleanDirectory(pathutils.CRYPTO_KEYS_DIR) JobQueuePurge() if modify_ssh_setup: try: - priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS) + priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.SSH_LOGIN_USER) utils.RemoveAuthorizedKey(auth_keys, utils.ReadFile(pub_key)) @@ -494,15 +555,15 @@ def LeaveCluster(modify_ssh_setup): logging.exception("Error while processing ssh files") try: - utils.RemoveFile(constants.CONFD_HMAC_KEY) - utils.RemoveFile(constants.RAPI_CERT_FILE) - utils.RemoveFile(constants.SPICE_CERT_FILE) - utils.RemoveFile(constants.SPICE_CACERT_FILE) - utils.RemoveFile(constants.NODED_CERT_FILE) + utils.RemoveFile(pathutils.CONFD_HMAC_KEY) + utils.RemoveFile(pathutils.RAPI_CERT_FILE) + utils.RemoveFile(pathutils.SPICE_CERT_FILE) + utils.RemoveFile(pathutils.SPICE_CACERT_FILE) + utils.RemoveFile(pathutils.NODED_CERT_FILE) except: # pylint: disable=W0702 logging.exception("Error while removing cluster secrets") - result = utils.RunCmd([constants.DAEMON_UTIL, "stop", constants.CONFD]) + result = utils.RunCmd([pathutils.DAEMON_UTIL, "stop", constants.CONFD]) if result.failed: logging.error("Command %s failed with exitcode %s and error %s", result.cmd, result.exit_code, result.output) @@ -511,47 +572,265 @@ def LeaveCluster(modify_ssh_setup): raise errors.QuitGanetiException(True, "Shutdown scheduled") -def GetNodeInfo(vgname, hypervisor_type): +def _GetVgInfo(name, excl_stor): + """Retrieves information about a LVM volume group. + + """ + # TODO: GetVGInfo supports returning information for multiple VGs at once + vginfo = bdev.LogicalVolume.GetVGInfo([name], excl_stor) + if vginfo: + vg_free = int(round(vginfo[0][0], 0)) + vg_size = int(round(vginfo[0][1], 0)) + else: + vg_free = None + vg_size = None + + return { + "name": name, + "vg_free": vg_free, + "vg_size": vg_size, + } + + +def _GetVgSpindlesInfo(name, excl_stor): + """Retrieves information about spindles in an LVM volume group. + + @type name: string + @param name: VG name + @type excl_stor: bool + @param excl_stor: exclusive storage + @rtype: dict + @return: dictionary whose keys are "name", "vg_free", "vg_size" for VG name, + free spindles, total spindles respectively + + """ + if excl_stor: + (vg_free, vg_size) = bdev.LogicalVolume.GetVgSpindlesInfo(name) + else: + vg_free = 0 + vg_size = 0 + return { + "name": name, + "vg_free": vg_free, + "vg_size": vg_size, + } + + +def _GetHvInfo(name): + """Retrieves node information from a hypervisor. + + The information returned depends on the hypervisor. Common items: + + - vg_size is the size of the configured volume group in MiB + - vg_free is the free size of the volume group in MiB + - memory_dom0 is the memory allocated for domain0 in MiB + - memory_free is the currently available (free) ram in MiB + - memory_total is the total number of ram in MiB + - hv_version: the hypervisor version, if available + + """ + return hypervisor.GetHypervisor(name).GetNodeInfo() + + +def _GetNamedNodeInfo(names, fn): + """Calls C{fn} for all names in C{names} and returns a dictionary. + + @rtype: None or dict + + """ + if names is None: + return None + else: + return map(fn, names) + + +def GetNodeInfo(storage_units, hv_names, excl_stor): """Gives back a hash with different information about the node. - @type vgname: C{string} - @param vgname: the name of the volume group to ask for disk space information - @type hypervisor_type: C{str} - @param hypervisor_type: the name of the hypervisor to ask for - memory information - @rtype: C{dict} - @return: dictionary with the following keys: - - vg_size is the size of the configured volume group in MiB - - vg_free is the free size of the volume group in MiB - - memory_dom0 is the memory allocated for domain0 in MiB - - memory_free is the currently available (free) ram in MiB - - memory_total is the total number of ram in MiB - - hv_version: the hypervisor version, if available + @type storage_units: list of pairs (string, string) + @param storage_units: List of pairs (storage unit, identifier) to ask for disk + space information. In case of lvm-vg, the identifier is + the VG name. + @type hv_names: list of string + @param hv_names: Names of the hypervisors to ask for node information + @type excl_stor: boolean + @param excl_stor: Whether exclusive_storage is active + @rtype: tuple; (string, None/dict, None/dict) + @return: Tuple containing boot ID, volume group information and hypervisor + information + + """ + bootid = utils.ReadFile(_BOOT_ID_PATH, size=128).rstrip("\n") + storage_info = _GetNamedNodeInfo( + storage_units, + (lambda storage_unit: _ApplyStorageInfoFunction(storage_unit[0], + storage_unit[1], + excl_stor))) + hv_info = _GetNamedNodeInfo(hv_names, _GetHvInfo) + + return (bootid, storage_info, hv_info) + + +# FIXME: implement storage reporting for all missing storage types. +_STORAGE_TYPE_INFO_FN = { + constants.ST_BLOCK: None, + constants.ST_DISKLESS: None, + constants.ST_EXT: None, + constants.ST_FILE: None, + constants.ST_LVM_PV: _GetVgSpindlesInfo, + constants.ST_LVM_VG: _GetVgInfo, + constants.ST_RADOS: None, +} + + +def _ApplyStorageInfoFunction(storage_type, storage_key, *args): + """Looks up and applies the correct function to calculate free and total + storage for the given storage type. + + @type storage_type: string + @param storage_type: the storage type for which the storage shall be reported. + @type storage_key: string + @param storage_key: identifier of a storage unit, e.g. the volume group name + of an LVM storage unit + @type args: any + @param args: various parameters that can be used for storage reporting. These + parameters and their semantics vary from storage type to storage type and + are just propagated in this function. + @return: the results of the application of the storage space function (see + _STORAGE_TYPE_INFO_FN) if storage space reporting is implemented for that + storage type + @raises NotImplementedError: for storage types who don't support space + reporting yet + """ + fn = _STORAGE_TYPE_INFO_FN[storage_type] + if fn is not None: + return fn(storage_key, *args) + else: + raise NotImplementedError + + +def _CheckExclusivePvs(pvi_list): + """Check that PVs are not shared among LVs + + @type pvi_list: list of L{objects.LvmPvInfo} objects + @param pvi_list: information about the PVs + + @rtype: list of tuples (string, list of strings) + @return: offending volumes, as tuples: (pv_name, [lv1_name, lv2_name...]) + + """ + res = [] + for pvi in pvi_list: + if len(pvi.lv_list) > 1: + res.append((pvi.name, pvi.lv_list)) + return res + + +def _VerifyHypervisors(what, vm_capable, result, all_hvparams, + get_hv_fn=hypervisor.GetHypervisor): + """Verifies the hypervisor. Appends the results to the 'results' list. + + @type what: C{dict} + @param what: a dictionary of things to check + @type vm_capable: boolean + @param vm_capable: whether or not this node is vm capable + @type result: dict + @param result: dictionary of verification results; results of the + verifications in this function will be added here + @type all_hvparams: dict of dict of string + @param all_hvparams: dictionary mapping hypervisor names to hvparams + @type get_hv_fn: function + @param get_hv_fn: function to retrieve the hypervisor, to improve testability """ - outputarray = {} + if not vm_capable: + return + + if constants.NV_HYPERVISOR in what: + result[constants.NV_HYPERVISOR] = {} + for hv_name in what[constants.NV_HYPERVISOR]: + hvparams = all_hvparams[hv_name] + try: + val = get_hv_fn(hv_name).Verify(hvparams=hvparams) + except errors.HypervisorError, err: + val = "Error while checking hypervisor: %s" % str(err) + result[constants.NV_HYPERVISOR][hv_name] = val - if vgname is not None: - vginfo = bdev.LogicalVolume.GetVGInfo([vgname]) - vg_free = vg_size = None - if vginfo: - vg_free = int(round(vginfo[0][0], 0)) - vg_size = int(round(vginfo[0][1], 0)) - outputarray["vg_size"] = vg_size - outputarray["vg_free"] = vg_free - if hypervisor_type is not None: - hyper = hypervisor.GetHypervisor(hypervisor_type) - hyp_info = hyper.GetNodeInfo() - if hyp_info is not None: - outputarray.update(hyp_info) +def _VerifyHvparams(what, vm_capable, result, + get_hv_fn=hypervisor.GetHypervisor): + """Verifies the hvparams. Appends the results to the 'results' list. - outputarray["bootid"] = utils.ReadFile(_BOOT_ID_PATH, size=128).rstrip("\n") + @type what: C{dict} + @param what: a dictionary of things to check + @type vm_capable: boolean + @param vm_capable: whether or not this node is vm capable + @type result: dict + @param result: dictionary of verification results; results of the + verifications in this function will be added here + @type get_hv_fn: function + @param get_hv_fn: function to retrieve the hypervisor, to improve testability - return outputarray + """ + if not vm_capable: + return + if constants.NV_HVPARAMS in what: + result[constants.NV_HVPARAMS] = [] + for source, hv_name, hvparms in what[constants.NV_HVPARAMS]: + try: + logging.info("Validating hv %s, %s", hv_name, hvparms) + get_hv_fn(hv_name).ValidateParameters(hvparms) + except errors.HypervisorError, err: + result[constants.NV_HVPARAMS].append((source, hv_name, str(err))) + + +def _VerifyInstanceList(what, vm_capable, result, all_hvparams): + """Verifies the instance list. + + @type what: C{dict} + @param what: a dictionary of things to check + @type vm_capable: boolean + @param vm_capable: whether or not this node is vm capable + @type result: dict + @param result: dictionary of verification results; results of the + verifications in this function will be added here + @type all_hvparams: dict of dict of string + @param all_hvparams: dictionary mapping hypervisor names to hvparams + + """ + if constants.NV_INSTANCELIST in what and vm_capable: + # GetInstanceList can fail + try: + val = GetInstanceList(what[constants.NV_INSTANCELIST], + all_hvparams=all_hvparams) + except RPCFail, err: + val = str(err) + result[constants.NV_INSTANCELIST] = val + + +def _VerifyNodeInfo(what, vm_capable, result, all_hvparams): + """Verifies the node info. + + @type what: C{dict} + @param what: a dictionary of things to check + @type vm_capable: boolean + @param vm_capable: whether or not this node is vm capable + @type result: dict + @param result: dictionary of verification results; results of the + verifications in this function will be added here + @type all_hvparams: dict of dict of string + @param all_hvparams: dictionary mapping hypervisor names to hvparams -def VerifyNode(what, cluster_name): + """ + if constants.NV_HVINFO in what and vm_capable: + hvname = what[constants.NV_HVINFO] + hyper = hypervisor.GetHypervisor(hvname) + hvparams = all_hvparams[hvname] + result[constants.NV_HVINFO] = hyper.GetNodeInfo(hvparams=hvparams) + + +def VerifyNode(what, cluster_name, all_hvparams): """Verify the status of the local node. Based on the input L{what} parameter, various checks are done on the @@ -575,6 +854,10 @@ def VerifyNode(what, cluster_name): - node-net-test: list of nodes we should check node daemon port connectivity with - hypervisor: list with hypervisors to run the verify for + @type cluster_name: string + @param cluster_name: the cluster's name + @type all_hvparams: dict of dict of strings + @param all_hvparams: a dictionary mapping hypervisor names to hvparams @rtype: dict @return: a dictionary with the same keys as the input dict, and values representing the result of the checks @@ -585,27 +868,15 @@ def VerifyNode(what, cluster_name): port = netutils.GetDaemonPort(constants.NODED) vm_capable = my_name not in what.get(constants.NV_VMNODES, []) - if constants.NV_HYPERVISOR in what and vm_capable: - result[constants.NV_HYPERVISOR] = tmp = {} - for hv_name in what[constants.NV_HYPERVISOR]: - try: - val = hypervisor.GetHypervisor(hv_name).Verify() - except errors.HypervisorError, err: - val = "Error while checking hypervisor: %s" % str(err) - tmp[hv_name] = val - - if constants.NV_HVPARAMS in what and vm_capable: - result[constants.NV_HVPARAMS] = tmp = [] - for source, hv_name, hvparms in what[constants.NV_HVPARAMS]: - try: - logging.info("Validating hv %s, %s", hv_name, hvparms) - hypervisor.GetHypervisor(hv_name).ValidateParameters(hvparms) - except errors.HypervisorError, err: - tmp.append((source, hv_name, str(err))) + _VerifyHypervisors(what, vm_capable, result, all_hvparams) + _VerifyHvparams(what, vm_capable, result) if constants.NV_FILELIST in what: - result[constants.NV_FILELIST] = utils.FingerprintFiles( - what[constants.NV_FILELIST]) + fingerprints = utils.FingerprintFiles(map(vcluster.LocalizeVirtualPath, + what[constants.NV_FILELIST])) + result[constants.NV_FILELIST] = \ + dict((vcluster.MakeVirtualPath(key), value) + for (key, value) in fingerprints.items()) if constants.NV_NODELIST in what: (nodes, bynode) = what[constants.NV_NODELIST] @@ -660,7 +931,12 @@ def VerifyNode(what, cluster_name): else: source = None result[constants.NV_MASTERIP] = netutils.TcpPing(master_ip, port, - source=source) + source=source) + + if constants.NV_USERSCRIPTS in what: + result[constants.NV_USERSCRIPTS] = \ + [script for script in what[constants.NV_USERSCRIPTS] + if not utils.IsExecutable(script)] if constants.NV_OOB_PATHS in what: result[constants.NV_OOB_PATHS] = tmp = [] @@ -685,33 +961,40 @@ def VerifyNode(what, cluster_name): val = str(err) result[constants.NV_LVLIST] = val - if constants.NV_INSTANCELIST in what and vm_capable: - # GetInstanceList can fail - try: - val = GetInstanceList(what[constants.NV_INSTANCELIST]) - except RPCFail, err: - val = str(err) - result[constants.NV_INSTANCELIST] = val + _VerifyInstanceList(what, vm_capable, result, all_hvparams) if constants.NV_VGLIST in what and vm_capable: result[constants.NV_VGLIST] = utils.ListVolumeGroups() if constants.NV_PVLIST in what and vm_capable: - result[constants.NV_PVLIST] = \ - bdev.LogicalVolume.GetPVInfo(what[constants.NV_PVLIST], - filter_allocatable=False) + check_exclusive_pvs = constants.NV_EXCLUSIVEPVS in what + val = bdev.LogicalVolume.GetPVInfo(what[constants.NV_PVLIST], + filter_allocatable=False, + include_lvs=check_exclusive_pvs) + if check_exclusive_pvs: + result[constants.NV_EXCLUSIVEPVS] = _CheckExclusivePvs(val) + for pvi in val: + # Avoid sending useless data on the wire + pvi.lv_list = [] + result[constants.NV_PVLIST] = map(objects.LvmPvInfo.ToDict, val) if constants.NV_VERSION in what: result[constants.NV_VERSION] = (constants.PROTOCOL_VERSION, constants.RELEASE_VERSION) - if constants.NV_HVINFO in what and vm_capable: - hyper = hypervisor.GetHypervisor(what[constants.NV_HVINFO]) - result[constants.NV_HVINFO] = hyper.GetNodeInfo() + _VerifyNodeInfo(what, vm_capable, result, all_hvparams) + + if constants.NV_DRBDVERSION in what and vm_capable: + try: + drbd_version = DRBD8.GetProcInfo().GetVersionString() + except errors.BlockDeviceError, err: + logging.warning("Can't get DRBD version", exc_info=True) + drbd_version = str(err) + result[constants.NV_DRBDVERSION] = drbd_version if constants.NV_DRBDLIST in what and vm_capable: try: - used_minors = bdev.DRBD8.GetUsedDevs().keys() + used_minors = drbd.DRBD8.GetUsedDevs() except errors.BlockDeviceError, err: logging.warning("Can't get used minors list", exc_info=True) used_minors = str(err) @@ -720,7 +1003,7 @@ def VerifyNode(what, cluster_name): if constants.NV_DRBDHELPER in what and vm_capable: status = True try: - payload = bdev.BaseDRBD.GetUsermodeHelper() + payload = drbd.DRBD8.GetUsermodeHelper() except errors.BlockDeviceError, err: logging.error("Can't get DRBD usermode helper: %s", str(err)) status = False @@ -749,6 +1032,11 @@ def VerifyNode(what, cluster_name): result[constants.NV_BRIDGES] = [bridge for bridge in what[constants.NV_BRIDGES] if not utils.BridgeExists(bridge)] + + if what.get(constants.NV_FILE_STORAGE_PATHS) == my_name: + result[constants.NV_FILE_STORAGE_PATHS] = \ + bdev.ComputeWrongFileStoragePaths() + return result @@ -909,11 +1197,47 @@ def BridgesExist(bridges_list): _Fail("Missing bridges %s", utils.CommaJoin(missing)) -def GetInstanceList(hypervisor_list): +def GetInstanceListForHypervisor(hname, hvparams=None, + get_hv_fn=hypervisor.GetHypervisor): + """Provides a list of instances of the given hypervisor. + + @type hname: string + @param hname: name of the hypervisor + @type hvparams: dict of strings + @param hvparams: hypervisor parameters for the given hypervisor + @type get_hv_fn: function + @param get_hv_fn: function that returns a hypervisor for the given hypervisor + name; optional parameter to increase testability + + @rtype: list + @return: a list of all running instances on the current node + - instance1.example.com + - instance2.example.com + + """ + results = [] + try: + hv = get_hv_fn(hname) + names = hv.ListInstances(hvparams=hvparams) + results.extend(names) + except errors.HypervisorError, err: + _Fail("Error enumerating instances (hypervisor %s): %s", + hname, err, exc=True) + return results + + +def GetInstanceList(hypervisor_list, all_hvparams=None, + get_hv_fn=hypervisor.GetHypervisor): """Provides a list of instances. @type hypervisor_list: list @param hypervisor_list: the list of hypervisors to query information + @type all_hvparams: dict of dict of strings + @param all_hvparams: a dictionary mapping hypervisor types to respective + cluster-wide hypervisor parameters + @type get_hv_fn: function + @param get_hv_fn: function that returns a hypervisor for the given hypervisor + name; optional parameter to increase testability @rtype: list @return: a list of all running instances on the current node @@ -923,13 +1247,9 @@ def GetInstanceList(hypervisor_list): """ results = [] for hname in hypervisor_list: - try: - names = hypervisor.GetHypervisor(hname).ListInstances() - results.extend(names) - except errors.HypervisorError, err: - _Fail("Error enumerating instances (hypervisor %s): %s", - hname, err, exc=True) - + hvparams = all_hvparams[hname] + results.extend(GetInstanceListForHypervisor(hname, hvparams=hvparams, + get_hv_fn=get_hv_fn)) return results @@ -946,6 +1266,7 @@ def GetInstanceInfo(instance, hname): - memory: memory size of instance (int) - state: xen state of instance (string) - time: cpu time of instance (float) + - vcpus: the number of vcpus (int) """ output = {} @@ -953,6 +1274,7 @@ def GetInstanceInfo(instance, hname): iinfo = hypervisor.GetHypervisor(hname).GetInstanceInfo(instance) if iinfo is not None: output["memory"] = iinfo[2] + output["vcpus"] = iinfo[3] output["state"] = iinfo[4] output["time"] = iinfo[5] @@ -960,7 +1282,7 @@ def GetInstanceInfo(instance, hname): def GetInstanceMigratable(instance): - """Gives whether an instance can be migrated. + """Computes whether an instance can be migrated. @type instance: L{objects.Instance} @param instance: object representing the instance to be checked. @@ -973,7 +1295,7 @@ def GetInstanceMigratable(instance): """ hyper = hypervisor.GetHypervisor(instance.hypervisor) iname = instance.name - if iname not in hyper.ListInstances(): + if iname not in hyper.ListInstances(instance.hvparams): _Fail("Instance %s is not running", iname) for idx in range(len(instance.disks)): @@ -1051,7 +1373,7 @@ def _InstanceLogName(kind, os_name, instance, component): c_msg = "" base = ("%s-%s-%s%s-%s.log" % (kind, os_name, instance, c_msg, utils.TimestampForFilename())) - return utils.PathJoin(constants.LOG_OS_DIR, base) + return utils.PathJoin(pathutils.LOG_OS_DIR, base) def InstanceOsAdd(instance, reinstall, debug): @@ -1119,9 +1441,16 @@ def RunRenameInstance(instance, old_name, debug): " log file:\n%s", result.fail_reason, "\n".join(lines), log=False) -def _GetBlockDevSymlinkPath(instance_name, idx): - return utils.PathJoin(constants.DISK_LINKS_DIR, "%s%s%d" % - (instance_name, constants.DISK_SEPARATOR, idx)) +def _GetBlockDevSymlinkPath(instance_name, idx, _dir=None): + """Returns symlink path for block device. + + """ + if _dir is None: + _dir = pathutils.DISK_LINKS_DIR + + return utils.PathJoin(_dir, + ("%s%s%s" % + (instance_name, constants.DISK_SEPARATOR, idx))) def _SymlinkBlockDev(instance_name, device_path, idx): @@ -1195,17 +1524,22 @@ def _GatherAndLinkBlockDevs(instance): return block_devices -def StartInstance(instance, startup_paused): +def StartInstance(instance, startup_paused, reason, store_reason=True): """Start an instance. @type instance: L{objects.Instance} @param instance: the instance object @type startup_paused: bool @param instance: pause instance at startup? + @type reason: list of reasons + @param reason: the reason trail for this startup + @type store_reason: boolean + @param store_reason: whether to store the shutdown reason trail on file @rtype: None """ - running_instances = GetInstanceList([instance.hypervisor]) + running_instances = GetInstanceListForHypervisor(instance.hypervisor, + instance.hvparams) if instance.name in running_instances: logging.info("Instance %s already running, not starting", instance.name) @@ -1215,6 +1549,8 @@ def StartInstance(instance, startup_paused): block_devices = _GatherAndLinkBlockDevs(instance) hyper = hypervisor.GetHypervisor(instance.hypervisor) hyper.StartInstance(instance, block_devices, startup_paused) + if store_reason: + _StoreInstReasonTrail(instance.name, reason) except errors.BlockDeviceError, err: _Fail("Block device error: %s", err, exc=True) except errors.HypervisorError, err: @@ -1222,7 +1558,7 @@ def StartInstance(instance, startup_paused): _Fail("Hypervisor error: %s", err, exc=True) -def InstanceShutdown(instance, timeout): +def InstanceShutdown(instance, timeout, reason, store_reason=True): """Shut an instance down. @note: this functions uses polling with a hardcoded timeout. @@ -1231,6 +1567,10 @@ def InstanceShutdown(instance, timeout): @param instance: the instance object @type timeout: integer @param timeout: maximum timeout for soft shutdown + @type reason: list of reasons + @param reason: the reason trail for this shutdown + @type store_reason: boolean + @param store_reason: whether to store the shutdown reason trail on file @rtype: None """ @@ -1238,7 +1578,7 @@ def InstanceShutdown(instance, timeout): hyper = hypervisor.GetHypervisor(hv_name) iname = instance.name - if instance.name not in hyper.ListInstances(): + if instance.name not in hyper.ListInstances(instance.hvparams): logging.info("Instance %s not running, doing nothing", iname) return @@ -1247,13 +1587,15 @@ def InstanceShutdown(instance, timeout): self.tried_once = False def __call__(self): - if iname not in hyper.ListInstances(): + if iname not in hyper.ListInstances(instance.hvparams): return try: hyper.StopInstance(instance, retry=self.tried_once) + if store_reason: + _StoreInstReasonTrail(instance.name, reason) except errors.HypervisorError, err: - if iname not in hyper.ListInstances(): + if iname not in hyper.ListInstances(instance.hvparams): # if the instance is no longer existing, consider this a # success and go to cleanup return @@ -1273,14 +1615,14 @@ def InstanceShutdown(instance, timeout): try: hyper.StopInstance(instance, force=True) except errors.HypervisorError, err: - if iname in hyper.ListInstances(): + if iname in hyper.ListInstances(instance.hvparams): # only raise an error if the instance still exists, otherwise # the error could simply be "instance ... unknown"! _Fail("Failed to force stop instance %s: %s", iname, err) time.sleep(1) - if iname in hyper.ListInstances(): + if iname in hyper.ListInstances(instance.hvparams): _Fail("Could not shutdown instance %s even by destroy", iname) try: @@ -1291,7 +1633,7 @@ def InstanceShutdown(instance, timeout): _RemoveBlockDevLinks(iname, instance.disks) -def InstanceReboot(instance, reboot_type, shutdown_timeout): +def InstanceReboot(instance, reboot_type, shutdown_timeout, reason): """Reboot an instance. @type instance: L{objects.Instance} @@ -1309,10 +1651,13 @@ def InstanceReboot(instance, reboot_type, shutdown_timeout): instance (instead of a call_instance_reboot RPC) @type shutdown_timeout: integer @param shutdown_timeout: maximum timeout for soft shutdown + @type reason: list of reasons + @param reason: the reason trail for this reboot @rtype: None """ - running_instances = GetInstanceList([instance.hypervisor]) + running_instances = GetInstanceListForHypervisor(instance.hypervisor, + instance.hvparams) if instance.name not in running_instances: _Fail("Cannot reboot instance %s that is not running", instance.name) @@ -1325,14 +1670,37 @@ def InstanceReboot(instance, reboot_type, shutdown_timeout): _Fail("Failed to soft reboot instance %s: %s", instance.name, err) elif reboot_type == constants.INSTANCE_REBOOT_HARD: try: - InstanceShutdown(instance, shutdown_timeout) - return StartInstance(instance, False) + InstanceShutdown(instance, shutdown_timeout, reason, store_reason=False) + result = StartInstance(instance, False, reason, store_reason=False) + _StoreInstReasonTrail(instance.name, reason) + return result except errors.HypervisorError, err: _Fail("Failed to hard reboot instance %s: %s", instance.name, err) else: _Fail("Invalid reboot_type received: %s", reboot_type) +def InstanceBalloonMemory(instance, memory): + """Resize an instance's memory. + + @type instance: L{objects.Instance} + @param instance: the instance object + @type memory: int + @param memory: new memory amount in MB + @rtype: None + + """ + hyper = hypervisor.GetHypervisor(instance.hypervisor) + running = hyper.ListInstances(instance.hvparams) + if instance.name not in running: + logging.info("Instance %s is not running, cannot balloon", instance.name) + return + try: + hyper.BalloonInstanceMemory(instance, memory) + except errors.HypervisorError, err: + _Fail("Failed to balloon instance memory: %s", err, exc=True) + + def MigrationInfo(instance): """Gather information about an instance to be migrated. @@ -1456,7 +1824,7 @@ def GetMigrationStatus(instance): _Fail("Failed to get migration status: %s", err, exc=True) -def BlockdevCreate(disk, size, owner, on_primary, info): +def BlockdevCreate(disk, size, owner, on_primary, info, excl_stor): """Creates a block device for an instance. @type disk: L{objects.Disk} @@ -1471,6 +1839,8 @@ def BlockdevCreate(disk, size, owner, on_primary, info): @type info: string @param info: string that will be sent to the physical device creation, used for example to set (LVM) tags on LVs + @type excl_stor: boolean + @param excl_stor: Whether exclusive_storage is active @return: the new unique_id of the device (this can sometime be computed only after creation), or None. On secondary nodes, @@ -1497,7 +1867,7 @@ def BlockdevCreate(disk, size, owner, on_primary, info): clist.append(crdev) try: - device = bdev.Create(disk.dev_type, disk.physical_id, clist, disk.size) + device = bdev.Create(disk, clist, excl_stor) except errors.BlockDeviceError, err: _Fail("Can't create block device: %s", err) @@ -1506,7 +1876,6 @@ def BlockdevCreate(disk, size, owner, on_primary, info): device.Assemble() except errors.BlockDeviceError, err: _Fail("Can't assemble device after creation, unusual event: %s", err) - device.SetSyncSpeed(constants.SYNC_SPEED) if on_primary or disk.OpenOnSecondary(): try: device.Open(force=True) @@ -1528,8 +1897,13 @@ def _WipeDevice(path, offset, size): @param size: The size in MiB to write """ + # Internal sizes are always in Mebibytes; if the following "dd" command + # should use a different block size the offset and size given to this + # function must be adjusted accordingly before being passed to "dd". + block_size = 1024 * 1024 + cmd = [constants.DD_CMD, "if=/dev/zero", "seek=%d" % offset, - "bs=%d" % constants.WIPE_BLOCK_SIZE, "oflag=direct", "of=%s" % path, + "bs=%s" % block_size, "oflag=direct", "of=%s" % path, "count=%d" % size] result = utils.RunCmd(cmd) @@ -1558,6 +1932,10 @@ def BlockdevWipe(disk, offset, size): _Fail("Cannot execute wipe for device %s: device not found", disk.iv_name) # Do cross verify some of the parameters + if offset < 0: + _Fail("Negative offset") + if size < 0: + _Fail("Negative size") if offset > rdev.size: _Fail("Offset is bigger than device size") if (offset + size) > rdev.size: @@ -1680,8 +2058,7 @@ def _RecursiveAssembleBD(disk, owner, as_primary): children.append(cdev) if as_primary or disk.AssembleOnSecondary(): - r_dev = bdev.Assemble(disk.dev_type, disk.physical_id, children, disk.size) - r_dev.SetSyncSpeed(constants.SYNC_SPEED) + r_dev = bdev.Assemble(disk, children) result = r_dev if as_primary or disk.OpenOnSecondary(): r_dev.Open() @@ -1705,7 +2082,7 @@ def BlockdevAssemble(disk, owner, as_primary, idx): """ try: result = _RecursiveAssembleBD(disk, owner, as_primary) - if isinstance(result, bdev.BlockDev): + if isinstance(result, BlockDev): # pylint: disable=E1103 result = result.dev_path if as_primary: @@ -1874,7 +2251,7 @@ def _RecursiveFindBD(disk): for chdisk in disk.children: children.append(_RecursiveFindBD(chdisk)) - return bdev.FindDevice(disk.dev_type, disk.physical_id, children, disk.size) + return bdev.FindDevice(disk, children) def _OpenRealBD(disk): @@ -1916,7 +2293,7 @@ def BlockdevFind(disk): return rbd.GetSyncStatus() -def BlockdevGetsize(disks): +def BlockdevGetdimensions(disks): """Computes the size of the given disks. If a disk is not found, returns None instead. @@ -1925,7 +2302,8 @@ def BlockdevGetsize(disks): @param disks: the list of disk to compute the size for @rtype: list @return: list with elements None if the disk cannot be found, - otherwise the size + otherwise the pair (size, spindles), where spindles is None if the + device doesn't support that """ result = [] @@ -1938,7 +2316,7 @@ def BlockdevGetsize(disks): if rbd is None: result.append(None) else: - result.append(rbd.GetActualSize()) + result.append(rbd.GetActualDimensions()) return result @@ -1973,7 +2351,7 @@ def BlockdevExport(disk, dest_node, dest_path, cluster_name): " oflag=dsync", dest_path) remotecmd = _GetSshRunner(cluster_name).BuildCmd(dest_node, - constants.GANETI_RUNAS, + constants.SSH_LOGIN_USER, destcmd) # all commands have been checked, so we're safe to combine them @@ -2009,6 +2387,8 @@ def UploadFile(file_name, data, mode, uid, gid, atime, mtime): @rtype: None """ + file_name = vcluster.LocalizeVirtualPath(file_name) + if not os.path.isabs(file_name): _Fail("Filename passed to UploadFile is not absolute: '%s'", file_name) @@ -2051,33 +2431,6 @@ def RunOob(oob_program, command, node, timeout): return result.stdout -def WriteSsconfFiles(values): - """Update all ssconf files. - - Wrapper around the SimpleStore.WriteFiles. - - """ - ssconf.SimpleStore().WriteFiles(values) - - -def _ErrnoOrStr(err): - """Format an EnvironmentError exception. - - If the L{err} argument has an errno attribute, it will be looked up - and converted into a textual C{E...} description. Otherwise the - string representation of the error will be returned. - - @type err: L{EnvironmentError} - @param err: the exception to format - - """ - if hasattr(err, "errno"): - detail = errno.errorcode[err.errno] - else: - detail = str(err) - return detail - - def _OSOndiskAPIVersion(os_dir): """Compute and return the API version of a given OS. @@ -2097,7 +2450,7 @@ def _OSOndiskAPIVersion(os_dir): st = os.stat(api_file) except EnvironmentError, err: return False, ("Required file '%s' not found under path %s: %s" % - (constants.OS_API_FILE, os_dir, _ErrnoOrStr(err))) + (constants.OS_API_FILE, os_dir, utils.ErrnoOrStr(err))) if not stat.S_ISREG(stat.S_IFMT(st.st_mode)): return False, ("File '%s' in %s is not a regular file" % @@ -2107,7 +2460,7 @@ def _OSOndiskAPIVersion(os_dir): api_versions = utils.ReadFile(api_file).splitlines() except EnvironmentError, err: return False, ("Error while reading the API version file at %s: %s" % - (api_file, _ErrnoOrStr(err))) + (api_file, utils.ErrnoOrStr(err))) try: api_versions = [int(version.strip()) for version in api_versions] @@ -2124,7 +2477,7 @@ def DiagnoseOS(top_dirs=None): @type top_dirs: list @param top_dirs: the list of directories in which to search (if not given defaults to - L{constants.OS_SEARCH_PATH}) + L{pathutils.OS_SEARCH_PATH}) @rtype: list of L{objects.OS} @return: a list of tuples (name, path, status, diagnose, variants, parameters, api_version) for all (potential) OSes under all @@ -2139,7 +2492,7 @@ def DiagnoseOS(top_dirs=None): """ if top_dirs is None: - top_dirs = constants.OS_SEARCH_PATH + top_dirs = pathutils.OS_SEARCH_PATH result = [] for dir_name in top_dirs: @@ -2181,7 +2534,7 @@ def _TryOSFromDisk(name, base_dir=None): """ if base_dir is None: - os_dir = utils.FindFile(name, constants.OS_SEARCH_PATH, os.path.isdir) + os_dir = utils.FindFile(name, pathutils.OS_SEARCH_PATH, os.path.isdir) else: os_dir = utils.FindFile(name, [base_dir], os.path.isdir) @@ -2220,7 +2573,7 @@ def _TryOSFromDisk(name, base_dir=None): del os_files[filename] continue return False, ("File '%s' under path '%s' is missing (%s)" % - (filename, os_dir, _ErrnoOrStr(err))) + (filename, os_dir, utils.ErrnoOrStr(err))) if not stat.S_ISREG(stat.S_IFMT(st.st_mode)): return False, ("File '%s' under path '%s' is not a regular file" % @@ -2235,12 +2588,13 @@ def _TryOSFromDisk(name, base_dir=None): if constants.OS_VARIANTS_FILE in os_files: variants_file = os_files[constants.OS_VARIANTS_FILE] try: - variants = utils.ReadFile(variants_file).splitlines() + variants = \ + utils.FilterEmptyLinesAndComments(utils.ReadFile(variants_file)) except EnvironmentError, err: # we accept missing files, but not other errors if err.errno != errno.ENOENT: return False, ("Error while reading the OS variants file at %s: %s" % - (variants_file, _ErrnoOrStr(err))) + (variants_file, utils.ErrnoOrStr(err))) parameters = [] if constants.OS_PARAMETERS_FILE in os_files: @@ -2249,7 +2603,7 @@ def _TryOSFromDisk(name, base_dir=None): parameters = utils.ReadFile(parameters_file).splitlines() except EnvironmentError, err: return False, ("Error while reading the OS parameters file at %s: %s" % - (parameters_file, _ErrnoOrStr(err))) + (parameters_file, utils.ErrnoOrStr(err))) parameters = [v.split(None, 1) for v in parameters] os_obj = objects.OS(name=name, path=os_dir, @@ -2329,6 +2683,11 @@ def OSCoreEnv(os_name, inst_os, os_params, debug=0): for pname, pvalue in os_params.items(): result["OSP_%s" % pname.upper()] = pvalue + # Set a default path otherwise programs called by OS scripts (or + # even hooks called from OS scripts) might break, and we don't want + # to have each script require setting a PATH variable + result["PATH"] = constants.HOOKS_PATH + return result @@ -2382,6 +2741,9 @@ def OSEnvironment(instance, inst_os, debug=0): result["NIC_%d_BRIDGE" % idx] = nic.nicparams[constants.NIC_LINK] if nic.nicparams[constants.NIC_LINK]: result["NIC_%d_LINK" % idx] = nic.nicparams[constants.NIC_LINK] + if nic.netinfo: + nobj = objects.Network.FromDict(nic.netinfo) + result.update(nobj.HooksDict("NIC_%d_" % idx)) if constants.HV_NIC_TYPE in instance.hvparams: result["NIC_%d_FRONTEND_TYPE" % idx] = \ instance.hvparams[constants.HV_NIC_TYPE] @@ -2394,7 +2756,52 @@ def OSEnvironment(instance, inst_os, debug=0): return result -def BlockdevGrow(disk, amount, dryrun): +def DiagnoseExtStorage(top_dirs=None): + """Compute the validity for all ExtStorage Providers. + + @type top_dirs: list + @param top_dirs: the list of directories in which to + search (if not given defaults to + L{pathutils.ES_SEARCH_PATH}) + @rtype: list of L{objects.ExtStorage} + @return: a list of tuples (name, path, status, diagnose, parameters) + for all (potential) ExtStorage Providers under all + search paths, where: + - name is the (potential) ExtStorage Provider + - path is the full path to the ExtStorage Provider + - status True/False is the validity of the ExtStorage Provider + - diagnose is the error message for an invalid ExtStorage Provider, + otherwise empty + - parameters is a list of (name, help) parameters, if any + + """ + if top_dirs is None: + top_dirs = pathutils.ES_SEARCH_PATH + + result = [] + for dir_name in top_dirs: + if os.path.isdir(dir_name): + try: + f_names = utils.ListVisibleFiles(dir_name) + except EnvironmentError, err: + logging.exception("Can't list the ExtStorage directory %s: %s", + dir_name, err) + break + for name in f_names: + es_path = utils.PathJoin(dir_name, name) + status, es_inst = bdev.ExtStorageFromDisk(name, base_dir=dir_name) + if status: + diagnose = "" + parameters = es_inst.supported_parameters + else: + diagnose = es_inst + parameters = [] + result.append((name, es_path, status, diagnose, parameters)) + + return result + + +def BlockdevGrow(disk, amount, dryrun, backingstore): """Grow a stack of block devices. This function is called recursively, with the childrens being the @@ -2407,6 +2814,9 @@ def BlockdevGrow(disk, amount, dryrun): @type dryrun: boolean @param dryrun: whether to execute the operation in simulation mode only, without actually increasing the size + @param backingstore: whether to execute the operation on backing storage + only, or on "logical" storage only; e.g. DRBD is logical storage, + whereas LVM, file, RBD are backing storage @rtype: (status, result) @return: a tuple with the status of the operation (True/False), and the errors message if status is False @@ -2417,7 +2827,7 @@ def BlockdevGrow(disk, amount, dryrun): _Fail("Cannot find block device %s", disk) try: - r_dev.Grow(amount, dryrun) + r_dev.Grow(amount, dryrun, backingstore) except errors.BlockDeviceError, err: _Fail("Failed to grow block device: %s", err, exc=True) @@ -2452,6 +2862,32 @@ def BlockdevSnapshot(disk): disk.unique_id, disk.dev_type) +def BlockdevSetInfo(disk, info): + """Sets 'metadata' information on block devices. + + This function sets 'info' metadata on block devices. Initial + information is set at device creation; this function should be used + for example after renames. + + @type disk: L{objects.Disk} + @param disk: the disk to be grown + @type info: string + @param info: new 'info' metadata + @rtype: (status, result) + @return: a tuple with the status of the operation (True/False), and + the errors message if status is False + + """ + r_dev = _RecursiveFindBD(disk) + if r_dev is None: + _Fail("Cannot find block device %s", disk) + + try: + r_dev.SetInfo(info) + except errors.BlockDeviceError, err: + _Fail("Failed to set information on block device: %s", err, exc=True) + + def FinalizeExport(instance, snap_disks): """Write out the export configuration information. @@ -2465,8 +2901,8 @@ def FinalizeExport(instance, snap_disks): @rtype: None """ - destdir = utils.PathJoin(constants.EXPORT_DIR, instance.name + ".new") - finaldestdir = utils.PathJoin(constants.EXPORT_DIR, instance.name) + destdir = utils.PathJoin(pathutils.EXPORT_DIR, instance.name + ".new") + finaldestdir = utils.PathJoin(pathutils.EXPORT_DIR, instance.name) config = objects.SerializableConfigParser() @@ -2479,8 +2915,13 @@ def FinalizeExport(instance, snap_disks): config.add_section(constants.INISECT_INS) config.set(constants.INISECT_INS, "name", instance.name) + config.set(constants.INISECT_INS, "maxmem", "%d" % + instance.beparams[constants.BE_MAXMEM]) + config.set(constants.INISECT_INS, "minmem", "%d" % + instance.beparams[constants.BE_MINMEM]) + # "memory" is deprecated, but useful for exporting to old ganeti versions config.set(constants.INISECT_INS, "memory", "%d" % - instance.beparams[constants.BE_MEMORY]) + instance.beparams[constants.BE_MAXMEM]) config.set(constants.INISECT_INS, "vcpus", "%d" % instance.beparams[constants.BE_VCPUS]) config.set(constants.INISECT_INS, "disk_template", instance.disk_template) @@ -2493,6 +2934,8 @@ def FinalizeExport(instance, snap_disks): config.set(constants.INISECT_INS, "nic%d_mac" % nic_count, "%s" % nic.mac) config.set(constants.INISECT_INS, "nic%d_ip" % nic_count, "%s" % nic.ip) + config.set(constants.INISECT_INS, "nic%d_network" % nic_count, + "%s" % nic.network) for param in constants.NICS_PARAMETER_TYPES: config.set(constants.INISECT_INS, "nic%d_%s" % (nic_count, param), "%s" % nic.nicparams.get(param, None)) @@ -2563,8 +3006,8 @@ def ListExports(): @return: list of the exports """ - if os.path.isdir(constants.EXPORT_DIR): - return sorted(utils.ListVisibleFiles(constants.EXPORT_DIR)) + if os.path.isdir(pathutils.EXPORT_DIR): + return sorted(utils.ListVisibleFiles(pathutils.EXPORT_DIR)) else: _Fail("No exports directory") @@ -2577,7 +3020,7 @@ def RemoveExport(export): @rtype: None """ - target = utils.PathJoin(constants.EXPORT_DIR, export) + target = utils.PathJoin(pathutils.EXPORT_DIR, export) try: shutil.rmtree(target) @@ -2639,18 +3082,13 @@ def _TransformFileStorageDir(fs_dir): @return: the normalized path if valid, None otherwise """ - if not constants.ENABLE_FILE_STORAGE: + if not (constants.ENABLE_FILE_STORAGE or + constants.ENABLE_SHARED_FILE_STORAGE): _Fail("File storage disabled at configure time") - cfg = _GetConfig() - fs_dir = os.path.normpath(fs_dir) - base_fstore = cfg.GetFileStorageDir() - base_shared = cfg.GetSharedFileStorageDir() - if not (utils.IsBelowDir(base_fstore, fs_dir) or - utils.IsBelowDir(base_shared, fs_dir)): - _Fail("File storage directory '%s' is not under base file" - " storage directory '%s' or shared storage directory '%s'", - fs_dir, base_fstore, base_shared) - return fs_dir + + bdev.CheckFileStoragePath(fs_dir) + + return os.path.normpath(fs_dir) def CreateFileStorageDir(file_storage_dir): @@ -2741,12 +3179,9 @@ def _EnsureJobQueueFile(file_name): @raises RPCFail: if the file is not valid """ - queue_dir = os.path.normpath(constants.QUEUE_DIR) - result = (os.path.commonprefix([queue_dir, file_name]) == queue_dir) - - if not result: + if not utils.IsBelowDir(pathutils.QUEUE_DIR, file_name): _Fail("Passed job queue file '%s' does not belong to" - " the queue directory '%s'", file_name, queue_dir) + " the queue directory '%s'", file_name, pathutils.QUEUE_DIR) def JobQueueUpdate(file_name, content): @@ -2763,12 +3198,14 @@ def JobQueueUpdate(file_name, content): @return: the success of the operation """ + file_name = vcluster.LocalizeVirtualPath(file_name) + _EnsureJobQueueFile(file_name) getents = runtime.GetEnts() # Write and replace the file atomically utils.WriteFile(file_name, data=_Decompress(content), uid=getents.masterd_uid, - gid=getents.masterd_gid) + gid=getents.daemons_gid, mode=constants.JOB_QUEUE_FILES_PERMS) def JobQueueRename(old, new): @@ -2784,13 +3221,16 @@ def JobQueueRename(old, new): @return: the success of the operation and payload """ + old = vcluster.LocalizeVirtualPath(old) + new = vcluster.LocalizeVirtualPath(new) + _EnsureJobQueueFile(old) _EnsureJobQueueFile(new) getents = runtime.GetEnts() - utils.RenameFile(old, new, mkdir=True, mkdir_mode=0700, - dir_uid=getents.masterd_uid, dir_gid=getents.masterd_gid) + utils.RenameFile(old, new, mkdir=True, mkdir_mode=0750, + dir_uid=getents.masterd_uid, dir_gid=getents.daemons_gid) def BlockdevClose(instance_name, disks): @@ -2920,18 +3360,18 @@ def DemoteFromMC(): if master == myself: _Fail("ssconf status shows I'm the master node, will not demote") - result = utils.RunCmd([constants.DAEMON_UTIL, "check", constants.MASTERD]) + result = utils.RunCmd([pathutils.DAEMON_UTIL, "check", constants.MASTERD]) if not result.failed: _Fail("The master daemon is running, will not demote") try: - if os.path.isfile(constants.CLUSTER_CONF_FILE): - utils.CreateBackup(constants.CLUSTER_CONF_FILE) + if os.path.isfile(pathutils.CLUSTER_CONF_FILE): + utils.CreateBackup(pathutils.CLUSTER_CONF_FILE) except EnvironmentError, err: if err.errno != errno.ENOENT: _Fail("Error while backing up cluster file: %s", err, exc=True) - utils.RemoveFile(constants.CLUSTER_CONF_FILE) + utils.RemoveFile(pathutils.CLUSTER_CONF_FILE) def _GetX509Filenames(cryptodir, name): @@ -2943,7 +3383,7 @@ def _GetX509Filenames(cryptodir, name): utils.PathJoin(cryptodir, name, _X509_CERT_FILE)) -def CreateX509Certificate(validity, cryptodir=constants.CRYPTO_KEYS_DIR): +def CreateX509Certificate(validity, cryptodir=pathutils.CRYPTO_KEYS_DIR): """Creates a new X509 certificate for SSL/TLS. @type validity: int @@ -2974,7 +3414,7 @@ def CreateX509Certificate(validity, cryptodir=constants.CRYPTO_KEYS_DIR): raise -def RemoveX509Certificate(name, cryptodir=constants.CRYPTO_KEYS_DIR): +def RemoveX509Certificate(name, cryptodir=pathutils.CRYPTO_KEYS_DIR): """Removes a X509 certificate. @type name: string @@ -3019,9 +3459,9 @@ def _GetImportExportIoCommand(instance, mode, ieio, ieargs): real_filename = os.path.realpath(filename) directory = os.path.dirname(real_filename) - if not utils.IsBelowDir(constants.EXPORT_DIR, real_filename): + if not utils.IsBelowDir(pathutils.EXPORT_DIR, real_filename): _Fail("File '%s' is not under exports directory '%s': %s", - filename, constants.EXPORT_DIR, real_filename) + filename, pathutils.EXPORT_DIR, real_filename) # Create directory utils.Makedirs(directory, mode=0750) @@ -3108,7 +3548,7 @@ def _CreateImportExportStatusDir(prefix): """Creates status directory for import/export. """ - return tempfile.mkdtemp(dir=constants.IMPORT_EXPORT_DIR, + return tempfile.mkdtemp(dir=pathutils.IMPORT_EXPORT_DIR, prefix=("%s-%s-" % (prefix, utils.TimestampForFilename()))) @@ -3156,11 +3596,11 @@ def StartImportExportDaemon(mode, opts, host, port, instance, component, if opts.key_name is None: # Use server.pem - key_path = constants.NODED_CERT_FILE - cert_path = constants.NODED_CERT_FILE + key_path = pathutils.NODED_CERT_FILE + cert_path = pathutils.NODED_CERT_FILE assert opts.ca_pem is None else: - (_, key_path, cert_path) = _GetX509Filenames(constants.CRYPTO_KEYS_DIR, + (_, key_path, cert_path) = _GetX509Filenames(pathutils.CRYPTO_KEYS_DIR, opts.key_name) assert opts.ca_pem is not None @@ -3176,7 +3616,7 @@ def StartImportExportDaemon(mode, opts, host, port, instance, component, if opts.ca_pem is None: # Use server.pem - ca = utils.ReadFile(constants.NODED_CERT_FILE) + ca = utils.ReadFile(pathutils.NODED_CERT_FILE) else: ca = opts.ca_pem @@ -3184,7 +3624,7 @@ def StartImportExportDaemon(mode, opts, host, port, instance, component, utils.WriteFile(ca_file, data=ca, mode=0400) cmd = [ - constants.IMPORT_EXPORT_DAEMON, + pathutils.IMPORT_EXPORT_DAEMON, status_file, mode, "--key=%s" % key_path, "--cert=%s" % cert_path, @@ -3254,7 +3694,7 @@ def GetImportExportStatus(names): result = [] for name in names: - status_file = utils.PathJoin(constants.IMPORT_EXPORT_DIR, name, + status_file = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name, _IES_STATUS_FILE) try: @@ -3279,7 +3719,7 @@ def AbortImportExport(name): """ logging.info("Abort import/export %s", name) - status_dir = utils.PathJoin(constants.IMPORT_EXPORT_DIR, name) + status_dir = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name) pid = utils.ReadLockedPidFile(utils.PathJoin(status_dir, _IES_PID_FILE)) if pid: @@ -3297,7 +3737,7 @@ def CleanupImportExport(name): """ logging.info("Finalizing import/export %s", name) - status_dir = utils.PathJoin(constants.IMPORT_EXPORT_DIR, name) + status_dir = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name) pid = utils.ReadLockedPidFile(utils.PathJoin(status_dir, _IES_PID_FILE)) @@ -3440,7 +3880,7 @@ def GetDrbdUsermodeHelper(): """ try: - return bdev.BaseDRBD.GetUsermodeHelper() + return drbd.DRBD8.GetUsermodeHelper() except errors.BlockDeviceError, err: _Fail(str(err)) @@ -3469,6 +3909,209 @@ def PowercycleNode(hypervisor_type): hyper.PowercycleNode() +def _VerifyRestrictedCmdName(cmd): + """Verifies a restricted command name. + + @type cmd: string + @param cmd: Command name + @rtype: tuple; (boolean, string or None) + @return: The tuple's first element is the status; if C{False}, the second + element is an error message string, otherwise it's C{None} + + """ + if not cmd.strip(): + return (False, "Missing command name") + + if os.path.basename(cmd) != cmd: + return (False, "Invalid command name") + + if not constants.EXT_PLUGIN_MASK.match(cmd): + return (False, "Command name contains forbidden characters") + + return (True, None) + + +def _CommonRestrictedCmdCheck(path, owner): + """Common checks for restricted command file system directories and files. + + @type path: string + @param path: Path to check + @param owner: C{None} or tuple containing UID and GID + @rtype: tuple; (boolean, string or C{os.stat} result) + @return: The tuple's first element is the status; if C{False}, the second + element is an error message string, otherwise it's the result of C{os.stat} + + """ + if owner is None: + # Default to root as owner + owner = (0, 0) + + try: + st = os.stat(path) + except EnvironmentError, err: + return (False, "Can't stat(2) '%s': %s" % (path, err)) + + if stat.S_IMODE(st.st_mode) & (~_RCMD_MAX_MODE): + return (False, "Permissions on '%s' are too permissive" % path) + + if (st.st_uid, st.st_gid) != owner: + (owner_uid, owner_gid) = owner + return (False, "'%s' is not owned by %s:%s" % (path, owner_uid, owner_gid)) + + return (True, st) + + +def _VerifyRestrictedCmdDirectory(path, _owner=None): + """Verifies restricted command directory. + + @type path: string + @param path: Path to check + @rtype: tuple; (boolean, string or None) + @return: The tuple's first element is the status; if C{False}, the second + element is an error message string, otherwise it's C{None} + + """ + (status, value) = _CommonRestrictedCmdCheck(path, _owner) + + if not status: + return (False, value) + + if not stat.S_ISDIR(value.st_mode): + return (False, "Path '%s' is not a directory" % path) + + return (True, None) + + +def _VerifyRestrictedCmd(path, cmd, _owner=None): + """Verifies a whole restricted command and returns its executable filename. + + @type path: string + @param path: Directory containing restricted commands + @type cmd: string + @param cmd: Command name + @rtype: tuple; (boolean, string) + @return: The tuple's first element is the status; if C{False}, the second + element is an error message string, otherwise the second element is the + absolute path to the executable + + """ + executable = utils.PathJoin(path, cmd) + + (status, msg) = _CommonRestrictedCmdCheck(executable, _owner) + + if not status: + return (False, msg) + + if not utils.IsExecutable(executable): + return (False, "access(2) thinks '%s' can't be executed" % executable) + + return (True, executable) + + +def _PrepareRestrictedCmd(path, cmd, + _verify_dir=_VerifyRestrictedCmdDirectory, + _verify_name=_VerifyRestrictedCmdName, + _verify_cmd=_VerifyRestrictedCmd): + """Performs a number of tests on a restricted command. + + @type path: string + @param path: Directory containing restricted commands + @type cmd: string + @param cmd: Command name + @return: Same as L{_VerifyRestrictedCmd} + + """ + # Verify the directory first + (status, msg) = _verify_dir(path) + if status: + # Check command if everything was alright + (status, msg) = _verify_name(cmd) + + if not status: + return (False, msg) + + # Check actual executable + return _verify_cmd(path, cmd) + + +def RunRestrictedCmd(cmd, + _lock_timeout=_RCMD_LOCK_TIMEOUT, + _lock_file=pathutils.RESTRICTED_COMMANDS_LOCK_FILE, + _path=pathutils.RESTRICTED_COMMANDS_DIR, + _sleep_fn=time.sleep, + _prepare_fn=_PrepareRestrictedCmd, + _runcmd_fn=utils.RunCmd, + _enabled=constants.ENABLE_RESTRICTED_COMMANDS): + """Executes a restricted command after performing strict tests. + + @type cmd: string + @param cmd: Command name + @rtype: string + @return: Command output + @raise RPCFail: In case of an error + + """ + logging.info("Preparing to run restricted command '%s'", cmd) + + if not _enabled: + _Fail("Restricted commands disabled at configure time") + + lock = None + try: + cmdresult = None + try: + lock = utils.FileLock.Open(_lock_file) + lock.Exclusive(blocking=True, timeout=_lock_timeout) + + (status, value) = _prepare_fn(_path, cmd) + + if status: + cmdresult = _runcmd_fn([value], env={}, reset_env=True, + postfork_fn=lambda _: lock.Unlock()) + else: + logging.error(value) + except Exception: # pylint: disable=W0703 + # Keep original error in log + logging.exception("Caught exception") + + if cmdresult is None: + logging.info("Sleeping for %0.1f seconds before returning", + _RCMD_INVALID_DELAY) + _sleep_fn(_RCMD_INVALID_DELAY) + + # Do not include original error message in returned error + _Fail("Executing command '%s' failed" % cmd) + elif cmdresult.failed or cmdresult.fail_reason: + _Fail("Restricted command '%s' failed: %s; output: %s", + cmd, cmdresult.fail_reason, cmdresult.output) + else: + return cmdresult.output + finally: + if lock is not None: + # Release lock at last + lock.Close() + lock = None + + +def SetWatcherPause(until, _filename=pathutils.WATCHER_PAUSEFILE): + """Creates or removes the watcher pause file. + + @type until: None or number + @param until: Unix timestamp saying until when the watcher shouldn't run + + """ + if until is None: + logging.info("Received request to no longer pause watcher") + utils.RemoveFile(_filename) + else: + logging.info("Received request to pause watcher until %s", until) + + if not ht.TNumber(until): + _Fail("Duration must be numeric") + + utils.WriteFile(_filename, data="%d\n" % (until, ), mode=0644) + + class HooksRunner(object): """Hook runner. @@ -3481,11 +4124,11 @@ class HooksRunner(object): @type hooks_base_dir: str or None @param hooks_base_dir: if not None, this overrides the - L{constants.HOOKS_BASE_DIR} (useful for unittests) + L{pathutils.HOOKS_BASE_DIR} (useful for unittests) """ if hooks_base_dir is None: - hooks_base_dir = constants.HOOKS_BASE_DIR + hooks_base_dir = pathutils.HOOKS_BASE_DIR # yeah, _BASE_DIR is not valid for attributes, we use it like a # constant self._BASE_DIR = hooks_base_dir # pylint: disable=C0103 @@ -3545,7 +4188,7 @@ class HooksRunner(object): runparts_results = utils.RunParts(dir_name, env=env, reset_env=True) - for (relname, relstatus, runresult) in runparts_results: + for (relname, relstatus, runresult) in runparts_results: if relstatus == constants.RUNPARTS_SKIP: rrval = constants.HKR_SKIP output = "" @@ -3609,7 +4252,7 @@ class DevCacheManager(object): """ _DEV_PREFIX = "/dev/" - _ROOT_DIR = constants.BDEV_CACHE_DIR + _ROOT_DIR = pathutils.BDEV_CACHE_DIR @classmethod def _ConvertPath(cls, dev_path):