X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/a3a5f850731eadc4a158dccf4bff56a06ce9f809..ca83454f36da2226fe84b32f2cce81610f938568:/lib/backend.py diff --git a/lib/backend.py b/lib/backend.py index f1474b7..bc833e9 100644 --- a/lib/backend.py +++ b/lib/backend.py @@ -1,7 +1,7 @@ # # -# Copyright (C) 2006, 2007 Google Inc. +# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -28,7 +28,7 @@ """ -# pylint: disable-msg=E1103 +# pylint: disable=E1103 # E1103: %s %r has no %r member (but some types could not be # inferred), because the _TryOSFromDisk returns either (True, os_obj) @@ -47,6 +47,7 @@ import logging import tempfile import zlib import base64 +import signal from ganeti import errors from ganeti import utils @@ -56,14 +57,49 @@ from ganeti import constants from ganeti import bdev from ganeti import objects from ganeti import ssconf +from ganeti import serializer +from ganeti import netutils +from ganeti import runtime +from ganeti import compat +from ganeti import pathutils +from ganeti import vcluster +from ganeti import ht +from ganeti import hooksmaster _BOOT_ID_PATH = "/proc/sys/kernel/random/boot_id" -_ALLOWED_CLEAN_DIRS = frozenset([ - constants.DATA_DIR, - constants.JOB_QUEUE_ARCHIVE_DIR, - constants.QUEUE_DIR, +_ALLOWED_CLEAN_DIRS = compat.UniqueFrozenset([ + pathutils.DATA_DIR, + pathutils.JOB_QUEUE_ARCHIVE_DIR, + pathutils.QUEUE_DIR, + pathutils.CRYPTO_KEYS_DIR, ]) +_MAX_SSL_CERT_VALIDITY = 7 * 24 * 60 * 60 +_X509_KEY_FILE = "key" +_X509_CERT_FILE = "cert" +_IES_STATUS_FILE = "status" +_IES_PID_FILE = "pid" +_IES_CA_FILE = "ca" + +#: Valid LVS output line regex +_LVSLINE_REGEX = re.compile("^ *([^|]+)\|([^|]+)\|([0-9.]+)\|([^|]{6,})\|?$") + +# Actions for the master setup script +_MASTER_START = "start" +_MASTER_STOP = "stop" + +#: Maximum file permissions for restricted command directory and executables +_RCMD_MAX_MODE = (stat.S_IRWXU | + stat.S_IRGRP | stat.S_IXGRP | + stat.S_IROTH | stat.S_IXOTH) + +#: Delay before returning an error for restricted commands +_RCMD_INVALID_DELAY = 10 + +#: How long to wait to acquire lock for restricted commands (shorter than +#: L{_RCMD_INVALID_DELAY}) to reduce blockage of noded forks when many +#: command requests arrive +_RCMD_LOCK_TIMEOUT = _RCMD_INVALID_DELAY * 0.8 class RPCFail(Exception): @@ -74,6 +110,34 @@ class RPCFail(Exception): """ +def _GetInstReasonFilename(instance_name): + """Path of the file containing the reason of the instance status change. + + @type instance_name: string + @param instance_name: The name of the instance + @rtype: string + @return: The path of the file + + """ + return utils.PathJoin(pathutils.INSTANCE_REASON_DIR, instance_name) + + +def _StoreInstReasonTrail(instance_name, trail): + """Serialize a reason trail related to an instance change of state to file. + + The exact location of the file depends on the name of the instance and on + the configuration of the Ganeti cluster defined at deploy time. + + @type instance_name: string + @param instance_name: The name of the instance + @rtype: None + + """ + json = serializer.DumpJson(trail) + filename = _GetInstReasonFilename(instance_name) + utils.WriteFile(filename, data=json) + + def _Fail(msg, *args, **kwargs): """Log an error and the raise an RPCFail exception. @@ -177,18 +241,24 @@ def _BuildUploadFileList(): """ allowed_files = set([ - constants.CLUSTER_CONF_FILE, - constants.ETC_HOSTS, - constants.SSH_KNOWN_HOSTS_FILE, - constants.VNC_PASSWORD_FILE, - constants.RAPI_CERT_FILE, - constants.RAPI_USERS_FILE, - constants.CONFD_HMAC_KEY, + pathutils.CLUSTER_CONF_FILE, + pathutils.ETC_HOSTS, + pathutils.SSH_KNOWN_HOSTS_FILE, + pathutils.VNC_PASSWORD_FILE, + pathutils.RAPI_CERT_FILE, + pathutils.SPICE_CERT_FILE, + pathutils.SPICE_CACERT_FILE, + pathutils.RAPI_USERS_FILE, + pathutils.CONFD_HMAC_KEY, + pathutils.CLUSTER_DOMAIN_SECRET_FILE, ]) for hv_name in constants.HYPER_TYPES: hv_class = hypervisor.GetHypervisorClass(hv_name) - allowed_files.update(hv_class.GetAncillaryFiles()) + allowed_files.update(hv_class.GetAncillaryFiles()[0]) + + assert pathutils.FILE_STORAGE_PATHS_FILE not in allowed_files, \ + "Allowed file storage paths should never be uploaded via RPC" return frozenset(allowed_files) @@ -203,8 +273,8 @@ def JobQueuePurge(): @return: True, None """ - _CleanDirectory(constants.QUEUE_DIR, exclude=[constants.JOB_QUEUE_LOCK_FILE]) - _CleanDirectory(constants.JOB_QUEUE_ARCHIVE_DIR) + _CleanDirectory(pathutils.QUEUE_DIR, exclude=[pathutils.JOB_QUEUE_LOCK_FILE]) + _CleanDirectory(pathutils.JOB_QUEUE_ARCHIVE_DIR) def GetMasterInfo(): @@ -214,7 +284,8 @@ def GetMasterInfo(): for consumption here or from the node daemon. @rtype: tuple - @return: master_netdev, master_ip, master_name + @return: master_netdev, master_ip, master_name, primary_ip_family, + master_netmask @raise RPCFail: in case of errors """ @@ -222,153 +293,234 @@ def GetMasterInfo(): cfg = _GetConfig() master_netdev = cfg.GetMasterNetdev() master_ip = cfg.GetMasterIP() + master_netmask = cfg.GetMasterNetmask() master_node = cfg.GetMasterNode() + primary_ip_family = cfg.GetPrimaryIPFamily() except errors.ConfigurationError, err: _Fail("Cluster configuration incomplete: %s", err, exc=True) - return (master_netdev, master_ip, master_node) + return (master_netdev, master_ip, master_node, primary_ip_family, + master_netmask) + + +def RunLocalHooks(hook_opcode, hooks_path, env_builder_fn): + """Decorator that runs hooks before and after the decorated function. + + @type hook_opcode: string + @param hook_opcode: opcode of the hook + @type hooks_path: string + @param hooks_path: path of the hooks + @type env_builder_fn: function + @param env_builder_fn: function that returns a dictionary containing the + environment variables for the hooks. Will get all the parameters of the + decorated function. + @raise RPCFail: in case of pre-hook failure + + """ + def decorator(fn): + def wrapper(*args, **kwargs): + _, myself = ssconf.GetMasterAndMyself() + nodes = ([myself], [myself]) # these hooks run locally + + env_fn = compat.partial(env_builder_fn, *args, **kwargs) + + cfg = _GetConfig() + hr = HooksRunner() + hm = hooksmaster.HooksMaster(hook_opcode, hooks_path, nodes, + hr.RunLocalHooks, None, env_fn, + logging.warning, cfg.GetClusterName(), + cfg.GetMasterNode()) + hm.RunPhase(constants.HOOKS_PHASE_PRE) + result = fn(*args, **kwargs) + hm.RunPhase(constants.HOOKS_PHASE_POST) + + return result + return wrapper + return decorator + + +def _BuildMasterIpEnv(master_params, use_external_mip_script=None): + """Builds environment variables for master IP hooks. + + @type master_params: L{objects.MasterNetworkParameters} + @param master_params: network parameters of the master + @type use_external_mip_script: boolean + @param use_external_mip_script: whether to use an external master IP + address setup script (unused, but necessary per the implementation of the + _RunLocalHooks decorator) + + """ + # pylint: disable=W0613 + ver = netutils.IPAddress.GetVersionFromAddressFamily(master_params.ip_family) + env = { + "MASTER_NETDEV": master_params.netdev, + "MASTER_IP": master_params.ip, + "MASTER_NETMASK": str(master_params.netmask), + "CLUSTER_IP_VERSION": str(ver), + } + + return env + + +def _RunMasterSetupScript(master_params, action, use_external_mip_script): + """Execute the master IP address setup script. + + @type master_params: L{objects.MasterNetworkParameters} + @param master_params: network parameters of the master + @type action: string + @param action: action to pass to the script. Must be one of + L{backend._MASTER_START} or L{backend._MASTER_STOP} + @type use_external_mip_script: boolean + @param use_external_mip_script: whether to use an external master IP + address setup script + @raise backend.RPCFail: if there are errors during the execution of the + script + + """ + env = _BuildMasterIpEnv(master_params) + + if use_external_mip_script: + setup_script = pathutils.EXTERNAL_MASTER_SETUP_SCRIPT + else: + setup_script = pathutils.DEFAULT_MASTER_SETUP_SCRIPT + + result = utils.RunCmd([setup_script, action], env=env, reset_env=True) + + if result.failed: + _Fail("Failed to %s the master IP. Script return value: %s, output: '%s'" % + (action, result.exit_code, result.output), log=True) + + +@RunLocalHooks(constants.FAKE_OP_MASTER_TURNUP, "master-ip-turnup", + _BuildMasterIpEnv) +def ActivateMasterIp(master_params, use_external_mip_script): + """Activate the IP address of the master daemon. + + @type master_params: L{objects.MasterNetworkParameters} + @param master_params: network parameters of the master + @type use_external_mip_script: boolean + @param use_external_mip_script: whether to use an external master IP + address setup script + @raise RPCFail: in case of errors during the IP startup + + """ + _RunMasterSetupScript(master_params, _MASTER_START, + use_external_mip_script) -def StartMaster(start_daemons, no_voting): +def StartMasterDaemons(no_voting): """Activate local node as master node. - The function will always try activate the IP address of the master - (unless someone else has it). It will also start the master daemons, - based on the start_daemons parameter. + The function will start the master daemons (ganeti-masterd and ganeti-rapi). - @type start_daemons: boolean - @param start_daemons: whether to also start the master - daemons (ganeti-masterd and ganeti-rapi) @type no_voting: boolean @param no_voting: whether to start ganeti-masterd without a node vote - (if start_daemons is True), but still non-interactively + but still non-interactively @rtype: None """ - # GetMasterInfo will raise an exception if not able to return data - master_netdev, master_ip, _ = GetMasterInfo() - err_msgs = [] - if utils.TcpPing(master_ip, constants.DEFAULT_NODED_PORT): - if utils.OwnIpAddress(master_ip): - # we already have the ip: - logging.debug("Master IP already configured, doing nothing") - else: - msg = "Someone else has the master ip, not activating" - logging.error(msg) - err_msgs.append(msg) + if no_voting: + masterd_args = "--no-voting --yes-do-it" else: - result = utils.RunCmd(["ip", "address", "add", "%s/32" % master_ip, - "dev", master_netdev, "label", - "%s:0" % master_netdev]) - if result.failed: - msg = "Can't activate master IP: %s" % result.output - logging.error(msg) - err_msgs.append(msg) + masterd_args = "" - result = utils.RunCmd(["arping", "-q", "-U", "-c 3", "-I", master_netdev, - "-s", master_ip, master_ip]) - # we'll ignore the exit code of arping + env = { + "EXTRA_MASTERD_ARGS": masterd_args, + } - # and now start the master and rapi daemons - if start_daemons: - if no_voting: - masterd_args = "--no-voting --yes-do-it" - else: - masterd_args = "" + result = utils.RunCmd([pathutils.DAEMON_UTIL, "start-master"], env=env) + if result.failed: + msg = "Can't start Ganeti master: %s" % result.output + logging.error(msg) + _Fail(msg) - env = { - "EXTRA_MASTERD_ARGS": masterd_args, - } - result = utils.RunCmd([constants.DAEMON_UTIL, "start-master"], env=env) - if result.failed: - msg = "Can't start Ganeti master: %s" % result.output - logging.error(msg) - err_msgs.append(msg) +@RunLocalHooks(constants.FAKE_OP_MASTER_TURNDOWN, "master-ip-turndown", + _BuildMasterIpEnv) +def DeactivateMasterIp(master_params, use_external_mip_script): + """Deactivate the master IP on this node. - if err_msgs: - _Fail("; ".join(err_msgs)) + @type master_params: L{objects.MasterNetworkParameters} + @param master_params: network parameters of the master + @type use_external_mip_script: boolean + @param use_external_mip_script: whether to use an external master IP + address setup script + @raise RPCFail: in case of errors during the IP turndown + + """ + _RunMasterSetupScript(master_params, _MASTER_STOP, + use_external_mip_script) -def StopMaster(stop_daemons): - """Deactivate this node as master. +def StopMasterDaemons(): + """Stop the master daemons on this node. - The function will always try to deactivate the IP address of the - master. It will also stop the master daemons depending on the - stop_daemons parameter. + Stop the master daemons (ganeti-masterd and ganeti-rapi) on this node. - @type stop_daemons: boolean - @param stop_daemons: whether to also stop the master daemons - (ganeti-masterd and ganeti-rapi) @rtype: None """ # TODO: log and report back to the caller the error failures; we # need to decide in which case we fail the RPC for this - # GetMasterInfo will raise an exception if not able to return data - master_netdev, master_ip, _ = GetMasterInfo() - - result = utils.RunCmd(["ip", "address", "del", "%s/32" % master_ip, - "dev", master_netdev]) + result = utils.RunCmd([pathutils.DAEMON_UTIL, "stop-master"]) if result.failed: - logging.error("Can't remove the master IP, error: %s", result.output) - # but otherwise ignore the failure - - if stop_daemons: - result = utils.RunCmd([constants.DAEMON_UTIL, "stop-master"]) - if result.failed: - logging.error("Could not stop Ganeti master, command %s had exitcode %s" - " and error %s", - result.cmd, result.exit_code, result.output) - - -def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub): - """Joins this node to the cluster. - - This does the following: - - updates the hostkeys of the machine (rsa and dsa) - - adds the ssh private key to the user - - adds the ssh public key to the users' authorized_keys file - - @type dsa: str - @param dsa: the DSA private key to write - @type dsapub: str - @param dsapub: the DSA public key to write - @type rsa: str - @param rsa: the RSA private key to write - @type rsapub: str - @param rsapub: the RSA public key to write - @type sshkey: str - @param sshkey: the SSH private key to write - @type sshpub: str - @param sshpub: the SSH public key to write - @rtype: boolean - @return: the success of the operation + logging.error("Could not stop Ganeti master, command %s had exitcode %s" + " and error %s", + result.cmd, result.exit_code, result.output) - """ - sshd_keys = [(constants.SSH_HOST_RSA_PRIV, rsa, 0600), - (constants.SSH_HOST_RSA_PUB, rsapub, 0644), - (constants.SSH_HOST_DSA_PRIV, dsa, 0600), - (constants.SSH_HOST_DSA_PUB, dsapub, 0644)] - for name, content, mode in sshd_keys: - utils.WriteFile(name, data=content, mode=mode) - try: - priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS, - mkdir=True) - except errors.OpExecError, err: - _Fail("Error while processing user ssh files: %s", err, exc=True) +def ChangeMasterNetmask(old_netmask, netmask, master_ip, master_netdev): + """Change the netmask of the master IP. + + @param old_netmask: the old value of the netmask + @param netmask: the new value of the netmask + @param master_ip: the master IP + @param master_netdev: the master network device + + """ + if old_netmask == netmask: + return - for name, content in [(priv_key, sshkey), (pub_key, sshpub)]: - utils.WriteFile(name, data=content, mode=0600) + if not netutils.IPAddress.Own(master_ip): + _Fail("The master IP address is not up, not attempting to change its" + " netmask") - utils.AddAuthorizedKey(auth_keys, sshpub) + result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "add", + "%s/%s" % (master_ip, netmask), + "dev", master_netdev, "label", + "%s:0" % master_netdev]) + if result.failed: + _Fail("Could not set the new netmask on the master IP address") - result = utils.RunCmd([constants.DAEMON_UTIL, "reload-ssh-keys"]) + result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "del", + "%s/%s" % (master_ip, old_netmask), + "dev", master_netdev, "label", + "%s:0" % master_netdev]) if result.failed: - _Fail("Unable to reload SSH keys (command %r, exit code %s, output %r)", - result.cmd, result.exit_code, result.output) + _Fail("Could not bring down the master IP address with the old netmask") + + +def EtcHostsModify(mode, host, ip): + """Modify a host entry in /etc/hosts. + + @param mode: The mode to operate. Either add or remove entry + @param host: The host to operate on + @param ip: The ip associated with the entry + + """ + if mode == constants.ETC_HOSTS_ADD: + if not ip: + RPCFail("Mode 'add' needs 'ip' parameter, but parameter not" + " present") + utils.AddHostToEtcHosts(host, ip) + elif mode == constants.ETC_HOSTS_REMOVE: + if ip: + RPCFail("Mode 'remove' does not allow 'ip' parameter, but" + " parameter is present") + utils.RemoveHostFromEtcHosts(host) + else: + RPCFail("Mode not supported") def LeaveCluster(modify_ssh_setup): @@ -384,12 +536,13 @@ def LeaveCluster(modify_ssh_setup): @param modify_ssh_setup: boolean """ - _CleanDirectory(constants.DATA_DIR) + _CleanDirectory(pathutils.DATA_DIR) + _CleanDirectory(pathutils.CRYPTO_KEYS_DIR) JobQueuePurge() if modify_ssh_setup: try: - priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS) + priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.SSH_LOGIN_USER) utils.RemoveAuthorizedKey(auth_keys, utils.ReadFile(pub_key)) @@ -399,51 +552,107 @@ def LeaveCluster(modify_ssh_setup): logging.exception("Error while processing ssh files") try: - utils.RemoveFile(constants.CONFD_HMAC_KEY) - utils.RemoveFile(constants.RAPI_CERT_FILE) - utils.RemoveFile(constants.NODED_CERT_FILE) - except: # pylint: disable-msg=W0702 + utils.RemoveFile(pathutils.CONFD_HMAC_KEY) + utils.RemoveFile(pathutils.RAPI_CERT_FILE) + utils.RemoveFile(pathutils.SPICE_CERT_FILE) + utils.RemoveFile(pathutils.SPICE_CACERT_FILE) + utils.RemoveFile(pathutils.NODED_CERT_FILE) + except: # pylint: disable=W0702 logging.exception("Error while removing cluster secrets") - result = utils.RunCmd([constants.DAEMON_UTIL, "stop", constants.CONFD]) + result = utils.RunCmd([pathutils.DAEMON_UTIL, "stop", constants.CONFD]) if result.failed: logging.error("Command %s failed with exitcode %s and error %s", result.cmd, result.exit_code, result.output) # Raise a custom exception (handled in ganeti-noded) - raise errors.QuitGanetiException(True, 'Shutdown scheduled') + raise errors.QuitGanetiException(True, "Shutdown scheduled") + +def _GetVgInfo(name, excl_stor): + """Retrieves information about a LVM volume group. -def GetNodeInfo(vgname, hypervisor_type): + """ + # TODO: GetVGInfo supports returning information for multiple VGs at once + vginfo = bdev.LogicalVolume.GetVGInfo([name], excl_stor) + if vginfo: + vg_free = int(round(vginfo[0][0], 0)) + vg_size = int(round(vginfo[0][1], 0)) + else: + vg_free = None + vg_size = None + + return { + "name": name, + "vg_free": vg_free, + "vg_size": vg_size, + } + + +def _GetHvInfo(name): + """Retrieves node information from a hypervisor. + + The information returned depends on the hypervisor. Common items: + + - vg_size is the size of the configured volume group in MiB + - vg_free is the free size of the volume group in MiB + - memory_dom0 is the memory allocated for domain0 in MiB + - memory_free is the currently available (free) ram in MiB + - memory_total is the total number of ram in MiB + - hv_version: the hypervisor version, if available + + """ + return hypervisor.GetHypervisor(name).GetNodeInfo() + + +def _GetNamedNodeInfo(names, fn): + """Calls C{fn} for all names in C{names} and returns a dictionary. + + @rtype: None or dict + + """ + if names is None: + return None + else: + return map(fn, names) + + +def GetNodeInfo(vg_names, hv_names, excl_stor): """Gives back a hash with different information about the node. - @type vgname: C{string} - @param vgname: the name of the volume group to ask for disk space information - @type hypervisor_type: C{str} - @param hypervisor_type: the name of the hypervisor to ask for - memory information - @rtype: C{dict} - @return: dictionary with the following keys: - - vg_size is the size of the configured volume group in MiB - - vg_free is the free size of the volume group in MiB - - memory_dom0 is the memory allocated for domain0 in MiB - - memory_free is the currently available (free) ram in MiB - - memory_total is the total number of ram in MiB + @type vg_names: list of string + @param vg_names: Names of the volume groups to ask for disk space information + @type hv_names: list of string + @param hv_names: Names of the hypervisors to ask for node information + @type excl_stor: boolean + @param excl_stor: Whether exclusive_storage is active + @rtype: tuple; (string, None/dict, None/dict) + @return: Tuple containing boot ID, volume group information and hypervisor + information """ - outputarray = {} - vginfo = _GetVGInfo(vgname) - outputarray['vg_size'] = vginfo['vg_size'] - outputarray['vg_free'] = vginfo['vg_free'] + bootid = utils.ReadFile(_BOOT_ID_PATH, size=128).rstrip("\n") + vg_info = _GetNamedNodeInfo(vg_names, (lambda vg: _GetVgInfo(vg, excl_stor))) + hv_info = _GetNamedNodeInfo(hv_names, _GetHvInfo) - hyper = hypervisor.GetHypervisor(hypervisor_type) - hyp_info = hyper.GetNodeInfo() - if hyp_info is not None: - outputarray.update(hyp_info) + return (bootid, vg_info, hv_info) - outputarray["bootid"] = utils.ReadFile(_BOOT_ID_PATH, size=128).rstrip("\n") - return outputarray +def _CheckExclusivePvs(pvi_list): + """Check that PVs are not shared among LVs + + @type pvi_list: list of L{objects.LvmPvInfo} objects + @param pvi_list: information about the PVs + + @rtype: list of tuples (string, list of strings) + @return: offending volumes, as tuples: (pv_name, [lv1_name, lv2_name...]) + + """ + res = [] + for pvi in pvi_list: + if len(pvi.lv_list) > 1: + res.append((pvi.name, pvi.lv_list)) + return res def VerifyNode(what, cluster_name): @@ -476,10 +685,11 @@ def VerifyNode(what, cluster_name): """ result = {} - my_name = utils.HostInfo().name - port = utils.GetDaemonPort(constants.NODED) + my_name = netutils.Hostname.GetSysName() + port = netutils.GetDaemonPort(constants.NODED) + vm_capable = my_name not in what.get(constants.NV_VMNODES, []) - if constants.NV_HYPERVISOR in what: + if constants.NV_HYPERVISOR in what and vm_capable: result[constants.NV_HYPERVISOR] = tmp = {} for hv_name in what[constants.NV_HYPERVISOR]: try: @@ -488,17 +698,42 @@ def VerifyNode(what, cluster_name): val = "Error while checking hypervisor: %s" % str(err) tmp[hv_name] = val + if constants.NV_HVPARAMS in what and vm_capable: + result[constants.NV_HVPARAMS] = tmp = [] + for source, hv_name, hvparms in what[constants.NV_HVPARAMS]: + try: + logging.info("Validating hv %s, %s", hv_name, hvparms) + hypervisor.GetHypervisor(hv_name).ValidateParameters(hvparms) + except errors.HypervisorError, err: + tmp.append((source, hv_name, str(err))) + if constants.NV_FILELIST in what: - result[constants.NV_FILELIST] = utils.FingerprintFiles( - what[constants.NV_FILELIST]) + fingerprints = utils.FingerprintFiles(map(vcluster.LocalizeVirtualPath, + what[constants.NV_FILELIST])) + result[constants.NV_FILELIST] = \ + dict((vcluster.MakeVirtualPath(key), value) + for (key, value) in fingerprints.items()) if constants.NV_NODELIST in what: - result[constants.NV_NODELIST] = tmp = {} - random.shuffle(what[constants.NV_NODELIST]) - for node in what[constants.NV_NODELIST]: + (nodes, bynode) = what[constants.NV_NODELIST] + + # Add nodes from other groups (different for each node) + try: + nodes.extend(bynode[my_name]) + except KeyError: + pass + + # Use a random order + random.shuffle(nodes) + + # Try to contact all nodes + val = {} + for node in nodes: success, message = _GetSshRunner(cluster_name).VerifyNodeHostname(node) if not success: - tmp[node] = message + val[node] = message + + result[constants.NV_NODELIST] = val if constants.NV_NODENETTEST in what: result[constants.NV_NODENETTEST] = tmp = {} @@ -514,10 +749,10 @@ def VerifyNode(what, cluster_name): else: for name, pip, sip in what[constants.NV_NODENETTEST]: fail = [] - if not utils.TcpPing(pip, port, source=my_pip): + if not netutils.TcpPing(pip, port, source=my_pip): fail.append("primary") if sip != pip: - if not utils.TcpPing(sip, port, source=my_sip): + if not netutils.TcpPing(sip, port, source=my_sip): fail.append("secondary") if fail: tmp[name] = ("failure using the %s interface(s)" % @@ -528,20 +763,41 @@ def VerifyNode(what, cluster_name): # rest of the function) master_name, master_ip = what[constants.NV_MASTERIP] if master_name == my_name: - source = constants.LOCALHOST_IP_ADDRESS + source = constants.IP4_ADDRESS_LOCALHOST else: source = None - result[constants.NV_MASTERIP] = utils.TcpPing(master_ip, port, - source=source) + result[constants.NV_MASTERIP] = netutils.TcpPing(master_ip, port, + source=source) - if constants.NV_LVLIST in what: + if constants.NV_USERSCRIPTS in what: + result[constants.NV_USERSCRIPTS] = \ + [script for script in what[constants.NV_USERSCRIPTS] + if not utils.IsExecutable(script)] + + if constants.NV_OOB_PATHS in what: + result[constants.NV_OOB_PATHS] = tmp = [] + for path in what[constants.NV_OOB_PATHS]: + try: + st = os.stat(path) + except OSError, err: + tmp.append("error stating out of band helper: %s" % err) + else: + if stat.S_ISREG(st.st_mode): + if stat.S_IMODE(st.st_mode) & stat.S_IXUSR: + tmp.append(None) + else: + tmp.append("out of band helper %s is not executable" % path) + else: + tmp.append("out of band helper %s is not a file" % path) + + if constants.NV_LVLIST in what and vm_capable: try: - val = GetVolumeList(what[constants.NV_LVLIST]) + val = GetVolumeList(utils.ListVolumeGroups().keys()) except RPCFail, err: val = str(err) result[constants.NV_LVLIST] = val - if constants.NV_INSTANCELIST in what: + if constants.NV_INSTANCELIST in what and vm_capable: # GetInstanceList can fail try: val = GetInstanceList(what[constants.NV_INSTANCELIST]) @@ -549,23 +805,30 @@ def VerifyNode(what, cluster_name): val = str(err) result[constants.NV_INSTANCELIST] = val - if constants.NV_VGLIST in what: + if constants.NV_VGLIST in what and vm_capable: result[constants.NV_VGLIST] = utils.ListVolumeGroups() - if constants.NV_PVLIST in what: - result[constants.NV_PVLIST] = \ - bdev.LogicalVolume.GetPVInfo(what[constants.NV_PVLIST], - filter_allocatable=False) + if constants.NV_PVLIST in what and vm_capable: + check_exclusive_pvs = constants.NV_EXCLUSIVEPVS in what + val = bdev.LogicalVolume.GetPVInfo(what[constants.NV_PVLIST], + filter_allocatable=False, + include_lvs=check_exclusive_pvs) + if check_exclusive_pvs: + result[constants.NV_EXCLUSIVEPVS] = _CheckExclusivePvs(val) + for pvi in val: + # Avoid sending useless data on the wire + pvi.lv_list = [] + result[constants.NV_PVLIST] = map(objects.LvmPvInfo.ToDict, val) if constants.NV_VERSION in what: result[constants.NV_VERSION] = (constants.PROTOCOL_VERSION, constants.RELEASE_VERSION) - if constants.NV_HVINFO in what: + if constants.NV_HVINFO in what and vm_capable: hyper = hypervisor.GetHypervisor(what[constants.NV_HVINFO]) result[constants.NV_HVINFO] = hyper.GetNodeInfo() - if constants.NV_DRBDLIST in what: + if constants.NV_DRBDLIST in what and vm_capable: try: used_minors = bdev.DRBD8.GetUsedDevs().keys() except errors.BlockDeviceError, err: @@ -573,6 +836,16 @@ def VerifyNode(what, cluster_name): used_minors = str(err) result[constants.NV_DRBDLIST] = used_minors + if constants.NV_DRBDHELPER in what and vm_capable: + status = True + try: + payload = bdev.BaseDRBD.GetUsermodeHelper() + except errors.BlockDeviceError, err: + logging.error("Can't get DRBD usermode helper: %s", str(err)) + status = False + payload = str(err) + result[constants.NV_DRBDHELPER] = (status, payload) + if constants.NV_NODESETUP in what: result[constants.NV_NODESETUP] = tmpr = [] if not os.path.isdir("/sys/block") or not os.path.isdir("/sys/class/net"): @@ -588,49 +861,101 @@ def VerifyNode(what, cluster_name): if constants.NV_TIME in what: result[constants.NV_TIME] = utils.SplitTime(time.time()) + if constants.NV_OSLIST in what and vm_capable: + result[constants.NV_OSLIST] = DiagnoseOS() + + if constants.NV_BRIDGES in what and vm_capable: + result[constants.NV_BRIDGES] = [bridge + for bridge in what[constants.NV_BRIDGES] + if not utils.BridgeExists(bridge)] + + if what.get(constants.NV_FILE_STORAGE_PATHS) == my_name: + result[constants.NV_FILE_STORAGE_PATHS] = \ + bdev.ComputeWrongFileStoragePaths() + return result -def GetVolumeList(vg_name): +def GetBlockDevSizes(devices): + """Return the size of the given block devices + + @type devices: list + @param devices: list of block device nodes to query + @rtype: dict + @return: + dictionary of all block devices under /dev (key). The value is their + size in MiB. + + {'/dev/disk/by-uuid/123456-12321231-312312-312': 124} + + """ + DEV_PREFIX = "/dev/" + blockdevs = {} + + for devpath in devices: + if not utils.IsBelowDir(DEV_PREFIX, devpath): + continue + + try: + st = os.stat(devpath) + except EnvironmentError, err: + logging.warning("Error stat()'ing device %s: %s", devpath, str(err)) + continue + + if stat.S_ISBLK(st.st_mode): + result = utils.RunCmd(["blockdev", "--getsize64", devpath]) + if result.failed: + # We don't want to fail, just do not list this device as available + logging.warning("Cannot get size for block device %s", devpath) + continue + + size = int(result.stdout) / (1024 * 1024) + blockdevs[devpath] = size + return blockdevs + + +def GetVolumeList(vg_names): """Compute list of logical volumes and their size. - @type vg_name: str - @param vg_name: the volume group whose LVs we should list + @type vg_names: list + @param vg_names: the volume groups whose LVs we should list, or + empty for all volume groups @rtype: dict @return: dictionary of all partions (key) with value being a tuple of their size (in MiB), inactive and online status:: - {'test1': ('20.06', True, True)} + {'xenvg/test1': ('20.06', True, True)} in case of errors, a string is returned with the error details. """ lvs = {} - sep = '|' + sep = "|" + if not vg_names: + vg_names = [] result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix", "--separator=%s" % sep, - "-olv_name,lv_size,lv_attr", vg_name]) + "-ovg_name,lv_name,lv_size,lv_attr"] + vg_names) if result.failed: _Fail("Failed to list logical volumes, lvs output: %s", result.output) - valid_line_re = re.compile("^ *([^|]+)\|([0-9.]+)\|([^|]{6})\|?$") for line in result.stdout.splitlines(): line = line.strip() - match = valid_line_re.match(line) + match = _LVSLINE_REGEX.match(line) if not match: logging.error("Invalid line returned from lvs output: '%s'", line) continue - name, size, attr = match.groups() - inactive = attr[4] == '-' - online = attr[5] == 'o' - virtual = attr[0] == 'v' + vg_name, name, size, attr = match.groups() + inactive = attr[4] == "-" + online = attr[5] == "o" + virtual = attr[0] == "v" if virtual: # we don't want to report such volumes as existing, since they # don't really hold data continue - lvs[name] = (size, inactive, online) + lvs[vg_name + "/" + name] = (size, inactive, online) return lvs @@ -673,20 +998,20 @@ def NodeVolumes(): result.output) def parse_dev(dev): - return dev.split('(')[0] + return dev.split("(")[0] def handle_dev(dev): return [parse_dev(x) for x in dev.split(",")] def map_line(line): line = [v.strip() for v in line] - return [{'name': line[0], 'size': line[1], - 'dev': dev, 'vg': line[3]} for dev in handle_dev(line[2])] + return [{"name": line[0], "size": line[1], + "dev": dev, "vg": line[3]} for dev in handle_dev(line[2])] all_devs = [] for line in result.stdout.splitlines(): - if line.count('|') >= 3: - all_devs.extend(map_line(line.split('|'))) + if line.count("|") >= 3: + all_devs.extend(map_line(line.split("|"))) else: logging.warning("Strange line in the output from lvs: '%s'", line) return all_devs @@ -745,15 +1070,17 @@ def GetInstanceInfo(instance, hname): - memory: memory size of instance (int) - state: xen state of instance (string) - time: cpu time of instance (float) + - vcpus: the number of vcpus (int) """ output = {} iinfo = hypervisor.GetHypervisor(hname).GetInstanceInfo(instance) if iinfo is not None: - output['memory'] = iinfo[2] - output['state'] = iinfo[4] - output['time'] = iinfo[5] + output["memory"] = iinfo[2] + output["vcpus"] = iinfo[3] + output["state"] = iinfo[4] + output["time"] = iinfo[5] return output @@ -778,7 +1105,8 @@ def GetInstanceMigratable(instance): for idx in range(len(instance.disks)): link_name = _GetBlockDevSymlinkPath(iname, idx) if not os.path.islink(link_name): - _Fail("Instance %s was not restarted since ganeti 1.2.5", iname) + logging.warning("Instance %s is missing symlink %s for disk %d", + iname, link_name, idx) def GetAllInstancesInfo(hypervisor_list): @@ -806,16 +1134,16 @@ def GetAllInstancesInfo(hypervisor_list): if iinfo: for name, _, memory, vcpus, state, times in iinfo: value = { - 'memory': memory, - 'vcpus': vcpus, - 'state': state, - 'time': times, + "memory": memory, + "vcpus": vcpus, + "state": state, + "time": times, } if name in output: # we only check static parameters, like memory and vcpus, # and not state and time which can change between the # invocations of the different hypervisors - for key in 'memory', 'vcpus': + for key in "memory", "vcpus": if value[key] != output[name][key]: _Fail("Instance %s is running twice" " with different parameters", name) @@ -824,7 +1152,7 @@ def GetAllInstancesInfo(hypervisor_list): return output -def _InstanceLogName(kind, os_name, instance): +def _InstanceLogName(kind, os_name, instance, component): """Compute the OS log filename for a given instance and operation. The instance name and os name are passed in as strings since not all @@ -836,11 +1164,20 @@ def _InstanceLogName(kind, os_name, instance): @param os_name: the os name @type instance: string @param instance: the name of the instance being imported/added/etc. + @type component: string or None + @param component: the name of the component of the instance being + transferred """ - base = ("%s-%s-%s-%s.log" % - (kind, os_name, instance, utils.TimestampForFilename())) - return utils.PathJoin(constants.LOG_OS_DIR, base) + # TODO: Use tempfile.mkstemp to create unique filename + if component: + assert "/" not in component + c_msg = "-%s" % component + else: + c_msg = "" + base = ("%s-%s-%s%s-%s.log" % + (kind, os_name, instance, c_msg, utils.TimestampForFilename())) + return utils.PathJoin(pathutils.LOG_OS_DIR, base) def InstanceOsAdd(instance, reinstall, debug): @@ -859,12 +1196,12 @@ def InstanceOsAdd(instance, reinstall, debug): create_env = OSEnvironment(instance, inst_os, debug) if reinstall: - create_env['INSTANCE_REINSTALL'] = "1" + create_env["INSTANCE_REINSTALL"] = "1" - logfile = _InstanceLogName("add", instance.os, instance.name) + logfile = _InstanceLogName("add", instance.os, instance.name, None) result = utils.RunCmd([inst_os.create_script], env=create_env, - cwd=inst_os.path, output=logfile,) + cwd=inst_os.path, output=logfile, reset_env=True) if result.failed: logging.error("os create command '%s' returned error: %s, logfile: %s," " output: %s", result.cmd, result.fail_reason, logfile, @@ -891,13 +1228,13 @@ def RunRenameInstance(instance, old_name, debug): inst_os = OSFromDisk(instance.os) rename_env = OSEnvironment(instance, inst_os, debug) - rename_env['OLD_INSTANCE_NAME'] = old_name + rename_env["OLD_INSTANCE_NAME"] = old_name logfile = _InstanceLogName("rename", instance.os, - "%s-%s" % (old_name, instance.name)) + "%s-%s" % (old_name, instance.name), None) result = utils.RunCmd([inst_os.rename_script], env=rename_env, - cwd=inst_os.path, output=logfile) + cwd=inst_os.path, output=logfile, reset_env=True) if result.failed: logging.error("os create command '%s' returned error: %s output: %s", @@ -908,49 +1245,16 @@ def RunRenameInstance(instance, old_name, debug): " log file:\n%s", result.fail_reason, "\n".join(lines), log=False) -def _GetVGInfo(vg_name): - """Get information about the volume group. - - @type vg_name: str - @param vg_name: the volume group which we query - @rtype: dict - @return: - A dictionary with the following keys: - - C{vg_size} is the total size of the volume group in MiB - - C{vg_free} is the free size of the volume group in MiB - - C{pv_count} are the number of physical disks in that VG - - If an error occurs during gathering of data, we return the same dict - with keys all set to None. +def _GetBlockDevSymlinkPath(instance_name, idx, _dir=None): + """Returns symlink path for block device. """ - retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"]) - - retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings", - "--nosuffix", "--units=m", "--separator=:", vg_name]) - - if retval.failed: - logging.error("volume group %s not present", vg_name) - return retdic - valarr = retval.stdout.strip().rstrip(':').split(':') - if len(valarr) == 3: - try: - retdic = { - "vg_size": int(round(float(valarr[0]), 0)), - "vg_free": int(round(float(valarr[1]), 0)), - "pv_count": int(valarr[2]), - } - except (TypeError, ValueError), err: - logging.exception("Fail to parse vgs output: %s", err) - else: - logging.error("vgs output has the wrong number of fields (expected" - " three): %s", str(valarr)) - return retdic + if _dir is None: + _dir = pathutils.DISK_LINKS_DIR - -def _GetBlockDevSymlinkPath(instance_name, idx): - return utils.PathJoin(constants.DISK_LINKS_DIR, - "%s:%d" % (instance_name, idx)) + return utils.PathJoin(_dir, + ("%s%s%s" % + (instance_name, constants.DISK_SEPARATOR, idx))) def _SymlinkBlockDev(instance_name, device_path, idx): @@ -1024,11 +1328,17 @@ def _GatherAndLinkBlockDevs(instance): return block_devices -def StartInstance(instance): +def StartInstance(instance, startup_paused, reason, store_reason=True): """Start an instance. @type instance: L{objects.Instance} @param instance: the instance object + @type startup_paused: bool + @param instance: pause instance at startup? + @type reason: list of reasons + @param reason: the reason trail for this startup + @type store_reason: boolean + @param store_reason: whether to store the shutdown reason trail on file @rtype: None """ @@ -1041,7 +1351,9 @@ def StartInstance(instance): try: block_devices = _GatherAndLinkBlockDevs(instance) hyper = hypervisor.GetHypervisor(instance.hypervisor) - hyper.StartInstance(instance, block_devices) + hyper.StartInstance(instance, block_devices, startup_paused) + if store_reason: + _StoreInstReasonTrail(instance.name, reason) except errors.BlockDeviceError, err: _Fail("Block device error: %s", err, exc=True) except errors.HypervisorError, err: @@ -1049,7 +1361,7 @@ def StartInstance(instance): _Fail("Hypervisor error: %s", err, exc=True) -def InstanceShutdown(instance, timeout): +def InstanceShutdown(instance, timeout, reason, store_reason=True): """Shut an instance down. @note: this functions uses polling with a hardcoded timeout. @@ -1058,6 +1370,10 @@ def InstanceShutdown(instance, timeout): @param instance: the instance object @type timeout: integer @param timeout: maximum timeout for soft shutdown + @type reason: list of reasons + @param reason: the reason trail for this shutdown + @type store_reason: boolean + @param store_reason: whether to store the shutdown reason trail on file @rtype: None """ @@ -1079,6 +1395,8 @@ def InstanceShutdown(instance, timeout): try: hyper.StopInstance(instance, retry=self.tried_once) + if store_reason: + _StoreInstReasonTrail(instance.name, reason) except errors.HypervisorError, err: if iname not in hyper.ListInstances(): # if the instance is no longer existing, consider this a @@ -1118,7 +1436,7 @@ def InstanceShutdown(instance, timeout): _RemoveBlockDevLinks(iname, instance.disks) -def InstanceReboot(instance, reboot_type, shutdown_timeout): +def InstanceReboot(instance, reboot_type, shutdown_timeout, reason): """Reboot an instance. @type instance: L{objects.Instance} @@ -1136,6 +1454,8 @@ def InstanceReboot(instance, reboot_type, shutdown_timeout): instance (instead of a call_instance_reboot RPC) @type shutdown_timeout: integer @param shutdown_timeout: maximum timeout for soft shutdown + @type reason: list of reasons + @param reason: the reason trail for this reboot @rtype: None """ @@ -1152,14 +1472,37 @@ def InstanceReboot(instance, reboot_type, shutdown_timeout): _Fail("Failed to soft reboot instance %s: %s", instance.name, err) elif reboot_type == constants.INSTANCE_REBOOT_HARD: try: - InstanceShutdown(instance, shutdown_timeout) - return StartInstance(instance) + InstanceShutdown(instance, shutdown_timeout, reason, store_reason=False) + result = StartInstance(instance, False, reason, store_reason=False) + _StoreInstReasonTrail(instance.name, reason) + return result except errors.HypervisorError, err: _Fail("Failed to hard reboot instance %s: %s", instance.name, err) else: _Fail("Invalid reboot_type received: %s", reboot_type) +def InstanceBalloonMemory(instance, memory): + """Resize an instance's memory. + + @type instance: L{objects.Instance} + @param instance: the instance object + @type memory: int + @param memory: new memory amount in MB + @rtype: None + + """ + hyper = hypervisor.GetHypervisor(instance.hypervisor) + running = hyper.ListInstances() + if instance.name not in running: + logging.info("Instance %s is not running, cannot balloon", instance.name) + return + try: + hyper.BalloonInstanceMemory(instance, memory) + except errors.HypervisorError, err: + _Fail("Failed to balloon instance memory: %s", err, exc=True) + + def MigrationInfo(instance): """Gather information about an instance to be migrated. @@ -1186,14 +1529,25 @@ def AcceptInstance(instance, info, target): @param target: target host (usually ip), on this node """ + # TODO: why is this required only for DTS_EXT_MIRROR? + if instance.disk_template in constants.DTS_EXT_MIRROR: + # Create the symlinks, as the disks are not active + # in any way + try: + _GatherAndLinkBlockDevs(instance) + except errors.BlockDeviceError, err: + _Fail("Block device error: %s", err, exc=True) + hyper = hypervisor.GetHypervisor(instance.hypervisor) try: hyper.AcceptInstance(instance, info, target) except errors.HypervisorError, err: + if instance.disk_template in constants.DTS_EXT_MIRROR: + _RemoveBlockDevLinks(instance.name, instance.disks) _Fail("Failed to accept instance: %s", err, exc=True) -def FinalizeMigration(instance, info, success): +def FinalizeMigrationDst(instance, info, success): """Finalize any preparation to accept an instance. @type instance: L{objects.Instance} @@ -1206,9 +1560,9 @@ def FinalizeMigration(instance, info, success): """ hyper = hypervisor.GetHypervisor(instance.hypervisor) try: - hyper.FinalizeMigration(instance, info, success) + hyper.FinalizeMigrationDst(instance, info, success) except errors.HypervisorError, err: - _Fail("Failed to finalize migration: %s", err, exc=True) + _Fail("Failed to finalize migration on the target node: %s", err, exc=True) def MigrateInstance(instance, target, live): @@ -1221,10 +1575,7 @@ def MigrateInstance(instance, target, live): @type live: boolean @param live: whether the migration should be done live or not (the interpretation of this parameter is left to the hypervisor) - @rtype: tuple - @return: a tuple of (success, msg) where: - - succes is a boolean denoting the success/failure of the operation - - msg is a string with details in case of failure + @raise RPCFail: if migration fails for some reason """ hyper = hypervisor.GetHypervisor(instance.hypervisor) @@ -1235,7 +1586,47 @@ def MigrateInstance(instance, target, live): _Fail("Failed to migrate instance: %s", err, exc=True) -def BlockdevCreate(disk, size, owner, on_primary, info): +def FinalizeMigrationSource(instance, success, live): + """Finalize the instance migration on the source node. + + @type instance: L{objects.Instance} + @param instance: the instance definition of the migrated instance + @type success: bool + @param success: whether the migration succeeded or not + @type live: bool + @param live: whether the user requested a live migration or not + @raise RPCFail: If the execution fails for some reason + + """ + hyper = hypervisor.GetHypervisor(instance.hypervisor) + + try: + hyper.FinalizeMigrationSource(instance, success, live) + except Exception, err: # pylint: disable=W0703 + _Fail("Failed to finalize the migration on the source node: %s", err, + exc=True) + + +def GetMigrationStatus(instance): + """Get the migration status + + @type instance: L{objects.Instance} + @param instance: the instance that is being migrated + @rtype: L{objects.MigrationStatus} + @return: the status of the current migration (one of + L{constants.HV_MIGRATION_VALID_STATUSES}), plus any additional + progress info that can be retrieved from the hypervisor + @raise RPCFail: If the migration status cannot be retrieved + + """ + hyper = hypervisor.GetHypervisor(instance.hypervisor) + try: + return hyper.GetMigrationStatus(instance) + except Exception, err: # pylint: disable=W0703 + _Fail("Failed to get migration status: %s", err, exc=True) + + +def BlockdevCreate(disk, size, owner, on_primary, info, excl_stor): """Creates a block device for an instance. @type disk: L{objects.Disk} @@ -1250,14 +1641,16 @@ def BlockdevCreate(disk, size, owner, on_primary, info): @type info: string @param info: string that will be sent to the physical device creation, used for example to set (LVM) tags on LVs + @type excl_stor: boolean + @param excl_stor: Whether exclusive_storage is active @return: the new unique_id of the device (this can sometime be computed only after creation), or None. On secondary nodes, it's not required to return anything. """ - # TODO: remove the obsolete 'size' argument - # pylint: disable-msg=W0613 + # TODO: remove the obsolete "size" argument + # pylint: disable=W0613 clist = [] if disk.children: for child in disk.children: @@ -1269,14 +1662,14 @@ def BlockdevCreate(disk, size, owner, on_primary, info): # we need the children open in case the device itself has to # be assembled try: - # pylint: disable-msg=E1103 + # pylint: disable=E1103 crdev.Open() except errors.BlockDeviceError, err: _Fail("Can't make child '%s' read-write: %s", child, err) clist.append(crdev) try: - device = bdev.Create(disk.dev_type, disk.physical_id, clist, disk.size) + device = bdev.Create(disk, clist, excl_stor) except errors.BlockDeviceError, err: _Fail("Can't create block device: %s", err) @@ -1285,7 +1678,6 @@ def BlockdevCreate(disk, size, owner, on_primary, info): device.Assemble() except errors.BlockDeviceError, err: _Fail("Can't assemble device after creation, unusual event: %s", err) - device.SetSyncSpeed(constants.SYNC_SPEED) if on_primary or disk.OpenOnSecondary(): try: device.Open(force=True) @@ -1299,6 +1691,96 @@ def BlockdevCreate(disk, size, owner, on_primary, info): return device.unique_id +def _WipeDevice(path, offset, size): + """This function actually wipes the device. + + @param path: The path to the device to wipe + @param offset: The offset in MiB in the file + @param size: The size in MiB to write + + """ + # Internal sizes are always in Mebibytes; if the following "dd" command + # should use a different block size the offset and size given to this + # function must be adjusted accordingly before being passed to "dd". + block_size = 1024 * 1024 + + cmd = [constants.DD_CMD, "if=/dev/zero", "seek=%d" % offset, + "bs=%s" % block_size, "oflag=direct", "of=%s" % path, + "count=%d" % size] + result = utils.RunCmd(cmd) + + if result.failed: + _Fail("Wipe command '%s' exited with error: %s; output: %s", result.cmd, + result.fail_reason, result.output) + + +def BlockdevWipe(disk, offset, size): + """Wipes a block device. + + @type disk: L{objects.Disk} + @param disk: the disk object we want to wipe + @type offset: int + @param offset: The offset in MiB in the file + @type size: int + @param size: The size in MiB to write + + """ + try: + rdev = _RecursiveFindBD(disk) + except errors.BlockDeviceError: + rdev = None + + if not rdev: + _Fail("Cannot execute wipe for device %s: device not found", disk.iv_name) + + # Do cross verify some of the parameters + if offset < 0: + _Fail("Negative offset") + if size < 0: + _Fail("Negative size") + if offset > rdev.size: + _Fail("Offset is bigger than device size") + if (offset + size) > rdev.size: + _Fail("The provided offset and size to wipe is bigger than device size") + + _WipeDevice(rdev.dev_path, offset, size) + + +def BlockdevPauseResumeSync(disks, pause): + """Pause or resume the sync of the block device. + + @type disks: list of L{objects.Disk} + @param disks: the disks object we want to pause/resume + @type pause: bool + @param pause: Wheater to pause or resume + + """ + success = [] + for disk in disks: + try: + rdev = _RecursiveFindBD(disk) + except errors.BlockDeviceError: + rdev = None + + if not rdev: + success.append((False, ("Cannot change sync for device %s:" + " device not found" % disk.iv_name))) + continue + + result = rdev.PauseResumeSync(pause) + + if result: + success.append((result, None)) + else: + if pause: + msg = "Pause" + else: + msg = "Resume" + success.append((result, "%s for device %s failed" % (msg, disk.iv_name))) + + return success + + def BlockdevRemove(disk): """Remove a block device. @@ -1378,8 +1860,7 @@ def _RecursiveAssembleBD(disk, owner, as_primary): children.append(cdev) if as_primary or disk.AssembleOnSecondary(): - r_dev = bdev.Assemble(disk.dev_type, disk.physical_id, children, disk.size) - r_dev.SetSyncSpeed(constants.SYNC_SPEED) + r_dev = bdev.Assemble(disk, children) result = r_dev if as_primary or disk.OpenOnSecondary(): r_dev.Open() @@ -1391,7 +1872,7 @@ def _RecursiveAssembleBD(disk, owner, as_primary): return result -def BlockdevAssemble(disk, owner, as_primary): +def BlockdevAssemble(disk, owner, as_primary, idx): """Activate a block device for an instance. This is a wrapper over _RecursiveAssembleBD. @@ -1404,10 +1885,14 @@ def BlockdevAssemble(disk, owner, as_primary): try: result = _RecursiveAssembleBD(disk, owner, as_primary) if isinstance(result, bdev.BlockDev): - # pylint: disable-msg=E1103 + # pylint: disable=E1103 result = result.dev_path + if as_primary: + _SymlinkBlockDev(owner, result, idx) except errors.BlockDeviceError, err: _Fail("Error while assembling disk: %s", err, exc=True) + except OSError, err: + _Fail("Error while symlinking disk: %s", err, exc=True) return result @@ -1504,9 +1989,7 @@ def BlockdevGetmirrorstatus(disks): @type disks: list of L{objects.Disk} @param disks: the list of disks which we should query @rtype: disk - @return: - a list of (mirror_done, estimated_time) tuples, which - are the result of L{bdev.BlockDev.CombinedSyncStatus} + @return: List of L{objects.BlockDevStatus}, one for each disk @raise errors.BlockDeviceError: if any of the disks cannot be found @@ -1522,6 +2005,37 @@ def BlockdevGetmirrorstatus(disks): return stats +def BlockdevGetmirrorstatusMulti(disks): + """Get the mirroring status of a list of devices. + + @type disks: list of L{objects.Disk} + @param disks: the list of disks which we should query + @rtype: disk + @return: List of tuples, (bool, status), one for each disk; bool denotes + success/failure, status is L{objects.BlockDevStatus} on success, string + otherwise + + """ + result = [] + for disk in disks: + try: + rbd = _RecursiveFindBD(disk) + if rbd is None: + result.append((False, "Can't find device %s" % disk)) + continue + + status = rbd.CombinedSyncStatus() + except errors.BlockDeviceError, err: + logging.exception("Error while getting disk status") + result.append((False, str(err))) + else: + result.append((True, status)) + + assert len(disks) == len(result) + + return result + + def _RecursiveFindBD(disk): """Check if a device is activated. @@ -1539,7 +2053,7 @@ def _RecursiveFindBD(disk): for chdisk in disk.children: children.append(_RecursiveFindBD(chdisk)) - return bdev.FindDevice(disk.dev_type, disk.physical_id, children, disk.size) + return bdev.FindDevice(disk, children) def _OpenRealBD(disk): @@ -1638,11 +2152,11 @@ def BlockdevExport(disk, dest_node, dest_path, cluster_name): " oflag=dsync", dest_path) remotecmd = _GetSshRunner(cluster_name).BuildCmd(dest_node, - constants.GANETI_RUNAS, + constants.SSH_LOGIN_USER, destcmd) # all commands have been checked, so we're safe to combine them - command = '|'.join([expcmd, utils.ShellQuoteArgs(remotecmd)]) + command = "|".join([expcmd, utils.ShellQuoteArgs(remotecmd)]) result = utils.RunCmd(["bash", "-c", command]) @@ -1663,10 +2177,10 @@ def UploadFile(file_name, data, mode, uid, gid, atime, mtime): @param data: the new contents of the file @type mode: int @param mode: the mode to give the file (can be None) - @type uid: int - @param uid: the owner of the file (can be -1 for default) - @type gid: int - @param gid: the group of the file (can be -1 for default) + @type uid: string + @param uid: the owner of the file + @type gid: string + @param gid: the group of the file @type atime: float @param atime: the atime to set on the file (can be None) @type mtime: float @@ -1674,6 +2188,8 @@ def UploadFile(file_name, data, mode, uid, gid, atime, mtime): @rtype: None """ + file_name = vcluster.LocalizeVirtualPath(file_name) + if not os.path.isabs(file_name): _Fail("Filename passed to UploadFile is not absolute: '%s'", file_name) @@ -1683,35 +2199,37 @@ def UploadFile(file_name, data, mode, uid, gid, atime, mtime): raw_data = _Decompress(data) - utils.WriteFile(file_name, data=raw_data, mode=mode, uid=uid, gid=gid, - atime=atime, mtime=mtime) + if not (isinstance(uid, basestring) and isinstance(gid, basestring)): + _Fail("Invalid username/groupname type") + getents = runtime.GetEnts() + uid = getents.LookupUser(uid) + gid = getents.LookupGroup(gid) -def WriteSsconfFiles(values): - """Update all ssconf files. + utils.SafeWriteFile(file_name, None, + data=raw_data, mode=mode, uid=uid, gid=gid, + atime=atime, mtime=mtime) - Wrapper around the SimpleStore.WriteFiles. - """ - ssconf.SimpleStore().WriteFiles(values) +def RunOob(oob_program, command, node, timeout): + """Executes oob_program with given command on given node. + @param oob_program: The path to the executable oob_program + @param command: The command to invoke on oob_program + @param node: The node given as an argument to the program + @param timeout: Timeout after which we kill the oob program -def _ErrnoOrStr(err): - """Format an EnvironmentError exception. + @return: stdout + @raise RPCFail: If execution fails for some reason - If the L{err} argument has an errno attribute, it will be looked up - and converted into a textual C{E...} description. Otherwise the - string representation of the error will be returned. + """ + result = utils.RunCmd([oob_program, command, node], timeout=timeout) - @type err: L{EnvironmentError} - @param err: the exception to format + if result.failed: + _Fail("'%s' failed with reason '%s'; output: %s", result.cmd, + result.fail_reason, result.output) - """ - if hasattr(err, 'errno'): - detail = errno.errorcode[err.errno] - else: - detail = str(err) - return detail + return result.stdout def _OSOndiskAPIVersion(os_dir): @@ -1733,7 +2251,7 @@ def _OSOndiskAPIVersion(os_dir): st = os.stat(api_file) except EnvironmentError, err: return False, ("Required file '%s' not found under path %s: %s" % - (constants.OS_API_FILE, os_dir, _ErrnoOrStr(err))) + (constants.OS_API_FILE, os_dir, utils.ErrnoOrStr(err))) if not stat.S_ISREG(stat.S_IFMT(st.st_mode)): return False, ("File '%s' in %s is not a regular file" % @@ -1743,7 +2261,7 @@ def _OSOndiskAPIVersion(os_dir): api_versions = utils.ReadFile(api_file).splitlines() except EnvironmentError, err: return False, ("Error while reading the API version file at %s: %s" % - (api_file, _ErrnoOrStr(err))) + (api_file, utils.ErrnoOrStr(err))) try: api_versions = [int(version.strip()) for version in api_versions] @@ -1760,19 +2278,22 @@ def DiagnoseOS(top_dirs=None): @type top_dirs: list @param top_dirs: the list of directories in which to search (if not given defaults to - L{constants.OS_SEARCH_PATH}) + L{pathutils.OS_SEARCH_PATH}) @rtype: list of L{objects.OS} - @return: a list of tuples (name, path, status, diagnose, variants) - for all (potential) OSes under all search paths, where: + @return: a list of tuples (name, path, status, diagnose, variants, + parameters, api_version) for all (potential) OSes under all + search paths, where: - name is the (potential) OS name - path is the full path to the OS - status True/False is the validity of the OS - diagnose is the error message for an invalid OS, otherwise empty - variants is a list of supported OS variants, if any + - parameters is a list of (name, help) parameters, if any + - api_version is a list of support OS API versions """ if top_dirs is None: - top_dirs = constants.OS_SEARCH_PATH + top_dirs = pathutils.OS_SEARCH_PATH result = [] for dir_name in top_dirs: @@ -1788,10 +2309,13 @@ def DiagnoseOS(top_dirs=None): if status: diagnose = "" variants = os_inst.supported_variants + parameters = os_inst.supported_parameters + api_versions = os_inst.api_versions else: diagnose = os_inst - variants = [] - result.append((name, os_path, status, diagnose, variants)) + variants = parameters = api_versions = [] + result.append((name, os_path, status, diagnose, variants, + parameters, api_versions)) return result @@ -1811,7 +2335,7 @@ def _TryOSFromDisk(name, base_dir=None): """ if base_dir is None: - os_dir = utils.FindFile(name, constants.OS_SEARCH_PATH, os.path.isdir) + os_dir = utils.FindFile(name, pathutils.OS_SEARCH_PATH, os.path.isdir) else: os_dir = utils.FindFile(name, [base_dir], os.path.isdir) @@ -1827,20 +2351,30 @@ def _TryOSFromDisk(name, base_dir=None): return False, ("API version mismatch for path '%s': found %s, want %s." % (os_dir, api_versions, constants.OS_API_VERSIONS)) - # OS Files dictionary, we will populate it with the absolute path names - os_files = dict.fromkeys(constants.OS_SCRIPTS) + # OS Files dictionary, we will populate it with the absolute path + # names; if the value is True, then it is a required file, otherwise + # an optional one + os_files = dict.fromkeys(constants.OS_SCRIPTS, True) if max(api_versions) >= constants.OS_API_V15: - os_files[constants.OS_VARIANTS_FILE] = '' + os_files[constants.OS_VARIANTS_FILE] = False + + if max(api_versions) >= constants.OS_API_V20: + os_files[constants.OS_PARAMETERS_FILE] = True + else: + del os_files[constants.OS_SCRIPT_VERIFY] - for filename in os_files: + for (filename, required) in os_files.items(): os_files[filename] = utils.PathJoin(os_dir, filename) try: st = os.stat(os_files[filename]) except EnvironmentError, err: + if err.errno == errno.ENOENT and not required: + del os_files[filename] + continue return False, ("File '%s' under path '%s' is missing (%s)" % - (filename, os_dir, _ErrnoOrStr(err))) + (filename, os_dir, utils.ErrnoOrStr(err))) if not stat.S_ISREG(stat.S_IFMT(st.st_mode)): return False, ("File '%s' under path '%s' is not a regular file" % @@ -1851,23 +2385,37 @@ def _TryOSFromDisk(name, base_dir=None): return False, ("File '%s' under path '%s' is not executable" % (filename, os_dir)) - variants = None + variants = [] if constants.OS_VARIANTS_FILE in os_files: variants_file = os_files[constants.OS_VARIANTS_FILE] try: - variants = utils.ReadFile(variants_file).splitlines() + variants = \ + utils.FilterEmptyLinesAndComments(utils.ReadFile(variants_file)) + except EnvironmentError, err: + # we accept missing files, but not other errors + if err.errno != errno.ENOENT: + return False, ("Error while reading the OS variants file at %s: %s" % + (variants_file, utils.ErrnoOrStr(err))) + + parameters = [] + if constants.OS_PARAMETERS_FILE in os_files: + parameters_file = os_files[constants.OS_PARAMETERS_FILE] + try: + parameters = utils.ReadFile(parameters_file).splitlines() except EnvironmentError, err: - return False, ("Error while reading the OS variants file at %s: %s" % - (variants_file, _ErrnoOrStr(err))) - if not variants: - return False, ("No supported os variant found") + return False, ("Error while reading the OS parameters file at %s: %s" % + (parameters_file, utils.ErrnoOrStr(err))) + parameters = [v.split(None, 1) for v in parameters] os_obj = objects.OS(name=name, path=os_dir, create_script=os_files[constants.OS_SCRIPT_CREATE], export_script=os_files[constants.OS_SCRIPT_EXPORT], import_script=os_files[constants.OS_SCRIPT_IMPORT], rename_script=os_files[constants.OS_SCRIPT_RENAME], + verify_script=os_files.get(constants.OS_SCRIPT_VERIFY, + None), supported_variants=variants, + supported_parameters=parameters, api_versions=api_versions) return True, os_obj @@ -1890,7 +2438,7 @@ def OSFromDisk(name, base_dir=None): @raise RPCFail: if we don't find a valid OS """ - name_only = name.split("+", 1)[0] + name_only = objects.OS.GetName(name) status, payload = _TryOSFromDisk(name_only, base_dir) if not status: @@ -1899,13 +2447,15 @@ def OSFromDisk(name, base_dir=None): return payload -def OSEnvironment(instance, inst_os, debug=0): - """Calculate the environment for an os script. +def OSCoreEnv(os_name, inst_os, os_params, debug=0): + """Calculate the basic environment for an os script. - @type instance: L{objects.Instance} - @param instance: target instance for the os script run + @type os_name: str + @param os_name: full operating system name (including variant) @type inst_os: L{objects.OS} @param inst_os: operating system for which the environment is being built + @type os_params: dict + @param os_params: the OS parameters @type debug: integer @param debug: debug level (0 or 1, for OS Api 10) @rtype: dict @@ -1917,63 +2467,160 @@ def OSEnvironment(instance, inst_os, debug=0): result = {} api_version = \ max(constants.OS_API_VERSIONS.intersection(inst_os.api_versions)) - result['OS_API_VERSION'] = '%d' % api_version - result['INSTANCE_NAME'] = instance.name - result['INSTANCE_OS'] = instance.os - result['HYPERVISOR'] = instance.hypervisor - result['DISK_COUNT'] = '%d' % len(instance.disks) - result['NIC_COUNT'] = '%d' % len(instance.nics) - result['DEBUG_LEVEL'] = '%d' % debug - if api_version >= constants.OS_API_V15: - try: - variant = instance.os.split('+', 1)[1] - except IndexError: + result["OS_API_VERSION"] = "%d" % api_version + result["OS_NAME"] = inst_os.name + result["DEBUG_LEVEL"] = "%d" % debug + + # OS variants + if api_version >= constants.OS_API_V15 and inst_os.supported_variants: + variant = objects.OS.GetVariant(os_name) + if not variant: variant = inst_os.supported_variants[0] - result['OS_VARIANT'] = variant - for idx, disk in enumerate(instance.disks): - real_disk = _OpenRealBD(disk) - result['DISK_%d_PATH' % idx] = real_disk.dev_path - result['DISK_%d_ACCESS' % idx] = disk.mode - if constants.HV_DISK_TYPE in instance.hvparams: - result['DISK_%d_FRONTEND_TYPE' % idx] = \ - instance.hvparams[constants.HV_DISK_TYPE] - if disk.dev_type in constants.LDS_BLOCK: - result['DISK_%d_BACKEND_TYPE' % idx] = 'block' - elif disk.dev_type == constants.LD_FILE: - result['DISK_%d_BACKEND_TYPE' % idx] = \ - 'file:%s' % disk.physical_id[0] - for idx, nic in enumerate(instance.nics): - result['NIC_%d_MAC' % idx] = nic.mac - if nic.ip: - result['NIC_%d_IP' % idx] = nic.ip - result['NIC_%d_MODE' % idx] = nic.nicparams[constants.NIC_MODE] - if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: - result['NIC_%d_BRIDGE' % idx] = nic.nicparams[constants.NIC_LINK] - if nic.nicparams[constants.NIC_LINK]: - result['NIC_%d_LINK' % idx] = nic.nicparams[constants.NIC_LINK] - if constants.HV_NIC_TYPE in instance.hvparams: - result['NIC_%d_FRONTEND_TYPE' % idx] = \ - instance.hvparams[constants.HV_NIC_TYPE] + else: + variant = "" + result["OS_VARIANT"] = variant - for source, kind in [(instance.beparams, "BE"), (instance.hvparams, "HV")]: - for key, value in source.items(): - result["INSTANCE_%s_%s" % (kind, key)] = str(value) + # OS params + for pname, pvalue in os_params.items(): + result["OSP_%s" % pname.upper()] = pvalue + + # Set a default path otherwise programs called by OS scripts (or + # even hooks called from OS scripts) might break, and we don't want + # to have each script require setting a PATH variable + result["PATH"] = constants.HOOKS_PATH return result -def BlockdevGrow(disk, amount): - """Grow a stack of block devices. +def OSEnvironment(instance, inst_os, debug=0): + """Calculate the environment for an os script. + + @type instance: L{objects.Instance} + @param instance: target instance for the os script run + @type inst_os: L{objects.OS} + @param inst_os: operating system for which the environment is being built + @type debug: integer + @param debug: debug level (0 or 1, for OS Api 10) + @rtype: dict + @return: dict of environment variables + @raise errors.BlockDeviceError: if the block device + cannot be found + + """ + result = OSCoreEnv(instance.os, inst_os, instance.osparams, debug=debug) + + for attr in ["name", "os", "uuid", "ctime", "mtime", "primary_node"]: + result["INSTANCE_%s" % attr.upper()] = str(getattr(instance, attr)) + + result["HYPERVISOR"] = instance.hypervisor + result["DISK_COUNT"] = "%d" % len(instance.disks) + result["NIC_COUNT"] = "%d" % len(instance.nics) + result["INSTANCE_SECONDARY_NODES"] = \ + ("%s" % " ".join(instance.secondary_nodes)) + + # Disks + for idx, disk in enumerate(instance.disks): + real_disk = _OpenRealBD(disk) + result["DISK_%d_PATH" % idx] = real_disk.dev_path + result["DISK_%d_ACCESS" % idx] = disk.mode + if constants.HV_DISK_TYPE in instance.hvparams: + result["DISK_%d_FRONTEND_TYPE" % idx] = \ + instance.hvparams[constants.HV_DISK_TYPE] + if disk.dev_type in constants.LDS_BLOCK: + result["DISK_%d_BACKEND_TYPE" % idx] = "block" + elif disk.dev_type == constants.LD_FILE: + result["DISK_%d_BACKEND_TYPE" % idx] = \ + "file:%s" % disk.physical_id[0] + + # NICs + for idx, nic in enumerate(instance.nics): + result["NIC_%d_MAC" % idx] = nic.mac + if nic.ip: + result["NIC_%d_IP" % idx] = nic.ip + result["NIC_%d_MODE" % idx] = nic.nicparams[constants.NIC_MODE] + if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: + result["NIC_%d_BRIDGE" % idx] = nic.nicparams[constants.NIC_LINK] + if nic.nicparams[constants.NIC_LINK]: + result["NIC_%d_LINK" % idx] = nic.nicparams[constants.NIC_LINK] + if nic.netinfo: + nobj = objects.Network.FromDict(nic.netinfo) + result.update(nobj.HooksDict("NIC_%d_" % idx)) + if constants.HV_NIC_TYPE in instance.hvparams: + result["NIC_%d_FRONTEND_TYPE" % idx] = \ + instance.hvparams[constants.HV_NIC_TYPE] + + # HV/BE params + for source, kind in [(instance.beparams, "BE"), (instance.hvparams, "HV")]: + for key, value in source.items(): + result["INSTANCE_%s_%s" % (kind, key)] = str(value) + + return result + + +def DiagnoseExtStorage(top_dirs=None): + """Compute the validity for all ExtStorage Providers. + + @type top_dirs: list + @param top_dirs: the list of directories in which to + search (if not given defaults to + L{pathutils.ES_SEARCH_PATH}) + @rtype: list of L{objects.ExtStorage} + @return: a list of tuples (name, path, status, diagnose, parameters) + for all (potential) ExtStorage Providers under all + search paths, where: + - name is the (potential) ExtStorage Provider + - path is the full path to the ExtStorage Provider + - status True/False is the validity of the ExtStorage Provider + - diagnose is the error message for an invalid ExtStorage Provider, + otherwise empty + - parameters is a list of (name, help) parameters, if any + + """ + if top_dirs is None: + top_dirs = pathutils.ES_SEARCH_PATH + + result = [] + for dir_name in top_dirs: + if os.path.isdir(dir_name): + try: + f_names = utils.ListVisibleFiles(dir_name) + except EnvironmentError, err: + logging.exception("Can't list the ExtStorage directory %s: %s", + dir_name, err) + break + for name in f_names: + es_path = utils.PathJoin(dir_name, name) + status, es_inst = bdev.ExtStorageFromDisk(name, base_dir=dir_name) + if status: + diagnose = "" + parameters = es_inst.supported_parameters + else: + diagnose = es_inst + parameters = [] + result.append((name, es_path, status, diagnose, parameters)) + + return result + + +def BlockdevGrow(disk, amount, dryrun, backingstore): + """Grow a stack of block devices. This function is called recursively, with the childrens being the first ones to resize. @type disk: L{objects.Disk} @param disk: the disk to be grown + @type amount: integer + @param amount: the amount (in mebibytes) to grow with + @type dryrun: boolean + @param dryrun: whether to execute the operation in simulation mode + only, without actually increasing the size + @param backingstore: whether to execute the operation on backing storage + only, or on "logical" storage only; e.g. DRBD is logical storage, + whereas LVM, file, RBD are backing storage @rtype: (status, result) - @return: a tuple with the status of the operation - (True/False), and the errors message if status - is False + @return: a tuple with the status of the operation (True/False), and + the errors message if status is False """ r_dev = _RecursiveFindBD(disk) @@ -1981,7 +2628,7 @@ def BlockdevGrow(disk, amount): _Fail("Cannot find block device %s", disk) try: - r_dev.Grow(amount) + r_dev.Grow(amount, dryrun, backingstore) except errors.BlockDeviceError, err: _Fail("Failed to grow block device: %s", err, exc=True) @@ -1995,7 +2642,7 @@ def BlockdevSnapshot(disk): @type disk: L{objects.Disk} @param disk: the disk to be snapshotted @rtype: string - @return: snapshot disk path + @return: snapshot disk ID as (vg, lv) """ if disk.dev_type == constants.LD_DRBD8: @@ -2016,62 +2663,30 @@ def BlockdevSnapshot(disk): disk.unique_id, disk.dev_type) -def ExportSnapshot(disk, dest_node, instance, cluster_name, idx, debug): - """Export a block device snapshot to a remote node. +def BlockdevSetInfo(disk, info): + """Sets 'metadata' information on block devices. + + This function sets 'info' metadata on block devices. Initial + information is set at device creation; this function should be used + for example after renames. @type disk: L{objects.Disk} - @param disk: the description of the disk to export - @type dest_node: str - @param dest_node: the destination node to export to - @type instance: L{objects.Instance} - @param instance: the instance object to whom the disk belongs - @type cluster_name: str - @param cluster_name: the cluster name, needed for SSH hostalias - @type idx: int - @param idx: the index of the disk in the instance's disk list, - used to export to the OS scripts environment - @type debug: integer - @param debug: debug level, passed to the OS scripts - @rtype: None + @param disk: the disk to be grown + @type info: string + @param info: new 'info' metadata + @rtype: (status, result) + @return: a tuple with the status of the operation (True/False), and + the errors message if status is False """ - inst_os = OSFromDisk(instance.os) - export_env = OSEnvironment(instance, inst_os, debug) - - export_script = inst_os.export_script - - logfile = _InstanceLogName("export", inst_os.name, instance.name) - - real_disk = _OpenRealBD(disk) - - export_env['EXPORT_DEVICE'] = real_disk.dev_path - export_env['EXPORT_INDEX'] = str(idx) - - destdir = utils.PathJoin(constants.EXPORT_DIR, instance.name + ".new") - destfile = disk.physical_id[1] - - # the target command is built out of three individual commands, - # which are joined by pipes; we check each individual command for - # valid parameters - expcmd = utils.BuildShellCmd("set -e; set -o pipefail; cd %s; %s 2>%s", - inst_os.path, export_script, logfile) - - comprcmd = "gzip" - - destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s", - destdir, utils.PathJoin(destdir, destfile)) - remotecmd = _GetSshRunner(cluster_name).BuildCmd(dest_node, - constants.GANETI_RUNAS, - destcmd) - - # all commands have been checked, so we're safe to combine them - command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)]) - - result = utils.RunCmd(["bash", "-c", command], env=export_env) + r_dev = _RecursiveFindBD(disk) + if r_dev is None: + _Fail("Cannot find block device %s", disk) - if result.failed: - _Fail("OS snapshot export command '%s' returned error: %s" - " output: %s", command, result.fail_reason, result.output) + try: + r_dev.SetInfo(info) + except errors.BlockDeviceError, err: + _Fail("Failed to set information on block device: %s", err, exc=True) def FinalizeExport(instance, snap_disks): @@ -2087,51 +2702,59 @@ def FinalizeExport(instance, snap_disks): @rtype: None """ - destdir = utils.PathJoin(constants.EXPORT_DIR, instance.name + ".new") - finaldestdir = utils.PathJoin(constants.EXPORT_DIR, instance.name) + destdir = utils.PathJoin(pathutils.EXPORT_DIR, instance.name + ".new") + finaldestdir = utils.PathJoin(pathutils.EXPORT_DIR, instance.name) config = objects.SerializableConfigParser() config.add_section(constants.INISECT_EXP) - config.set(constants.INISECT_EXP, 'version', '0') - config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time())) - config.set(constants.INISECT_EXP, 'source', instance.primary_node) - config.set(constants.INISECT_EXP, 'os', instance.os) - config.set(constants.INISECT_EXP, 'compression', 'gzip') + config.set(constants.INISECT_EXP, "version", "0") + config.set(constants.INISECT_EXP, "timestamp", "%d" % int(time.time())) + config.set(constants.INISECT_EXP, "source", instance.primary_node) + config.set(constants.INISECT_EXP, "os", instance.os) + config.set(constants.INISECT_EXP, "compression", "none") config.add_section(constants.INISECT_INS) - config.set(constants.INISECT_INS, 'name', instance.name) - config.set(constants.INISECT_INS, 'memory', '%d' % - instance.beparams[constants.BE_MEMORY]) - config.set(constants.INISECT_INS, 'vcpus', '%d' % + config.set(constants.INISECT_INS, "name", instance.name) + config.set(constants.INISECT_INS, "maxmem", "%d" % + instance.beparams[constants.BE_MAXMEM]) + config.set(constants.INISECT_INS, "minmem", "%d" % + instance.beparams[constants.BE_MINMEM]) + # "memory" is deprecated, but useful for exporting to old ganeti versions + config.set(constants.INISECT_INS, "memory", "%d" % + instance.beparams[constants.BE_MAXMEM]) + config.set(constants.INISECT_INS, "vcpus", "%d" % instance.beparams[constants.BE_VCPUS]) - config.set(constants.INISECT_INS, 'disk_template', instance.disk_template) - config.set(constants.INISECT_INS, 'hypervisor', instance.hypervisor) + config.set(constants.INISECT_INS, "disk_template", instance.disk_template) + config.set(constants.INISECT_INS, "hypervisor", instance.hypervisor) + config.set(constants.INISECT_INS, "tags", " ".join(instance.GetTags())) nic_total = 0 for nic_count, nic in enumerate(instance.nics): nic_total += 1 - config.set(constants.INISECT_INS, 'nic%d_mac' % - nic_count, '%s' % nic.mac) - config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip) + config.set(constants.INISECT_INS, "nic%d_mac" % + nic_count, "%s" % nic.mac) + config.set(constants.INISECT_INS, "nic%d_ip" % nic_count, "%s" % nic.ip) + config.set(constants.INISECT_INS, "nic%d_network" % nic_count, + "%s" % nic.network) for param in constants.NICS_PARAMETER_TYPES: - config.set(constants.INISECT_INS, 'nic%d_%s' % (nic_count, param), - '%s' % nic.nicparams.get(param, None)) + config.set(constants.INISECT_INS, "nic%d_%s" % (nic_count, param), + "%s" % nic.nicparams.get(param, None)) # TODO: redundant: on load can read nics until it doesn't exist - config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_total) + config.set(constants.INISECT_INS, "nic_count", "%d" % nic_total) disk_total = 0 for disk_count, disk in enumerate(snap_disks): if disk: disk_total += 1 - config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count, - ('%s' % disk.iv_name)) - config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count, - ('%s' % disk.physical_id[1])) - config.set(constants.INISECT_INS, 'disk%d_size' % disk_count, - ('%d' % disk.size)) + config.set(constants.INISECT_INS, "disk%d_ivname" % disk_count, + ("%s" % disk.iv_name)) + config.set(constants.INISECT_INS, "disk%d_dump" % disk_count, + ("%s" % disk.physical_id[1])) + config.set(constants.INISECT_INS, "disk%d_size" % disk_count, + ("%d" % disk.size)) - config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_total) + config.set(constants.INISECT_INS, "disk_count", "%d" % disk_total) # New-style hypervisor/backend parameters @@ -2144,6 +2767,10 @@ def FinalizeExport(instance, snap_disks): for name, value in instance.beparams.items(): config.set(constants.INISECT_BEP, name, str(value)) + config.add_section(constants.INISECT_OSP) + for name, value in instance.osparams.items(): + config.set(constants.INISECT_OSP, name, str(value)) + utils.WriteFile(utils.PathJoin(destdir, constants.EXPORT_CONF_FILE), data=config.Dumps()) shutil.rmtree(finaldestdir, ignore_errors=True) @@ -2173,53 +2800,6 @@ def ExportInfo(dest): return config.Dumps() -def ImportOSIntoInstance(instance, src_node, src_images, cluster_name, debug): - """Import an os image into an instance. - - @type instance: L{objects.Instance} - @param instance: instance to import the disks into - @type src_node: string - @param src_node: source node for the disk images - @type src_images: list of string - @param src_images: absolute paths of the disk images - @type debug: integer - @param debug: debug level, passed to the OS scripts - @rtype: list of boolean - @return: each boolean represent the success of importing the n-th disk - - """ - inst_os = OSFromDisk(instance.os) - import_env = OSEnvironment(instance, inst_os, debug) - import_script = inst_os.import_script - - logfile = _InstanceLogName("import", instance.os, instance.name) - - comprcmd = "gunzip" - impcmd = utils.BuildShellCmd("(cd %s; %s >%s 2>&1)", inst_os.path, - import_script, logfile) - - final_result = [] - for idx, image in enumerate(src_images): - if image: - destcmd = utils.BuildShellCmd('cat %s', image) - remotecmd = _GetSshRunner(cluster_name).BuildCmd(src_node, - constants.GANETI_RUNAS, - destcmd) - command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd]) - import_env['IMPORT_DEVICE'] = import_env['DISK_%d_PATH' % idx] - import_env['IMPORT_INDEX'] = str(idx) - result = utils.RunCmd(command, env=import_env) - if result.failed: - logging.error("Disk import command '%s' returned error: %s" - " output: %s", command, result.fail_reason, - result.output) - final_result.append("error importing disk %d: %s, %s" % - (idx, result.fail_reason, result.output[-100])) - - if final_result: - _Fail("; ".join(final_result), log=False) - - def ListExports(): """Return a list of exports currently available on this machine. @@ -2227,8 +2807,8 @@ def ListExports(): @return: list of the exports """ - if os.path.isdir(constants.EXPORT_DIR): - return utils.ListVisibleFiles(constants.EXPORT_DIR) + if os.path.isdir(pathutils.EXPORT_DIR): + return sorted(utils.ListVisibleFiles(pathutils.EXPORT_DIR)) else: _Fail("No exports directory") @@ -2241,7 +2821,7 @@ def RemoveExport(export): @rtype: None """ - target = utils.PathJoin(constants.EXPORT_DIR, export) + target = utils.PathJoin(pathutils.EXPORT_DIR, export) try: shutil.rmtree(target) @@ -2290,29 +2870,26 @@ def BlockdevRename(devlist): _Fail("; ".join(msgs)) -def _TransformFileStorageDir(file_storage_dir): +def _TransformFileStorageDir(fs_dir): """Checks whether given file_storage_dir is valid. - Checks wheter the given file_storage_dir is within the cluster-wide - default file_storage_dir stored in SimpleStore. Only paths under that - directory are allowed. + Checks wheter the given fs_dir is within the cluster-wide default + file_storage_dir or the shared_file_storage_dir, which are stored in + SimpleStore. Only paths under those directories are allowed. - @type file_storage_dir: str - @param file_storage_dir: the path to check + @type fs_dir: str + @param fs_dir: the path to check @return: the normalized path if valid, None otherwise """ - if not constants.ENABLE_FILE_STORAGE: + if not (constants.ENABLE_FILE_STORAGE or + constants.ENABLE_SHARED_FILE_STORAGE): _Fail("File storage disabled at configure time") - cfg = _GetConfig() - file_storage_dir = os.path.normpath(file_storage_dir) - base_file_storage_dir = cfg.GetFileStorageDir() - if (os.path.commonprefix([file_storage_dir, base_file_storage_dir]) != - base_file_storage_dir): - _Fail("File storage directory '%s' is not under base file" - " storage directory '%s'", file_storage_dir, base_file_storage_dir) - return file_storage_dir + + bdev.CheckFileStoragePath(fs_dir) + + return os.path.normpath(fs_dir) def CreateFileStorageDir(file_storage_dir): @@ -2403,18 +2980,15 @@ def _EnsureJobQueueFile(file_name): @raises RPCFail: if the file is not valid """ - queue_dir = os.path.normpath(constants.QUEUE_DIR) - result = (os.path.commonprefix([queue_dir, file_name]) == queue_dir) - - if not result: + if not utils.IsBelowDir(pathutils.QUEUE_DIR, file_name): _Fail("Passed job queue file '%s' does not belong to" - " the queue directory '%s'", file_name, queue_dir) + " the queue directory '%s'", file_name, pathutils.QUEUE_DIR) def JobQueueUpdate(file_name, content): """Updates a file in the queue directory. - This is just a wrapper over L{utils.WriteFile}, with proper + This is just a wrapper over L{utils.io.WriteFile}, with proper checking. @type file_name: str @@ -2425,10 +2999,14 @@ def JobQueueUpdate(file_name, content): @return: the success of the operation """ + file_name = vcluster.LocalizeVirtualPath(file_name) + _EnsureJobQueueFile(file_name) + getents = runtime.GetEnts() # Write and replace the file atomically - utils.WriteFile(file_name, data=_Decompress(content)) + utils.WriteFile(file_name, data=_Decompress(content), uid=getents.masterd_uid, + gid=getents.daemons_gid, mode=constants.JOB_QUEUE_FILES_PERMS) def JobQueueRename(old, new): @@ -2444,28 +3022,16 @@ def JobQueueRename(old, new): @return: the success of the operation and payload """ + old = vcluster.LocalizeVirtualPath(old) + new = vcluster.LocalizeVirtualPath(new) + _EnsureJobQueueFile(old) _EnsureJobQueueFile(new) - utils.RenameFile(old, new, mkdir=True) - + getents = runtime.GetEnts() -def JobQueueSetDrainFlag(drain_flag): - """Set the drain flag for the queue. - - This will set or unset the queue drain flag. - - @type drain_flag: boolean - @param drain_flag: if True, will set the drain flag, otherwise reset it. - @rtype: truple - @return: always True, None - @warning: the function always returns True - - """ - if drain_flag: - utils.WriteFile(constants.JOB_QUEUE_DRAIN_FILE, data="", close=True) - else: - utils.RemoveFile(constants.JOB_QUEUE_DRAIN_FILE) + utils.RenameFile(old, new, mkdir=True, mkdir_mode=0750, + dir_uid=getents.masterd_uid, dir_gid=getents.daemons_gid) def BlockdevClose(instance_name, disks): @@ -2522,6 +3088,70 @@ def ValidateHVParams(hvname, hvparams): _Fail(str(err), log=False) +def _CheckOSPList(os_obj, parameters): + """Check whether a list of parameters is supported by the OS. + + @type os_obj: L{objects.OS} + @param os_obj: OS object to check + @type parameters: list + @param parameters: the list of parameters to check + + """ + supported = [v[0] for v in os_obj.supported_parameters] + delta = frozenset(parameters).difference(supported) + if delta: + _Fail("The following parameters are not supported" + " by the OS %s: %s" % (os_obj.name, utils.CommaJoin(delta))) + + +def ValidateOS(required, osname, checks, osparams): + """Validate the given OS' parameters. + + @type required: boolean + @param required: whether absence of the OS should translate into + failure or not + @type osname: string + @param osname: the OS to be validated + @type checks: list + @param checks: list of the checks to run (currently only 'parameters') + @type osparams: dict + @param osparams: dictionary with OS parameters + @rtype: boolean + @return: True if the validation passed, or False if the OS was not + found and L{required} was false + + """ + if not constants.OS_VALIDATE_CALLS.issuperset(checks): + _Fail("Unknown checks required for OS %s: %s", osname, + set(checks).difference(constants.OS_VALIDATE_CALLS)) + + name_only = objects.OS.GetName(osname) + status, tbv = _TryOSFromDisk(name_only, None) + + if not status: + if required: + _Fail(tbv) + else: + return False + + if max(tbv.api_versions) < constants.OS_API_V20: + return True + + if constants.OS_VALIDATE_PARAMETERS in checks: + _CheckOSPList(tbv, osparams.keys()) + + validate_env = OSCoreEnv(osname, tbv, osparams) + result = utils.RunCmd([tbv.verify_script] + checks, env=validate_env, + cwd=tbv.path, reset_env=True) + if result.failed: + logging.error("os validate command '%s' returned error: %s output: %s", + result.cmd, result.fail_reason, result.output) + _Fail("OS validation script failed (%s), output: %s", + result.fail_reason, result.output, log=False) + + return True + + def DemoteFromMC(): """Demotes the current node from master candidate role. @@ -2531,18 +3161,393 @@ def DemoteFromMC(): if master == myself: _Fail("ssconf status shows I'm the master node, will not demote") - result = utils.RunCmd([constants.DAEMON_UTIL, "check", constants.MASTERD]) + result = utils.RunCmd([pathutils.DAEMON_UTIL, "check", constants.MASTERD]) if not result.failed: _Fail("The master daemon is running, will not demote") try: - if os.path.isfile(constants.CLUSTER_CONF_FILE): - utils.CreateBackup(constants.CLUSTER_CONF_FILE) + if os.path.isfile(pathutils.CLUSTER_CONF_FILE): + utils.CreateBackup(pathutils.CLUSTER_CONF_FILE) except EnvironmentError, err: if err.errno != errno.ENOENT: _Fail("Error while backing up cluster file: %s", err, exc=True) - utils.RemoveFile(constants.CLUSTER_CONF_FILE) + utils.RemoveFile(pathutils.CLUSTER_CONF_FILE) + + +def _GetX509Filenames(cryptodir, name): + """Returns the full paths for the private key and certificate. + + """ + return (utils.PathJoin(cryptodir, name), + utils.PathJoin(cryptodir, name, _X509_KEY_FILE), + utils.PathJoin(cryptodir, name, _X509_CERT_FILE)) + + +def CreateX509Certificate(validity, cryptodir=pathutils.CRYPTO_KEYS_DIR): + """Creates a new X509 certificate for SSL/TLS. + + @type validity: int + @param validity: Validity in seconds + @rtype: tuple; (string, string) + @return: Certificate name and public part + + """ + (key_pem, cert_pem) = \ + utils.GenerateSelfSignedX509Cert(netutils.Hostname.GetSysName(), + min(validity, _MAX_SSL_CERT_VALIDITY)) + + cert_dir = tempfile.mkdtemp(dir=cryptodir, + prefix="x509-%s-" % utils.TimestampForFilename()) + try: + name = os.path.basename(cert_dir) + assert len(name) > 5 + + (_, key_file, cert_file) = _GetX509Filenames(cryptodir, name) + + utils.WriteFile(key_file, mode=0400, data=key_pem) + utils.WriteFile(cert_file, mode=0400, data=cert_pem) + + # Never return private key as it shouldn't leave the node + return (name, cert_pem) + except Exception: + shutil.rmtree(cert_dir, ignore_errors=True) + raise + + +def RemoveX509Certificate(name, cryptodir=pathutils.CRYPTO_KEYS_DIR): + """Removes a X509 certificate. + + @type name: string + @param name: Certificate name + + """ + (cert_dir, key_file, cert_file) = _GetX509Filenames(cryptodir, name) + + utils.RemoveFile(key_file) + utils.RemoveFile(cert_file) + + try: + os.rmdir(cert_dir) + except EnvironmentError, err: + _Fail("Cannot remove certificate directory '%s': %s", + cert_dir, err) + + +def _GetImportExportIoCommand(instance, mode, ieio, ieargs): + """Returns the command for the requested input/output. + + @type instance: L{objects.Instance} + @param instance: The instance object + @param mode: Import/export mode + @param ieio: Input/output type + @param ieargs: Input/output arguments + + """ + assert mode in (constants.IEM_IMPORT, constants.IEM_EXPORT) + + env = None + prefix = None + suffix = None + exp_size = None + + if ieio == constants.IEIO_FILE: + (filename, ) = ieargs + + if not utils.IsNormAbsPath(filename): + _Fail("Path '%s' is not normalized or absolute", filename) + + real_filename = os.path.realpath(filename) + directory = os.path.dirname(real_filename) + + if not utils.IsBelowDir(pathutils.EXPORT_DIR, real_filename): + _Fail("File '%s' is not under exports directory '%s': %s", + filename, pathutils.EXPORT_DIR, real_filename) + + # Create directory + utils.Makedirs(directory, mode=0750) + + quoted_filename = utils.ShellQuote(filename) + + if mode == constants.IEM_IMPORT: + suffix = "> %s" % quoted_filename + elif mode == constants.IEM_EXPORT: + suffix = "< %s" % quoted_filename + + # Retrieve file size + try: + st = os.stat(filename) + except EnvironmentError, err: + logging.error("Can't stat(2) %s: %s", filename, err) + else: + exp_size = utils.BytesToMebibyte(st.st_size) + + elif ieio == constants.IEIO_RAW_DISK: + (disk, ) = ieargs + + real_disk = _OpenRealBD(disk) + + if mode == constants.IEM_IMPORT: + # we set here a smaller block size as, due to transport buffering, more + # than 64-128k will mostly ignored; we use nocreat to fail if the device + # is not already there or we pass a wrong path; we use notrunc to no + # attempt truncate on an LV device; we use oflag=dsync to not buffer too + # much memory; this means that at best, we flush every 64k, which will + # not be very fast + suffix = utils.BuildShellCmd(("| dd of=%s conv=nocreat,notrunc" + " bs=%s oflag=dsync"), + real_disk.dev_path, + str(64 * 1024)) + + elif mode == constants.IEM_EXPORT: + # the block size on the read dd is 1MiB to match our units + prefix = utils.BuildShellCmd("dd if=%s bs=%s count=%s |", + real_disk.dev_path, + str(1024 * 1024), # 1 MB + str(disk.size)) + exp_size = disk.size + + elif ieio == constants.IEIO_SCRIPT: + (disk, disk_index, ) = ieargs + + assert isinstance(disk_index, (int, long)) + + real_disk = _OpenRealBD(disk) + + inst_os = OSFromDisk(instance.os) + env = OSEnvironment(instance, inst_os) + + if mode == constants.IEM_IMPORT: + env["IMPORT_DEVICE"] = env["DISK_%d_PATH" % disk_index] + env["IMPORT_INDEX"] = str(disk_index) + script = inst_os.import_script + + elif mode == constants.IEM_EXPORT: + env["EXPORT_DEVICE"] = real_disk.dev_path + env["EXPORT_INDEX"] = str(disk_index) + script = inst_os.export_script + + # TODO: Pass special environment only to script + script_cmd = utils.BuildShellCmd("( cd %s && %s; )", inst_os.path, script) + + if mode == constants.IEM_IMPORT: + suffix = "| %s" % script_cmd + + elif mode == constants.IEM_EXPORT: + prefix = "%s |" % script_cmd + + # Let script predict size + exp_size = constants.IE_CUSTOM_SIZE + + else: + _Fail("Invalid %s I/O mode %r", mode, ieio) + + return (env, prefix, suffix, exp_size) + + +def _CreateImportExportStatusDir(prefix): + """Creates status directory for import/export. + + """ + return tempfile.mkdtemp(dir=pathutils.IMPORT_EXPORT_DIR, + prefix=("%s-%s-" % + (prefix, utils.TimestampForFilename()))) + + +def StartImportExportDaemon(mode, opts, host, port, instance, component, + ieio, ieioargs): + """Starts an import or export daemon. + + @param mode: Import/output mode + @type opts: L{objects.ImportExportOptions} + @param opts: Daemon options + @type host: string + @param host: Remote host for export (None for import) + @type port: int + @param port: Remote port for export (None for import) + @type instance: L{objects.Instance} + @param instance: Instance object + @type component: string + @param component: which part of the instance is transferred now, + e.g. 'disk/0' + @param ieio: Input/output type + @param ieioargs: Input/output arguments + + """ + if mode == constants.IEM_IMPORT: + prefix = "import" + + if not (host is None and port is None): + _Fail("Can not specify host or port on import") + + elif mode == constants.IEM_EXPORT: + prefix = "export" + + if host is None or port is None: + _Fail("Host and port must be specified for an export") + + else: + _Fail("Invalid mode %r", mode) + + if (opts.key_name is None) ^ (opts.ca_pem is None): + _Fail("Cluster certificate can only be used for both key and CA") + + (cmd_env, cmd_prefix, cmd_suffix, exp_size) = \ + _GetImportExportIoCommand(instance, mode, ieio, ieioargs) + + if opts.key_name is None: + # Use server.pem + key_path = pathutils.NODED_CERT_FILE + cert_path = pathutils.NODED_CERT_FILE + assert opts.ca_pem is None + else: + (_, key_path, cert_path) = _GetX509Filenames(pathutils.CRYPTO_KEYS_DIR, + opts.key_name) + assert opts.ca_pem is not None + + for i in [key_path, cert_path]: + if not os.path.exists(i): + _Fail("File '%s' does not exist" % i) + + status_dir = _CreateImportExportStatusDir("%s-%s" % (prefix, component)) + try: + status_file = utils.PathJoin(status_dir, _IES_STATUS_FILE) + pid_file = utils.PathJoin(status_dir, _IES_PID_FILE) + ca_file = utils.PathJoin(status_dir, _IES_CA_FILE) + + if opts.ca_pem is None: + # Use server.pem + ca = utils.ReadFile(pathutils.NODED_CERT_FILE) + else: + ca = opts.ca_pem + + # Write CA file + utils.WriteFile(ca_file, data=ca, mode=0400) + + cmd = [ + pathutils.IMPORT_EXPORT_DAEMON, + status_file, mode, + "--key=%s" % key_path, + "--cert=%s" % cert_path, + "--ca=%s" % ca_file, + ] + + if host: + cmd.append("--host=%s" % host) + + if port: + cmd.append("--port=%s" % port) + + if opts.ipv6: + cmd.append("--ipv6") + else: + cmd.append("--ipv4") + + if opts.compress: + cmd.append("--compress=%s" % opts.compress) + + if opts.magic: + cmd.append("--magic=%s" % opts.magic) + + if exp_size is not None: + cmd.append("--expected-size=%s" % exp_size) + + if cmd_prefix: + cmd.append("--cmd-prefix=%s" % cmd_prefix) + + if cmd_suffix: + cmd.append("--cmd-suffix=%s" % cmd_suffix) + + if mode == constants.IEM_EXPORT: + # Retry connection a few times when connecting to remote peer + cmd.append("--connect-retries=%s" % constants.RIE_CONNECT_RETRIES) + cmd.append("--connect-timeout=%s" % constants.RIE_CONNECT_ATTEMPT_TIMEOUT) + elif opts.connect_timeout is not None: + assert mode == constants.IEM_IMPORT + # Overall timeout for establishing connection while listening + cmd.append("--connect-timeout=%s" % opts.connect_timeout) + + logfile = _InstanceLogName(prefix, instance.os, instance.name, component) + + # TODO: Once _InstanceLogName uses tempfile.mkstemp, StartDaemon has + # support for receiving a file descriptor for output + utils.StartDaemon(cmd, env=cmd_env, pidfile=pid_file, + output=logfile) + + # The import/export name is simply the status directory name + return os.path.basename(status_dir) + + except Exception: + shutil.rmtree(status_dir, ignore_errors=True) + raise + + +def GetImportExportStatus(names): + """Returns import/export daemon status. + + @type names: sequence + @param names: List of names + @rtype: List of dicts + @return: Returns a list of the state of each named import/export or None if a + status couldn't be read + + """ + result = [] + + for name in names: + status_file = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name, + _IES_STATUS_FILE) + + try: + data = utils.ReadFile(status_file) + except EnvironmentError, err: + if err.errno != errno.ENOENT: + raise + data = None + + if not data: + result.append(None) + continue + + result.append(serializer.LoadJson(data)) + + return result + + +def AbortImportExport(name): + """Sends SIGTERM to a running import/export daemon. + + """ + logging.info("Abort import/export %s", name) + + status_dir = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name) + pid = utils.ReadLockedPidFile(utils.PathJoin(status_dir, _IES_PID_FILE)) + + if pid: + logging.info("Import/export %s is running with PID %s, sending SIGTERM", + name, pid) + utils.IgnoreProcessNotFound(os.kill, pid, signal.SIGTERM) + + +def CleanupImportExport(name): + """Cleanup after an import or export. + + If the import/export daemon is still running it's killed. Afterwards the + whole status directory is removed. + + """ + logging.info("Finalizing import/export %s", name) + + status_dir = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name) + + pid = utils.ReadLockedPidFile(utils.PathJoin(status_dir, _IES_PID_FILE)) + + if pid: + logging.info("Import/export %s is still running with PID %s", + name, pid) + utils.KillProcess(pid, waitpid=False) + + shutil.rmtree(status_dir, ignore_errors=True) def _FindDisks(nodes_ip, disks): @@ -2550,7 +3555,7 @@ def _FindDisks(nodes_ip, disks): """ # set the correct physical ID - my_name = utils.HostInfo().name + my_name = netutils.Hostname.GetSysName() for cf in disks: cf.SetPhysicalID(my_name, nodes_ip) @@ -2671,6 +3676,16 @@ def DrbdWaitSync(nodes_ip, disks): return (alldone, min_resync) +def GetDrbdUsermodeHelper(): + """Returns DRBD usermode helper currently configured. + + """ + try: + return bdev.BaseDRBD.GetUsermodeHelper() + except errors.BlockDeviceError, err: + _Fail(str(err)) + + def PowercycleNode(hypervisor_type): """Hard-powercycle the node. @@ -2689,12 +3704,215 @@ def PowercycleNode(hypervisor_type): # ensure the child is running on ram try: utils.Mlockall() - except Exception: # pylint: disable-msg=W0703 + except Exception: # pylint: disable=W0703 pass time.sleep(5) hyper.PowercycleNode() +def _VerifyRestrictedCmdName(cmd): + """Verifies a restricted command name. + + @type cmd: string + @param cmd: Command name + @rtype: tuple; (boolean, string or None) + @return: The tuple's first element is the status; if C{False}, the second + element is an error message string, otherwise it's C{None} + + """ + if not cmd.strip(): + return (False, "Missing command name") + + if os.path.basename(cmd) != cmd: + return (False, "Invalid command name") + + if not constants.EXT_PLUGIN_MASK.match(cmd): + return (False, "Command name contains forbidden characters") + + return (True, None) + + +def _CommonRestrictedCmdCheck(path, owner): + """Common checks for restricted command file system directories and files. + + @type path: string + @param path: Path to check + @param owner: C{None} or tuple containing UID and GID + @rtype: tuple; (boolean, string or C{os.stat} result) + @return: The tuple's first element is the status; if C{False}, the second + element is an error message string, otherwise it's the result of C{os.stat} + + """ + if owner is None: + # Default to root as owner + owner = (0, 0) + + try: + st = os.stat(path) + except EnvironmentError, err: + return (False, "Can't stat(2) '%s': %s" % (path, err)) + + if stat.S_IMODE(st.st_mode) & (~_RCMD_MAX_MODE): + return (False, "Permissions on '%s' are too permissive" % path) + + if (st.st_uid, st.st_gid) != owner: + (owner_uid, owner_gid) = owner + return (False, "'%s' is not owned by %s:%s" % (path, owner_uid, owner_gid)) + + return (True, st) + + +def _VerifyRestrictedCmdDirectory(path, _owner=None): + """Verifies restricted command directory. + + @type path: string + @param path: Path to check + @rtype: tuple; (boolean, string or None) + @return: The tuple's first element is the status; if C{False}, the second + element is an error message string, otherwise it's C{None} + + """ + (status, value) = _CommonRestrictedCmdCheck(path, _owner) + + if not status: + return (False, value) + + if not stat.S_ISDIR(value.st_mode): + return (False, "Path '%s' is not a directory" % path) + + return (True, None) + + +def _VerifyRestrictedCmd(path, cmd, _owner=None): + """Verifies a whole restricted command and returns its executable filename. + + @type path: string + @param path: Directory containing restricted commands + @type cmd: string + @param cmd: Command name + @rtype: tuple; (boolean, string) + @return: The tuple's first element is the status; if C{False}, the second + element is an error message string, otherwise the second element is the + absolute path to the executable + + """ + executable = utils.PathJoin(path, cmd) + + (status, msg) = _CommonRestrictedCmdCheck(executable, _owner) + + if not status: + return (False, msg) + + if not utils.IsExecutable(executable): + return (False, "access(2) thinks '%s' can't be executed" % executable) + + return (True, executable) + + +def _PrepareRestrictedCmd(path, cmd, + _verify_dir=_VerifyRestrictedCmdDirectory, + _verify_name=_VerifyRestrictedCmdName, + _verify_cmd=_VerifyRestrictedCmd): + """Performs a number of tests on a restricted command. + + @type path: string + @param path: Directory containing restricted commands + @type cmd: string + @param cmd: Command name + @return: Same as L{_VerifyRestrictedCmd} + + """ + # Verify the directory first + (status, msg) = _verify_dir(path) + if status: + # Check command if everything was alright + (status, msg) = _verify_name(cmd) + + if not status: + return (False, msg) + + # Check actual executable + return _verify_cmd(path, cmd) + + +def RunRestrictedCmd(cmd, + _lock_timeout=_RCMD_LOCK_TIMEOUT, + _lock_file=pathutils.RESTRICTED_COMMANDS_LOCK_FILE, + _path=pathutils.RESTRICTED_COMMANDS_DIR, + _sleep_fn=time.sleep, + _prepare_fn=_PrepareRestrictedCmd, + _runcmd_fn=utils.RunCmd, + _enabled=constants.ENABLE_RESTRICTED_COMMANDS): + """Executes a restricted command after performing strict tests. + + @type cmd: string + @param cmd: Command name + @rtype: string + @return: Command output + @raise RPCFail: In case of an error + + """ + logging.info("Preparing to run restricted command '%s'", cmd) + + if not _enabled: + _Fail("Restricted commands disabled at configure time") + + lock = None + try: + cmdresult = None + try: + lock = utils.FileLock.Open(_lock_file) + lock.Exclusive(blocking=True, timeout=_lock_timeout) + + (status, value) = _prepare_fn(_path, cmd) + + if status: + cmdresult = _runcmd_fn([value], env={}, reset_env=True, + postfork_fn=lambda _: lock.Unlock()) + else: + logging.error(value) + except Exception: # pylint: disable=W0703 + # Keep original error in log + logging.exception("Caught exception") + + if cmdresult is None: + logging.info("Sleeping for %0.1f seconds before returning", + _RCMD_INVALID_DELAY) + _sleep_fn(_RCMD_INVALID_DELAY) + + # Do not include original error message in returned error + _Fail("Executing command '%s' failed" % cmd) + elif cmdresult.failed or cmdresult.fail_reason: + _Fail("Restricted command '%s' failed: %s; output: %s", + cmd, cmdresult.fail_reason, cmdresult.output) + else: + return cmdresult.output + finally: + if lock is not None: + # Release lock at last + lock.Close() + lock = None + + +def SetWatcherPause(until, _filename=pathutils.WATCHER_PAUSEFILE): + """Creates or removes the watcher pause file. + + @type until: None or number + @param until: Unix timestamp saying until when the watcher shouldn't run + + """ + if until is None: + logging.info("Received request to no longer pause watcher") + utils.RemoveFile(_filename) + else: + logging.info("Received request to pause watcher until %s", until) + + if not ht.TNumber(until): + _Fail("Duration must be numeric") + + utils.WriteFile(_filename, data="%d\n" % (until, ), mode=0644) + + class HooksRunner(object): """Hook runner. @@ -2707,14 +3925,28 @@ class HooksRunner(object): @type hooks_base_dir: str or None @param hooks_base_dir: if not None, this overrides the - L{constants.HOOKS_BASE_DIR} (useful for unittests) + L{pathutils.HOOKS_BASE_DIR} (useful for unittests) """ if hooks_base_dir is None: - hooks_base_dir = constants.HOOKS_BASE_DIR + hooks_base_dir = pathutils.HOOKS_BASE_DIR # yeah, _BASE_DIR is not valid for attributes, we use it like a # constant - self._BASE_DIR = hooks_base_dir # pylint: disable-msg=C0103 + self._BASE_DIR = hooks_base_dir # pylint: disable=C0103 + + def RunLocalHooks(self, node_list, hpath, phase, env): + """Check that the hooks will be run only locally and then run them. + + """ + assert len(node_list) == 1 + node = node_list[0] + _, myself = ssconf.GetMasterAndMyself() + assert node == myself + + results = self.RunHooks(hpath, phase, env) + + # Return values in the form expected by HooksMaster + return {node: (None, False, results)} def RunHooks(self, hpath, phase, env): """Run the scripts in the hooks directory. @@ -2745,7 +3977,6 @@ class HooksRunner(object): else: _Fail("Unknown hooks phase '%s'", phase) - subdir = "%s-%s.d" % (hpath, suffix) dir_name = utils.PathJoin(self._BASE_DIR, subdir) @@ -2758,7 +3989,7 @@ class HooksRunner(object): runparts_results = utils.RunParts(dir_name, env=env, reset_env=True) - for (relname, relstatus, runresult) in runparts_results: + for (relname, relstatus, runresult) in runparts_results: if relstatus == constants.RUNPARTS_SKIP: rrval = constants.HKR_SKIP output = "" @@ -2822,7 +4053,7 @@ class DevCacheManager(object): """ _DEV_PREFIX = "/dev/" - _ROOT_DIR = constants.BDEV_CACHE_DIR + _ROOT_DIR = pathutils.BDEV_CACHE_DIR @classmethod def _ConvertPath(cls, dev_path): @@ -2881,7 +4112,7 @@ class DevCacheManager(object): def RemoveCache(cls, dev_path): """Remove data for a dev_path. - This is just a wrapper over L{utils.RemoveFile} with a converted + This is just a wrapper over L{utils.io.RemoveFile} with a converted path name and logging. @type dev_path: str