X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/f39cd91ddfb6ff918248b613d1e899792988600f..3b01286e6de5b54648bbb96dafd6deb67babc01a:/lib/backend.py diff --git a/lib/backend.py b/lib/backend.py index beeca61..4bf94fb 100644 --- a/lib/backend.py +++ b/lib/backend.py @@ -1,7 +1,7 @@ # # -# Copyright (C) 2006, 2007 Google Inc. +# Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -47,6 +47,7 @@ import logging import tempfile import zlib import base64 +import signal from ganeti import errors from ganeti import utils @@ -57,6 +58,8 @@ from ganeti import bdev from ganeti import objects from ganeti import ssconf from ganeti import serializer +from ganeti import netutils +from ganeti import runtime _BOOT_ID_PATH = "/proc/sys/kernel/random/boot_id" @@ -192,6 +195,7 @@ def _BuildUploadFileList(): constants.RAPI_CERT_FILE, constants.RAPI_USERS_FILE, constants.CONFD_HMAC_KEY, + constants.CLUSTER_DOMAIN_SECRET_FILE, ]) for hv_name in constants.HYPER_TYPES: @@ -222,7 +226,7 @@ def GetMasterInfo(): for consumption here or from the node daemon. @rtype: tuple - @return: master_netdev, master_ip, master_name + @return: master_netdev, master_ip, master_name, primary_ip_family @raise RPCFail: in case of errors """ @@ -231,21 +235,23 @@ def GetMasterInfo(): master_netdev = cfg.GetMasterNetdev() master_ip = cfg.GetMasterIP() master_node = cfg.GetMasterNode() + primary_ip_family = cfg.GetPrimaryIPFamily() except errors.ConfigurationError, err: _Fail("Cluster configuration incomplete: %s", err, exc=True) - return (master_netdev, master_ip, master_node) + return (master_netdev, master_ip, master_node, primary_ip_family) def StartMaster(start_daemons, no_voting): """Activate local node as master node. - The function will always try activate the IP address of the master - (unless someone else has it). It will also start the master daemons, - based on the start_daemons parameter. + The function will either try activate the IP address of the master + (unless someone else has it) or also start the master daemons, based + on the start_daemons parameter. @type start_daemons: boolean - @param start_daemons: whether to also start the master - daemons (ganeti-masterd and ganeti-rapi) + @param start_daemons: whether to start the master daemons + (ganeti-masterd and ganeti-rapi), or (if false) activate the + master ip @type no_voting: boolean @param no_voting: whether to start ganeti-masterd without a node vote (if start_daemons is True), but still non-interactively @@ -253,31 +259,10 @@ def StartMaster(start_daemons, no_voting): """ # GetMasterInfo will raise an exception if not able to return data - master_netdev, master_ip, _ = GetMasterInfo() + master_netdev, master_ip, _, family = GetMasterInfo() err_msgs = [] - if utils.TcpPing(master_ip, constants.DEFAULT_NODED_PORT): - if utils.OwnIpAddress(master_ip): - # we already have the ip: - logging.debug("Master IP already configured, doing nothing") - else: - msg = "Someone else has the master ip, not activating" - logging.error(msg) - err_msgs.append(msg) - else: - result = utils.RunCmd(["ip", "address", "add", "%s/32" % master_ip, - "dev", master_netdev, "label", - "%s:0" % master_netdev]) - if result.failed: - msg = "Can't activate master IP: %s" % result.output - logging.error(msg) - err_msgs.append(msg) - - result = utils.RunCmd(["arping", "-q", "-U", "-c 3", "-I", master_netdev, - "-s", master_ip, master_ip]) - # we'll ignore the exit code of arping - - # and now start the master and rapi daemons + # either start the master and rapi daemons if start_daemons: if no_voting: masterd_args = "--no-voting --yes-do-it" @@ -293,6 +278,40 @@ def StartMaster(start_daemons, no_voting): msg = "Can't start Ganeti master: %s" % result.output logging.error(msg) err_msgs.append(msg) + # or activate the IP + else: + if netutils.TcpPing(master_ip, constants.DEFAULT_NODED_PORT): + if netutils.IPAddress.Own(master_ip): + # we already have the ip: + logging.debug("Master IP already configured, doing nothing") + else: + msg = "Someone else has the master ip, not activating" + logging.error(msg) + err_msgs.append(msg) + else: + ipcls = netutils.IP4Address + if family == netutils.IP6Address.family: + ipcls = netutils.IP6Address + + result = utils.RunCmd(["ip", "address", "add", + "%s/%d" % (master_ip, ipcls.iplen), + "dev", master_netdev, "label", + "%s:0" % master_netdev]) + if result.failed: + msg = "Can't activate master IP: %s" % result.output + logging.error(msg) + err_msgs.append(msg) + + # we ignore the exit code of the following cmds + if ipcls == netutils.IP4Address: + utils.RunCmd(["arping", "-q", "-U", "-c 3", "-I", master_netdev, "-s", + master_ip, master_ip]) + elif ipcls == netutils.IP6Address: + try: + utils.RunCmd(["ndisc6", "-q", "-r 3", master_ip, master_netdev]) + except errors.OpExecError: + # TODO: Better error reporting + logging.warning("Can't execute ndisc6, please install if missing") if err_msgs: _Fail("; ".join(err_msgs)) @@ -315,9 +334,14 @@ def StopMaster(stop_daemons): # need to decide in which case we fail the RPC for this # GetMasterInfo will raise an exception if not able to return data - master_netdev, master_ip, _ = GetMasterInfo() + master_netdev, master_ip, _, family = GetMasterInfo() - result = utils.RunCmd(["ip", "address", "del", "%s/32" % master_ip, + ipcls = netutils.IP4Address + if family == netutils.IP6Address.family: + ipcls = netutils.IP6Address + + result = utils.RunCmd(["ip", "address", "del", + "%s/%d" % (master_ip, ipcls.iplen), "dev", master_netdev]) if result.failed: logging.error("Can't remove the master IP, error: %s", result.output) @@ -331,52 +355,26 @@ def StopMaster(stop_daemons): result.cmd, result.exit_code, result.output) -def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub): - """Joins this node to the cluster. - - This does the following: - - updates the hostkeys of the machine (rsa and dsa) - - adds the ssh private key to the user - - adds the ssh public key to the users' authorized_keys file - - @type dsa: str - @param dsa: the DSA private key to write - @type dsapub: str - @param dsapub: the DSA public key to write - @type rsa: str - @param rsa: the RSA private key to write - @type rsapub: str - @param rsapub: the RSA public key to write - @type sshkey: str - @param sshkey: the SSH private key to write - @type sshpub: str - @param sshpub: the SSH public key to write - @rtype: boolean - @return: the success of the operation - - """ - sshd_keys = [(constants.SSH_HOST_RSA_PRIV, rsa, 0600), - (constants.SSH_HOST_RSA_PUB, rsapub, 0644), - (constants.SSH_HOST_DSA_PRIV, dsa, 0600), - (constants.SSH_HOST_DSA_PUB, dsapub, 0644)] - for name, content, mode in sshd_keys: - utils.WriteFile(name, data=content, mode=mode) - - try: - priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS, - mkdir=True) - except errors.OpExecError, err: - _Fail("Error while processing user ssh files: %s", err, exc=True) - - for name, content in [(priv_key, sshkey), (pub_key, sshpub)]: - utils.WriteFile(name, data=content, mode=0600) +def EtcHostsModify(mode, host, ip): + """Modify a host entry in /etc/hosts. - utils.AddAuthorizedKey(auth_keys, sshpub) + @param mode: The mode to operate. Either add or remove entry + @param host: The host to operate on + @param ip: The ip associated with the entry - result = utils.RunCmd([constants.DAEMON_UTIL, "reload-ssh-keys"]) - if result.failed: - _Fail("Unable to reload SSH keys (command %r, exit code %s, output %r)", - result.cmd, result.exit_code, result.output) + """ + if mode == constants.ETC_HOSTS_ADD: + if not ip: + RPCFail("Mode 'add' needs 'ip' parameter, but parameter not" + " present") + utils.AddHostToEtcHosts(host, ip) + elif mode == constants.ETC_HOSTS_REMOVE: + if ip: + RPCFail("Mode 'remove' does not allow 'ip' parameter, but" + " parameter is present") + utils.RemoveHostFromEtcHosts(host) + else: + RPCFail("Mode not supported") def LeaveCluster(modify_ssh_setup): @@ -441,9 +439,15 @@ def GetNodeInfo(vgname, hypervisor_type): """ outputarray = {} - vginfo = _GetVGInfo(vgname) - outputarray['vg_size'] = vginfo['vg_size'] - outputarray['vg_free'] = vginfo['vg_free'] + + vginfo = bdev.LogicalVolume.GetVGInfo([vgname]) + vg_free = vg_size = None + if vginfo: + vg_free = int(round(vginfo[0][0], 0)) + vg_size = int(round(vginfo[0][1], 0)) + + outputarray['vg_size'] = vg_size + outputarray['vg_free'] = vg_free hyper = hypervisor.GetHypervisor(hypervisor_type) hyp_info = hyper.GetNodeInfo() @@ -485,8 +489,11 @@ def VerifyNode(what, cluster_name): """ result = {} + my_name = netutils.Hostname.GetSysName() + port = netutils.GetDaemonPort(constants.NODED) + vm_capable = my_name not in what.get(constants.NV_VMNODES, []) - if constants.NV_HYPERVISOR in what: + if constants.NV_HYPERVISOR in what and vm_capable: result[constants.NV_HYPERVISOR] = tmp = {} for hv_name in what[constants.NV_HYPERVISOR]: try: @@ -509,7 +516,6 @@ def VerifyNode(what, cluster_name): if constants.NV_NODENETTEST in what: result[constants.NV_NODENETTEST] = tmp = {} - my_name = utils.HostInfo().name my_pip = my_sip = None for name, pip, sip in what[constants.NV_NODENETTEST]: if name == my_name: @@ -520,26 +526,36 @@ def VerifyNode(what, cluster_name): tmp[my_name] = ("Can't find my own primary/secondary IP" " in the node list") else: - port = utils.GetDaemonPort(constants.NODED) for name, pip, sip in what[constants.NV_NODENETTEST]: fail = [] - if not utils.TcpPing(pip, port, source=my_pip): + if not netutils.TcpPing(pip, port, source=my_pip): fail.append("primary") if sip != pip: - if not utils.TcpPing(sip, port, source=my_sip): + if not netutils.TcpPing(sip, port, source=my_sip): fail.append("secondary") if fail: tmp[name] = ("failure using the %s interface(s)" % " and ".join(fail)) - if constants.NV_LVLIST in what: + if constants.NV_MASTERIP in what: + # FIXME: add checks on incoming data structures (here and in the + # rest of the function) + master_name, master_ip = what[constants.NV_MASTERIP] + if master_name == my_name: + source = constants.IP4_ADDRESS_LOCALHOST + else: + source = None + result[constants.NV_MASTERIP] = netutils.TcpPing(master_ip, port, + source=source) + + if constants.NV_LVLIST in what and vm_capable: try: val = GetVolumeList(what[constants.NV_LVLIST]) except RPCFail, err: val = str(err) result[constants.NV_LVLIST] = val - if constants.NV_INSTANCELIST in what: + if constants.NV_INSTANCELIST in what and vm_capable: # GetInstanceList can fail try: val = GetInstanceList(what[constants.NV_INSTANCELIST]) @@ -547,10 +563,10 @@ def VerifyNode(what, cluster_name): val = str(err) result[constants.NV_INSTANCELIST] = val - if constants.NV_VGLIST in what: + if constants.NV_VGLIST in what and vm_capable: result[constants.NV_VGLIST] = utils.ListVolumeGroups() - if constants.NV_PVLIST in what: + if constants.NV_PVLIST in what and vm_capable: result[constants.NV_PVLIST] = \ bdev.LogicalVolume.GetPVInfo(what[constants.NV_PVLIST], filter_allocatable=False) @@ -559,11 +575,11 @@ def VerifyNode(what, cluster_name): result[constants.NV_VERSION] = (constants.PROTOCOL_VERSION, constants.RELEASE_VERSION) - if constants.NV_HVINFO in what: + if constants.NV_HVINFO in what and vm_capable: hyper = hypervisor.GetHypervisor(what[constants.NV_HVINFO]) result[constants.NV_HVINFO] = hyper.GetNodeInfo() - if constants.NV_DRBDLIST in what: + if constants.NV_DRBDLIST in what and vm_capable: try: used_minors = bdev.DRBD8.GetUsedDevs().keys() except errors.BlockDeviceError, err: @@ -571,6 +587,16 @@ def VerifyNode(what, cluster_name): used_minors = str(err) result[constants.NV_DRBDLIST] = used_minors + if constants.NV_DRBDHELPER in what and vm_capable: + status = True + try: + payload = bdev.BaseDRBD.GetUsermodeHelper() + except errors.BlockDeviceError, err: + logging.error("Can't get DRBD usermode helper: %s", str(err)) + status = False + payload = str(err) + result[constants.NV_DRBDHELPER] = (status, payload) + if constants.NV_NODESETUP in what: result[constants.NV_NODESETUP] = tmpr = [] if not os.path.isdir("/sys/block") or not os.path.isdir("/sys/class/net"): @@ -586,6 +612,9 @@ def VerifyNode(what, cluster_name): if constants.NV_TIME in what: result[constants.NV_TIME] = utils.SplitTime(time.time()) + if constants.NV_OSLIST in what and vm_capable: + result[constants.NV_OSLIST] = DiagnoseOS() + return result @@ -776,7 +805,8 @@ def GetInstanceMigratable(instance): for idx in range(len(instance.disks)): link_name = _GetBlockDevSymlinkPath(iname, idx) if not os.path.islink(link_name): - _Fail("Instance %s was not restarted since ganeti 1.2.5", iname) + logging.warning("Instance %s is missing symlink %s for disk %d", + iname, link_name, idx) def GetAllInstancesInfo(hypervisor_list): @@ -907,46 +937,6 @@ def RunRenameInstance(instance, old_name, debug): " log file:\n%s", result.fail_reason, "\n".join(lines), log=False) -def _GetVGInfo(vg_name): - """Get information about the volume group. - - @type vg_name: str - @param vg_name: the volume group which we query - @rtype: dict - @return: - A dictionary with the following keys: - - C{vg_size} is the total size of the volume group in MiB - - C{vg_free} is the free size of the volume group in MiB - - C{pv_count} are the number of physical disks in that VG - - If an error occurs during gathering of data, we return the same dict - with keys all set to None. - - """ - retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"]) - - retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings", - "--nosuffix", "--units=m", "--separator=:", vg_name]) - - if retval.failed: - logging.error("volume group %s not present", vg_name) - return retdic - valarr = retval.stdout.strip().rstrip(':').split(':') - if len(valarr) == 3: - try: - retdic = { - "vg_size": int(round(float(valarr[0]), 0)), - "vg_free": int(round(float(valarr[1]), 0)), - "pv_count": int(valarr[2]), - } - except (TypeError, ValueError), err: - logging.exception("Fail to parse vgs output: %s", err) - else: - logging.error("vgs output has the wrong number of fields (expected" - " three): %s", str(valarr)) - return retdic - - def _GetBlockDevSymlinkPath(instance_name, idx): return utils.PathJoin(constants.DISK_LINKS_DIR, "%s:%d" % (instance_name, idx)) @@ -1298,6 +1288,52 @@ def BlockdevCreate(disk, size, owner, on_primary, info): return device.unique_id +def _WipeDevice(path, offset, size): + """This function actually wipes the device. + + @param path: The path to the device to wipe + @param offset: The offset in MiB in the file + @param size: The size in MiB to write + + """ + cmd = [constants.DD_CMD, "if=/dev/zero", "seek=%d" % offset, + "bs=%d" % constants.WIPE_BLOCK_SIZE, "oflag=direct", "of=%s" % path, + "count=%d" % size] + result = utils.RunCmd(cmd) + + if result.failed: + _Fail("Wipe command '%s' exited with error: %s; output: %s", result.cmd, + result.fail_reason, result.output) + + +def BlockdevWipe(disk, offset, size): + """Wipes a block device. + + @type disk: L{objects.Disk} + @param disk: the disk object we want to wipe + @type offset: int + @param offset: The offset in MiB in the file + @type size: int + @param size: The size in MiB to write + + """ + try: + rdev = _RecursiveFindBD(disk) + except errors.BlockDeviceError: + rdev = None + + if not rdev: + _Fail("Cannot execute wipe for device %s: device not found", disk.iv_name) + + # Do cross verify some of the parameters + if offset > rdev.size: + _Fail("Offset is bigger than device size") + if (offset + size) > rdev.size: + _Fail("The provided offset and size to wipe is bigger than device size") + + _WipeDevice(rdev.dev_path, offset, size) + + def BlockdevRemove(disk): """Remove a block device. @@ -1682,8 +1718,9 @@ def UploadFile(file_name, data, mode, uid, gid, atime, mtime): raw_data = _Decompress(data) - utils.WriteFile(file_name, data=raw_data, mode=mode, uid=uid, gid=gid, - atime=atime, mtime=mtime) + utils.SafeWriteFile(file_name, None, + data=raw_data, mode=mode, uid=uid, gid=gid, + atime=atime, mtime=mtime) def WriteSsconfFiles(values): @@ -1761,13 +1798,16 @@ def DiagnoseOS(top_dirs=None): search (if not given defaults to L{constants.OS_SEARCH_PATH}) @rtype: list of L{objects.OS} - @return: a list of tuples (name, path, status, diagnose, variants) - for all (potential) OSes under all search paths, where: + @return: a list of tuples (name, path, status, diagnose, variants, + parameters, api_version) for all (potential) OSes under all + search paths, where: - name is the (potential) OS name - path is the full path to the OS - status True/False is the validity of the OS - diagnose is the error message for an invalid OS, otherwise empty - variants is a list of supported OS variants, if any + - parameters is a list of (name, help) parameters, if any + - api_version is a list of support OS API versions """ if top_dirs is None: @@ -1787,10 +1827,13 @@ def DiagnoseOS(top_dirs=None): if status: diagnose = "" variants = os_inst.supported_variants + parameters = os_inst.supported_parameters + api_versions = os_inst.api_versions else: diagnose = os_inst - variants = [] - result.append((name, os_path, status, diagnose, variants)) + variants = parameters = api_versions = [] + result.append((name, os_path, status, diagnose, variants, + parameters, api_versions)) return result @@ -1832,6 +1875,11 @@ def _TryOSFromDisk(name, base_dir=None): if max(api_versions) >= constants.OS_API_V15: os_files[constants.OS_VARIANTS_FILE] = '' + if max(api_versions) >= constants.OS_API_V20: + os_files[constants.OS_PARAMETERS_FILE] = '' + else: + del os_files[constants.OS_SCRIPT_VERIFY] + for filename in os_files: os_files[filename] = utils.PathJoin(os_dir, filename) @@ -1850,7 +1898,7 @@ def _TryOSFromDisk(name, base_dir=None): return False, ("File '%s' under path '%s' is not executable" % (filename, os_dir)) - variants = None + variants = [] if constants.OS_VARIANTS_FILE in os_files: variants_file = os_files[constants.OS_VARIANTS_FILE] try: @@ -1861,12 +1909,25 @@ def _TryOSFromDisk(name, base_dir=None): if not variants: return False, ("No supported os variant found") + parameters = [] + if constants.OS_PARAMETERS_FILE in os_files: + parameters_file = os_files[constants.OS_PARAMETERS_FILE] + try: + parameters = utils.ReadFile(parameters_file).splitlines() + except EnvironmentError, err: + return False, ("Error while reading the OS parameters file at %s: %s" % + (parameters_file, _ErrnoOrStr(err))) + parameters = [v.split(None, 1) for v in parameters] + os_obj = objects.OS(name=name, path=os_dir, create_script=os_files[constants.OS_SCRIPT_CREATE], export_script=os_files[constants.OS_SCRIPT_EXPORT], import_script=os_files[constants.OS_SCRIPT_IMPORT], rename_script=os_files[constants.OS_SCRIPT_RENAME], + verify_script=os_files.get(constants.OS_SCRIPT_VERIFY, + None), supported_variants=variants, + supported_parameters=parameters, api_versions=api_versions) return True, os_obj @@ -1889,7 +1950,7 @@ def OSFromDisk(name, base_dir=None): @raise RPCFail: if we don't find a valid OS """ - name_only = name.split("+", 1)[0] + name_only = objects.OS.GetName(name) status, payload = _TryOSFromDisk(name_only, base_dir) if not status: @@ -1898,13 +1959,15 @@ def OSFromDisk(name, base_dir=None): return payload -def OSEnvironment(instance, inst_os, debug=0): - """Calculate the environment for an os script. +def OSCoreEnv(os_name, inst_os, os_params, debug=0): + """Calculate the basic environment for an os script. - @type instance: L{objects.Instance} - @param instance: target instance for the os script run + @type os_name: str + @param os_name: full operating system name (including variant) @type inst_os: L{objects.OS} @param inst_os: operating system for which the environment is being built + @type os_params: dict + @param os_params: the OS parameters @type debug: integer @param debug: debug level (0 or 1, for OS Api 10) @rtype: dict @@ -1917,18 +1980,48 @@ def OSEnvironment(instance, inst_os, debug=0): api_version = \ max(constants.OS_API_VERSIONS.intersection(inst_os.api_versions)) result['OS_API_VERSION'] = '%d' % api_version - result['INSTANCE_NAME'] = instance.name - result['INSTANCE_OS'] = instance.os - result['HYPERVISOR'] = instance.hypervisor - result['DISK_COUNT'] = '%d' % len(instance.disks) - result['NIC_COUNT'] = '%d' % len(instance.nics) + result['OS_NAME'] = inst_os.name result['DEBUG_LEVEL'] = '%d' % debug + + # OS variants if api_version >= constants.OS_API_V15: - try: - variant = instance.os.split('+', 1)[1] - except IndexError: + variant = objects.OS.GetVariant(os_name) + if not variant: variant = inst_os.supported_variants[0] result['OS_VARIANT'] = variant + + # OS params + for pname, pvalue in os_params.items(): + result['OSP_%s' % pname.upper()] = pvalue + + return result + + +def OSEnvironment(instance, inst_os, debug=0): + """Calculate the environment for an os script. + + @type instance: L{objects.Instance} + @param instance: target instance for the os script run + @type inst_os: L{objects.OS} + @param inst_os: operating system for which the environment is being built + @type debug: integer + @param debug: debug level (0 or 1, for OS Api 10) + @rtype: dict + @return: dict of environment variables + @raise errors.BlockDeviceError: if the block device + cannot be found + + """ + result = OSCoreEnv(instance.os, inst_os, instance.osparams, debug=debug) + + for attr in ["name", "os", "uuid", "ctime", "mtime"]: + result["INSTANCE_%s" % attr.upper()] = str(getattr(instance, attr)) + + result['HYPERVISOR'] = instance.hypervisor + result['DISK_COUNT'] = '%d' % len(instance.disks) + result['NIC_COUNT'] = '%d' % len(instance.nics) + + # Disks for idx, disk in enumerate(instance.disks): real_disk = _OpenRealBD(disk) result['DISK_%d_PATH' % idx] = real_disk.dev_path @@ -1941,6 +2034,8 @@ def OSEnvironment(instance, inst_os, debug=0): elif disk.dev_type == constants.LD_FILE: result['DISK_%d_BACKEND_TYPE' % idx] = \ 'file:%s' % disk.physical_id[0] + + # NICs for idx, nic in enumerate(instance.nics): result['NIC_%d_MAC' % idx] = nic.mac if nic.ip: @@ -1954,6 +2049,7 @@ def OSEnvironment(instance, inst_os, debug=0): result['NIC_%d_FRONTEND_TYPE' % idx] = \ instance.hvparams[constants.HV_NIC_TYPE] + # HV/BE params for source, kind in [(instance.beparams, "BE"), (instance.hvparams, "HV")]: for key, value in source.items(): result["INSTANCE_%s_%s" % (kind, key)] = str(value) @@ -2015,66 +2111,6 @@ def BlockdevSnapshot(disk): disk.unique_id, disk.dev_type) -def ExportSnapshot(disk, dest_node, instance, cluster_name, idx, debug): - """Export a block device snapshot to a remote node. - - @type disk: L{objects.Disk} - @param disk: the description of the disk to export - @type dest_node: str - @param dest_node: the destination node to export to - @type instance: L{objects.Instance} - @param instance: the instance object to whom the disk belongs - @type cluster_name: str - @param cluster_name: the cluster name, needed for SSH hostalias - @type idx: int - @param idx: the index of the disk in the instance's disk list, - used to export to the OS scripts environment - @type debug: integer - @param debug: debug level, passed to the OS scripts - @rtype: None - - """ - inst_os = OSFromDisk(instance.os) - export_env = OSEnvironment(instance, inst_os, debug) - - export_script = inst_os.export_script - - logfile = _InstanceLogName("export", inst_os.name, instance.name) - if not os.path.exists(constants.LOG_OS_DIR): - os.mkdir(constants.LOG_OS_DIR, 0750) - - real_disk = _OpenRealBD(disk) - - export_env['EXPORT_DEVICE'] = real_disk.dev_path - export_env['EXPORT_INDEX'] = str(idx) - - destdir = utils.PathJoin(constants.EXPORT_DIR, instance.name + ".new") - destfile = disk.physical_id[1] - - # the target command is built out of three individual commands, - # which are joined by pipes; we check each individual command for - # valid parameters - expcmd = utils.BuildShellCmd("set -e; set -o pipefail; cd %s; %s 2>%s", - inst_os.path, export_script, logfile) - - comprcmd = "gzip" - - destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s", - destdir, utils.PathJoin(destdir, destfile)) - remotecmd = _GetSshRunner(cluster_name).BuildCmd(dest_node, - constants.GANETI_RUNAS, - destcmd) - - # all commands have been checked, so we're safe to combine them - command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)]) - - result = utils.RunCmd(["bash", "-c", command], env=export_env) - - if result.failed: - _Fail("OS snapshot export command '%s' returned error: %s" - " output: %s", command, result.fail_reason, result.output) - - def FinalizeExport(instance, snap_disks): """Write out the export configuration information. @@ -2145,6 +2181,10 @@ def FinalizeExport(instance, snap_disks): for name, value in instance.beparams.items(): config.set(constants.INISECT_BEP, name, str(value)) + config.add_section(constants.INISECT_OSP) + for name, value in instance.osparams.items(): + config.set(constants.INISECT_OSP, name, str(value)) + utils.WriteFile(utils.PathJoin(destdir, constants.EXPORT_CONF_FILE), data=config.Dumps()) shutil.rmtree(finaldestdir, ignore_errors=True) @@ -2174,55 +2214,6 @@ def ExportInfo(dest): return config.Dumps() -def ImportOSIntoInstance(instance, src_node, src_images, cluster_name, debug): - """Import an os image into an instance. - - @type instance: L{objects.Instance} - @param instance: instance to import the disks into - @type src_node: string - @param src_node: source node for the disk images - @type src_images: list of string - @param src_images: absolute paths of the disk images - @type debug: integer - @param debug: debug level, passed to the OS scripts - @rtype: list of boolean - @return: each boolean represent the success of importing the n-th disk - - """ - inst_os = OSFromDisk(instance.os) - import_env = OSEnvironment(instance, inst_os, debug) - import_script = inst_os.import_script - - logfile = _InstanceLogName("import", instance.os, instance.name) - if not os.path.exists(constants.LOG_OS_DIR): - os.mkdir(constants.LOG_OS_DIR, 0750) - - comprcmd = "gunzip" - impcmd = utils.BuildShellCmd("(cd %s; %s >%s 2>&1)", inst_os.path, - import_script, logfile) - - final_result = [] - for idx, image in enumerate(src_images): - if image: - destcmd = utils.BuildShellCmd('cat %s', image) - remotecmd = _GetSshRunner(cluster_name).BuildCmd(src_node, - constants.GANETI_RUNAS, - destcmd) - command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd]) - import_env['IMPORT_DEVICE'] = import_env['DISK_%d_PATH' % idx] - import_env['IMPORT_INDEX'] = str(idx) - result = utils.RunCmd(command, env=import_env) - if result.failed: - logging.error("Disk import command '%s' returned error: %s" - " output: %s", command, result.fail_reason, - result.output) - final_result.append("error importing disk %d: %s, %s" % - (idx, result.fail_reason, result.output[-100])) - - if final_result: - _Fail("; ".join(final_result), log=False) - - def ListExports(): """Return a list of exports currently available on this machine. @@ -2231,7 +2222,7 @@ def ListExports(): """ if os.path.isdir(constants.EXPORT_DIR): - return utils.ListVisibleFiles(constants.EXPORT_DIR) + return sorted(utils.ListVisibleFiles(constants.EXPORT_DIR)) else: _Fail("No exports directory") @@ -2429,9 +2420,11 @@ def JobQueueUpdate(file_name, content): """ _EnsureJobQueueFile(file_name) + getents = runtime.GetEnts() # Write and replace the file atomically - utils.WriteFile(file_name, data=_Decompress(content)) + utils.WriteFile(file_name, data=_Decompress(content), uid=getents.masterd_uid, + gid=getents.masterd_gid) def JobQueueRename(old, new): @@ -2453,24 +2446,6 @@ def JobQueueRename(old, new): utils.RenameFile(old, new, mkdir=True) -def JobQueueSetDrainFlag(drain_flag): - """Set the drain flag for the queue. - - This will set or unset the queue drain flag. - - @type drain_flag: boolean - @param drain_flag: if True, will set the drain flag, otherwise reset it. - @rtype: truple - @return: always True, None - @warning: the function always returns True - - """ - if drain_flag: - utils.WriteFile(constants.JOB_QUEUE_DRAIN_FILE, data="", close=True) - else: - utils.RemoveFile(constants.JOB_QUEUE_DRAIN_FILE) - - def BlockdevClose(instance_name, disks): """Closes the given block devices. @@ -2525,6 +2500,70 @@ def ValidateHVParams(hvname, hvparams): _Fail(str(err), log=False) +def _CheckOSPList(os_obj, parameters): + """Check whether a list of parameters is supported by the OS. + + @type os_obj: L{objects.OS} + @param os_obj: OS object to check + @type parameters: list + @param parameters: the list of parameters to check + + """ + supported = [v[0] for v in os_obj.supported_parameters] + delta = frozenset(parameters).difference(supported) + if delta: + _Fail("The following parameters are not supported" + " by the OS %s: %s" % (os_obj.name, utils.CommaJoin(delta))) + + +def ValidateOS(required, osname, checks, osparams): + """Validate the given OS' parameters. + + @type required: boolean + @param required: whether absence of the OS should translate into + failure or not + @type osname: string + @param osname: the OS to be validated + @type checks: list + @param checks: list of the checks to run (currently only 'parameters') + @type osparams: dict + @param osparams: dictionary with OS parameters + @rtype: boolean + @return: True if the validation passed, or False if the OS was not + found and L{required} was false + + """ + if not constants.OS_VALIDATE_CALLS.issuperset(checks): + _Fail("Unknown checks required for OS %s: %s", osname, + set(checks).difference(constants.OS_VALIDATE_CALLS)) + + name_only = objects.OS.GetName(osname) + status, tbv = _TryOSFromDisk(name_only, None) + + if not status: + if required: + _Fail(tbv) + else: + return False + + if max(tbv.api_versions) < constants.OS_API_V20: + return True + + if constants.OS_VALIDATE_PARAMETERS in checks: + _CheckOSPList(tbv, osparams.keys()) + + validate_env = OSCoreEnv(osname, tbv, osparams) + result = utils.RunCmd([tbv.verify_script] + checks, env=validate_env, + cwd=tbv.path) + if result.failed: + logging.error("os validate command '%s' returned error: %s output: %s", + result.cmd, result.fail_reason, result.output) + _Fail("OS validation script failed (%s), output: %s", + result.fail_reason, result.output, log=False) + + return True + + def DemoteFromMC(): """Demotes the current node from master candidate role. @@ -2567,7 +2606,7 @@ def CreateX509Certificate(validity, cryptodir=constants.CRYPTO_KEYS_DIR): """ (key_pem, cert_pem) = \ - utils.GenerateSelfSignedX509Cert(utils.HostInfo.SysName(), + utils.GenerateSelfSignedX509Cert(netutils.Hostname.GetSysName(), min(validity, _MAX_SSL_CERT_VALIDITY)) cert_dir = tempfile.mkdtemp(dir=cryptodir, @@ -2622,6 +2661,7 @@ def _GetImportExportIoCommand(instance, mode, ieio, ieargs): env = None prefix = None suffix = None + exp_size = None if ieio == constants.IEIO_FILE: (filename, ) = ieargs @@ -2646,6 +2686,14 @@ def _GetImportExportIoCommand(instance, mode, ieio, ieargs): elif mode == constants.IEM_EXPORT: suffix = "< %s" % quoted_filename + # Retrieve file size + try: + st = os.stat(filename) + except EnvironmentError, err: + logging.error("Can't stat(2) %s: %s", filename, err) + else: + exp_size = utils.BytesToMebibyte(st.st_size) + elif ieio == constants.IEIO_RAW_DISK: (disk, ) = ieargs @@ -2669,6 +2717,7 @@ def _GetImportExportIoCommand(instance, mode, ieio, ieargs): real_disk.dev_path, str(1024 * 1024), # 1 MB str(disk.size)) + exp_size = disk.size elif ieio == constants.IEIO_SCRIPT: (disk, disk_index, ) = ieargs @@ -2699,10 +2748,13 @@ def _GetImportExportIoCommand(instance, mode, ieio, ieargs): elif mode == constants.IEM_EXPORT: prefix = "%s |" % script_cmd + # Let script predict size + exp_size = constants.IE_CUSTOM_SIZE + else: _Fail("Invalid %s I/O mode %r", mode, ieio) - return (env, prefix, suffix) + return (env, prefix, suffix, exp_size) def _CreateImportExportStatusDir(prefix): @@ -2714,15 +2766,12 @@ def _CreateImportExportStatusDir(prefix): (prefix, utils.TimestampForFilename()))) -def StartImportExportDaemon(mode, key_name, ca, host, port, instance, - ieio, ieioargs): +def StartImportExportDaemon(mode, opts, host, port, instance, ieio, ieioargs): """Starts an import or export daemon. @param mode: Import/output mode - @type key_name: string - @param key_name: RSA key name (None to use cluster certificate) - @type ca: string: - @param ca: Remote CA in PEM format (None to use cluster certificate) + @type opts: L{objects.ImportExportOptions} + @param opts: Daemon options @type host: string @param host: Remote host for export (None for import) @type port: int @@ -2748,42 +2797,47 @@ def StartImportExportDaemon(mode, key_name, ca, host, port, instance, else: _Fail("Invalid mode %r", mode) - if (key_name is None) ^ (ca is None): + if (opts.key_name is None) ^ (opts.ca_pem is None): _Fail("Cluster certificate can only be used for both key and CA") - (cmd_env, cmd_prefix, cmd_suffix) = \ + (cmd_env, cmd_prefix, cmd_suffix, exp_size) = \ _GetImportExportIoCommand(instance, mode, ieio, ieioargs) - if key_name is None: + if opts.key_name is None: # Use server.pem key_path = constants.NODED_CERT_FILE cert_path = constants.NODED_CERT_FILE - assert ca is None + assert opts.ca_pem is None else: (_, key_path, cert_path) = _GetX509Filenames(constants.CRYPTO_KEYS_DIR, - key_name) - assert ca is not None + opts.key_name) + assert opts.ca_pem is not None + + for i in [key_path, cert_path]: + if not os.path.exists(i): + _Fail("File '%s' does not exist" % i) status_dir = _CreateImportExportStatusDir(prefix) try: status_file = utils.PathJoin(status_dir, _IES_STATUS_FILE) pid_file = utils.PathJoin(status_dir, _IES_PID_FILE) + ca_file = utils.PathJoin(status_dir, _IES_CA_FILE) - if ca is None: + if opts.ca_pem is None: # Use server.pem - # TODO: If socat runs as a non-root user, this might need to be copied to - # a separate file - ca_path = constants.NODED_CERT_FILE + ca = utils.ReadFile(constants.NODED_CERT_FILE) else: - ca_path = utils.PathJoin(status_dir, _IES_CA_FILE) - utils.WriteFile(ca_path, data=ca, mode=0400) + ca = opts.ca_pem + + # Write CA file + utils.WriteFile(ca_file, data=ca, mode=0400) cmd = [ constants.IMPORT_EXPORT_DAEMON, status_file, mode, "--key=%s" % key_path, "--cert=%s" % cert_path, - "--ca=%s" % ca_path, + "--ca=%s" % ca_file, ] if host: @@ -2792,6 +2846,15 @@ def StartImportExportDaemon(mode, key_name, ca, host, port, instance, if port: cmd.append("--port=%s" % port) + if opts.compress: + cmd.append("--compress=%s" % opts.compress) + + if opts.magic: + cmd.append("--magic=%s" % opts.magic) + + if exp_size is not None: + cmd.append("--expected-size=%s" % exp_size) + if cmd_prefix: cmd.append("--cmd-prefix=%s" % cmd_prefix) @@ -2845,6 +2908,21 @@ def GetImportExportStatus(names): return result +def AbortImportExport(name): + """Sends SIGTERM to a running import/export daemon. + + """ + logging.info("Abort import/export %s", name) + + status_dir = utils.PathJoin(constants.IMPORT_EXPORT_DIR, name) + pid = utils.ReadLockedPidFile(utils.PathJoin(status_dir, _IES_PID_FILE)) + + if pid: + logging.info("Import/export %s is running with PID %s, sending SIGTERM", + name, pid) + utils.IgnoreProcessNotFound(os.kill, pid, signal.SIGTERM) + + def CleanupImportExport(name): """Cleanup after an import or export. @@ -2855,25 +2933,8 @@ def CleanupImportExport(name): logging.info("Finalizing import/export %s", name) status_dir = utils.PathJoin(constants.IMPORT_EXPORT_DIR, name) - pid_file = utils.PathJoin(status_dir, _IES_PID_FILE) - pid = None - try: - fd = os.open(pid_file, os.O_RDONLY) - except EnvironmentError, err: - if err.errno != errno.ENOENT: - raise - # PID file doesn't exist - else: - try: - try: - # Try to acquire lock - utils.LockFile(fd) - except errors.LockError: - # Couldn't lock, daemon is running - pid = int(os.read(fd, 100)) - finally: - os.close(fd) + pid = utils.ReadLockedPidFile(utils.PathJoin(status_dir, _IES_PID_FILE)) if pid: logging.info("Import/export %s is still running with PID %s", @@ -2888,7 +2949,7 @@ def _FindDisks(nodes_ip, disks): """ # set the correct physical ID - my_name = utils.HostInfo().name + my_name = netutils.Hostname.GetSysName() for cf in disks: cf.SetPhysicalID(my_name, nodes_ip) @@ -3009,6 +3070,16 @@ def DrbdWaitSync(nodes_ip, disks): return (alldone, min_resync) +def GetDrbdUsermodeHelper(): + """Returns DRBD usermode helper currently configured. + + """ + try: + return bdev.BaseDRBD.GetUsermodeHelper() + except errors.BlockDeviceError, err: + _Fail(str(err)) + + def PowercycleNode(hypervisor_type): """Hard-powercycle the node. @@ -3024,6 +3095,11 @@ def PowercycleNode(hypervisor_type): pid = 0 if pid > 0: return "Reboot scheduled in 5 seconds" + # ensure the child is running on ram + try: + utils.Mlockall() + except Exception: # pylint: disable-msg=W0703 + pass time.sleep(5) hyper.PowercycleNode()