#
#
-# Copyright (C) 2006, 2007 Google Inc.
+# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
from ganeti import objects
from ganeti import ssconf
from ganeti import serializer
+from ganeti import netutils
+from ganeti import runtime
_BOOT_ID_PATH = "/proc/sys/kernel/random/boot_id"
_IES_PID_FILE = "pid"
_IES_CA_FILE = "ca"
+#: Valid LVS output line regex
+_LVSLINE_REGEX = re.compile("^ *([^|]+)\|([^|]+)\|([0-9.]+)\|([^|]{6})\|?$")
+
class RPCFail(Exception):
"""Class denoting RPC failure.
constants.RAPI_CERT_FILE,
constants.RAPI_USERS_FILE,
constants.CONFD_HMAC_KEY,
+ constants.CLUSTER_DOMAIN_SECRET_FILE,
])
for hv_name in constants.HYPER_TYPES:
for consumption here or from the node daemon.
@rtype: tuple
- @return: master_netdev, master_ip, master_name
+ @return: master_netdev, master_ip, master_name, primary_ip_family
@raise RPCFail: in case of errors
"""
master_netdev = cfg.GetMasterNetdev()
master_ip = cfg.GetMasterIP()
master_node = cfg.GetMasterNode()
+ primary_ip_family = cfg.GetPrimaryIPFamily()
except errors.ConfigurationError, err:
_Fail("Cluster configuration incomplete: %s", err, exc=True)
- return (master_netdev, master_ip, master_node)
+ return (master_netdev, master_ip, master_node, primary_ip_family)
def StartMaster(start_daemons, no_voting):
"""Activate local node as master node.
- The function will always try activate the IP address of the master
- (unless someone else has it). It will also start the master daemons,
- based on the start_daemons parameter.
+ The function will either try activate the IP address of the master
+ (unless someone else has it) or also start the master daemons, based
+ on the start_daemons parameter.
@type start_daemons: boolean
- @param start_daemons: whether to also start the master
- daemons (ganeti-masterd and ganeti-rapi)
+ @param start_daemons: whether to start the master daemons
+ (ganeti-masterd and ganeti-rapi), or (if false) activate the
+ master ip
@type no_voting: boolean
@param no_voting: whether to start ganeti-masterd without a node vote
(if start_daemons is True), but still non-interactively
"""
# GetMasterInfo will raise an exception if not able to return data
- master_netdev, master_ip, _ = GetMasterInfo()
+ master_netdev, master_ip, _, family = GetMasterInfo()
err_msgs = []
- if utils.TcpPing(master_ip, constants.DEFAULT_NODED_PORT):
- if utils.OwnIpAddress(master_ip):
- # we already have the ip:
- logging.debug("Master IP already configured, doing nothing")
- else:
- msg = "Someone else has the master ip, not activating"
- logging.error(msg)
- err_msgs.append(msg)
- else:
- result = utils.RunCmd(["ip", "address", "add", "%s/32" % master_ip,
- "dev", master_netdev, "label",
- "%s:0" % master_netdev])
- if result.failed:
- msg = "Can't activate master IP: %s" % result.output
- logging.error(msg)
- err_msgs.append(msg)
-
- result = utils.RunCmd(["arping", "-q", "-U", "-c 3", "-I", master_netdev,
- "-s", master_ip, master_ip])
- # we'll ignore the exit code of arping
-
- # and now start the master and rapi daemons
+ # either start the master and rapi daemons
if start_daemons:
if no_voting:
masterd_args = "--no-voting --yes-do-it"
msg = "Can't start Ganeti master: %s" % result.output
logging.error(msg)
err_msgs.append(msg)
+ # or activate the IP
+ else:
+ if netutils.TcpPing(master_ip, constants.DEFAULT_NODED_PORT):
+ if netutils.IPAddress.Own(master_ip):
+ # we already have the ip:
+ logging.debug("Master IP already configured, doing nothing")
+ else:
+ msg = "Someone else has the master ip, not activating"
+ logging.error(msg)
+ err_msgs.append(msg)
+ else:
+ ipcls = netutils.IP4Address
+ if family == netutils.IP6Address.family:
+ ipcls = netutils.IP6Address
+
+ result = utils.RunCmd(["ip", "address", "add",
+ "%s/%d" % (master_ip, ipcls.iplen),
+ "dev", master_netdev, "label",
+ "%s:0" % master_netdev])
+ if result.failed:
+ msg = "Can't activate master IP: %s" % result.output
+ logging.error(msg)
+ err_msgs.append(msg)
+
+ # we ignore the exit code of the following cmds
+ if ipcls == netutils.IP4Address:
+ utils.RunCmd(["arping", "-q", "-U", "-c 3", "-I", master_netdev, "-s",
+ master_ip, master_ip])
+ elif ipcls == netutils.IP6Address:
+ try:
+ utils.RunCmd(["ndisc6", "-q", "-r 3", master_ip, master_netdev])
+ except errors.OpExecError:
+ # TODO: Better error reporting
+ logging.warning("Can't execute ndisc6, please install if missing")
if err_msgs:
_Fail("; ".join(err_msgs))
# need to decide in which case we fail the RPC for this
# GetMasterInfo will raise an exception if not able to return data
- master_netdev, master_ip, _ = GetMasterInfo()
+ master_netdev, master_ip, _, family = GetMasterInfo()
- result = utils.RunCmd(["ip", "address", "del", "%s/32" % master_ip,
+ ipcls = netutils.IP4Address
+ if family == netutils.IP6Address.family:
+ ipcls = netutils.IP6Address
+
+ result = utils.RunCmd(["ip", "address", "del",
+ "%s/%d" % (master_ip, ipcls.iplen),
"dev", master_netdev])
if result.failed:
logging.error("Can't remove the master IP, error: %s", result.output)
result.cmd, result.exit_code, result.output)
-def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
- """Joins this node to the cluster.
-
- This does the following:
- - updates the hostkeys of the machine (rsa and dsa)
- - adds the ssh private key to the user
- - adds the ssh public key to the users' authorized_keys file
-
- @type dsa: str
- @param dsa: the DSA private key to write
- @type dsapub: str
- @param dsapub: the DSA public key to write
- @type rsa: str
- @param rsa: the RSA private key to write
- @type rsapub: str
- @param rsapub: the RSA public key to write
- @type sshkey: str
- @param sshkey: the SSH private key to write
- @type sshpub: str
- @param sshpub: the SSH public key to write
- @rtype: boolean
- @return: the success of the operation
+def EtcHostsModify(mode, host, ip):
+ """Modify a host entry in /etc/hosts.
- """
- sshd_keys = [(constants.SSH_HOST_RSA_PRIV, rsa, 0600),
- (constants.SSH_HOST_RSA_PUB, rsapub, 0644),
- (constants.SSH_HOST_DSA_PRIV, dsa, 0600),
- (constants.SSH_HOST_DSA_PUB, dsapub, 0644)]
- for name, content, mode in sshd_keys:
- utils.WriteFile(name, data=content, mode=mode)
+ @param mode: The mode to operate. Either add or remove entry
+ @param host: The host to operate on
+ @param ip: The ip associated with the entry
- try:
- priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS,
- mkdir=True)
- except errors.OpExecError, err:
- _Fail("Error while processing user ssh files: %s", err, exc=True)
-
- for name, content in [(priv_key, sshkey), (pub_key, sshpub)]:
- utils.WriteFile(name, data=content, mode=0600)
-
- utils.AddAuthorizedKey(auth_keys, sshpub)
-
- result = utils.RunCmd([constants.DAEMON_UTIL, "reload-ssh-keys"])
- if result.failed:
- _Fail("Unable to reload SSH keys (command %r, exit code %s, output %r)",
- result.cmd, result.exit_code, result.output)
+ """
+ if mode == constants.ETC_HOSTS_ADD:
+ if not ip:
+ RPCFail("Mode 'add' needs 'ip' parameter, but parameter not"
+ " present")
+ utils.AddHostToEtcHosts(host, ip)
+ elif mode == constants.ETC_HOSTS_REMOVE:
+ if ip:
+ RPCFail("Mode 'remove' does not allow 'ip' parameter, but"
+ " parameter is present")
+ utils.RemoveHostFromEtcHosts(host)
+ else:
+ RPCFail("Mode not supported")
def LeaveCluster(modify_ssh_setup):
"""
outputarray = {}
- vginfo = _GetVGInfo(vgname)
- outputarray['vg_size'] = vginfo['vg_size']
- outputarray['vg_free'] = vginfo['vg_free']
- hyper = hypervisor.GetHypervisor(hypervisor_type)
- hyp_info = hyper.GetNodeInfo()
- if hyp_info is not None:
- outputarray.update(hyp_info)
+ if vgname is not None:
+ vginfo = bdev.LogicalVolume.GetVGInfo([vgname])
+ vg_free = vg_size = None
+ if vginfo:
+ vg_free = int(round(vginfo[0][0], 0))
+ vg_size = int(round(vginfo[0][1], 0))
+ outputarray['vg_size'] = vg_size
+ outputarray['vg_free'] = vg_free
+
+ if hypervisor_type is not None:
+ hyper = hypervisor.GetHypervisor(hypervisor_type)
+ hyp_info = hyper.GetNodeInfo()
+ if hyp_info is not None:
+ outputarray.update(hyp_info)
outputarray["bootid"] = utils.ReadFile(_BOOT_ID_PATH, size=128).rstrip("\n")
"""
result = {}
+ my_name = netutils.Hostname.GetSysName()
+ port = netutils.GetDaemonPort(constants.NODED)
+ vm_capable = my_name not in what.get(constants.NV_VMNODES, [])
- if constants.NV_HYPERVISOR in what:
+ if constants.NV_HYPERVISOR in what and vm_capable:
result[constants.NV_HYPERVISOR] = tmp = {}
for hv_name in what[constants.NV_HYPERVISOR]:
try:
val = "Error while checking hypervisor: %s" % str(err)
tmp[hv_name] = val
+ if constants.NV_HVPARAMS in what and vm_capable:
+ result[constants.NV_HVPARAMS] = tmp = []
+ for source, hv_name, hvparms in what[constants.NV_HVPARAMS]:
+ try:
+ logging.info("Validating hv %s, %s", hv_name, hvparms)
+ hypervisor.GetHypervisor(hv_name).ValidateParameters(hvparms)
+ except errors.HypervisorError, err:
+ tmp.append((source, hv_name, str(err)))
+
if constants.NV_FILELIST in what:
result[constants.NV_FILELIST] = utils.FingerprintFiles(
what[constants.NV_FILELIST])
if constants.NV_NODENETTEST in what:
result[constants.NV_NODENETTEST] = tmp = {}
- my_name = utils.HostInfo().name
my_pip = my_sip = None
for name, pip, sip in what[constants.NV_NODENETTEST]:
if name == my_name:
tmp[my_name] = ("Can't find my own primary/secondary IP"
" in the node list")
else:
- port = utils.GetDaemonPort(constants.NODED)
for name, pip, sip in what[constants.NV_NODENETTEST]:
fail = []
- if not utils.TcpPing(pip, port, source=my_pip):
+ if not netutils.TcpPing(pip, port, source=my_pip):
fail.append("primary")
if sip != pip:
- if not utils.TcpPing(sip, port, source=my_sip):
+ if not netutils.TcpPing(sip, port, source=my_sip):
fail.append("secondary")
if fail:
tmp[name] = ("failure using the %s interface(s)" %
" and ".join(fail))
- if constants.NV_LVLIST in what:
+ if constants.NV_MASTERIP in what:
+ # FIXME: add checks on incoming data structures (here and in the
+ # rest of the function)
+ master_name, master_ip = what[constants.NV_MASTERIP]
+ if master_name == my_name:
+ source = constants.IP4_ADDRESS_LOCALHOST
+ else:
+ source = None
+ result[constants.NV_MASTERIP] = netutils.TcpPing(master_ip, port,
+ source=source)
+
+ if constants.NV_OOB_PATHS in what:
+ result[constants.NV_OOB_PATHS] = tmp = []
+ for path in what[constants.NV_OOB_PATHS]:
+ try:
+ st = os.stat(path)
+ except OSError, err:
+ tmp.append("error stating out of band helper: %s" % err)
+ else:
+ if stat.S_ISREG(st.st_mode):
+ if stat.S_IMODE(st.st_mode) & stat.S_IXUSR:
+ tmp.append(None)
+ else:
+ tmp.append("out of band helper %s is not executable" % path)
+ else:
+ tmp.append("out of band helper %s is not a file" % path)
+
+ if constants.NV_LVLIST in what and vm_capable:
try:
- val = GetVolumeList(what[constants.NV_LVLIST])
+ val = GetVolumeList(utils.ListVolumeGroups().keys())
except RPCFail, err:
val = str(err)
result[constants.NV_LVLIST] = val
- if constants.NV_INSTANCELIST in what:
+ if constants.NV_INSTANCELIST in what and vm_capable:
# GetInstanceList can fail
try:
val = GetInstanceList(what[constants.NV_INSTANCELIST])
val = str(err)
result[constants.NV_INSTANCELIST] = val
- if constants.NV_VGLIST in what:
+ if constants.NV_VGLIST in what and vm_capable:
result[constants.NV_VGLIST] = utils.ListVolumeGroups()
- if constants.NV_PVLIST in what:
+ if constants.NV_PVLIST in what and vm_capable:
result[constants.NV_PVLIST] = \
bdev.LogicalVolume.GetPVInfo(what[constants.NV_PVLIST],
filter_allocatable=False)
result[constants.NV_VERSION] = (constants.PROTOCOL_VERSION,
constants.RELEASE_VERSION)
- if constants.NV_HVINFO in what:
+ if constants.NV_HVINFO in what and vm_capable:
hyper = hypervisor.GetHypervisor(what[constants.NV_HVINFO])
result[constants.NV_HVINFO] = hyper.GetNodeInfo()
- if constants.NV_DRBDLIST in what:
+ if constants.NV_DRBDLIST in what and vm_capable:
try:
used_minors = bdev.DRBD8.GetUsedDevs().keys()
except errors.BlockDeviceError, err:
used_minors = str(err)
result[constants.NV_DRBDLIST] = used_minors
+ if constants.NV_DRBDHELPER in what and vm_capable:
+ status = True
+ try:
+ payload = bdev.BaseDRBD.GetUsermodeHelper()
+ except errors.BlockDeviceError, err:
+ logging.error("Can't get DRBD usermode helper: %s", str(err))
+ status = False
+ payload = str(err)
+ result[constants.NV_DRBDHELPER] = (status, payload)
+
if constants.NV_NODESETUP in what:
result[constants.NV_NODESETUP] = tmpr = []
if not os.path.isdir("/sys/block") or not os.path.isdir("/sys/class/net"):
if constants.NV_TIME in what:
result[constants.NV_TIME] = utils.SplitTime(time.time())
+ if constants.NV_OSLIST in what and vm_capable:
+ result[constants.NV_OSLIST] = DiagnoseOS()
+
return result
-def GetVolumeList(vg_name):
+def GetVolumeList(vg_names):
"""Compute list of logical volumes and their size.
- @type vg_name: str
- @param vg_name: the volume group whose LVs we should list
+ @type vg_names: list
+ @param vg_names: the volume groups whose LVs we should list, or
+ empty for all volume groups
@rtype: dict
@return:
dictionary of all partions (key) with value being a tuple of
their size (in MiB), inactive and online status::
- {'test1': ('20.06', True, True)}
+ {'xenvg/test1': ('20.06', True, True)}
in case of errors, a string is returned with the error
details.
"""
lvs = {}
sep = '|'
+ if not vg_names:
+ vg_names = []
result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
"--separator=%s" % sep,
- "-olv_name,lv_size,lv_attr", vg_name])
+ "-ovg_name,lv_name,lv_size,lv_attr"] + vg_names)
if result.failed:
_Fail("Failed to list logical volumes, lvs output: %s", result.output)
- valid_line_re = re.compile("^ *([^|]+)\|([0-9.]+)\|([^|]{6})\|?$")
for line in result.stdout.splitlines():
line = line.strip()
- match = valid_line_re.match(line)
+ match = _LVSLINE_REGEX.match(line)
if not match:
logging.error("Invalid line returned from lvs output: '%s'", line)
continue
- name, size, attr = match.groups()
+ vg_name, name, size, attr = match.groups()
inactive = attr[4] == '-'
online = attr[5] == 'o'
virtual = attr[0] == 'v'
# we don't want to report such volumes as existing, since they
# don't really hold data
continue
- lvs[name] = (size, inactive, online)
+ lvs[vg_name+"/"+name] = (size, inactive, online)
return lvs
for idx in range(len(instance.disks)):
link_name = _GetBlockDevSymlinkPath(iname, idx)
if not os.path.islink(link_name):
- _Fail("Instance %s was not restarted since ganeti 1.2.5", iname)
+ logging.warning("Instance %s is missing symlink %s for disk %d",
+ iname, link_name, idx)
def GetAllInstancesInfo(hypervisor_list):
" log file:\n%s", result.fail_reason, "\n".join(lines), log=False)
-def _GetVGInfo(vg_name):
- """Get information about the volume group.
-
- @type vg_name: str
- @param vg_name: the volume group which we query
- @rtype: dict
- @return:
- A dictionary with the following keys:
- - C{vg_size} is the total size of the volume group in MiB
- - C{vg_free} is the free size of the volume group in MiB
- - C{pv_count} are the number of physical disks in that VG
-
- If an error occurs during gathering of data, we return the same dict
- with keys all set to None.
-
- """
- retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"])
-
- retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
- "--nosuffix", "--units=m", "--separator=:", vg_name])
-
- if retval.failed:
- logging.error("volume group %s not present", vg_name)
- return retdic
- valarr = retval.stdout.strip().rstrip(':').split(':')
- if len(valarr) == 3:
- try:
- retdic = {
- "vg_size": int(round(float(valarr[0]), 0)),
- "vg_free": int(round(float(valarr[1]), 0)),
- "pv_count": int(valarr[2]),
- }
- except (TypeError, ValueError), err:
- logging.exception("Fail to parse vgs output: %s", err)
- else:
- logging.error("vgs output has the wrong number of fields (expected"
- " three): %s", str(valarr))
- return retdic
-
-
def _GetBlockDevSymlinkPath(instance_name, idx):
- return utils.PathJoin(constants.DISK_LINKS_DIR,
- "%s:%d" % (instance_name, idx))
+ return utils.PathJoin(constants.DISK_LINKS_DIR, "%s%s%d" %
+ (instance_name, constants.DISK_SEPARATOR, idx))
def _SymlinkBlockDev(instance_name, device_path, idx):
return device.unique_id
+def _WipeDevice(path, offset, size):
+ """This function actually wipes the device.
+
+ @param path: The path to the device to wipe
+ @param offset: The offset in MiB in the file
+ @param size: The size in MiB to write
+
+ """
+ cmd = [constants.DD_CMD, "if=/dev/zero", "seek=%d" % offset,
+ "bs=%d" % constants.WIPE_BLOCK_SIZE, "oflag=direct", "of=%s" % path,
+ "count=%d" % size]
+ result = utils.RunCmd(cmd)
+
+ if result.failed:
+ _Fail("Wipe command '%s' exited with error: %s; output: %s", result.cmd,
+ result.fail_reason, result.output)
+
+
+def BlockdevWipe(disk, offset, size):
+ """Wipes a block device.
+
+ @type disk: L{objects.Disk}
+ @param disk: the disk object we want to wipe
+ @type offset: int
+ @param offset: The offset in MiB in the file
+ @type size: int
+ @param size: The size in MiB to write
+
+ """
+ try:
+ rdev = _RecursiveFindBD(disk)
+ except errors.BlockDeviceError:
+ rdev = None
+
+ if not rdev:
+ _Fail("Cannot execute wipe for device %s: device not found", disk.iv_name)
+
+ # Do cross verify some of the parameters
+ if offset > rdev.size:
+ _Fail("Offset is bigger than device size")
+ if (offset + size) > rdev.size:
+ _Fail("The provided offset and size to wipe is bigger than device size")
+
+ _WipeDevice(rdev.dev_path, offset, size)
+
+
+def BlockdevPauseResumeSync(disks, pause):
+ """Pause or resume the sync of the block device.
+
+ @type disks: list of L{objects.Disk}
+ @param disks: the disks object we want to pause/resume
+ @type pause: bool
+ @param pause: Wheater to pause or resume
+
+ """
+ success = []
+ for disk in disks:
+ try:
+ rdev = _RecursiveFindBD(disk)
+ except errors.BlockDeviceError:
+ rdev = None
+
+ if not rdev:
+ success.append((False, ("Cannot change sync for device %s:"
+ " device not found" % disk.iv_name)))
+ continue
+
+ result = rdev.PauseResumeSync(pause)
+
+ if result:
+ success.append((result, None))
+ else:
+ if pause:
+ msg = "Pause"
+ else:
+ msg = "Resume"
+ success.append((result, "%s for device %s failed" % (msg, disk.iv_name)))
+
+ return success
+
+
def BlockdevRemove(disk):
"""Remove a block device.
return result
-def BlockdevAssemble(disk, owner, as_primary):
+def BlockdevAssemble(disk, owner, as_primary, idx):
"""Activate a block device for an instance.
This is a wrapper over _RecursiveAssembleBD.
if isinstance(result, bdev.BlockDev):
# pylint: disable-msg=E1103
result = result.dev_path
+ if as_primary:
+ _SymlinkBlockDev(owner, result, idx)
except errors.BlockDeviceError, err:
_Fail("Error while assembling disk: %s", err, exc=True)
+ except OSError, err:
+ _Fail("Error while symlinking disk: %s", err, exc=True)
return result
@type disks: list of L{objects.Disk}
@param disks: the list of disks which we should query
@rtype: disk
- @return:
- a list of (mirror_done, estimated_time) tuples, which
- are the result of L{bdev.BlockDev.CombinedSyncStatus}
+ @return: List of L{objects.BlockDevStatus}, one for each disk
@raise errors.BlockDeviceError: if any of the disks cannot be
found
return stats
+def BlockdevGetmirrorstatusMulti(disks):
+ """Get the mirroring status of a list of devices.
+
+ @type disks: list of L{objects.Disk}
+ @param disks: the list of disks which we should query
+ @rtype: disk
+ @return: List of tuples, (bool, status), one for each disk; bool denotes
+ success/failure, status is L{objects.BlockDevStatus} on success, string
+ otherwise
+
+ """
+ result = []
+ for disk in disks:
+ try:
+ rbd = _RecursiveFindBD(disk)
+ if rbd is None:
+ result.append((False, "Can't find device %s" % disk))
+ continue
+
+ status = rbd.CombinedSyncStatus()
+ except errors.BlockDeviceError, err:
+ logging.exception("Error while getting disk status")
+ result.append((False, str(err)))
+ else:
+ result.append((True, status))
+
+ assert len(disks) == len(result)
+
+ return result
+
+
def _RecursiveFindBD(disk):
"""Check if a device is activated.
raw_data = _Decompress(data)
- utils.WriteFile(file_name, data=raw_data, mode=mode, uid=uid, gid=gid,
- atime=atime, mtime=mtime)
+ utils.SafeWriteFile(file_name, None,
+ data=raw_data, mode=mode, uid=uid, gid=gid,
+ atime=atime, mtime=mtime)
+
+
+def RunOob(oob_program, command, node, timeout):
+ """Executes oob_program with given command on given node.
+
+ @param oob_program: The path to the executable oob_program
+ @param command: The command to invoke on oob_program
+ @param node: The node given as an argument to the program
+ @param timeout: Timeout after which we kill the oob program
+
+ @return: stdout
+ @raise RPCFail: If execution fails for some reason
+
+ """
+ result = utils.RunCmd([oob_program, command, node], timeout=timeout)
+
+ if result.failed:
+ _Fail("'%s' failed with reason '%s'; output: %s", result.cmd,
+ result.fail_reason, result.output)
+
+ return result.stdout
def WriteSsconfFiles(values):
search (if not given defaults to
L{constants.OS_SEARCH_PATH})
@rtype: list of L{objects.OS}
- @return: a list of tuples (name, path, status, diagnose, variants)
- for all (potential) OSes under all search paths, where:
+ @return: a list of tuples (name, path, status, diagnose, variants,
+ parameters, api_version) for all (potential) OSes under all
+ search paths, where:
- name is the (potential) OS name
- path is the full path to the OS
- status True/False is the validity of the OS
- diagnose is the error message for an invalid OS, otherwise empty
- variants is a list of supported OS variants, if any
+ - parameters is a list of (name, help) parameters, if any
+ - api_version is a list of support OS API versions
"""
if top_dirs is None:
if status:
diagnose = ""
variants = os_inst.supported_variants
+ parameters = os_inst.supported_parameters
+ api_versions = os_inst.api_versions
else:
diagnose = os_inst
- variants = []
- result.append((name, os_path, status, diagnose, variants))
+ variants = parameters = api_versions = []
+ result.append((name, os_path, status, diagnose, variants,
+ parameters, api_versions))
return result
if max(api_versions) >= constants.OS_API_V15:
os_files[constants.OS_VARIANTS_FILE] = ''
+ if max(api_versions) >= constants.OS_API_V20:
+ os_files[constants.OS_PARAMETERS_FILE] = ''
+ else:
+ del os_files[constants.OS_SCRIPT_VERIFY]
+
for filename in os_files:
os_files[filename] = utils.PathJoin(os_dir, filename)
return False, ("File '%s' under path '%s' is not executable" %
(filename, os_dir))
- variants = None
+ variants = []
if constants.OS_VARIANTS_FILE in os_files:
variants_file = os_files[constants.OS_VARIANTS_FILE]
try:
if not variants:
return False, ("No supported os variant found")
+ parameters = []
+ if constants.OS_PARAMETERS_FILE in os_files:
+ parameters_file = os_files[constants.OS_PARAMETERS_FILE]
+ try:
+ parameters = utils.ReadFile(parameters_file).splitlines()
+ except EnvironmentError, err:
+ return False, ("Error while reading the OS parameters file at %s: %s" %
+ (parameters_file, _ErrnoOrStr(err)))
+ parameters = [v.split(None, 1) for v in parameters]
+
os_obj = objects.OS(name=name, path=os_dir,
create_script=os_files[constants.OS_SCRIPT_CREATE],
export_script=os_files[constants.OS_SCRIPT_EXPORT],
import_script=os_files[constants.OS_SCRIPT_IMPORT],
rename_script=os_files[constants.OS_SCRIPT_RENAME],
+ verify_script=os_files.get(constants.OS_SCRIPT_VERIFY,
+ None),
supported_variants=variants,
+ supported_parameters=parameters,
api_versions=api_versions)
return True, os_obj
@raise RPCFail: if we don't find a valid OS
"""
- name_only = name.split("+", 1)[0]
+ name_only = objects.OS.GetName(name)
status, payload = _TryOSFromDisk(name_only, base_dir)
if not status:
return payload
-def OSEnvironment(instance, inst_os, debug=0):
- """Calculate the environment for an os script.
+def OSCoreEnv(os_name, inst_os, os_params, debug=0):
+ """Calculate the basic environment for an os script.
- @type instance: L{objects.Instance}
- @param instance: target instance for the os script run
+ @type os_name: str
+ @param os_name: full operating system name (including variant)
@type inst_os: L{objects.OS}
@param inst_os: operating system for which the environment is being built
+ @type os_params: dict
+ @param os_params: the OS parameters
@type debug: integer
@param debug: debug level (0 or 1, for OS Api 10)
@rtype: dict
api_version = \
max(constants.OS_API_VERSIONS.intersection(inst_os.api_versions))
result['OS_API_VERSION'] = '%d' % api_version
- result['INSTANCE_NAME'] = instance.name
- result['INSTANCE_OS'] = instance.os
- result['HYPERVISOR'] = instance.hypervisor
- result['DISK_COUNT'] = '%d' % len(instance.disks)
- result['NIC_COUNT'] = '%d' % len(instance.nics)
+ result['OS_NAME'] = inst_os.name
result['DEBUG_LEVEL'] = '%d' % debug
+
+ # OS variants
if api_version >= constants.OS_API_V15:
- try:
- variant = instance.os.split('+', 1)[1]
- except IndexError:
+ variant = objects.OS.GetVariant(os_name)
+ if not variant:
variant = inst_os.supported_variants[0]
result['OS_VARIANT'] = variant
+
+ # OS params
+ for pname, pvalue in os_params.items():
+ result['OSP_%s' % pname.upper()] = pvalue
+
+ return result
+
+
+def OSEnvironment(instance, inst_os, debug=0):
+ """Calculate the environment for an os script.
+
+ @type instance: L{objects.Instance}
+ @param instance: target instance for the os script run
+ @type inst_os: L{objects.OS}
+ @param inst_os: operating system for which the environment is being built
+ @type debug: integer
+ @param debug: debug level (0 or 1, for OS Api 10)
+ @rtype: dict
+ @return: dict of environment variables
+ @raise errors.BlockDeviceError: if the block device
+ cannot be found
+
+ """
+ result = OSCoreEnv(instance.os, inst_os, instance.osparams, debug=debug)
+
+ for attr in ["name", "os", "uuid", "ctime", "mtime"]:
+ result["INSTANCE_%s" % attr.upper()] = str(getattr(instance, attr))
+
+ result['HYPERVISOR'] = instance.hypervisor
+ result['DISK_COUNT'] = '%d' % len(instance.disks)
+ result['NIC_COUNT'] = '%d' % len(instance.nics)
+
+ # Disks
for idx, disk in enumerate(instance.disks):
real_disk = _OpenRealBD(disk)
result['DISK_%d_PATH' % idx] = real_disk.dev_path
elif disk.dev_type == constants.LD_FILE:
result['DISK_%d_BACKEND_TYPE' % idx] = \
'file:%s' % disk.physical_id[0]
+
+ # NICs
for idx, nic in enumerate(instance.nics):
result['NIC_%d_MAC' % idx] = nic.mac
if nic.ip:
result['NIC_%d_FRONTEND_TYPE' % idx] = \
instance.hvparams[constants.HV_NIC_TYPE]
+ # HV/BE params
for source, kind in [(instance.beparams, "BE"), (instance.hvparams, "HV")]:
for key, value in source.items():
result["INSTANCE_%s_%s" % (kind, key)] = str(value)
@type disk: L{objects.Disk}
@param disk: the disk to be snapshotted
@rtype: string
- @return: snapshot disk path
+ @return: snapshot disk ID as (vg, lv)
"""
if disk.dev_type == constants.LD_DRBD8:
for name, value in instance.beparams.items():
config.set(constants.INISECT_BEP, name, str(value))
+ config.add_section(constants.INISECT_OSP)
+ for name, value in instance.osparams.items():
+ config.set(constants.INISECT_OSP, name, str(value))
+
utils.WriteFile(utils.PathJoin(destdir, constants.EXPORT_CONF_FILE),
data=config.Dumps())
shutil.rmtree(finaldestdir, ignore_errors=True)
"""
if os.path.isdir(constants.EXPORT_DIR):
- return utils.ListVisibleFiles(constants.EXPORT_DIR)
+ return sorted(utils.ListVisibleFiles(constants.EXPORT_DIR))
else:
_Fail("No exports directory")
def JobQueueUpdate(file_name, content):
"""Updates a file in the queue directory.
- This is just a wrapper over L{utils.WriteFile}, with proper
+ This is just a wrapper over L{utils.io.WriteFile}, with proper
checking.
@type file_name: str
"""
_EnsureJobQueueFile(file_name)
+ getents = runtime.GetEnts()
# Write and replace the file atomically
- utils.WriteFile(file_name, data=_Decompress(content))
+ utils.WriteFile(file_name, data=_Decompress(content), uid=getents.masterd_uid,
+ gid=getents.masterd_gid)
def JobQueueRename(old, new):
utils.RenameFile(old, new, mkdir=True)
-def JobQueueSetDrainFlag(drain_flag):
- """Set the drain flag for the queue.
-
- This will set or unset the queue drain flag.
-
- @type drain_flag: boolean
- @param drain_flag: if True, will set the drain flag, otherwise reset it.
- @rtype: truple
- @return: always True, None
- @warning: the function always returns True
-
- """
- if drain_flag:
- utils.WriteFile(constants.JOB_QUEUE_DRAIN_FILE, data="", close=True)
- else:
- utils.RemoveFile(constants.JOB_QUEUE_DRAIN_FILE)
-
-
def BlockdevClose(instance_name, disks):
"""Closes the given block devices.
_Fail(str(err), log=False)
+def _CheckOSPList(os_obj, parameters):
+ """Check whether a list of parameters is supported by the OS.
+
+ @type os_obj: L{objects.OS}
+ @param os_obj: OS object to check
+ @type parameters: list
+ @param parameters: the list of parameters to check
+
+ """
+ supported = [v[0] for v in os_obj.supported_parameters]
+ delta = frozenset(parameters).difference(supported)
+ if delta:
+ _Fail("The following parameters are not supported"
+ " by the OS %s: %s" % (os_obj.name, utils.CommaJoin(delta)))
+
+
+def ValidateOS(required, osname, checks, osparams):
+ """Validate the given OS' parameters.
+
+ @type required: boolean
+ @param required: whether absence of the OS should translate into
+ failure or not
+ @type osname: string
+ @param osname: the OS to be validated
+ @type checks: list
+ @param checks: list of the checks to run (currently only 'parameters')
+ @type osparams: dict
+ @param osparams: dictionary with OS parameters
+ @rtype: boolean
+ @return: True if the validation passed, or False if the OS was not
+ found and L{required} was false
+
+ """
+ if not constants.OS_VALIDATE_CALLS.issuperset(checks):
+ _Fail("Unknown checks required for OS %s: %s", osname,
+ set(checks).difference(constants.OS_VALIDATE_CALLS))
+
+ name_only = objects.OS.GetName(osname)
+ status, tbv = _TryOSFromDisk(name_only, None)
+
+ if not status:
+ if required:
+ _Fail(tbv)
+ else:
+ return False
+
+ if max(tbv.api_versions) < constants.OS_API_V20:
+ return True
+
+ if constants.OS_VALIDATE_PARAMETERS in checks:
+ _CheckOSPList(tbv, osparams.keys())
+
+ validate_env = OSCoreEnv(osname, tbv, osparams)
+ result = utils.RunCmd([tbv.verify_script] + checks, env=validate_env,
+ cwd=tbv.path)
+ if result.failed:
+ logging.error("os validate command '%s' returned error: %s output: %s",
+ result.cmd, result.fail_reason, result.output)
+ _Fail("OS validation script failed (%s), output: %s",
+ result.fail_reason, result.output, log=False)
+
+ return True
+
+
def DemoteFromMC():
"""Demotes the current node from master candidate role.
"""
(key_pem, cert_pem) = \
- utils.GenerateSelfSignedX509Cert(utils.HostInfo.SysName(),
+ utils.GenerateSelfSignedX509Cert(netutils.Hostname.GetSysName(),
min(validity, _MAX_SSL_CERT_VALIDITY))
cert_dir = tempfile.mkdtemp(dir=cryptodir,
env = None
prefix = None
suffix = None
+ exp_size = None
if ieio == constants.IEIO_FILE:
(filename, ) = ieargs
elif mode == constants.IEM_EXPORT:
suffix = "< %s" % quoted_filename
+ # Retrieve file size
+ try:
+ st = os.stat(filename)
+ except EnvironmentError, err:
+ logging.error("Can't stat(2) %s: %s", filename, err)
+ else:
+ exp_size = utils.BytesToMebibyte(st.st_size)
+
elif ieio == constants.IEIO_RAW_DISK:
(disk, ) = ieargs
real_disk.dev_path,
str(1024 * 1024), # 1 MB
str(disk.size))
+ exp_size = disk.size
elif ieio == constants.IEIO_SCRIPT:
(disk, disk_index, ) = ieargs
elif mode == constants.IEM_EXPORT:
prefix = "%s |" % script_cmd
+ # Let script predict size
+ exp_size = constants.IE_CUSTOM_SIZE
+
else:
_Fail("Invalid %s I/O mode %r", mode, ieio)
- return (env, prefix, suffix)
+ return (env, prefix, suffix, exp_size)
def _CreateImportExportStatusDir(prefix):
(prefix, utils.TimestampForFilename())))
-def StartImportExportDaemon(mode, key_name, ca, host, port, instance,
- ieio, ieioargs):
+def StartImportExportDaemon(mode, opts, host, port, instance, ieio, ieioargs):
"""Starts an import or export daemon.
@param mode: Import/output mode
- @type key_name: string
- @param key_name: RSA key name (None to use cluster certificate)
- @type ca: string:
- @param ca: Remote CA in PEM format (None to use cluster certificate)
+ @type opts: L{objects.ImportExportOptions}
+ @param opts: Daemon options
@type host: string
@param host: Remote host for export (None for import)
@type port: int
else:
_Fail("Invalid mode %r", mode)
- if (key_name is None) ^ (ca is None):
+ if (opts.key_name is None) ^ (opts.ca_pem is None):
_Fail("Cluster certificate can only be used for both key and CA")
- (cmd_env, cmd_prefix, cmd_suffix) = \
+ (cmd_env, cmd_prefix, cmd_suffix, exp_size) = \
_GetImportExportIoCommand(instance, mode, ieio, ieioargs)
- if key_name is None:
+ if opts.key_name is None:
# Use server.pem
key_path = constants.NODED_CERT_FILE
cert_path = constants.NODED_CERT_FILE
- assert ca is None
+ assert opts.ca_pem is None
else:
(_, key_path, cert_path) = _GetX509Filenames(constants.CRYPTO_KEYS_DIR,
- key_name)
- assert ca is not None
+ opts.key_name)
+ assert opts.ca_pem is not None
for i in [key_path, cert_path]:
if not os.path.exists(i):
pid_file = utils.PathJoin(status_dir, _IES_PID_FILE)
ca_file = utils.PathJoin(status_dir, _IES_CA_FILE)
- if ca is None:
+ if opts.ca_pem is None:
# Use server.pem
ca = utils.ReadFile(constants.NODED_CERT_FILE)
+ else:
+ ca = opts.ca_pem
+ # Write CA file
utils.WriteFile(ca_file, data=ca, mode=0400)
cmd = [
if port:
cmd.append("--port=%s" % port)
+ if opts.ipv6:
+ cmd.append("--ipv6")
+ else:
+ cmd.append("--ipv4")
+
+ if opts.compress:
+ cmd.append("--compress=%s" % opts.compress)
+
+ if opts.magic:
+ cmd.append("--magic=%s" % opts.magic)
+
+ if exp_size is not None:
+ cmd.append("--expected-size=%s" % exp_size)
+
if cmd_prefix:
cmd.append("--cmd-prefix=%s" % cmd_prefix)
if cmd_suffix:
cmd.append("--cmd-suffix=%s" % cmd_suffix)
+ if mode == constants.IEM_EXPORT:
+ # Retry connection a few times when connecting to remote peer
+ cmd.append("--connect-retries=%s" % constants.RIE_CONNECT_RETRIES)
+ cmd.append("--connect-timeout=%s" % constants.RIE_CONNECT_ATTEMPT_TIMEOUT)
+ elif opts.connect_timeout is not None:
+ assert mode == constants.IEM_IMPORT
+ # Overall timeout for establishing connection while listening
+ cmd.append("--connect-timeout=%s" % opts.connect_timeout)
+
logfile = _InstanceLogName(prefix, instance.os, instance.name)
# TODO: Once _InstanceLogName uses tempfile.mkstemp, StartDaemon has
if pid:
logging.info("Import/export %s is running with PID %s, sending SIGTERM",
name, pid)
- os.kill(pid, signal.SIGTERM)
+ utils.IgnoreProcessNotFound(os.kill, pid, signal.SIGTERM)
def CleanupImportExport(name):
"""
# set the correct physical ID
- my_name = utils.HostInfo().name
+ my_name = netutils.Hostname.GetSysName()
for cf in disks:
cf.SetPhysicalID(my_name, nodes_ip)
return (alldone, min_resync)
+def GetDrbdUsermodeHelper():
+ """Returns DRBD usermode helper currently configured.
+
+ """
+ try:
+ return bdev.BaseDRBD.GetUsermodeHelper()
+ except errors.BlockDeviceError, err:
+ _Fail(str(err))
+
+
def PowercycleNode(hypervisor_type):
"""Hard-powercycle the node.
pid = 0
if pid > 0:
return "Reboot scheduled in 5 seconds"
+ # ensure the child is running on ram
+ try:
+ utils.Mlockall()
+ except Exception: # pylint: disable-msg=W0703
+ pass
time.sleep(5)
hyper.PowercycleNode()
def RemoveCache(cls, dev_path):
"""Remove data for a dev_path.
- This is just a wrapper over L{utils.RemoveFile} with a converted
+ This is just a wrapper over L{utils.io.RemoveFile} with a converted
path name and logging.
@type dev_path: str