from ganeti import ssconf
+_BOOT_ID_PATH = "/proc/sys/kernel/random/boot_id"
+
+
class RPCFail(Exception):
"""Class denoting RPC failure.
"""
+
def _Fail(msg, *args, **kwargs):
"""Log an error and the raise an RPCFail exception.
try:
priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
- f = open(pub_key, 'r')
- try:
- utils.RemoveAuthorizedKey(auth_keys, f.read(8192))
- finally:
- f.close()
+ utils.RemoveAuthorizedKey(auth_keys, utils.ReadFile(pub_key))
utils.RemoveFile(priv_key)
utils.RemoveFile(pub_key)
except errors.OpExecError:
logging.exception("Error while processing ssh files")
+ try:
+ utils.RemoveFile(constants.HMAC_CLUSTER_KEY)
+ utils.RemoveFile(constants.RAPI_CERT_FILE)
+ utils.RemoveFile(constants.SSL_CERT_FILE)
+ except:
+ logging.exception("Error while removing cluster secrets")
+
+ confd_pid = utils.ReadPidFile(utils.DaemonPidFileName(constants.CONFD))
+
+ if confd_pid:
+ utils.KillProcess(confd_pid, timeout=2)
+
# Raise a custom exception (handled in ganeti-noded)
raise errors.QuitGanetiException(True, 'Shutdown scheduled')
if hyp_info is not None:
outputarray.update(hyp_info)
- f = open("/proc/sys/kernel/random/boot_id", 'r')
- try:
- outputarray["bootid"] = f.read(128).rstrip("\n")
- finally:
- f.close()
+ outputarray["bootid"] = utils.ReadFile(_BOOT_ID_PATH, size=128).rstrip("\n")
return outputarray
name, size, attr = match.groups()
inactive = attr[4] == '-'
online = attr[5] == 'o'
+ virtual = attr[0] == 'v'
+ if virtual:
+ # we don't want to report such volumes as existing, since they
+ # don't really hold data
+ continue
lvs[name] = (size, inactive, online)
return lvs
_Fail("Hypervisor error: %s", err, exc=True)
-def InstanceShutdown(instance):
+def InstanceShutdown(instance, timeout):
"""Shut an instance down.
@note: this functions uses polling with a hardcoded timeout.
@type instance: L{objects.Instance}
@param instance: the instance object
+ @type timeout: integer
+ @param timeout: maximum timeout for soft shutdown
@rtype: None
"""
hv_name = instance.hypervisor
- running_instances = GetInstanceList([hv_name])
+ hyper = hypervisor.GetHypervisor(hv_name)
+ running_instances = hyper.ListInstances()
iname = instance.name
if iname not in running_instances:
logging.info("Instance %s not running, doing nothing", iname)
return
- hyper = hypervisor.GetHypervisor(hv_name)
- try:
- hyper.StopInstance(instance)
- except errors.HypervisorError, err:
- _Fail("Failed to stop instance %s: %s", iname, err)
+ start = time.time()
+ end = start + timeout
+ sleep_time = 1
- # test every 10secs for 2min
-
- time.sleep(1)
- for _ in range(11):
- if instance.name not in GetInstanceList([hv_name]):
+ tried_once = False
+ while not tried_once and time.time() < end:
+ try:
+ hyper.StopInstance(instance, retry=tried_once)
+ except errors.HypervisorError, err:
+ _Fail("Failed to stop instance %s: %s", iname, err)
+ tried_once = True
+ time.sleep(sleep_time)
+ if instance.name not in hyper.ListInstances():
break
- time.sleep(10)
+ if sleep_time < 5:
+ # 1.2 behaves particularly good for our case:
+ # it gives us 10 increasing steps and caps just slightly above 5 seconds
+ sleep_time *= 1.2
else:
# the shutdown did not succeed
- logging.error("Shutdown of '%s' unsuccessful, using destroy", iname)
+ logging.error("Shutdown of '%s' unsuccessful, forcing", iname)
try:
hyper.StopInstance(instance, force=True)
_RemoveBlockDevLinks(iname, instance.disks)
-def InstanceReboot(instance, reboot_type):
+def InstanceReboot(instance, reboot_type, shutdown_timeout):
"""Reboot an instance.
@type instance: L{objects.Instance}
not accepted here, since that mode is handled differently, in
cmdlib, and translates into full stop and start of the
instance (instead of a call_instance_reboot RPC)
+ @type timeout: integer
+ @param timeout: maximum timeout for soft shutdown
@rtype: None
"""
_Fail("Failed to soft reboot instance %s: %s", instance.name, err)
elif reboot_type == constants.INSTANCE_REBOOT_HARD:
try:
- InstanceShutdown(instance)
+ InstanceShutdown(instance, shutdown_timeout)
return StartInstance(instance)
except errors.HypervisorError, err:
_Fail("Failed to hard reboot instance %s: %s", instance.name, err)
rbd = _RecursiveFindBD(dsk)
if rbd is None:
_Fail("Can't find device %s", dsk)
+
stats.append(rbd.CombinedSyncStatus())
+
return stats
@type disk: L{objects.Disk}
@param disk: the disk to find
- @rtype: None or tuple
- @return: None if the disk cannot be found, otherwise a
- tuple (device_path, major, minor, sync_percent,
- estimated_time, is_degraded)
+ @rtype: None or objects.BlockDevStatus
+ @return: None if the disk cannot be found, otherwise a the current
+ information
"""
try:
rbd = _RecursiveFindBD(disk)
except errors.BlockDeviceError, err:
_Fail("Failed to find device: %s", err, exc=True)
+
if rbd is None:
return None
- return (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus()
+
+ return rbd.GetSyncStatus()
+
+
+def BlockdevGetsize(disks):
+ """Computes the size of the given disks.
+
+ If a disk is not found, returns None instead.
+
+ @type disks: list of L{objects.Disk}
+ @param disks: the list of disk to compute the size for
+ @rtype: list
+ @return: list with elements None if the disk cannot be found,
+ otherwise the size
+
+ """
+ result = []
+ for cf in disks:
+ try:
+ rbd = _RecursiveFindBD(cf)
+ except errors.BlockDeviceError, err:
+ result.append(None)
+ continue
+ if rbd is None:
+ result.append(None)
+ else:
+ result.append(rbd.GetActualSize())
+ return result
+
+
+def BlockdevExport(disk, dest_node, dest_path, cluster_name):
+ """Export a block device to a remote node.
+
+ @type disk: L{objects.Disk}
+ @param disk: the description of the disk to export
+ @type dest_node: str
+ @param dest_node: the destination node to export to
+ @type dest_path: str
+ @param dest_path: the destination path on the target node
+ @type cluster_name: str
+ @param cluster_name: the cluster name, needed for SSH hostalias
+ @rtype: None
+
+ """
+ real_disk = _RecursiveFindBD(disk)
+ if real_disk is None:
+ _Fail("Block device '%s' is not set up", disk)
+
+ real_disk.Open()
+
+ # the block size on the read dd is 1MiB to match our units
+ expcmd = utils.BuildShellCmd("set -e; set -o pipefail; "
+ "dd if=%s bs=1048576 count=%s",
+ real_disk.dev_path, str(disk.size))
+
+ # we set here a smaller block size as, due to ssh buffering, more
+ # than 64-128k will mostly ignored; we use nocreat to fail if the
+ # device is not already there or we pass a wrong path; we use
+ # notrunc to no attempt truncate on an LV device; we use oflag=dsync
+ # to not buffer too much memory; this means that at best, we flush
+ # every 64k, which will not be very fast
+ destcmd = utils.BuildShellCmd("dd of=%s conv=nocreat,notrunc bs=65536"
+ " oflag=dsync", dest_path)
+
+ remotecmd = _GetSshRunner(cluster_name).BuildCmd(dest_node,
+ constants.GANETI_RUNAS,
+ destcmd)
+
+ # all commands have been checked, so we're safe to combine them
+ command = '|'.join([expcmd, utils.ShellQuoteArgs(remotecmd)])
+
+ result = utils.RunCmd(["bash", "-c", command])
+
+ if result.failed:
+ _Fail("Disk copy command '%s' returned error: %s"
+ " output: %s", command, result.fail_reason, result.output)
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
data holding either the vaid versions or an error message
"""
- api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
+ api_file = os.path.sep.join([os_dir, constants.OS_API_FILE])
try:
st = os.stat(api_file)
except EnvironmentError, err:
- return False, ("Required file 'ganeti_api_version' file not"
- " found under path %s: %s" % (os_dir, _ErrnoOrStr(err)))
+ return False, ("Required file '%s' not found under path %s: %s" %
+ (constants.OS_API_FILE, os_dir, _ErrnoOrStr(err)))
if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
- return False, ("File 'ganeti_api_version' file at %s is not"
- " a regular file" % os_dir)
+ return False, ("File '%s' in %s is not a regular file" %
+ (constants.OS_API_FILE, os_dir))
try:
api_versions = utils.ReadFile(api_file).splitlines()
search (if not given defaults to
L{constants.OS_SEARCH_PATH})
@rtype: list of L{objects.OS}
- @return: a list of tuples (name, path, status, diagnose)
+ @return: a list of tuples (name, path, status, diagnose, variants)
for all (potential) OSes under all search paths, where:
- name is the (potential) OS name
- path is the full path to the OS
- status True/False is the validity of the OS
- diagnose is the error message for an invalid OS, otherwise empty
+ - variants is a list of supported OS variants, if any
"""
if top_dirs is None:
status, os_inst = _TryOSFromDisk(name, base_dir=dir_name)
if status:
diagnose = ""
+ variants = os_inst.supported_variants
else:
diagnose = os_inst
- result.append((name, os_path, status, diagnose))
+ variants = []
+ result.append((name, os_path, status, diagnose, variants))
return result
return False, ("API version mismatch for path '%s': found %s, want %s." %
(os_dir, api_versions, constants.OS_API_VERSIONS))
- # OS Scripts dictionary, we will populate it with the actual script names
- os_scripts = dict.fromkeys(constants.OS_SCRIPTS)
+ # OS Files dictionary, we will populate it with the absolute path names
+ os_files = dict.fromkeys(constants.OS_SCRIPTS)
+
+ if max(api_versions) >= constants.OS_API_V15:
+ os_files[constants.OS_VARIANTS_FILE] = ''
- for script in os_scripts:
- os_scripts[script] = os.path.sep.join([os_dir, script])
+ for name in os_files:
+ os_files[name] = os.path.sep.join([os_dir, name])
try:
- st = os.stat(os_scripts[script])
+ st = os.stat(os_files[name])
except EnvironmentError, err:
- return False, ("Script '%s' under path '%s' is missing (%s)" %
- (script, os_dir, _ErrnoOrStr(err)))
-
- if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
- return False, ("Script '%s' under path '%s' is not executable" %
- (script, os_dir))
+ return False, ("File '%s' under path '%s' is missing (%s)" %
+ (name, os_dir, _ErrnoOrStr(err)))
if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
- return False, ("Script '%s' under path '%s' is not a regular file" %
- (script, os_dir))
+ return False, ("File '%s' under path '%s' is not a regular file" %
+ (name, os_dir))
+
+ if name in constants.OS_SCRIPTS:
+ if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
+ return False, ("File '%s' under path '%s' is not executable" %
+ (name, os_dir))
+
+ variants = None
+ if constants.OS_VARIANTS_FILE in os_files:
+ variants_file = os_files[constants.OS_VARIANTS_FILE]
+ try:
+ variants = utils.ReadFile(variants_file).splitlines()
+ except EnvironmentError, err:
+ return False, ("Error while reading the OS variants file at %s: %s" %
+ (variants_file, _ErrnoOrStr(err)))
+ if not variants:
+ return False, ("No supported os variant found")
os_obj = objects.OS(name=name, path=os_dir,
- create_script=os_scripts[constants.OS_SCRIPT_CREATE],
- export_script=os_scripts[constants.OS_SCRIPT_EXPORT],
- import_script=os_scripts[constants.OS_SCRIPT_IMPORT],
- rename_script=os_scripts[constants.OS_SCRIPT_RENAME],
+ create_script=os_files[constants.OS_SCRIPT_CREATE],
+ export_script=os_files[constants.OS_SCRIPT_EXPORT],
+ import_script=os_files[constants.OS_SCRIPT_IMPORT],
+ rename_script=os_files[constants.OS_SCRIPT_RENAME],
+ supported_variants=variants,
api_versions=api_versions)
return True, os_obj
@raise RPCFail: if we don't find a valid OS
"""
- status, payload = _TryOSFromDisk(name, base_dir)
+ name_only = name.split("+", 1)[0]
+ status, payload = _TryOSFromDisk(name_only, base_dir)
if not status:
_Fail(payload)
result['DISK_COUNT'] = '%d' % len(instance.disks)
result['NIC_COUNT'] = '%d' % len(instance.nics)
result['DEBUG_LEVEL'] = '%d' % debug
+ if api_version >= constants.OS_API_V15:
+ try:
+ variant = instance.os.split('+', 1)[1]
+ except IndexError:
+ variant = os.supported_variants[0]
+ result['OS_VARIANT'] = variant
for idx, disk in enumerate(instance.disks):
real_disk = _RecursiveFindBD(disk)
if real_disk is None:
# the target command is built out of three individual commands,
# which are joined by pipes; we check each individual command for
# valid parameters
- expcmd = utils.BuildShellCmd("cd %s; %s 2>%s", inst_os.path,
- export_script, logfile)
+ expcmd = utils.BuildShellCmd("set -e; set -o pipefail; cd %s; %s 2>%s",
+ inst_os.path, export_script, logfile)
comprcmd = "gzip"
# all commands have been checked, so we're safe to combine them
command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
- result = utils.RunCmd(command, env=export_env)
+ result = utils.RunCmd(["bash", "-c", command], env=export_env)
if result.failed:
_Fail("OS snapshot export command '%s' returned error: %s"