@var _ALLOWED_UPLOAD_FILES: denotes which files are accepted in
the L{UploadFile} function
+@var _ALLOWED_CLEAN_DIRS: denotes which directories are accepted
+ in the L{_CleanDirectory} function
"""
+# pylint: disable-msg=E1103
+
+# E1103: %s %r has no %r member (but some types could not be
+# inferred), because the _TryOSFromDisk returns either (True, os_obj)
+# or (False, "string") which confuses pylint
+
import os
import os.path
import stat
import errno
import re
-import subprocess
import random
import logging
import tempfile
import zlib
import base64
+import signal
from ganeti import errors
from ganeti import utils
from ganeti import bdev
from ganeti import objects
from ganeti import ssconf
+from ganeti import serializer
_BOOT_ID_PATH = "/proc/sys/kernel/random/boot_id"
+_ALLOWED_CLEAN_DIRS = frozenset([
+ constants.DATA_DIR,
+ constants.JOB_QUEUE_ARCHIVE_DIR,
+ constants.QUEUE_DIR,
+ constants.CRYPTO_KEYS_DIR,
+ ])
+_MAX_SSL_CERT_VALIDITY = 7 * 24 * 60 * 60
+_X509_KEY_FILE = "key"
+_X509_CERT_FILE = "cert"
+_IES_STATUS_FILE = "status"
+_IES_PID_FILE = "pid"
+_IES_CA_FILE = "ca"
class RPCFail(Exception):
to the empty list
"""
+ if path not in _ALLOWED_CLEAN_DIRS:
+ _Fail("Path passed to _CleanDirectory not in allowed clean targets: '%s'",
+ path)
+
if not os.path.isdir(path):
return
if exclude is None:
exclude = [os.path.normpath(i) for i in exclude]
for rel_name in utils.ListVisibleFiles(path):
- full_name = os.path.normpath(os.path.join(path, rel_name))
+ full_name = utils.PathJoin(path, rel_name)
if full_name in exclude:
continue
if os.path.isfile(full_name) and not os.path.islink(full_name):
constants.VNC_PASSWORD_FILE,
constants.RAPI_CERT_FILE,
constants.RAPI_USERS_FILE,
- constants.HMAC_CLUSTER_KEY,
+ constants.CONFD_HMAC_KEY,
+ constants.CLUSTER_DOMAIN_SECRET_FILE,
])
for hv_name in constants.HYPER_TYPES:
# and now start the master and rapi daemons
if start_daemons:
- daemons_params = {
- 'ganeti-masterd': [],
- 'ganeti-rapi': [],
- }
if no_voting:
- daemons_params['ganeti-masterd'].append('--no-voting')
- daemons_params['ganeti-masterd'].append('--yes-do-it')
- for daemon in daemons_params:
- cmd = [daemon]
- cmd.extend(daemons_params[daemon])
- result = utils.RunCmd(cmd)
- if result.failed:
- msg = "Can't start daemon %s: %s" % (daemon, result.output)
- logging.error(msg)
- err_msgs.append(msg)
+ masterd_args = "--no-voting --yes-do-it"
+ else:
+ masterd_args = ""
+
+ env = {
+ "EXTRA_MASTERD_ARGS": masterd_args,
+ }
+
+ result = utils.RunCmd([constants.DAEMON_UTIL, "start-master"], env=env)
+ if result.failed:
+ msg = "Can't start Ganeti master: %s" % result.output
+ logging.error(msg)
+ err_msgs.append(msg)
if err_msgs:
_Fail("; ".join(err_msgs))
# but otherwise ignore the failure
if stop_daemons:
- # stop/kill the rapi and the master daemon
- for daemon in constants.RAPI, constants.MASTERD:
- utils.KillProcess(utils.ReadPidFile(utils.DaemonPidFileName(daemon)))
+ result = utils.RunCmd([constants.DAEMON_UTIL, "stop-master"])
+ if result.failed:
+ logging.error("Could not stop Ganeti master, command %s had exitcode %s"
+ " and error %s",
+ result.cmd, result.exit_code, result.output)
def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
utils.AddAuthorizedKey(auth_keys, sshpub)
- utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
+ result = utils.RunCmd([constants.DAEMON_UTIL, "reload-ssh-keys"])
+ if result.failed:
+ _Fail("Unable to reload SSH keys (command %r, exit code %s, output %r)",
+ result.cmd, result.exit_code, result.output)
-def LeaveCluster():
+def LeaveCluster(modify_ssh_setup):
"""Cleans up and remove the current node.
This function cleans up and prepares the current node to be removed
L{errors.QuitGanetiException} which is used as a special case to
shutdown the node daemon.
+ @param modify_ssh_setup: boolean
+
"""
_CleanDirectory(constants.DATA_DIR)
+ _CleanDirectory(constants.CRYPTO_KEYS_DIR)
JobQueuePurge()
- try:
- priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
+ if modify_ssh_setup:
+ try:
+ priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
- utils.RemoveAuthorizedKey(auth_keys, utils.ReadFile(pub_key))
+ utils.RemoveAuthorizedKey(auth_keys, utils.ReadFile(pub_key))
- utils.RemoveFile(priv_key)
- utils.RemoveFile(pub_key)
- except errors.OpExecError:
- logging.exception("Error while processing ssh files")
+ utils.RemoveFile(priv_key)
+ utils.RemoveFile(pub_key)
+ except errors.OpExecError:
+ logging.exception("Error while processing ssh files")
try:
- utils.RemoveFile(constants.HMAC_CLUSTER_KEY)
+ utils.RemoveFile(constants.CONFD_HMAC_KEY)
utils.RemoveFile(constants.RAPI_CERT_FILE)
- utils.RemoveFile(constants.SSL_CERT_FILE)
- except:
+ utils.RemoveFile(constants.NODED_CERT_FILE)
+ except: # pylint: disable-msg=W0702
logging.exception("Error while removing cluster secrets")
- confd_pid = utils.ReadPidFile(utils.DaemonPidFileName(constants.CONFD))
-
- if confd_pid:
- utils.KillProcess(confd_pid, timeout=2)
+ result = utils.RunCmd([constants.DAEMON_UTIL, "stop", constants.CONFD])
+ if result.failed:
+ logging.error("Command %s failed with exitcode %s and error %s",
+ result.cmd, result.exit_code, result.output)
# Raise a custom exception (handled in ganeti-noded)
raise errors.QuitGanetiException(True, 'Shutdown scheduled')
"""
result = {}
+ my_name = utils.HostInfo().name
+ port = utils.GetDaemonPort(constants.NODED)
if constants.NV_HYPERVISOR in what:
result[constants.NV_HYPERVISOR] = tmp = {}
for hv_name in what[constants.NV_HYPERVISOR]:
- tmp[hv_name] = hypervisor.GetHypervisor(hv_name).Verify()
+ try:
+ val = hypervisor.GetHypervisor(hv_name).Verify()
+ except errors.HypervisorError, err:
+ val = "Error while checking hypervisor: %s" % str(err)
+ tmp[hv_name] = val
if constants.NV_FILELIST in what:
result[constants.NV_FILELIST] = utils.FingerprintFiles(
if constants.NV_NODENETTEST in what:
result[constants.NV_NODENETTEST] = tmp = {}
- my_name = utils.HostInfo().name
my_pip = my_sip = None
for name, pip, sip in what[constants.NV_NODENETTEST]:
if name == my_name:
tmp[my_name] = ("Can't find my own primary/secondary IP"
" in the node list")
else:
- port = utils.GetDaemonPort(constants.NODED)
for name, pip, sip in what[constants.NV_NODENETTEST]:
fail = []
if not utils.TcpPing(pip, port, source=my_pip):
tmp[name] = ("failure using the %s interface(s)" %
" and ".join(fail))
+ if constants.NV_MASTERIP in what:
+ # FIXME: add checks on incoming data structures (here and in the
+ # rest of the function)
+ master_name, master_ip = what[constants.NV_MASTERIP]
+ if master_name == my_name:
+ source = constants.LOCALHOST_IP_ADDRESS
+ else:
+ source = None
+ result[constants.NV_MASTERIP] = utils.TcpPing(master_ip, port,
+ source=source)
+
if constants.NV_LVLIST in what:
- result[constants.NV_LVLIST] = GetVolumeList(what[constants.NV_LVLIST])
+ try:
+ val = GetVolumeList(what[constants.NV_LVLIST])
+ except RPCFail, err:
+ val = str(err)
+ result[constants.NV_LVLIST] = val
if constants.NV_INSTANCELIST in what:
- result[constants.NV_INSTANCELIST] = GetInstanceList(
- what[constants.NV_INSTANCELIST])
+ # GetInstanceList can fail
+ try:
+ val = GetInstanceList(what[constants.NV_INSTANCELIST])
+ except RPCFail, err:
+ val = str(err)
+ result[constants.NV_INSTANCELIST] = val
if constants.NV_VGLIST in what:
result[constants.NV_VGLIST] = utils.ListVolumeGroups()
+ if constants.NV_PVLIST in what:
+ result[constants.NV_PVLIST] = \
+ bdev.LogicalVolume.GetPVInfo(what[constants.NV_PVLIST],
+ filter_allocatable=False)
+
if constants.NV_VERSION in what:
result[constants.NV_VERSION] = (constants.PROTOCOL_VERSION,
constants.RELEASE_VERSION)
used_minors = str(err)
result[constants.NV_DRBDLIST] = used_minors
+ if constants.NV_NODESETUP in what:
+ result[constants.NV_NODESETUP] = tmpr = []
+ if not os.path.isdir("/sys/block") or not os.path.isdir("/sys/class/net"):
+ tmpr.append("The sysfs filesytem doesn't seem to be mounted"
+ " under /sys, missing required directories /sys/block"
+ " and /sys/class/net")
+ if (not os.path.isdir("/proc/sys") or
+ not os.path.isfile("/proc/sysrq-trigger")):
+ tmpr.append("The procfs filesystem doesn't seem to be mounted"
+ " under /proc, missing required directory /proc/sys and"
+ " the file /proc/sysrq-trigger")
+
+ if constants.NV_TIME in what:
+ result[constants.NV_TIME] = utils.SplitTime(time.time())
+
return result
result.output)
def parse_dev(dev):
- if '(' in dev:
- return dev.split('(')[0]
- else:
- return dev
+ return dev.split('(')[0]
+
+ def handle_dev(dev):
+ return [parse_dev(x) for x in dev.split(",")]
def map_line(line):
- return {
- 'name': line[0].strip(),
- 'size': line[1].strip(),
- 'dev': parse_dev(line[2].strip()),
- 'vg': line[3].strip(),
- }
+ line = [v.strip() for v in line]
+ return [{'name': line[0], 'size': line[1],
+ 'dev': dev, 'vg': line[3]} for dev in handle_dev(line[2])]
- return [map_line(line.split('|')) for line in result.stdout.splitlines()
- if line.count('|') >= 3]
+ all_devs = []
+ for line in result.stdout.splitlines():
+ if line.count('|') >= 3:
+ all_devs.extend(map_line(line.split('|')))
+ else:
+ logging.warning("Strange line in the output from lvs: '%s'", line)
+ return all_devs
def BridgesExist(bridges_list):
missing.append(bridge)
if missing:
- _Fail("Missing bridges %s", ", ".join(missing))
+ _Fail("Missing bridges %s", utils.CommaJoin(missing))
def GetInstanceList(hypervisor_list):
return output
-def InstanceOsAdd(instance, reinstall):
+def _InstanceLogName(kind, os_name, instance):
+ """Compute the OS log filename for a given instance and operation.
+
+ The instance name and os name are passed in as strings since not all
+ operations have these as part of an instance object.
+
+ @type kind: string
+ @param kind: the operation type (e.g. add, import, etc.)
+ @type os_name: string
+ @param os_name: the os name
+ @type instance: string
+ @param instance: the name of the instance being imported/added/etc.
+
+ """
+ # TODO: Use tempfile.mkstemp to create unique filename
+ base = ("%s-%s-%s-%s.log" %
+ (kind, os_name, instance, utils.TimestampForFilename()))
+ return utils.PathJoin(constants.LOG_OS_DIR, base)
+
+
+def InstanceOsAdd(instance, reinstall, debug):
"""Add an OS to an instance.
@type instance: L{objects.Instance}
@param instance: Instance whose OS is to be installed
@type reinstall: boolean
@param reinstall: whether this is an instance reinstall
+ @type debug: integer
+ @param debug: debug level, passed to the OS scripts
@rtype: None
"""
inst_os = OSFromDisk(instance.os)
- create_env = OSEnvironment(instance, inst_os)
+ create_env = OSEnvironment(instance, inst_os, debug)
if reinstall:
create_env['INSTANCE_REINSTALL'] = "1"
- logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
- instance.name, int(time.time()))
+ logfile = _InstanceLogName("add", instance.os, instance.name)
result = utils.RunCmd([inst_os.create_script], env=create_env,
cwd=inst_os.path, output=logfile,)
" log file:\n%s", result.fail_reason, "\n".join(lines), log=False)
-def RunRenameInstance(instance, old_name):
+def RunRenameInstance(instance, old_name, debug):
"""Run the OS rename script for an instance.
@type instance: L{objects.Instance}
@param instance: Instance whose OS is to be installed
@type old_name: string
@param old_name: previous instance name
+ @type debug: integer
+ @param debug: debug level, passed to the OS scripts
@rtype: boolean
@return: the success of the operation
"""
inst_os = OSFromDisk(instance.os)
- rename_env = OSEnvironment(instance, inst_os)
+ rename_env = OSEnvironment(instance, inst_os, debug)
rename_env['OLD_INSTANCE_NAME'] = old_name
- logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
- old_name,
- instance.name, int(time.time()))
+ logfile = _InstanceLogName("rename", instance.os,
+ "%s-%s" % (old_name, instance.name))
result = utils.RunCmd([inst_os.rename_script], env=rename_env,
cwd=inst_os.path, output=logfile)
"vg_free": int(round(float(valarr[1]), 0)),
"pv_count": int(valarr[2]),
}
- except ValueError, err:
+ except (TypeError, ValueError), err:
logging.exception("Fail to parse vgs output: %s", err)
else:
logging.error("vgs output has the wrong number of fields (expected"
def _GetBlockDevSymlinkPath(instance_name, idx):
- return os.path.join(constants.DISK_LINKS_DIR,
- "%s:%d" % (instance_name, idx))
+ return utils.PathJoin(constants.DISK_LINKS_DIR,
+ "%s:%d" % (instance_name, idx))
def _SymlinkBlockDev(instance_name, device_path, idx):
"""
hv_name = instance.hypervisor
hyper = hypervisor.GetHypervisor(hv_name)
- running_instances = hyper.ListInstances()
iname = instance.name
- if iname not in running_instances:
+ if instance.name not in hyper.ListInstances():
logging.info("Instance %s not running, doing nothing", iname)
return
- start = time.time()
- end = start + timeout
- sleep_time = 1
+ class _TryShutdown:
+ def __init__(self):
+ self.tried_once = False
- tried_once = False
- while not tried_once and time.time() < end:
- try:
- hyper.StopInstance(instance, retry=tried_once)
- except errors.HypervisorError, err:
- _Fail("Failed to stop instance %s: %s", iname, err)
- tried_once = True
- time.sleep(sleep_time)
- if instance.name not in hyper.ListInstances():
- break
- if sleep_time < 5:
- # 1.2 behaves particularly good for our case:
- # it gives us 10 increasing steps and caps just slightly above 5 seconds
- sleep_time *= 1.2
- else:
+ def __call__(self):
+ if iname not in hyper.ListInstances():
+ return
+
+ try:
+ hyper.StopInstance(instance, retry=self.tried_once)
+ except errors.HypervisorError, err:
+ if iname not in hyper.ListInstances():
+ # if the instance is no longer existing, consider this a
+ # success and go to cleanup
+ return
+
+ _Fail("Failed to stop instance %s: %s", iname, err)
+
+ self.tried_once = True
+
+ raise utils.RetryAgain()
+
+ try:
+ utils.Retry(_TryShutdown(), 5, timeout)
+ except utils.RetryTimeout:
# the shutdown did not succeed
logging.error("Shutdown of '%s' unsuccessful, forcing", iname)
try:
hyper.StopInstance(instance, force=True)
except errors.HypervisorError, err:
- _Fail("Failed to force stop instance %s: %s", iname, err)
+ if iname in hyper.ListInstances():
+ # only raise an error if the instance still exists, otherwise
+ # the error could simply be "instance ... unknown"!
+ _Fail("Failed to force stop instance %s: %s", iname, err)
time.sleep(1)
- if instance.name in GetInstanceList([hv_name]):
+
+ if iname in hyper.ListInstances():
_Fail("Could not shutdown instance %s even by destroy", iname)
+ try:
+ hyper.CleanupInstance(instance.name)
+ except errors.HypervisorError, err:
+ logging.warning("Failed to execute post-shutdown cleanup step: %s", err)
+
_RemoveBlockDevLinks(iname, instance.disks)
-def InstanceReboot(instance, reboot_type):
+def InstanceReboot(instance, reboot_type, shutdown_timeout):
"""Reboot an instance.
@type instance: L{objects.Instance}
not accepted here, since that mode is handled differently, in
cmdlib, and translates into full stop and start of the
instance (instead of a call_instance_reboot RPC)
+ @type shutdown_timeout: integer
+ @param shutdown_timeout: maximum timeout for soft shutdown
@rtype: None
"""
_Fail("Failed to soft reboot instance %s: %s", instance.name, err)
elif reboot_type == constants.INSTANCE_REBOOT_HARD:
try:
- InstanceShutdown(instance)
+ InstanceShutdown(instance, shutdown_timeout)
return StartInstance(instance)
except errors.HypervisorError, err:
_Fail("Failed to hard reboot instance %s: %s", instance.name, err)
hyper = hypervisor.GetHypervisor(instance.hypervisor)
try:
- hyper.MigrateInstance(instance.name, target, live)
+ hyper.MigrateInstance(instance, target, live)
except errors.HypervisorError, err:
_Fail("Failed to migrate instance: %s", err, exc=True)
it's not required to return anything.
"""
+ # TODO: remove the obsolete 'size' argument
+ # pylint: disable-msg=W0613
clist = []
if disk.children:
for child in disk.children:
# we need the children open in case the device itself has to
# be assembled
try:
+ # pylint: disable-msg=E1103
crdev.Open()
except errors.BlockDeviceError, err:
_Fail("Can't make child '%s' read-write: %s", child, err)
try:
result = _RecursiveAssembleBD(disk, owner, as_primary)
if isinstance(result, bdev.BlockDev):
+ # pylint: disable-msg=E1103
result = result.dev_path
except errors.BlockDeviceError, err:
_Fail("Error while assembling disk: %s", err, exc=True)
else:
devs.append(bd.dev_path)
else:
+ if not utils.IsNormAbsPath(rpath):
+ _Fail("Strange path returned from StaticDevPath: '%s'", rpath)
devs.append(rpath)
parent_bdev.RemoveChildren(devs)
return bdev.FindDevice(disk.dev_type, disk.physical_id, children, disk.size)
+def _OpenRealBD(disk):
+ """Opens the underlying block device of a disk.
+
+ @type disk: L{objects.Disk}
+ @param disk: the disk object we want to open
+
+ """
+ real_disk = _RecursiveFindBD(disk)
+ if real_disk is None:
+ _Fail("Block device '%s' is not set up", disk)
+
+ real_disk.Open()
+
+ return real_disk
+
+
def BlockdevFind(disk):
"""Check if a device is activated.
for cf in disks:
try:
rbd = _RecursiveFindBD(cf)
- except errors.BlockDeviceError, err:
+ except errors.BlockDeviceError:
result.append(None)
continue
if rbd is None:
@rtype: None
"""
- real_disk = _RecursiveFindBD(disk)
- if real_disk is None:
- _Fail("Block device '%s' is not set up", disk)
-
- real_disk.Open()
+ real_disk = _OpenRealBD(disk)
# the block size on the read dd is 1MiB to match our units
expcmd = utils.BuildShellCmd("set -e; set -o pipefail; "
return detail
-def _OSOndiskAPIVersion(name, os_dir):
+def _OSOndiskAPIVersion(os_dir):
"""Compute and return the API version of a given OS.
- This function will try to read the API version of the OS given by
- the 'name' parameter and residing in the 'os_dir' directory.
+ This function will try to read the API version of the OS residing in
+ the 'os_dir' directory.
- @type name: str
- @param name: the OS name we should look for
@type os_dir: str
- @param os_dir: the directory inwhich we should look for the OS
+ @param os_dir: the directory in which we should look for the OS
@rtype: tuple
@return: tuple (status, data) with status denoting the validity and
data holding either the vaid versions or an error message
"""
- api_file = os.path.sep.join([os_dir, constants.OS_API_FILE])
+ api_file = utils.PathJoin(os_dir, constants.OS_API_FILE)
try:
st = os.stat(api_file)
logging.exception("Can't list the OS directory %s: %s", dir_name, err)
break
for name in f_names:
- os_path = os.path.sep.join([dir_name, name])
+ os_path = utils.PathJoin(dir_name, name)
status, os_inst = _TryOSFromDisk(name, base_dir=dir_name)
if status:
diagnose = ""
"""
if base_dir is None:
os_dir = utils.FindFile(name, constants.OS_SEARCH_PATH, os.path.isdir)
- if os_dir is None:
- return False, "Directory for OS %s not found in search path" % name
else:
- os_dir = os.path.sep.join([base_dir, name])
+ os_dir = utils.FindFile(name, [base_dir], os.path.isdir)
- status, api_versions = _OSOndiskAPIVersion(name, os_dir)
+ if os_dir is None:
+ return False, "Directory for OS %s not found in search path" % name
+
+ status, api_versions = _OSOndiskAPIVersion(os_dir)
if not status:
# push the error up
return status, api_versions
if max(api_versions) >= constants.OS_API_V15:
os_files[constants.OS_VARIANTS_FILE] = ''
- for name in os_files:
- os_files[name] = os.path.sep.join([os_dir, name])
+ for filename in os_files:
+ os_files[filename] = utils.PathJoin(os_dir, filename)
try:
- st = os.stat(os_files[name])
+ st = os.stat(os_files[filename])
except EnvironmentError, err:
return False, ("File '%s' under path '%s' is missing (%s)" %
- (name, os_dir, _ErrnoOrStr(err)))
+ (filename, os_dir, _ErrnoOrStr(err)))
if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
return False, ("File '%s' under path '%s' is not a regular file" %
- (name, os_dir))
+ (filename, os_dir))
- if name in constants.OS_SCRIPTS:
+ if filename in constants.OS_SCRIPTS:
if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
return False, ("File '%s' under path '%s' is not executable" %
- (name, os_dir))
+ (filename, os_dir))
- variants = None
+ variants = []
if constants.OS_VARIANTS_FILE in os_files:
variants_file = os_files[constants.OS_VARIANTS_FILE]
try:
@raise RPCFail: if we don't find a valid OS
"""
- name_only = name.split('+',1)[0]
+ name_only = name.split("+", 1)[0]
status, payload = _TryOSFromDisk(name_only, base_dir)
if not status:
return payload
-def OSEnvironment(instance, os, debug=0):
+def OSCoreEnv(inst_os, debug=0):
+ """Calculate the basic environment for an os script.
+
+ @type inst_os: L{objects.OS}
+ @param inst_os: operating system for which the environment is being built
+ @type debug: integer
+ @param debug: debug level (0 or 1, for OS Api 10)
+ @rtype: dict
+ @return: dict of environment variables
+ @raise errors.BlockDeviceError: if the block device
+ cannot be found
+
+ """
+ result = {}
+ api_version = \
+ max(constants.OS_API_VERSIONS.intersection(inst_os.api_versions))
+ result['OS_API_VERSION'] = '%d' % api_version
+ result['OS_NAME'] = inst_os.name
+ result['DEBUG_LEVEL'] = '%d' % debug
+
+ # OS variants
+ if api_version >= constants.OS_API_V15:
+ try:
+ variant = inst_os.name.split('+', 1)[1]
+ except IndexError:
+ variant = inst_os.supported_variants[0]
+ result['OS_VARIANT'] = variant
+
+ return result
+
+
+def OSEnvironment(instance, inst_os, debug=0):
"""Calculate the environment for an os script.
@type instance: L{objects.Instance}
@param instance: target instance for the os script run
- @type os: L{objects.OS}
- @param os: operating system for which the environment is being built
+ @type inst_os: L{objects.OS}
+ @param inst_os: operating system for which the environment is being built
@type debug: integer
@param debug: debug level (0 or 1, for OS Api 10)
@rtype: dict
cannot be found
"""
- result = {}
- api_version = max(constants.OS_API_VERSIONS.intersection(os.api_versions))
- result['OS_API_VERSION'] = '%d' % api_version
+ result = OSCoreEnv(inst_os, debug)
+
result['INSTANCE_NAME'] = instance.name
result['INSTANCE_OS'] = instance.os
result['HYPERVISOR'] = instance.hypervisor
result['DISK_COUNT'] = '%d' % len(instance.disks)
result['NIC_COUNT'] = '%d' % len(instance.nics)
- result['DEBUG_LEVEL'] = '%d' % debug
- if api_version >= constants.OS_API_V15:
- try:
- variant = instance.os.split('+', 1)[1]
- except IndexError:
- variant = os.supported_variants[0]
- result['OS_VARIANT'] = variant
+
+ # Disks
for idx, disk in enumerate(instance.disks):
- real_disk = _RecursiveFindBD(disk)
- if real_disk is None:
- raise errors.BlockDeviceError("Block device '%s' is not set up" %
- str(disk))
- real_disk.Open()
+ real_disk = _OpenRealBD(disk)
result['DISK_%d_PATH' % idx] = real_disk.dev_path
result['DISK_%d_ACCESS' % idx] = disk.mode
if constants.HV_DISK_TYPE in instance.hvparams:
elif disk.dev_type == constants.LD_FILE:
result['DISK_%d_BACKEND_TYPE' % idx] = \
'file:%s' % disk.physical_id[0]
+
+ # NICs
for idx, nic in enumerate(instance.nics):
result['NIC_%d_MAC' % idx] = nic.mac
if nic.ip:
result['NIC_%d_FRONTEND_TYPE' % idx] = \
instance.hvparams[constants.HV_NIC_TYPE]
+ # HV/BE params
for source, kind in [(instance.beparams, "BE"), (instance.hvparams, "HV")]:
for key, value in source.items():
result["INSTANCE_%s_%s" % (kind, key)] = str(value)
return result
+
def BlockdevGrow(disk, amount):
"""Grow a stack of block devices.
@return: snapshot disk path
"""
- if disk.children:
- if len(disk.children) == 1:
- # only one child, let's recurse on it
- return BlockdevSnapshot(disk.children[0])
- else:
- # more than one child, choose one that matches
- for child in disk.children:
- if child.size == disk.size:
- # return implies breaking the loop
- return BlockdevSnapshot(child)
+ if disk.dev_type == constants.LD_DRBD8:
+ if not disk.children:
+ _Fail("DRBD device '%s' without backing storage cannot be snapshotted",
+ disk.unique_id)
+ return BlockdevSnapshot(disk.children[0])
elif disk.dev_type == constants.LD_LV:
r_dev = _RecursiveFindBD(disk)
if r_dev is not None:
+ # FIXME: choose a saner value for the snapshot size
# let's stay on the safe side and ask for the full size, for now
return r_dev.Snapshot(disk.size)
else:
disk.unique_id, disk.dev_type)
-def ExportSnapshot(disk, dest_node, instance, cluster_name, idx):
- """Export a block device snapshot to a remote node.
-
- @type disk: L{objects.Disk}
- @param disk: the description of the disk to export
- @type dest_node: str
- @param dest_node: the destination node to export to
- @type instance: L{objects.Instance}
- @param instance: the instance object to whom the disk belongs
- @type cluster_name: str
- @param cluster_name: the cluster name, needed for SSH hostalias
- @type idx: int
- @param idx: the index of the disk in the instance's disk list,
- used to export to the OS scripts environment
- @rtype: None
-
- """
- inst_os = OSFromDisk(instance.os)
- export_env = OSEnvironment(instance, inst_os)
-
- export_script = inst_os.export_script
-
- logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
- instance.name, int(time.time()))
- if not os.path.exists(constants.LOG_OS_DIR):
- os.mkdir(constants.LOG_OS_DIR, 0750)
- real_disk = _RecursiveFindBD(disk)
- if real_disk is None:
- _Fail("Block device '%s' is not set up", disk)
-
- real_disk.Open()
-
- export_env['EXPORT_DEVICE'] = real_disk.dev_path
- export_env['EXPORT_INDEX'] = str(idx)
-
- destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
- destfile = disk.physical_id[1]
-
- # the target command is built out of three individual commands,
- # which are joined by pipes; we check each individual command for
- # valid parameters
- expcmd = utils.BuildShellCmd("set -e; set -o pipefail; cd %s; %s 2>%s",
- inst_os.path, export_script, logfile)
-
- comprcmd = "gzip"
-
- destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
- destdir, destdir, destfile)
- remotecmd = _GetSshRunner(cluster_name).BuildCmd(dest_node,
- constants.GANETI_RUNAS,
- destcmd)
-
- # all commands have been checked, so we're safe to combine them
- command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
-
- result = utils.RunCmd(["bash", "-c", command], env=export_env)
-
- if result.failed:
- _Fail("OS snapshot export command '%s' returned error: %s"
- " output: %s", command, result.fail_reason, result.output)
-
-
def FinalizeExport(instance, snap_disks):
"""Write out the export configuration information.
@rtype: None
"""
- destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
- finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
+ destdir = utils.PathJoin(constants.EXPORT_DIR, instance.name + ".new")
+ finaldestdir = utils.PathJoin(constants.EXPORT_DIR, instance.name)
config = objects.SerializableConfigParser()
config.set(constants.INISECT_INS, 'vcpus', '%d' %
instance.beparams[constants.BE_VCPUS])
config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
+ config.set(constants.INISECT_INS, 'hypervisor', instance.hypervisor)
nic_total = 0
for nic_count, nic in enumerate(instance.nics):
config.set(constants.INISECT_INS, 'nic%d_mac' %
nic_count, '%s' % nic.mac)
config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
- config.set(constants.INISECT_INS, 'nic%d_bridge' % nic_count,
- '%s' % nic.bridge)
+ for param in constants.NICS_PARAMETER_TYPES:
+ config.set(constants.INISECT_INS, 'nic%d_%s' % (nic_count, param),
+ '%s' % nic.nicparams.get(param, None))
# TODO: redundant: on load can read nics until it doesn't exist
config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_total)
config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_total)
- utils.WriteFile(os.path.join(destdir, constants.EXPORT_CONF_FILE),
+ # New-style hypervisor/backend parameters
+
+ config.add_section(constants.INISECT_HYP)
+ for name, value in instance.hvparams.items():
+ if name not in constants.HVC_GLOBALS:
+ config.set(constants.INISECT_HYP, name, str(value))
+
+ config.add_section(constants.INISECT_BEP)
+ for name, value in instance.beparams.items():
+ config.set(constants.INISECT_BEP, name, str(value))
+
+ utils.WriteFile(utils.PathJoin(destdir, constants.EXPORT_CONF_FILE),
data=config.Dumps())
- shutil.rmtree(finaldestdir, True)
+ shutil.rmtree(finaldestdir, ignore_errors=True)
shutil.move(destdir, finaldestdir)
export info
"""
- cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
+ cff = utils.PathJoin(dest, constants.EXPORT_CONF_FILE)
config = objects.SerializableConfigParser()
config.read(cff)
return config.Dumps()
-def ImportOSIntoInstance(instance, src_node, src_images, cluster_name):
- """Import an os image into an instance.
-
- @type instance: L{objects.Instance}
- @param instance: instance to import the disks into
- @type src_node: string
- @param src_node: source node for the disk images
- @type src_images: list of string
- @param src_images: absolute paths of the disk images
- @rtype: list of boolean
- @return: each boolean represent the success of importing the n-th disk
-
- """
- inst_os = OSFromDisk(instance.os)
- import_env = OSEnvironment(instance, inst_os)
- import_script = inst_os.import_script
-
- logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
- instance.name, int(time.time()))
- if not os.path.exists(constants.LOG_OS_DIR):
- os.mkdir(constants.LOG_OS_DIR, 0750)
-
- comprcmd = "gunzip"
- impcmd = utils.BuildShellCmd("(cd %s; %s >%s 2>&1)", inst_os.path,
- import_script, logfile)
-
- final_result = []
- for idx, image in enumerate(src_images):
- if image:
- destcmd = utils.BuildShellCmd('cat %s', image)
- remotecmd = _GetSshRunner(cluster_name).BuildCmd(src_node,
- constants.GANETI_RUNAS,
- destcmd)
- command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
- import_env['IMPORT_DEVICE'] = import_env['DISK_%d_PATH' % idx]
- import_env['IMPORT_INDEX'] = str(idx)
- result = utils.RunCmd(command, env=import_env)
- if result.failed:
- logging.error("Disk import command '%s' returned error: %s"
- " output: %s", command, result.fail_reason,
- result.output)
- final_result.append("error importing disk %d: %s, %s" %
- (idx, result.fail_reason, result.output[-100]))
-
- if final_result:
- _Fail("; ".join(final_result), log=False)
-
-
def ListExports():
"""Return a list of exports currently available on this machine.
"""
if os.path.isdir(constants.EXPORT_DIR):
- return utils.ListVisibleFiles(constants.EXPORT_DIR)
+ return sorted(utils.ListVisibleFiles(constants.EXPORT_DIR))
else:
_Fail("No exports directory")
@rtype: None
"""
- target = os.path.join(constants.EXPORT_DIR, export)
+ target = utils.PathJoin(constants.EXPORT_DIR, export)
try:
shutil.rmtree(target)
@return: the normalized path if valid, None otherwise
"""
+ if not constants.ENABLE_FILE_STORAGE:
+ _Fail("File storage disabled at configure time")
cfg = _GetConfig()
file_storage_dir = os.path.normpath(file_storage_dir)
base_file_storage_dir = cfg.GetFileStorageDir()
- if (not os.path.commonprefix([file_storage_dir, base_file_storage_dir]) ==
+ if (os.path.commonprefix([file_storage_dir, base_file_storage_dir]) !=
base_file_storage_dir):
_Fail("File storage directory '%s' is not under base file"
" storage directory '%s'", file_storage_dir, base_file_storage_dir)
utils.RenameFile(old, new, mkdir=True)
-def JobQueueSetDrainFlag(drain_flag):
- """Set the drain flag for the queue.
-
- This will set or unset the queue drain flag.
-
- @type drain_flag: boolean
- @param drain_flag: if True, will set the drain flag, otherwise reset it.
- @rtype: truple
- @return: always True, None
- @warning: the function always returns True
-
- """
- if drain_flag:
- utils.WriteFile(constants.JOB_QUEUE_DRAIN_FILE, data="", close=True)
- else:
- utils.RemoveFile(constants.JOB_QUEUE_DRAIN_FILE)
-
-
def BlockdevClose(instance_name, disks):
"""Closes the given block devices.
master, myself = ssconf.GetMasterAndMyself()
if master == myself:
_Fail("ssconf status shows I'm the master node, will not demote")
- pid_file = utils.DaemonPidFileName(constants.MASTERD)
- if utils.IsProcessAlive(utils.ReadPidFile(pid_file)):
+
+ result = utils.RunCmd([constants.DAEMON_UTIL, "check", constants.MASTERD])
+ if not result.failed:
_Fail("The master daemon is running, will not demote")
+
try:
if os.path.isfile(constants.CLUSTER_CONF_FILE):
utils.CreateBackup(constants.CLUSTER_CONF_FILE)
except EnvironmentError, err:
if err.errno != errno.ENOENT:
_Fail("Error while backing up cluster file: %s", err, exc=True)
+
utils.RemoveFile(constants.CLUSTER_CONF_FILE)
+def _GetX509Filenames(cryptodir, name):
+ """Returns the full paths for the private key and certificate.
+
+ """
+ return (utils.PathJoin(cryptodir, name),
+ utils.PathJoin(cryptodir, name, _X509_KEY_FILE),
+ utils.PathJoin(cryptodir, name, _X509_CERT_FILE))
+
+
+def CreateX509Certificate(validity, cryptodir=constants.CRYPTO_KEYS_DIR):
+ """Creates a new X509 certificate for SSL/TLS.
+
+ @type validity: int
+ @param validity: Validity in seconds
+ @rtype: tuple; (string, string)
+ @return: Certificate name and public part
+
+ """
+ (key_pem, cert_pem) = \
+ utils.GenerateSelfSignedX509Cert(utils.HostInfo.SysName(),
+ min(validity, _MAX_SSL_CERT_VALIDITY))
+
+ cert_dir = tempfile.mkdtemp(dir=cryptodir,
+ prefix="x509-%s-" % utils.TimestampForFilename())
+ try:
+ name = os.path.basename(cert_dir)
+ assert len(name) > 5
+
+ (_, key_file, cert_file) = _GetX509Filenames(cryptodir, name)
+
+ utils.WriteFile(key_file, mode=0400, data=key_pem)
+ utils.WriteFile(cert_file, mode=0400, data=cert_pem)
+
+ # Never return private key as it shouldn't leave the node
+ return (name, cert_pem)
+ except Exception:
+ shutil.rmtree(cert_dir, ignore_errors=True)
+ raise
+
+
+def RemoveX509Certificate(name, cryptodir=constants.CRYPTO_KEYS_DIR):
+ """Removes a X509 certificate.
+
+ @type name: string
+ @param name: Certificate name
+
+ """
+ (cert_dir, key_file, cert_file) = _GetX509Filenames(cryptodir, name)
+
+ utils.RemoveFile(key_file)
+ utils.RemoveFile(cert_file)
+
+ try:
+ os.rmdir(cert_dir)
+ except EnvironmentError, err:
+ _Fail("Cannot remove certificate directory '%s': %s",
+ cert_dir, err)
+
+
+def _GetImportExportIoCommand(instance, mode, ieio, ieargs):
+ """Returns the command for the requested input/output.
+
+ @type instance: L{objects.Instance}
+ @param instance: The instance object
+ @param mode: Import/export mode
+ @param ieio: Input/output type
+ @param ieargs: Input/output arguments
+
+ """
+ assert mode in (constants.IEM_IMPORT, constants.IEM_EXPORT)
+
+ env = None
+ prefix = None
+ suffix = None
+ exp_size = None
+
+ if ieio == constants.IEIO_FILE:
+ (filename, ) = ieargs
+
+ if not utils.IsNormAbsPath(filename):
+ _Fail("Path '%s' is not normalized or absolute", filename)
+
+ directory = os.path.normpath(os.path.dirname(filename))
+
+ if (os.path.commonprefix([constants.EXPORT_DIR, directory]) !=
+ constants.EXPORT_DIR):
+ _Fail("File '%s' is not under exports directory '%s'",
+ filename, constants.EXPORT_DIR)
+
+ # Create directory
+ utils.Makedirs(directory, mode=0750)
+
+ quoted_filename = utils.ShellQuote(filename)
+
+ if mode == constants.IEM_IMPORT:
+ suffix = "> %s" % quoted_filename
+ elif mode == constants.IEM_EXPORT:
+ suffix = "< %s" % quoted_filename
+
+ # Retrieve file size
+ try:
+ st = os.stat(filename)
+ except EnvironmentError, err:
+ logging.error("Can't stat(2) %s: %s", filename, err)
+ else:
+ exp_size = utils.BytesToMebibyte(st.st_size)
+
+ elif ieio == constants.IEIO_RAW_DISK:
+ (disk, ) = ieargs
+
+ real_disk = _OpenRealBD(disk)
+
+ if mode == constants.IEM_IMPORT:
+ # we set here a smaller block size as, due to transport buffering, more
+ # than 64-128k will mostly ignored; we use nocreat to fail if the device
+ # is not already there or we pass a wrong path; we use notrunc to no
+ # attempt truncate on an LV device; we use oflag=dsync to not buffer too
+ # much memory; this means that at best, we flush every 64k, which will
+ # not be very fast
+ suffix = utils.BuildShellCmd(("| dd of=%s conv=nocreat,notrunc"
+ " bs=%s oflag=dsync"),
+ real_disk.dev_path,
+ str(64 * 1024))
+
+ elif mode == constants.IEM_EXPORT:
+ # the block size on the read dd is 1MiB to match our units
+ prefix = utils.BuildShellCmd("dd if=%s bs=%s count=%s |",
+ real_disk.dev_path,
+ str(1024 * 1024), # 1 MB
+ str(disk.size))
+ exp_size = disk.size
+
+ elif ieio == constants.IEIO_SCRIPT:
+ (disk, disk_index, ) = ieargs
+
+ assert isinstance(disk_index, (int, long))
+
+ real_disk = _OpenRealBD(disk)
+
+ inst_os = OSFromDisk(instance.os)
+ env = OSEnvironment(instance, inst_os)
+
+ if mode == constants.IEM_IMPORT:
+ env["IMPORT_DEVICE"] = env["DISK_%d_PATH" % disk_index]
+ env["IMPORT_INDEX"] = str(disk_index)
+ script = inst_os.import_script
+
+ elif mode == constants.IEM_EXPORT:
+ env["EXPORT_DEVICE"] = real_disk.dev_path
+ env["EXPORT_INDEX"] = str(disk_index)
+ script = inst_os.export_script
+
+ # TODO: Pass special environment only to script
+ script_cmd = utils.BuildShellCmd("( cd %s && %s; )", inst_os.path, script)
+
+ if mode == constants.IEM_IMPORT:
+ suffix = "| %s" % script_cmd
+
+ elif mode == constants.IEM_EXPORT:
+ prefix = "%s |" % script_cmd
+
+ # Let script predict size
+ exp_size = constants.IE_CUSTOM_SIZE
+
+ else:
+ _Fail("Invalid %s I/O mode %r", mode, ieio)
+
+ return (env, prefix, suffix, exp_size)
+
+
+def _CreateImportExportStatusDir(prefix):
+ """Creates status directory for import/export.
+
+ """
+ return tempfile.mkdtemp(dir=constants.IMPORT_EXPORT_DIR,
+ prefix=("%s-%s-" %
+ (prefix, utils.TimestampForFilename())))
+
+
+def StartImportExportDaemon(mode, opts, host, port, instance, ieio, ieioargs):
+ """Starts an import or export daemon.
+
+ @param mode: Import/output mode
+ @type opts: L{objects.ImportExportOptions}
+ @param opts: Daemon options
+ @type host: string
+ @param host: Remote host for export (None for import)
+ @type port: int
+ @param port: Remote port for export (None for import)
+ @type instance: L{objects.Instance}
+ @param instance: Instance object
+ @param ieio: Input/output type
+ @param ieioargs: Input/output arguments
+
+ """
+ if mode == constants.IEM_IMPORT:
+ prefix = "import"
+
+ if not (host is None and port is None):
+ _Fail("Can not specify host or port on import")
+
+ elif mode == constants.IEM_EXPORT:
+ prefix = "export"
+
+ if host is None or port is None:
+ _Fail("Host and port must be specified for an export")
+
+ else:
+ _Fail("Invalid mode %r", mode)
+
+ if (opts.key_name is None) ^ (opts.ca_pem is None):
+ _Fail("Cluster certificate can only be used for both key and CA")
+
+ (cmd_env, cmd_prefix, cmd_suffix, exp_size) = \
+ _GetImportExportIoCommand(instance, mode, ieio, ieioargs)
+
+ if opts.key_name is None:
+ # Use server.pem
+ key_path = constants.NODED_CERT_FILE
+ cert_path = constants.NODED_CERT_FILE
+ assert opts.ca_pem is None
+ else:
+ (_, key_path, cert_path) = _GetX509Filenames(constants.CRYPTO_KEYS_DIR,
+ opts.key_name)
+ assert opts.ca_pem is not None
+
+ for i in [key_path, cert_path]:
+ if not os.path.exists(i):
+ _Fail("File '%s' does not exist" % i)
+
+ status_dir = _CreateImportExportStatusDir(prefix)
+ try:
+ status_file = utils.PathJoin(status_dir, _IES_STATUS_FILE)
+ pid_file = utils.PathJoin(status_dir, _IES_PID_FILE)
+ ca_file = utils.PathJoin(status_dir, _IES_CA_FILE)
+
+ if opts.ca_pem is None:
+ # Use server.pem
+ ca = utils.ReadFile(constants.NODED_CERT_FILE)
+ else:
+ ca = opts.ca_pem
+
+ # Write CA file
+ utils.WriteFile(ca_file, data=ca, mode=0400)
+
+ cmd = [
+ constants.IMPORT_EXPORT_DAEMON,
+ status_file, mode,
+ "--key=%s" % key_path,
+ "--cert=%s" % cert_path,
+ "--ca=%s" % ca_file,
+ ]
+
+ if host:
+ cmd.append("--host=%s" % host)
+
+ if port:
+ cmd.append("--port=%s" % port)
+
+ if opts.compress:
+ cmd.append("--compress=%s" % opts.compress)
+
+ if opts.magic:
+ cmd.append("--magic=%s" % opts.magic)
+
+ if exp_size is not None:
+ cmd.append("--expected-size=%s" % exp_size)
+
+ if cmd_prefix:
+ cmd.append("--cmd-prefix=%s" % cmd_prefix)
+
+ if cmd_suffix:
+ cmd.append("--cmd-suffix=%s" % cmd_suffix)
+
+ logfile = _InstanceLogName(prefix, instance.os, instance.name)
+
+ # TODO: Once _InstanceLogName uses tempfile.mkstemp, StartDaemon has
+ # support for receiving a file descriptor for output
+ utils.StartDaemon(cmd, env=cmd_env, pidfile=pid_file,
+ output=logfile)
+
+ # The import/export name is simply the status directory name
+ return os.path.basename(status_dir)
+
+ except Exception:
+ shutil.rmtree(status_dir, ignore_errors=True)
+ raise
+
+
+def GetImportExportStatus(names):
+ """Returns import/export daemon status.
+
+ @type names: sequence
+ @param names: List of names
+ @rtype: List of dicts
+ @return: Returns a list of the state of each named import/export or None if a
+ status couldn't be read
+
+ """
+ result = []
+
+ for name in names:
+ status_file = utils.PathJoin(constants.IMPORT_EXPORT_DIR, name,
+ _IES_STATUS_FILE)
+
+ try:
+ data = utils.ReadFile(status_file)
+ except EnvironmentError, err:
+ if err.errno != errno.ENOENT:
+ raise
+ data = None
+
+ if not data:
+ result.append(None)
+ continue
+
+ result.append(serializer.LoadJson(data))
+
+ return result
+
+
+def AbortImportExport(name):
+ """Sends SIGTERM to a running import/export daemon.
+
+ """
+ logging.info("Abort import/export %s", name)
+
+ status_dir = utils.PathJoin(constants.IMPORT_EXPORT_DIR, name)
+ pid = utils.ReadLockedPidFile(utils.PathJoin(status_dir, _IES_PID_FILE))
+
+ if pid:
+ logging.info("Import/export %s is running with PID %s, sending SIGTERM",
+ name, pid)
+ utils.IgnoreProcessNotFound(os.kill, pid, signal.SIGTERM)
+
+
+def CleanupImportExport(name):
+ """Cleanup after an import or export.
+
+ If the import/export daemon is still running it's killed. Afterwards the
+ whole status directory is removed.
+
+ """
+ logging.info("Finalizing import/export %s", name)
+
+ status_dir = utils.PathJoin(constants.IMPORT_EXPORT_DIR, name)
+
+ pid = utils.ReadLockedPidFile(utils.PathJoin(status_dir, _IES_PID_FILE))
+
+ if pid:
+ logging.info("Import/export %s is still running with PID %s",
+ name, pid)
+ utils.KillProcess(pid, waitpid=False)
+
+ shutil.rmtree(status_dir, ignore_errors=True)
+
+
def _FindDisks(nodes_ip, disks):
"""Sets the physical ID on disks and returns the block devices.
rd.AttachNet(multimaster)
except errors.BlockDeviceError, err:
_Fail("Can't change network configuration: %s", err)
+
# wait until the disks are connected; we need to retry the re-attach
# if the device becomes standalone, as this might happen if the one
# node disconnects and reconnects in a different mode before the
# other node reconnects; in this case, one or both of the nodes will
# decide it has wrong configuration and switch to standalone
- RECONNECT_TIMEOUT = 2 * 60
- sleep_time = 0.100 # start with 100 miliseconds
- timeout_limit = time.time() + RECONNECT_TIMEOUT
- while time.time() < timeout_limit:
+
+ def _Attach():
all_connected = True
+
for rd in bdevs:
stats = rd.GetProcStatus()
- if not (stats.is_connected or stats.is_in_resync):
- all_connected = False
+
+ all_connected = (all_connected and
+ (stats.is_connected or stats.is_in_resync))
+
if stats.is_standalone:
# peer had different config info and this node became
# standalone, even though this should not happen with the
rd.AttachNet(multimaster)
except errors.BlockDeviceError, err:
_Fail("Can't change network configuration: %s", err)
- if all_connected:
- break
- time.sleep(sleep_time)
- sleep_time = min(5, sleep_time * 1.5)
- if not all_connected:
+
+ if not all_connected:
+ raise utils.RetryAgain()
+
+ try:
+ # Start with a delay of 100 miliseconds and go up to 5 seconds
+ utils.Retry(_Attach, (0.1, 1.5, 5.0), 2 * 60)
+ except utils.RetryTimeout:
_Fail("Timeout in disk reconnecting")
+
if multimaster:
# change to primary mode
for rd in bdevs:
"""Wait until DRBDs have synchronized.
"""
+ def _helper(rd):
+ stats = rd.GetProcStatus()
+ if not (stats.is_connected or stats.is_in_resync):
+ raise utils.RetryAgain()
+ return stats
+
bdevs = _FindDisks(nodes_ip, disks)
min_resync = 100
alldone = True
for rd in bdevs:
- stats = rd.GetProcStatus()
- if not (stats.is_connected or stats.is_in_resync):
- _Fail("DRBD device %s is not in sync: stats=%s", rd, stats)
+ try:
+ # poll each second for 15 seconds
+ stats = utils.Retry(_helper, 1, 15, args=[rd])
+ except utils.RetryTimeout:
+ stats = rd.GetProcStatus()
+ # last check
+ if not (stats.is_connected or stats.is_in_resync):
+ _Fail("DRBD device %s is not in sync: stats=%s", rd, stats)
alldone = alldone and (not stats.is_in_resync)
if stats.sync_percent is not None:
min_resync = min(min_resync, stats.sync_percent)
pid = 0
if pid > 0:
return "Reboot scheduled in 5 seconds"
+ # ensure the child is running on ram
+ try:
+ utils.Mlockall()
+ except Exception: # pylint: disable-msg=W0703
+ pass
time.sleep(5)
hyper.PowercycleNode()
on the master side.
"""
- RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
-
def __init__(self, hooks_base_dir=None):
"""Constructor for hooks runner.
"""
if hooks_base_dir is None:
hooks_base_dir = constants.HOOKS_BASE_DIR
- self._BASE_DIR = hooks_base_dir
-
- @staticmethod
- def ExecHook(script, env):
- """Exec one hook script.
-
- @type script: str
- @param script: the full path to the script
- @type env: dict
- @param env: the environment with which to exec the script
- @rtype: tuple (success, message)
- @return: a tuple of success and message, where success
- indicates the succes of the operation, and message
- which will contain the error details in case we
- failed
-
- """
- # exec the process using subprocess and log the output
- fdstdin = None
- try:
- fdstdin = open("/dev/null", "r")
- child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
- stderr=subprocess.STDOUT, close_fds=True,
- shell=False, cwd="/", env=env)
- output = ""
- try:
- output = child.stdout.read(4096)
- child.stdout.close()
- except EnvironmentError, err:
- output += "Hook script error: %s" % str(err)
-
- while True:
- try:
- result = child.wait()
- break
- except EnvironmentError, err:
- if err.errno == errno.EINTR:
- continue
- raise
- finally:
- # try not to leak fds
- for fd in (fdstdin, ):
- if fd is not None:
- try:
- fd.close()
- except EnvironmentError, err:
- # just log the error
- #logging.exception("Error while closing fd %s", fd)
- pass
-
- return result == 0, utils.SafeEncode(output.strip())
+ # yeah, _BASE_DIR is not valid for attributes, we use it like a
+ # constant
+ self._BASE_DIR = hooks_base_dir # pylint: disable-msg=C0103
def RunHooks(self, hpath, phase, env):
"""Run the scripts in the hooks directory.
else:
_Fail("Unknown hooks phase '%s'", phase)
- rr = []
subdir = "%s-%s.d" % (hpath, suffix)
- dir_name = "%s/%s" % (self._BASE_DIR, subdir)
- try:
- dir_contents = utils.ListVisibleFiles(dir_name)
- except OSError:
- # FIXME: must log output in case of failures
- return rr
-
- # we use the standard python sort order,
- # so 00name is the recommended naming scheme
- dir_contents.sort()
- for relname in dir_contents:
- fname = os.path.join(dir_name, relname)
- if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
- self.RE_MASK.match(relname) is not None):
+ dir_name = utils.PathJoin(self._BASE_DIR, subdir)
+
+ results = []
+
+ if not os.path.isdir(dir_name):
+ # for non-existing/non-dirs, we simply exit instead of logging a
+ # warning at every operation
+ return results
+
+ runparts_results = utils.RunParts(dir_name, env=env, reset_env=True)
+
+ for (relname, relstatus, runresult) in runparts_results:
+ if relstatus == constants.RUNPARTS_SKIP:
rrval = constants.HKR_SKIP
output = ""
- else:
- result, output = self.ExecHook(fname, env)
- if not result:
+ elif relstatus == constants.RUNPARTS_ERR:
+ rrval = constants.HKR_FAIL
+ output = "Hook script execution error: %s" % runresult
+ elif relstatus == constants.RUNPARTS_RUN:
+ if runresult.failed:
rrval = constants.HKR_FAIL
else:
rrval = constants.HKR_SUCCESS
- rr.append(("%s/%s" % (subdir, relname), rrval, output))
+ output = utils.SafeEncode(runresult.output.strip())
+ results.append(("%s/%s" % (subdir, relname), rrval, output))
- return rr
+ return results
class IAllocatorRunner(object):
the master side.
"""
- def Run(self, name, idata):
+ @staticmethod
+ def Run(name, idata):
"""Run an iallocator script.
@type name: str
if dev_path.startswith(cls._DEV_PREFIX):
dev_path = dev_path[len(cls._DEV_PREFIX):]
dev_path = dev_path.replace("/", "_")
- fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path)
+ fpath = utils.PathJoin(cls._ROOT_DIR, "bdev_%s" % dev_path)
return fpath
@classmethod