# 02110-1301, USA.
-"""Functions used by the node daemon"""
+"""Functions used by the node daemon
+
+@var _ALLOWED_UPLOAD_FILES: denotes which files are accepted in
+ the L{UploadFile} function
+@var _ALLOWED_CLEAN_DIRS: denotes which directories are accepted
+ in the L{_CleanDirectory} function
+
+"""
+
+# pylint: disable-msg=E1103
+
+# E1103: %s %r has no %r member (but some types could not be
+# inferred), because the _TryOSFromDisk returns either (True, os_obj)
+# or (False, "string") which confuses pylint
import os
import stat
import errno
import re
-import subprocess
import random
import logging
import tempfile
from ganeti import ssconf
+_BOOT_ID_PATH = "/proc/sys/kernel/random/boot_id"
+_ALLOWED_CLEAN_DIRS = frozenset([
+ constants.DATA_DIR,
+ constants.JOB_QUEUE_ARCHIVE_DIR,
+ constants.QUEUE_DIR,
+ ])
+
+
+class RPCFail(Exception):
+ """Class denoting RPC failure.
+
+ Its argument is the error message.
+
+ """
+
+
+def _Fail(msg, *args, **kwargs):
+ """Log an error and the raise an RPCFail exception.
+
+ This exception is then handled specially in the ganeti daemon and
+ turned into a 'failed' return type. As such, this function is a
+ useful shortcut for logging the error and returning it to the master
+ daemon.
+
+ @type msg: string
+ @param msg: the text of the exception
+ @raise RPCFail
+
+ """
+ if args:
+ msg = msg % args
+ if "log" not in kwargs or kwargs["log"]: # if we should log this error
+ if "exc" in kwargs and kwargs["exc"]:
+ logging.exception(msg)
+ else:
+ logging.error(msg)
+ raise RPCFail(msg)
+
+
def _GetConfig():
"""Simple wrapper to return a SimpleStore.
to the empty list
"""
+ if path not in _ALLOWED_CLEAN_DIRS:
+ _Fail("Path passed to _CleanDirectory not in allowed clean targets: '%s'",
+ path)
+
if not os.path.isdir(path):
return
if exclude is None:
exclude = [os.path.normpath(i) for i in exclude]
for rel_name in utils.ListVisibleFiles(path):
- full_name = os.path.normpath(os.path.join(path, rel_name))
+ full_name = utils.PathJoin(path, rel_name)
if full_name in exclude:
continue
if os.path.isfile(full_name) and not os.path.islink(full_name):
utils.RemoveFile(full_name)
+def _BuildUploadFileList():
+ """Build the list of allowed upload files.
+
+ This is abstracted so that it's built only once at module import time.
+
+ """
+ allowed_files = set([
+ constants.CLUSTER_CONF_FILE,
+ constants.ETC_HOSTS,
+ constants.SSH_KNOWN_HOSTS_FILE,
+ constants.VNC_PASSWORD_FILE,
+ constants.RAPI_CERT_FILE,
+ constants.RAPI_USERS_FILE,
+ constants.HMAC_CLUSTER_KEY,
+ ])
+
+ for hv_name in constants.HYPER_TYPES:
+ hv_class = hypervisor.GetHypervisorClass(hv_name)
+ allowed_files.update(hv_class.GetAncillaryFiles())
+
+ return frozenset(allowed_files)
+
+
+_ALLOWED_UPLOAD_FILES = _BuildUploadFileList()
+
+
def JobQueuePurge():
"""Removes job queue files and archived jobs.
- @rtype: None
+ @rtype: tuple
+ @return: True, None
"""
_CleanDirectory(constants.QUEUE_DIR, exclude=[constants.JOB_QUEUE_LOCK_FILE])
for consumption here or from the node daemon.
@rtype: tuple
- @return: (master_netdev, master_ip, master_name) if we have a good
- configuration, otherwise (None, None, None)
+ @return: master_netdev, master_ip, master_name
+ @raise RPCFail: in case of errors
"""
try:
master_ip = cfg.GetMasterIP()
master_node = cfg.GetMasterNode()
except errors.ConfigurationError, err:
- logging.exception("Cluster configuration incomplete")
- return (None, None, None)
+ _Fail("Cluster configuration incomplete: %s", err, exc=True)
return (master_netdev, master_ip, master_node)
based on the start_daemons parameter.
@type start_daemons: boolean
- @param start_daemons: whther to also start the master
+ @param start_daemons: whether to also start the master
daemons (ganeti-masterd and ganeti-rapi)
@type no_voting: boolean
@param no_voting: whether to start ganeti-masterd without a node vote
@rtype: None
"""
- ok = True
+ # GetMasterInfo will raise an exception if not able to return data
master_netdev, master_ip, _ = GetMasterInfo()
- if not master_netdev:
- return False
+ err_msgs = []
if utils.TcpPing(master_ip, constants.DEFAULT_NODED_PORT):
if utils.OwnIpAddress(master_ip):
# we already have the ip:
- logging.debug("Already started")
+ logging.debug("Master IP already configured, doing nothing")
else:
- logging.error("Someone else has the master ip, not activating")
- ok = False
+ msg = "Someone else has the master ip, not activating"
+ logging.error(msg)
+ err_msgs.append(msg)
else:
result = utils.RunCmd(["ip", "address", "add", "%s/32" % master_ip,
"dev", master_netdev, "label",
"%s:0" % master_netdev])
if result.failed:
- logging.error("Can't activate master IP: %s", result.output)
- ok = False
+ msg = "Can't activate master IP: %s" % result.output
+ logging.error(msg)
+ err_msgs.append(msg)
result = utils.RunCmd(["arping", "-q", "-U", "-c 3", "-I", master_netdev,
"-s", master_ip, master_ip])
# and now start the master and rapi daemons
if start_daemons:
- daemons_params = {
- 'ganeti-masterd': [],
- 'ganeti-rapi': [],
- }
if no_voting:
- daemons_params['ganeti-masterd'].append('--no-voting')
- daemons_params['ganeti-masterd'].append('--yes-do-it')
- for daemon in daemons_params:
- cmd = [daemon]
- cmd.extend(daemons_params[daemon])
- result = utils.RunCmd(cmd)
- if result.failed:
- logging.error("Can't start daemon %s: %s", daemon, result.output)
- ok = False
- return ok
+ masterd_args = "--no-voting --yes-do-it"
+ else:
+ masterd_args = ""
+
+ env = {
+ "EXTRA_MASTERD_ARGS": masterd_args,
+ }
+
+ result = utils.RunCmd([constants.DAEMON_UTIL, "start-master"], env=env)
+ if result.failed:
+ msg = "Can't start Ganeti master: %s" % result.output
+ logging.error(msg)
+ err_msgs.append(msg)
+
+ if err_msgs:
+ _Fail("; ".join(err_msgs))
def StopMaster(stop_daemons):
@rtype: None
"""
+ # TODO: log and report back to the caller the error failures; we
+ # need to decide in which case we fail the RPC for this
+
+ # GetMasterInfo will raise an exception if not able to return data
master_netdev, master_ip, _ = GetMasterInfo()
- if not master_netdev:
- return False
result = utils.RunCmd(["ip", "address", "del", "%s/32" % master_ip,
"dev", master_netdev])
# but otherwise ignore the failure
if stop_daemons:
- # stop/kill the rapi and the master daemon
- for daemon in constants.RAPI_PID, constants.MASTERD_PID:
- utils.KillProcess(utils.ReadPidFile(utils.DaemonPidFileName(daemon)))
-
- return True
+ result = utils.RunCmd([constants.DAEMON_UTIL, "stop-master"])
+ if result.failed:
+ logging.error("Could not stop Ganeti master, command %s had exitcode %s"
+ " and error %s",
+ result.cmd, result.exit_code, result.output)
def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS,
mkdir=True)
except errors.OpExecError, err:
- msg = "Error while processing user ssh files"
- logging.exception(msg)
- return (False, "%s: %s" % (msg, err))
+ _Fail("Error while processing user ssh files: %s", err, exc=True)
for name, content in [(priv_key, sshkey), (pub_key, sshpub)]:
utils.WriteFile(name, data=content, mode=0600)
utils.AddAuthorizedKey(auth_keys, sshpub)
- utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
-
- return (True, "Node added successfully")
+ result = utils.RunCmd([constants.DAEMON_UTIL, "reload-ssh-keys"])
+ if result.failed:
+ _Fail("Unable to reload SSH keys (command %r, exit code %s, output %r)",
+ result.cmd, result.exit_code, result.output)
-def LeaveCluster():
+def LeaveCluster(modify_ssh_setup):
"""Cleans up and remove the current node.
This function cleans up and prepares the current node to be removed
L{errors.QuitGanetiException} which is used as a special case to
shutdown the node daemon.
+ @param modify_ssh_setup: boolean
+
"""
_CleanDirectory(constants.DATA_DIR)
JobQueuePurge()
- try:
- priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
- except errors.OpExecError:
- logging.exception("Error while processing ssh files")
- return
+ if modify_ssh_setup:
+ try:
+ priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
+
+ utils.RemoveAuthorizedKey(auth_keys, utils.ReadFile(pub_key))
+
+ utils.RemoveFile(priv_key)
+ utils.RemoveFile(pub_key)
+ except errors.OpExecError:
+ logging.exception("Error while processing ssh files")
- f = open(pub_key, 'r')
try:
- utils.RemoveAuthorizedKey(auth_keys, f.read(8192))
- finally:
- f.close()
+ utils.RemoveFile(constants.HMAC_CLUSTER_KEY)
+ utils.RemoveFile(constants.RAPI_CERT_FILE)
+ utils.RemoveFile(constants.SSL_CERT_FILE)
+ except: # pylint: disable-msg=W0702
+ logging.exception("Error while removing cluster secrets")
- utils.RemoveFile(priv_key)
- utils.RemoveFile(pub_key)
+ result = utils.RunCmd([constants.DAEMON_UTIL, "stop", constants.CONFD])
+ if result.failed:
+ logging.error("Command %s failed with exitcode %s and error %s",
+ result.cmd, result.exit_code, result.output)
- # Return a reassuring string to the caller, and quit
- raise errors.QuitGanetiException(False, 'Shutdown scheduled')
+ # Raise a custom exception (handled in ganeti-noded)
+ raise errors.QuitGanetiException(True, 'Shutdown scheduled')
def GetNodeInfo(vgname, hypervisor_type):
- """Gives back a hash with different informations about the node.
+ """Gives back a hash with different information about the node.
@type vgname: C{string}
@param vgname: the name of the volume group to ask for disk space information
if hyp_info is not None:
outputarray.update(hyp_info)
- f = open("/proc/sys/kernel/random/boot_id", 'r')
- try:
- outputarray["bootid"] = f.read(128).rstrip("\n")
- finally:
- f.close()
+ outputarray["bootid"] = utils.ReadFile(_BOOT_ID_PATH, size=128).rstrip("\n")
return outputarray
tmp[my_name] = ("Can't find my own primary/secondary IP"
" in the node list")
else:
- port = utils.GetNodeDaemonPort()
+ port = utils.GetDaemonPort(constants.NODED)
for name, pip, sip in what[constants.NV_NODENETTEST]:
fail = []
if not utils.TcpPing(pip, port, source=my_pip):
what[constants.NV_INSTANCELIST])
if constants.NV_VGLIST in what:
- result[constants.NV_VGLIST] = ListVolumeGroups()
+ result[constants.NV_VGLIST] = utils.ListVolumeGroups()
+
+ if constants.NV_PVLIST in what:
+ result[constants.NV_PVLIST] = \
+ bdev.LogicalVolume.GetPVInfo(what[constants.NV_PVLIST],
+ filter_allocatable=False)
if constants.NV_VERSION in what:
result[constants.NV_VERSION] = (constants.PROTOCOL_VERSION,
used_minors = str(err)
result[constants.NV_DRBDLIST] = used_minors
+ if constants.NV_NODESETUP in what:
+ result[constants.NV_NODESETUP] = tmpr = []
+ if not os.path.isdir("/sys/block") or not os.path.isdir("/sys/class/net"):
+ tmpr.append("The sysfs filesytem doesn't seem to be mounted"
+ " under /sys, missing required directories /sys/block"
+ " and /sys/class/net")
+ if (not os.path.isdir("/proc/sys") or
+ not os.path.isfile("/proc/sysrq-trigger")):
+ tmpr.append("The procfs filesystem doesn't seem to be mounted"
+ " under /proc, missing required directory /proc/sys and"
+ " the file /proc/sysrq-trigger")
+
+ if constants.NV_TIME in what:
+ result[constants.NV_TIME] = utils.SplitTime(time.time())
+
return result
"--separator=%s" % sep,
"-olv_name,lv_size,lv_attr", vg_name])
if result.failed:
- logging.error("Failed to list logical volumes, lvs output: %s",
- result.output)
- return result.output
+ _Fail("Failed to list logical volumes, lvs output: %s", result.output)
valid_line_re = re.compile("^ *([^|]+)\|([0-9.]+)\|([^|]{6})\|?$")
for line in result.stdout.splitlines():
name, size, attr = match.groups()
inactive = attr[4] == '-'
online = attr[5] == 'o'
+ virtual = attr[0] == 'v'
+ if virtual:
+ # we don't want to report such volumes as existing, since they
+ # don't really hold data
+ continue
lvs[name] = (size, inactive, online)
return lvs
"--separator=|",
"--options=lv_name,lv_size,devices,vg_name"])
if result.failed:
- logging.error("Failed to list logical volumes, lvs output: %s",
- result.output)
- return []
+ _Fail("Failed to list logical volumes, lvs output: %s",
+ result.output)
def parse_dev(dev):
if '(' in dev:
@return: C{True} if all of them exist, C{False} otherwise
"""
+ missing = []
for bridge in bridges_list:
if not utils.BridgeExists(bridge):
- return False
+ missing.append(bridge)
- return True
+ if missing:
+ _Fail("Missing bridges %s", utils.CommaJoin(missing))
def GetInstanceList(hypervisor_list):
names = hypervisor.GetHypervisor(hname).ListInstances()
results.extend(names)
except errors.HypervisorError, err:
- logging.exception("Error enumerating instances for hypevisor %s", hname)
- raise
+ _Fail("Error enumerating instances (hypervisor %s): %s",
+ hname, err, exc=True)
return results
def GetInstanceInfo(instance, hname):
- """Gives back the informations about an instance as a dictionary.
+ """Gives back the information about an instance as a dictionary.
@type instance: string
@param instance: the instance name
"""
hyper = hypervisor.GetHypervisor(instance.hypervisor)
- if instance.name not in hyper.ListInstances():
- return (False, 'not running')
+ iname = instance.name
+ if iname not in hyper.ListInstances():
+ _Fail("Instance %s is not running", iname)
for idx in range(len(instance.disks)):
- link_name = _GetBlockDevSymlinkPath(instance.name, idx)
+ link_name = _GetBlockDevSymlinkPath(iname, idx)
if not os.path.islink(link_name):
- return (False, 'not restarted since ganeti 1.2.5')
-
- return (True, '')
+ _Fail("Instance %s was not restarted since ganeti 1.2.5", iname)
def GetAllInstancesInfo(hypervisor_list):
for hname in hypervisor_list:
iinfo = hypervisor.GetHypervisor(hname).GetAllInstancesInfo()
if iinfo:
- for name, inst_id, memory, vcpus, state, times in iinfo:
+ for name, _, memory, vcpus, state, times in iinfo:
value = {
'memory': memory,
'vcpus': vcpus,
# invocations of the different hypervisors
for key in 'memory', 'vcpus':
if value[key] != output[name][key]:
- raise errors.HypervisorError("Instance %s is running twice"
- " with different parameters" % name)
+ _Fail("Instance %s is running twice"
+ " with different parameters", name)
output[name] = value
return output
-def InstanceOsAdd(instance):
+def _InstanceLogName(kind, os_name, instance):
+ """Compute the OS log filename for a given instance and operation.
+
+ The instance name and os name are passed in as strings since not all
+ operations have these as part of an instance object.
+
+ @type kind: string
+ @param kind: the operation type (e.g. add, import, etc.)
+ @type os_name: string
+ @param os_name: the os name
+ @type instance: string
+ @param instance: the name of the instance being imported/added/etc.
+
+ """
+ base = "%s-%s-%s-%d.log" % (kind, os_name, instance, int(time.time()))
+ return utils.PathJoin(constants.LOG_OS_DIR, base)
+
+
+def InstanceOsAdd(instance, reinstall, debug):
"""Add an OS to an instance.
@type instance: L{objects.Instance}
@param instance: Instance whose OS is to be installed
- @rtype: boolean
- @return: the success of the operation
+ @type reinstall: boolean
+ @param reinstall: whether this is an instance reinstall
+ @type debug: integer
+ @param debug: debug level, passed to the OS scripts
+ @rtype: None
"""
- try:
- inst_os = OSFromDisk(instance.os)
- except errors.InvalidOS, err:
- os_name, os_dir, os_err = err.args
- if os_dir is None:
- return (False, "Can't find OS '%s': %s" % (os_name, os_err))
- else:
- return (False, "Error parsing OS '%s' in directory %s: %s" %
- (os_name, os_dir, os_err))
+ inst_os = OSFromDisk(instance.os)
- create_env = OSEnvironment(instance)
+ create_env = OSEnvironment(instance, inst_os, debug)
+ if reinstall:
+ create_env['INSTANCE_REINSTALL'] = "1"
- logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
- instance.name, int(time.time()))
+ logfile = _InstanceLogName("add", instance.os, instance.name)
result = utils.RunCmd([inst_os.create_script], env=create_env,
cwd=inst_os.path, output=logfile,)
result.output)
lines = [utils.SafeEncode(val)
for val in utils.TailFile(logfile, lines=20)]
- return (False, "OS create script failed (%s), last lines in the"
- " log file:\n%s" % (result.fail_reason, "\n".join(lines)))
-
- return (True, "Successfully installed")
+ _Fail("OS create script failed (%s), last lines in the"
+ " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)
-def RunRenameInstance(instance, old_name):
+def RunRenameInstance(instance, old_name, debug):
"""Run the OS rename script for an instance.
@type instance: L{objects.Instance}
@param instance: Instance whose OS is to be installed
@type old_name: string
@param old_name: previous instance name
+ @type debug: integer
+ @param debug: debug level, passed to the OS scripts
@rtype: boolean
@return: the success of the operation
"""
inst_os = OSFromDisk(instance.os)
- rename_env = OSEnvironment(instance)
+ rename_env = OSEnvironment(instance, inst_os, debug)
rename_env['OLD_INSTANCE_NAME'] = old_name
- logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
- old_name,
- instance.name, int(time.time()))
+ logfile = _InstanceLogName("rename", instance.os,
+ "%s-%s" % (old_name, instance.name))
result = utils.RunCmd([inst_os.rename_script], env=rename_env,
cwd=inst_os.path, output=logfile)
result.cmd, result.fail_reason, result.output)
lines = [utils.SafeEncode(val)
for val in utils.TailFile(logfile, lines=20)]
- return (False, "OS rename script failed (%s), last lines in the"
- " log file:\n%s" % (result.fail_reason, "\n".join(lines)))
-
- return (True, "Rename successful")
+ _Fail("OS rename script failed (%s), last lines in the"
+ " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)
def _GetVGInfo(vg_name):
- """Get informations about the volume group.
+ """Get information about the volume group.
@type vg_name: str
@param vg_name: the volume group which we query
"vg_free": int(round(float(valarr[1]), 0)),
"pv_count": int(valarr[2]),
}
- except ValueError, err:
- logging.exception("Fail to parse vgs output")
+ except (TypeError, ValueError), err:
+ logging.exception("Fail to parse vgs output: %s", err)
else:
logging.error("vgs output has the wrong number of fields (expected"
" three): %s", str(valarr))
def _GetBlockDevSymlinkPath(instance_name, idx):
- return os.path.join(constants.DISK_LINKS_DIR,
- "%s:%d" % (instance_name, idx))
+ return utils.PathJoin(constants.DISK_LINKS_DIR,
+ "%s:%d" % (instance_name, idx))
def _SymlinkBlockDev(instance_name, device_path, idx):
"""Remove the block device symlinks belonging to the given instance.
"""
- for idx, disk in enumerate(disks):
+ for idx, _ in enumerate(disks):
link_name = _GetBlockDevSymlinkPath(instance_name, idx)
if os.path.islink(link_name):
try:
@type instance: L{objects.Instance}
@param instance: the instance object
- @rtype: boolean
- @return: whether the startup was successful or not
+ @rtype: None
"""
running_instances = GetInstanceList([instance.hypervisor])
if instance.name in running_instances:
- return (True, "Already running")
+ logging.info("Instance %s already running, not starting", instance.name)
+ return
try:
block_devices = _GatherAndLinkBlockDevs(instance)
hyper = hypervisor.GetHypervisor(instance.hypervisor)
hyper.StartInstance(instance, block_devices)
except errors.BlockDeviceError, err:
- logging.exception("Failed to start instance")
- return (False, "Block device error: %s" % str(err))
+ _Fail("Block device error: %s", err, exc=True)
except errors.HypervisorError, err:
- logging.exception("Failed to start instance")
_RemoveBlockDevLinks(instance.name, instance.disks)
- return (False, "Hypervisor error: %s" % str(err))
+ _Fail("Hypervisor error: %s", err, exc=True)
- return (True, "Instance started successfully")
-
-def InstanceShutdown(instance):
+def InstanceShutdown(instance, timeout):
"""Shut an instance down.
@note: this functions uses polling with a hardcoded timeout.
@type instance: L{objects.Instance}
@param instance: the instance object
- @rtype: boolean
- @return: whether the startup was successful or not
+ @type timeout: integer
+ @param timeout: maximum timeout for soft shutdown
+ @rtype: None
"""
hv_name = instance.hypervisor
- running_instances = GetInstanceList([hv_name])
+ hyper = hypervisor.GetHypervisor(hv_name)
+ iname = instance.name
- if instance.name not in running_instances:
- return (True, "Instance already stopped")
+ if instance.name not in hyper.ListInstances():
+ logging.info("Instance %s not running, doing nothing", iname)
+ return
- hyper = hypervisor.GetHypervisor(hv_name)
- try:
- hyper.StopInstance(instance)
- except errors.HypervisorError, err:
- msg = "Failed to stop instance %s: %s" % (instance.name, err)
- logging.error(msg)
- return (False, msg)
+ class _TryShutdown:
+ def __init__(self):
+ self.tried_once = False
- # test every 10secs for 2min
+ def __call__(self):
+ if iname not in hyper.ListInstances():
+ return
- time.sleep(1)
- for dummy in range(11):
- if instance.name not in GetInstanceList([hv_name]):
- break
- time.sleep(10)
- else:
+ try:
+ hyper.StopInstance(instance, retry=self.tried_once)
+ except errors.HypervisorError, err:
+ if iname not in hyper.ListInstances():
+ # if the instance is no longer existing, consider this a
+ # success and go to cleanup
+ return
+
+ _Fail("Failed to stop instance %s: %s", iname, err)
+
+ self.tried_once = True
+
+ raise utils.RetryAgain()
+
+ try:
+ utils.Retry(_TryShutdown(), 5, timeout)
+ except utils.RetryTimeout:
# the shutdown did not succeed
- logging.error("Shutdown of '%s' unsuccessful, using destroy",
- instance.name)
+ logging.error("Shutdown of '%s' unsuccessful, forcing", iname)
try:
hyper.StopInstance(instance, force=True)
except errors.HypervisorError, err:
- msg = "Failed to force stop instance %s: %s" % (instance.name, err)
- logging.error(msg)
- return (False, msg)
+ if iname in hyper.ListInstances():
+ # only raise an error if the instance still exists, otherwise
+ # the error could simply be "instance ... unknown"!
+ _Fail("Failed to force stop instance %s: %s", iname, err)
time.sleep(1)
- if instance.name in GetInstanceList([hv_name]):
- msg = ("Could not shutdown instance %s even by destroy" %
- instance.name)
- logging.error(msg)
- return (False, msg)
- _RemoveBlockDevLinks(instance.name, instance.disks)
+ if iname in hyper.ListInstances():
+ _Fail("Could not shutdown instance %s even by destroy", iname)
- return (True, "Instance has been shutdown successfully")
+ _RemoveBlockDevLinks(iname, instance.disks)
-def InstanceReboot(instance, reboot_type):
+def InstanceReboot(instance, reboot_type, shutdown_timeout):
"""Reboot an instance.
@type instance: L{objects.Instance}
not accepted here, since that mode is handled differently, in
cmdlib, and translates into full stop and start of the
instance (instead of a call_instance_reboot RPC)
- @rtype: boolean
- @return: the success of the operation
+ @type shutdown_timeout: integer
+ @param shutdown_timeout: maximum timeout for soft shutdown
+ @rtype: None
"""
running_instances = GetInstanceList([instance.hypervisor])
if instance.name not in running_instances:
- msg = "Cannot reboot instance %s that is not running" % instance.name
- logging.error(msg)
- return (False, msg)
+ _Fail("Cannot reboot instance %s that is not running", instance.name)
hyper = hypervisor.GetHypervisor(instance.hypervisor)
if reboot_type == constants.INSTANCE_REBOOT_SOFT:
try:
hyper.RebootInstance(instance)
except errors.HypervisorError, err:
- msg = "Failed to soft reboot instance %s: %s" % (instance.name, err)
- logging.error(msg)
- return (False, msg)
+ _Fail("Failed to soft reboot instance %s: %s", instance.name, err)
elif reboot_type == constants.INSTANCE_REBOOT_HARD:
try:
- stop_result = InstanceShutdown(instance)
- if not stop_result[0]:
- return stop_result
+ InstanceShutdown(instance, shutdown_timeout)
return StartInstance(instance)
except errors.HypervisorError, err:
- msg = "Failed to hard reboot instance %s: %s" % (instance.name, err)
- logging.error(msg)
- return (False, msg)
+ _Fail("Failed to hard reboot instance %s: %s", instance.name, err)
else:
- return (False, "Invalid reboot_type received: %s" % (reboot_type,))
-
- return (True, "Reboot successful")
+ _Fail("Invalid reboot_type received: %s", reboot_type)
def MigrationInfo(instance):
try:
info = hyper.MigrationInfo(instance)
except errors.HypervisorError, err:
- msg = "Failed to fetch migration information"
- logging.exception(msg)
- return (False, '%s: %s' % (msg, err))
- return (True, info)
+ _Fail("Failed to fetch migration information: %s", err, exc=True)
+ return info
def AcceptInstance(instance, info, target):
try:
hyper.AcceptInstance(instance, info, target)
except errors.HypervisorError, err:
- msg = "Failed to accept instance"
- logging.exception(msg)
- return (False, '%s: %s' % (msg, err))
- return (True, "Accept successfull")
+ _Fail("Failed to accept instance: %s", err, exc=True)
def FinalizeMigration(instance, info, success):
try:
hyper.FinalizeMigration(instance, info, success)
except errors.HypervisorError, err:
- msg = "Failed to finalize migration"
- logging.exception(msg)
- return (False, '%s: %s' % (msg, err))
- return (True, "Migration Finalized")
+ _Fail("Failed to finalize migration: %s", err, exc=True)
def MigrateInstance(instance, target, live):
hyper = hypervisor.GetHypervisor(instance.hypervisor)
try:
- hyper.MigrateInstance(instance.name, target, live)
+ hyper.MigrateInstance(instance, target, live)
except errors.HypervisorError, err:
- msg = "Failed to migrate instance"
- logging.exception(msg)
- return (False, "%s: %s" % (msg, err))
- return (True, "Migration successfull")
+ _Fail("Failed to migrate instance: %s", err, exc=True)
def BlockdevCreate(disk, size, owner, on_primary, info):
it's not required to return anything.
"""
+ # TODO: remove the obsolete 'size' argument
+ # pylint: disable-msg=W0613
clist = []
if disk.children:
for child in disk.children:
try:
crdev = _RecursiveAssembleBD(child, owner, on_primary)
except errors.BlockDeviceError, err:
- errmsg = "Can't assemble device %s: %s" % (child, err)
- logging.error(errmsg)
- return False, errmsg
+ _Fail("Can't assemble device %s: %s", child, err)
if on_primary or disk.AssembleOnSecondary():
# we need the children open in case the device itself has to
# be assembled
try:
+ # pylint: disable-msg=E1103
crdev.Open()
except errors.BlockDeviceError, err:
- errmsg = "Can't make child '%s' read-write: %s" % (child, err)
- logging.error(errmsg)
- return False, errmsg
+ _Fail("Can't make child '%s' read-write: %s", child, err)
clist.append(crdev)
try:
device = bdev.Create(disk.dev_type, disk.physical_id, clist, disk.size)
except errors.BlockDeviceError, err:
- return False, "Can't create block device: %s" % str(err)
+ _Fail("Can't create block device: %s", err)
if on_primary or disk.AssembleOnSecondary():
try:
device.Assemble()
except errors.BlockDeviceError, err:
- errmsg = ("Can't assemble device after creation, very"
- " unusual event: %s" % str(err))
- logging.error(errmsg)
- return False, errmsg
+ _Fail("Can't assemble device after creation, unusual event: %s", err)
device.SetSyncSpeed(constants.SYNC_SPEED)
if on_primary or disk.OpenOnSecondary():
try:
device.Open(force=True)
except errors.BlockDeviceError, err:
- errmsg = ("Can't make device r/w after creation, very"
- " unusual event: %s" % str(err))
- logging.error(errmsg)
- return False, errmsg
+ _Fail("Can't make device r/w after creation, unusual event: %s", err)
DevCacheManager.UpdateCache(device.dev_path, owner,
on_primary, disk.iv_name)
device.SetInfo(info)
- physical_id = device.unique_id
- return True, physical_id
+ return device.unique_id
def BlockdevRemove(disk):
"""
msgs = []
- result = True
try:
rdev = _RecursiveFindBD(disk)
except errors.BlockDeviceError, err:
rdev.Remove()
except errors.BlockDeviceError, err:
msgs.append(str(err))
- result = False
- if result:
+ if not msgs:
DevCacheManager.RemoveCache(r_path)
if disk.children:
for child in disk.children:
- c_status, c_msg = BlockdevRemove(child)
- result = result and c_status
- if c_msg: # not an empty message
- msgs.append(c_msg)
+ try:
+ BlockdevRemove(child)
+ except RPCFail, err:
+ msgs.append(str(err))
- return (result, "; ".join(msgs))
+ if msgs:
+ _Fail("; ".join(msgs))
def _RecursiveAssembleBD(disk, owner, as_primary):
C{True} for secondary nodes
"""
- status = True
- result = "no error information"
try:
result = _RecursiveAssembleBD(disk, owner, as_primary)
if isinstance(result, bdev.BlockDev):
+ # pylint: disable-msg=E1103
result = result.dev_path
except errors.BlockDeviceError, err:
- result = "Error while assembling disk: %s" % str(err)
- status = False
- return (status, result)
+ _Fail("Error while assembling disk: %s", err, exc=True)
+
+ return result
def BlockdevShutdown(disk):
"""Shut down a block device.
- First, if the device is assembled (Attach() is successfull), then
+ First, if the device is assembled (Attach() is successful), then
the device is shutdown. Then the children of the device are
shutdown.
@type disk: L{objects.Disk}
@param disk: the description of the disk we should
shutdown
- @rtype: boolean
- @return: the success of the operation
+ @rtype: None
"""
msgs = []
- result = True
r_dev = _RecursiveFindBD(disk)
if r_dev is not None:
r_path = r_dev.dev_path
DevCacheManager.RemoveCache(r_path)
except errors.BlockDeviceError, err:
msgs.append(str(err))
- result = False
if disk.children:
for child in disk.children:
- c_status, c_msg = BlockdevShutdown(child)
- result = result and c_status
- if c_msg: # not an empty message
- msgs.append(c_msg)
+ try:
+ BlockdevShutdown(child)
+ except RPCFail, err:
+ msgs.append(str(err))
- return (result, "; ".join(msgs))
+ if msgs:
+ _Fail("; ".join(msgs))
def BlockdevAddchildren(parent_cdev, new_cdevs):
@param parent_cdev: the disk to which we should add children
@type new_cdevs: list of L{objects.Disk}
@param new_cdevs: the list of children which we should add
- @rtype: boolean
- @return: the success of the operation
+ @rtype: None
"""
parent_bdev = _RecursiveFindBD(parent_cdev)
if parent_bdev is None:
- logging.error("Can't find parent device")
- return False
+ _Fail("Can't find parent device '%s' in add children", parent_cdev)
new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
if new_bdevs.count(None) > 0:
- logging.error("Can't find new device(s) to add: %s:%s",
- new_bdevs, new_cdevs)
- return False
+ _Fail("Can't find new device(s) to add: %s:%s", new_bdevs, new_cdevs)
parent_bdev.AddChildren(new_bdevs)
- return True
def BlockdevRemovechildren(parent_cdev, new_cdevs):
@param parent_cdev: the disk from which we should remove children
@type new_cdevs: list of L{objects.Disk}
@param new_cdevs: the list of children which we should remove
- @rtype: boolean
- @return: the success of the operation
+ @rtype: None
"""
parent_bdev = _RecursiveFindBD(parent_cdev)
if parent_bdev is None:
- logging.error("Can't find parent in remove children: %s", parent_cdev)
- return False
+ _Fail("Can't find parent device '%s' in remove children", parent_cdev)
devs = []
for disk in new_cdevs:
rpath = disk.StaticDevPath()
if rpath is None:
bd = _RecursiveFindBD(disk)
if bd is None:
- logging.error("Can't find dynamic device %s while removing children",
- disk)
- return False
+ _Fail("Can't find device %s while removing children", disk)
else:
devs.append(bd.dev_path)
else:
+ if not utils.IsNormAbsPath(rpath):
+ _Fail("Strange path returned from StaticDevPath: '%s'", rpath)
devs.append(rpath)
parent_bdev.RemoveChildren(devs)
- return True
def BlockdevGetmirrorstatus(disks):
for dsk in disks:
rbd = _RecursiveFindBD(dsk)
if rbd is None:
- raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
+ _Fail("Can't find device %s", dsk)
+
stats.append(rbd.CombinedSyncStatus())
+
return stats
def _RecursiveFindBD(disk):
"""Check if a device is activated.
- If so, return informations about the real device.
+ If so, return information about the real device.
@type disk: L{objects.Disk}
@param disk: the disk object we need to find
def BlockdevFind(disk):
"""Check if a device is activated.
- If it is, return informations about the real device.
+ If it is, return information about the real device.
@type disk: L{objects.Disk}
@param disk: the disk to find
- @rtype: None or tuple
- @return: None if the disk cannot be found, otherwise a
- tuple (device_path, major, minor, sync_percent,
- estimated_time, is_degraded)
+ @rtype: None or objects.BlockDevStatus
+ @return: None if the disk cannot be found, otherwise a the current
+ information
"""
try:
rbd = _RecursiveFindBD(disk)
except errors.BlockDeviceError, err:
- return (False, str(err))
+ _Fail("Failed to find device: %s", err, exc=True)
+
if rbd is None:
- return (True, None)
- return (True, (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus())
+ return None
+
+ return rbd.GetSyncStatus()
+
+
+def BlockdevGetsize(disks):
+ """Computes the size of the given disks.
+
+ If a disk is not found, returns None instead.
+
+ @type disks: list of L{objects.Disk}
+ @param disks: the list of disk to compute the size for
+ @rtype: list
+ @return: list with elements None if the disk cannot be found,
+ otherwise the size
+
+ """
+ result = []
+ for cf in disks:
+ try:
+ rbd = _RecursiveFindBD(cf)
+ except errors.BlockDeviceError:
+ result.append(None)
+ continue
+ if rbd is None:
+ result.append(None)
+ else:
+ result.append(rbd.GetActualSize())
+ return result
+
+
+def BlockdevExport(disk, dest_node, dest_path, cluster_name):
+ """Export a block device to a remote node.
+
+ @type disk: L{objects.Disk}
+ @param disk: the description of the disk to export
+ @type dest_node: str
+ @param dest_node: the destination node to export to
+ @type dest_path: str
+ @param dest_path: the destination path on the target node
+ @type cluster_name: str
+ @param cluster_name: the cluster name, needed for SSH hostalias
+ @rtype: None
+
+ """
+ real_disk = _RecursiveFindBD(disk)
+ if real_disk is None:
+ _Fail("Block device '%s' is not set up", disk)
+
+ real_disk.Open()
+
+ # the block size on the read dd is 1MiB to match our units
+ expcmd = utils.BuildShellCmd("set -e; set -o pipefail; "
+ "dd if=%s bs=1048576 count=%s",
+ real_disk.dev_path, str(disk.size))
+
+ # we set here a smaller block size as, due to ssh buffering, more
+ # than 64-128k will mostly ignored; we use nocreat to fail if the
+ # device is not already there or we pass a wrong path; we use
+ # notrunc to no attempt truncate on an LV device; we use oflag=dsync
+ # to not buffer too much memory; this means that at best, we flush
+ # every 64k, which will not be very fast
+ destcmd = utils.BuildShellCmd("dd of=%s conv=nocreat,notrunc bs=65536"
+ " oflag=dsync", dest_path)
+
+ remotecmd = _GetSshRunner(cluster_name).BuildCmd(dest_node,
+ constants.GANETI_RUNAS,
+ destcmd)
+
+ # all commands have been checked, so we're safe to combine them
+ command = '|'.join([expcmd, utils.ShellQuoteArgs(remotecmd)])
+
+ result = utils.RunCmd(["bash", "-c", command])
+
+ if result.failed:
+ _Fail("Disk copy command '%s' returned error: %s"
+ " output: %s", command, result.fail_reason, result.output)
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
@param atime: the atime to set on the file (can be None)
@type mtime: float
@param mtime: the mtime to set on the file (can be None)
- @rtype: boolean
- @return: the success of the operation; errors are logged
- in the node daemon log
+ @rtype: None
"""
if not os.path.isabs(file_name):
- logging.error("Filename passed to UploadFile is not absolute: '%s'",
- file_name)
- return False
+ _Fail("Filename passed to UploadFile is not absolute: '%s'", file_name)
- allowed_files = [
- constants.CLUSTER_CONF_FILE,
- constants.ETC_HOSTS,
- constants.SSH_KNOWN_HOSTS_FILE,
- constants.VNC_PASSWORD_FILE,
- ]
-
- if file_name not in allowed_files:
- logging.error("Filename passed to UploadFile not in allowed"
- " upload targets: '%s'", file_name)
- return False
+ if file_name not in _ALLOWED_UPLOAD_FILES:
+ _Fail("Filename passed to UploadFile not in allowed upload targets: '%s'",
+ file_name)
raw_data = _Decompress(data)
utils.WriteFile(file_name, data=raw_data, mode=mode, uid=uid, gid=gid,
atime=atime, mtime=mtime)
- return True
def WriteSsconfFiles(values):
return detail
-def _OSOndiskVersion(name, os_dir):
+def _OSOndiskAPIVersion(os_dir):
"""Compute and return the API version of a given OS.
- This function will try to read the API version of the OS given by
- the 'name' parameter and residing in the 'os_dir' directory.
+ This function will try to read the API version of the OS residing in
+ the 'os_dir' directory.
- @type name: str
- @param name: the OS name we should look for
@type os_dir: str
- @param os_dir: the directory inwhich we should look for the OS
- @rtype: int or None
- @return:
- Either an integer denoting the version or None in the
- case when this is not a valid OS name.
- @raise errors.InvalidOS: if the OS cannot be found
+ @param os_dir: the directory in which we should look for the OS
+ @rtype: tuple
+ @return: tuple (status, data) with status denoting the validity and
+ data holding either the vaid versions or an error message
"""
- api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
+ api_file = utils.PathJoin(os_dir, constants.OS_API_FILE)
try:
st = os.stat(api_file)
except EnvironmentError, err:
- raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not"
- " found (%s)" % _ErrnoOrStr(err))
+ return False, ("Required file '%s' not found under path %s: %s" %
+ (constants.OS_API_FILE, os_dir, _ErrnoOrStr(err)))
if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
- raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not"
- " a regular file")
+ return False, ("File '%s' in %s is not a regular file" %
+ (constants.OS_API_FILE, os_dir))
try:
- f = open(api_file)
- try:
- api_versions = f.readlines()
- finally:
- f.close()
+ api_versions = utils.ReadFile(api_file).splitlines()
except EnvironmentError, err:
- raise errors.InvalidOS(name, os_dir, "error while reading the"
- " API version (%s)" % _ErrnoOrStr(err))
+ return False, ("Error while reading the API version file at %s: %s" %
+ (api_file, _ErrnoOrStr(err)))
- api_versions = [version.strip() for version in api_versions]
try:
- api_versions = [int(version) for version in api_versions]
+ api_versions = [int(version.strip()) for version in api_versions]
except (TypeError, ValueError), err:
- raise errors.InvalidOS(name, os_dir,
- "API version is not integer (%s)" % str(err))
+ return False, ("API version(s) can't be converted to integer: %s" %
+ str(err))
- return api_versions
+ return True, api_versions
def DiagnoseOS(top_dirs=None):
search (if not given defaults to
L{constants.OS_SEARCH_PATH})
@rtype: list of L{objects.OS}
- @return: an OS object for each name in all the given
- directories
+ @return: a list of tuples (name, path, status, diagnose, variants)
+ for all (potential) OSes under all search paths, where:
+ - name is the (potential) OS name
+ - path is the full path to the OS
+ - status True/False is the validity of the OS
+ - diagnose is the error message for an invalid OS, otherwise empty
+ - variants is a list of supported OS variants, if any
"""
if top_dirs is None:
try:
f_names = utils.ListVisibleFiles(dir_name)
except EnvironmentError, err:
- logging.exception("Can't list the OS directory %s", dir_name)
+ logging.exception("Can't list the OS directory %s: %s", dir_name, err)
break
for name in f_names:
- try:
- os_inst = OSFromDisk(name, base_dir=dir_name)
- result.append(os_inst)
- except errors.InvalidOS, err:
- result.append(objects.OS.FromInvalidOS(err))
+ os_path = utils.PathJoin(dir_name, name)
+ status, os_inst = _TryOSFromDisk(name, base_dir=dir_name)
+ if status:
+ diagnose = ""
+ variants = os_inst.supported_variants
+ else:
+ diagnose = os_inst
+ variants = []
+ result.append((name, os_path, status, diagnose, variants))
return result
-def OSFromDisk(name, base_dir=None):
+def _TryOSFromDisk(name, base_dir=None):
"""Create an OS instance from disk.
This function will return an OS instance if the given name is a
- valid OS name. Otherwise, it will raise an appropriate
- L{errors.InvalidOS} exception, detailing why this is not a valid OS.
+ valid OS name.
@type base_dir: string
@keyword base_dir: Base directory containing OS installations.
Defaults to a search in all the OS_SEARCH_PATH dirs.
- @rtype: L{objects.OS}
- @return: the OS instance if we find a valid one
- @raise errors.InvalidOS: if we don't find a valid OS
+ @rtype: tuple
+ @return: success and either the OS instance if we find a valid one,
+ or error message
"""
if base_dir is None:
os_dir = utils.FindFile(name, constants.OS_SEARCH_PATH, os.path.isdir)
- if os_dir is None:
- raise errors.InvalidOS(name, None, "OS dir not found in search path")
else:
- os_dir = os.path.sep.join([base_dir, name])
+ os_dir = utils.FindFile(name, [base_dir], os.path.isdir)
- api_versions = _OSOndiskVersion(name, os_dir)
+ if os_dir is None:
+ return False, "Directory for OS %s not found in search path" % name
- if constants.OS_API_VERSION not in api_versions:
- raise errors.InvalidOS(name, os_dir, "API version mismatch"
- " (found %s want %s)"
- % (api_versions, constants.OS_API_VERSION))
+ status, api_versions = _OSOndiskAPIVersion(os_dir)
+ if not status:
+ # push the error up
+ return status, api_versions
- # OS Scripts dictionary, we will populate it with the actual script names
- os_scripts = dict.fromkeys(constants.OS_SCRIPTS)
+ if not constants.OS_API_VERSIONS.intersection(api_versions):
+ return False, ("API version mismatch for path '%s': found %s, want %s." %
+ (os_dir, api_versions, constants.OS_API_VERSIONS))
- for script in os_scripts:
- os_scripts[script] = os.path.sep.join([os_dir, script])
+ # OS Files dictionary, we will populate it with the absolute path names
+ os_files = dict.fromkeys(constants.OS_SCRIPTS)
+
+ if max(api_versions) >= constants.OS_API_V15:
+ os_files[constants.OS_VARIANTS_FILE] = ''
+
+ for filename in os_files:
+ os_files[filename] = utils.PathJoin(os_dir, filename)
try:
- st = os.stat(os_scripts[script])
+ st = os.stat(os_files[filename])
except EnvironmentError, err:
- raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" %
- (script, _ErrnoOrStr(err)))
-
- if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
- raise errors.InvalidOS(name, os_dir, "'%s' script not executable" %
- script)
+ return False, ("File '%s' under path '%s' is missing (%s)" %
+ (filename, os_dir, _ErrnoOrStr(err)))
if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
- raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" %
- script)
+ return False, ("File '%s' under path '%s' is not a regular file" %
+ (filename, os_dir))
+
+ if filename in constants.OS_SCRIPTS:
+ if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
+ return False, ("File '%s' under path '%s' is not executable" %
+ (filename, os_dir))
+
+ variants = None
+ if constants.OS_VARIANTS_FILE in os_files:
+ variants_file = os_files[constants.OS_VARIANTS_FILE]
+ try:
+ variants = utils.ReadFile(variants_file).splitlines()
+ except EnvironmentError, err:
+ return False, ("Error while reading the OS variants file at %s: %s" %
+ (variants_file, _ErrnoOrStr(err)))
+ if not variants:
+ return False, ("No supported os variant found")
+
+ os_obj = objects.OS(name=name, path=os_dir,
+ create_script=os_files[constants.OS_SCRIPT_CREATE],
+ export_script=os_files[constants.OS_SCRIPT_EXPORT],
+ import_script=os_files[constants.OS_SCRIPT_IMPORT],
+ rename_script=os_files[constants.OS_SCRIPT_RENAME],
+ supported_variants=variants,
+ api_versions=api_versions)
+ return True, os_obj
+
+
+def OSFromDisk(name, base_dir=None):
+ """Create an OS instance from disk.
+
+ This function will return an OS instance if the given name is a
+ valid OS name. Otherwise, it will raise an appropriate
+ L{RPCFail} exception, detailing why this is not a valid OS.
+
+ This is just a wrapper over L{_TryOSFromDisk}, which doesn't raise
+ an exception but returns true/false status data.
+
+ @type base_dir: string
+ @keyword base_dir: Base directory containing OS installations.
+ Defaults to a search in all the OS_SEARCH_PATH dirs.
+ @rtype: L{objects.OS}
+ @return: the OS instance if we find a valid one
+ @raise RPCFail: if we don't find a valid OS
+
+ """
+ name_only = name.split("+", 1)[0]
+ status, payload = _TryOSFromDisk(name_only, base_dir)
+ if not status:
+ _Fail(payload)
+
+ return payload
- return objects.OS(name=name, path=os_dir, status=constants.OS_VALID_STATUS,
- create_script=os_scripts[constants.OS_SCRIPT_CREATE],
- export_script=os_scripts[constants.OS_SCRIPT_EXPORT],
- import_script=os_scripts[constants.OS_SCRIPT_IMPORT],
- rename_script=os_scripts[constants.OS_SCRIPT_RENAME],
- api_versions=api_versions)
-def OSEnvironment(instance, debug=0):
+def OSEnvironment(instance, inst_os, debug=0):
"""Calculate the environment for an os script.
@type instance: L{objects.Instance}
@param instance: target instance for the os script run
+ @type inst_os: L{objects.OS}
+ @param inst_os: operating system for which the environment is being built
@type debug: integer
@param debug: debug level (0 or 1, for OS Api 10)
@rtype: dict
"""
result = {}
- result['OS_API_VERSION'] = '%d' % constants.OS_API_VERSION
+ api_version = \
+ max(constants.OS_API_VERSIONS.intersection(inst_os.api_versions))
+ result['OS_API_VERSION'] = '%d' % api_version
result['INSTANCE_NAME'] = instance.name
result['INSTANCE_OS'] = instance.os
result['HYPERVISOR'] = instance.hypervisor
result['DISK_COUNT'] = '%d' % len(instance.disks)
result['NIC_COUNT'] = '%d' % len(instance.nics)
result['DEBUG_LEVEL'] = '%d' % debug
+ if api_version >= constants.OS_API_V15:
+ try:
+ variant = instance.os.split('+', 1)[1]
+ except IndexError:
+ variant = inst_os.supported_variants[0]
+ result['OS_VARIANT'] = variant
for idx, disk in enumerate(instance.disks):
real_disk = _RecursiveFindBD(disk)
if real_disk is None:
result['NIC_%d_MAC' % idx] = nic.mac
if nic.ip:
result['NIC_%d_IP' % idx] = nic.ip
- result['NIC_%d_BRIDGE' % idx] = nic.bridge
+ result['NIC_%d_MODE' % idx] = nic.nicparams[constants.NIC_MODE]
+ if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
+ result['NIC_%d_BRIDGE' % idx] = nic.nicparams[constants.NIC_LINK]
+ if nic.nicparams[constants.NIC_LINK]:
+ result['NIC_%d_LINK' % idx] = nic.nicparams[constants.NIC_LINK]
if constants.HV_NIC_TYPE in instance.hvparams:
result['NIC_%d_FRONTEND_TYPE' % idx] = \
instance.hvparams[constants.HV_NIC_TYPE]
"""
r_dev = _RecursiveFindBD(disk)
if r_dev is None:
- return False, "Cannot find block device %s" % (disk,)
+ _Fail("Cannot find block device %s", disk)
try:
r_dev.Grow(amount)
except errors.BlockDeviceError, err:
- return False, str(err)
-
- return True, None
+ _Fail("Failed to grow block device: %s", err, exc=True)
def BlockdevSnapshot(disk):
@return: snapshot disk path
"""
- if disk.children:
- if len(disk.children) == 1:
- # only one child, let's recurse on it
- return BlockdevSnapshot(disk.children[0])
- else:
- # more than one child, choose one that matches
- for child in disk.children:
- if child.size == disk.size:
- # return implies breaking the loop
- return BlockdevSnapshot(child)
+ if disk.dev_type == constants.LD_DRBD8:
+ if not disk.children:
+ _Fail("DRBD device '%s' without backing storage cannot be snapshotted",
+ disk.unique_id)
+ return BlockdevSnapshot(disk.children[0])
elif disk.dev_type == constants.LD_LV:
r_dev = _RecursiveFindBD(disk)
if r_dev is not None:
+ # FIXME: choose a saner value for the snapshot size
# let's stay on the safe side and ask for the full size, for now
return r_dev.Snapshot(disk.size)
else:
- return None
+ _Fail("Cannot find block device %s", disk)
else:
- raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
- " '%s' of type '%s'" %
- (disk.unique_id, disk.dev_type))
+ _Fail("Cannot snapshot non-lvm block device '%s' of type '%s'",
+ disk.unique_id, disk.dev_type)
-def ExportSnapshot(disk, dest_node, instance, cluster_name, idx):
+def ExportSnapshot(disk, dest_node, instance, cluster_name, idx, debug):
"""Export a block device snapshot to a remote node.
@type disk: L{objects.Disk}
@type idx: int
@param idx: the index of the disk in the instance's disk list,
used to export to the OS scripts environment
- @rtype: boolean
- @return: the success of the operation
+ @type debug: integer
+ @param debug: debug level, passed to the OS scripts
+ @rtype: None
"""
- export_env = OSEnvironment(instance)
-
inst_os = OSFromDisk(instance.os)
+ export_env = OSEnvironment(instance, inst_os, debug)
+
export_script = inst_os.export_script
- logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
- instance.name, int(time.time()))
+ logfile = _InstanceLogName("export", inst_os.name, instance.name)
if not os.path.exists(constants.LOG_OS_DIR):
os.mkdir(constants.LOG_OS_DIR, 0750)
real_disk = _RecursiveFindBD(disk)
if real_disk is None:
- raise errors.BlockDeviceError("Block device '%s' is not set up" %
- str(disk))
+ _Fail("Block device '%s' is not set up", disk)
+
real_disk.Open()
export_env['EXPORT_DEVICE'] = real_disk.dev_path
export_env['EXPORT_INDEX'] = str(idx)
- destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
+ destdir = utils.PathJoin(constants.EXPORT_DIR, instance.name + ".new")
destfile = disk.physical_id[1]
# the target command is built out of three individual commands,
# which are joined by pipes; we check each individual command for
# valid parameters
- expcmd = utils.BuildShellCmd("cd %s; %s 2>%s", inst_os.path,
- export_script, logfile)
+ expcmd = utils.BuildShellCmd("set -e; set -o pipefail; cd %s; %s 2>%s",
+ inst_os.path, export_script, logfile)
comprcmd = "gzip"
- destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
- destdir, destdir, destfile)
+ destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s",
+ destdir, utils.PathJoin(destdir, destfile))
remotecmd = _GetSshRunner(cluster_name).BuildCmd(dest_node,
constants.GANETI_RUNAS,
destcmd)
# all commands have been checked, so we're safe to combine them
command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
- result = utils.RunCmd(command, env=export_env)
+ result = utils.RunCmd(["bash", "-c", command], env=export_env)
if result.failed:
- logging.error("os snapshot export command '%s' returned error: %s"
- " output: %s", command, result.fail_reason, result.output)
- return False
-
- return True
+ _Fail("OS snapshot export command '%s' returned error: %s"
+ " output: %s", command, result.fail_reason, result.output)
def FinalizeExport(instance, snap_disks):
@param snap_disks: list of snapshot block devices, which
will be used to get the actual name of the dump file
- @rtype: boolean
- @return: the success of the operation
+ @rtype: None
"""
- destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
- finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
+ destdir = utils.PathJoin(constants.EXPORT_DIR, instance.name + ".new")
+ finaldestdir = utils.PathJoin(constants.EXPORT_DIR, instance.name)
config = objects.SerializableConfigParser()
config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_total)
- utils.WriteFile(os.path.join(destdir, constants.EXPORT_CONF_FILE),
+ utils.WriteFile(utils.PathJoin(destdir, constants.EXPORT_CONF_FILE),
data=config.Dumps())
shutil.rmtree(finaldestdir, True)
shutil.move(destdir, finaldestdir)
- return True
-
def ExportInfo(dest):
"""Get export configuration information.
export info
"""
- cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
+ cff = utils.PathJoin(dest, constants.EXPORT_CONF_FILE)
config = objects.SerializableConfigParser()
config.read(cff)
if (not config.has_section(constants.INISECT_EXP) or
not config.has_section(constants.INISECT_INS)):
- return None
+ _Fail("Export info file doesn't have the required fields")
- return config
+ return config.Dumps()
-def ImportOSIntoInstance(instance, src_node, src_images, cluster_name):
+def ImportOSIntoInstance(instance, src_node, src_images, cluster_name, debug):
"""Import an os image into an instance.
@type instance: L{objects.Instance}
@param src_node: source node for the disk images
@type src_images: list of string
@param src_images: absolute paths of the disk images
+ @type debug: integer
+ @param debug: debug level, passed to the OS scripts
@rtype: list of boolean
@return: each boolean represent the success of importing the n-th disk
"""
- import_env = OSEnvironment(instance)
inst_os = OSFromDisk(instance.os)
+ import_env = OSEnvironment(instance, inst_os, debug)
import_script = inst_os.import_script
- logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
- instance.name, int(time.time()))
+ logfile = _InstanceLogName("import", instance.os, instance.name)
if not os.path.exists(constants.LOG_OS_DIR):
os.mkdir(constants.LOG_OS_DIR, 0750)
logging.error("Disk import command '%s' returned error: %s"
" output: %s", command, result.fail_reason,
result.output)
- final_result.append(False)
- else:
- final_result.append(True)
- else:
- final_result.append(True)
+ final_result.append("error importing disk %d: %s, %s" %
+ (idx, result.fail_reason, result.output[-100]))
- return final_result
+ if final_result:
+ _Fail("; ".join(final_result), log=False)
def ListExports():
if os.path.isdir(constants.EXPORT_DIR):
return utils.ListVisibleFiles(constants.EXPORT_DIR)
else:
- return []
+ _Fail("No exports directory")
def RemoveExport(export):
@type export: str
@param export: the name of the export to remove
- @rtype: boolean
- @return: the success of the operation
+ @rtype: None
"""
- target = os.path.join(constants.EXPORT_DIR, export)
+ target = utils.PathJoin(constants.EXPORT_DIR, export)
- shutil.rmtree(target)
- # TODO: catch some of the relevant exceptions and provide a pretty
- # error message if rmtree fails.
-
- return True
+ try:
+ shutil.rmtree(target)
+ except EnvironmentError, err:
+ _Fail("Error while removing the export: %s", err, exc=True)
def BlockdevRename(devlist):
@return: True if all renames succeeded, False otherwise
"""
+ msgs = []
result = True
for disk, unique_id in devlist:
dev = _RecursiveFindBD(disk)
if dev is None:
+ msgs.append("Can't find device %s in rename" % str(disk))
result = False
continue
try:
# cache? for now, we only lose lvm data when we rename, which
# is less critical than DRBD or MD
except errors.BlockDeviceError, err:
+ msgs.append("Can't rename device '%s' to '%s': %s" %
+ (dev, unique_id, err))
logging.exception("Can't rename device '%s' to '%s'", dev, unique_id)
result = False
- return result
+ if not result:
+ _Fail("; ".join(msgs))
def _TransformFileStorageDir(file_storage_dir):
@return: the normalized path if valid, None otherwise
"""
+ if not constants.ENABLE_FILE_STORAGE:
+ _Fail("File storage disabled at configure time")
cfg = _GetConfig()
file_storage_dir = os.path.normpath(file_storage_dir)
base_file_storage_dir = cfg.GetFileStorageDir()
if (not os.path.commonprefix([file_storage_dir, base_file_storage_dir]) ==
base_file_storage_dir):
- logging.error("file storage directory '%s' is not under base file"
- " storage directory '%s'",
- file_storage_dir, base_file_storage_dir)
- return None
+ _Fail("File storage directory '%s' is not under base file"
+ " storage directory '%s'", file_storage_dir, base_file_storage_dir)
return file_storage_dir
"""
file_storage_dir = _TransformFileStorageDir(file_storage_dir)
- result = True,
- if not file_storage_dir:
- result = False,
+ if os.path.exists(file_storage_dir):
+ if not os.path.isdir(file_storage_dir):
+ _Fail("Specified storage dir '%s' is not a directory",
+ file_storage_dir)
else:
- if os.path.exists(file_storage_dir):
- if not os.path.isdir(file_storage_dir):
- logging.error("'%s' is not a directory", file_storage_dir)
- result = False,
- else:
- try:
- os.makedirs(file_storage_dir, 0750)
- except OSError, err:
- logging.error("Cannot create file storage directory '%s': %s",
- file_storage_dir, err)
- result = False,
- return result
+ try:
+ os.makedirs(file_storage_dir, 0750)
+ except OSError, err:
+ _Fail("Cannot create file storage directory '%s': %s",
+ file_storage_dir, err, exc=True)
def RemoveFileStorageDir(file_storage_dir):
@param file_storage_dir: the directory we should cleanup
@rtype: tuple (success,)
@return: tuple of one element, C{success}, denoting
- whether the operation was successfull
+ whether the operation was successful
"""
file_storage_dir = _TransformFileStorageDir(file_storage_dir)
- result = True,
- if not file_storage_dir:
- result = False,
- else:
- if os.path.exists(file_storage_dir):
- if not os.path.isdir(file_storage_dir):
- logging.error("'%s' is not a directory", file_storage_dir)
- result = False,
- # deletes dir only if empty, otherwise we want to return False
- try:
- os.rmdir(file_storage_dir)
- except OSError, err:
- logging.exception("Cannot remove file storage directory '%s'",
- file_storage_dir)
- result = False,
- return result
+ if os.path.exists(file_storage_dir):
+ if not os.path.isdir(file_storage_dir):
+ _Fail("Specified Storage directory '%s' is not a directory",
+ file_storage_dir)
+ # deletes dir only if empty, otherwise we want to fail the rpc call
+ try:
+ os.rmdir(file_storage_dir)
+ except OSError, err:
+ _Fail("Cannot remove file storage directory '%s': %s",
+ file_storage_dir, err)
def RenameFileStorageDir(old_file_storage_dir, new_file_storage_dir):
"""
old_file_storage_dir = _TransformFileStorageDir(old_file_storage_dir)
new_file_storage_dir = _TransformFileStorageDir(new_file_storage_dir)
- result = True,
- if not old_file_storage_dir or not new_file_storage_dir:
- result = False,
- else:
- if not os.path.exists(new_file_storage_dir):
- if os.path.isdir(old_file_storage_dir):
- try:
- os.rename(old_file_storage_dir, new_file_storage_dir)
- except OSError, err:
- logging.exception("Cannot rename '%s' to '%s'",
- old_file_storage_dir, new_file_storage_dir)
- result = False,
- else:
- logging.error("'%s' is not a directory", old_file_storage_dir)
- result = False,
+ if not os.path.exists(new_file_storage_dir):
+ if os.path.isdir(old_file_storage_dir):
+ try:
+ os.rename(old_file_storage_dir, new_file_storage_dir)
+ except OSError, err:
+ _Fail("Cannot rename '%s' to '%s': %s",
+ old_file_storage_dir, new_file_storage_dir, err)
else:
- if os.path.exists(old_file_storage_dir):
- logging.error("Cannot rename '%s' to '%s'. Both locations exist.",
- old_file_storage_dir, new_file_storage_dir)
- result = False,
- return result
+ _Fail("Specified storage dir '%s' is not a directory",
+ old_file_storage_dir)
+ else:
+ if os.path.exists(old_file_storage_dir):
+ _Fail("Cannot rename '%s' to '%s': both locations exist",
+ old_file_storage_dir, new_file_storage_dir)
-def _IsJobQueueFile(file_name):
+def _EnsureJobQueueFile(file_name):
"""Checks whether the given filename is in the queue directory.
@type file_name: str
@param file_name: the file name we should check
- @rtype: boolean
- @return: whether the file is under the queue directory
+ @rtype: None
+ @raises RPCFail: if the file is not valid
"""
queue_dir = os.path.normpath(constants.QUEUE_DIR)
result = (os.path.commonprefix([queue_dir, file_name]) == queue_dir)
if not result:
- logging.error("'%s' is not a file in the queue directory",
- file_name)
-
- return result
+ _Fail("Passed job queue file '%s' does not belong to"
+ " the queue directory '%s'", file_name, queue_dir)
def JobQueueUpdate(file_name, content):
@return: the success of the operation
"""
- if not _IsJobQueueFile(file_name):
- return False
+ _EnsureJobQueueFile(file_name)
# Write and replace the file atomically
utils.WriteFile(file_name, data=_Decompress(content))
- return True
-
def JobQueueRename(old, new):
"""Renames a job queue file.
@param old: the old (actual) file name
@type new: str
@param new: the desired file name
- @rtype: boolean
- @return: the success of the operation
+ @rtype: tuple
+ @return: the success of the operation and payload
"""
- if not (_IsJobQueueFile(old) and _IsJobQueueFile(new)):
- return False
+ _EnsureJobQueueFile(old)
+ _EnsureJobQueueFile(new)
utils.RenameFile(old, new, mkdir=True)
- return True
-
def JobQueueSetDrainFlag(drain_flag):
"""Set the drain flag for the queue.
@type drain_flag: boolean
@param drain_flag: if True, will set the drain flag, otherwise reset it.
- @rtype: boolean
- @return: always True
+ @rtype: truple
+ @return: always True, None
@warning: the function always returns True
"""
else:
utils.RemoveFile(constants.JOB_QUEUE_DRAIN_FILE)
- return True
-
def BlockdevClose(instance_name, disks):
"""Closes the given block devices.
for cf in disks:
rd = _RecursiveFindBD(cf)
if rd is None:
- return (False, "Can't find device %s" % cf)
+ _Fail("Can't find device %s", cf)
bdevs.append(rd)
msg = []
except errors.BlockDeviceError, err:
msg.append(str(err))
if msg:
- return (False, "Can't make devices secondary: %s" % ",".join(msg))
+ _Fail("Can't make devices secondary: %s", ",".join(msg))
else:
if instance_name:
_RemoveBlockDevLinks(instance_name, disks)
- return (True, "All devices secondary")
def ValidateHVParams(hvname, hvparams):
@param hvname: the hypervisor name
@type hvparams: dict
@param hvparams: the hypervisor parameters to be validated
- @rtype: tuple (success, message)
- @return: a tuple of success and message, where success
- indicates the succes of the operation, and message
- which will contain the error details in case we
- failed
+ @rtype: None
"""
try:
hv_type = hypervisor.GetHypervisor(hvname)
hv_type.ValidateParameters(hvparams)
- return (True, "Validation passed")
except errors.HypervisorError, err:
- return (False, str(err))
+ _Fail(str(err), log=False)
def DemoteFromMC():
# try to ensure we're not the master by mistake
master, myself = ssconf.GetMasterAndMyself()
if master == myself:
- return (False, "ssconf status shows I'm the master node, will not demote")
- pid_file = utils.DaemonPidFileName(constants.MASTERD_PID)
- if utils.IsProcessAlive(utils.ReadPidFile(pid_file)):
- return (False, "The master daemon is running, will not demote")
+ _Fail("ssconf status shows I'm the master node, will not demote")
+
+ result = utils.RunCmd([constants.DAEMON_UTIL, "check", constants.MASTERD])
+ if not result.failed:
+ _Fail("The master daemon is running, will not demote")
+
try:
if os.path.isfile(constants.CLUSTER_CONF_FILE):
utils.CreateBackup(constants.CLUSTER_CONF_FILE)
except EnvironmentError, err:
if err.errno != errno.ENOENT:
- return (False, "Error while backing up cluster file: %s" % str(err))
+ _Fail("Error while backing up cluster file: %s", err, exc=True)
+
utils.RemoveFile(constants.CLUSTER_CONF_FILE)
- return (True, "Done")
def _FindDisks(nodes_ip, disks):
for cf in disks:
rd = _RecursiveFindBD(cf)
if rd is None:
- return (False, "Can't find device %s" % cf)
+ _Fail("Can't find device %s", cf)
bdevs.append(rd)
- return (True, bdevs)
+ return bdevs
def DrbdDisconnectNet(nodes_ip, disks):
"""Disconnects the network on a list of drbd devices.
"""
- status, bdevs = _FindDisks(nodes_ip, disks)
- if not status:
- return status, bdevs
+ bdevs = _FindDisks(nodes_ip, disks)
# disconnect disks
for rd in bdevs:
try:
rd.DisconnectNet()
except errors.BlockDeviceError, err:
- logging.exception("Failed to go into standalone mode")
- return (False, "Can't change network configuration: %s" % str(err))
- return (True, "All disks are now disconnected")
+ _Fail("Can't change network configuration to standalone mode: %s",
+ err, exc=True)
def DrbdAttachNet(nodes_ip, disks, instance_name, multimaster):
"""Attaches the network on a list of drbd devices.
"""
- status, bdevs = _FindDisks(nodes_ip, disks)
- if not status:
- return status, bdevs
+ bdevs = _FindDisks(nodes_ip, disks)
if multimaster:
for idx, rd in enumerate(bdevs):
try:
_SymlinkBlockDev(instance_name, rd.dev_path, idx)
except EnvironmentError, err:
- return (False, "Can't create symlink: %s" % str(err))
+ _Fail("Can't create symlink: %s", err)
# reconnect disks, switch to new master configuration and if
# needed primary mode
for rd in bdevs:
try:
rd.AttachNet(multimaster)
except errors.BlockDeviceError, err:
- return (False, "Can't change network configuration: %s" % str(err))
+ _Fail("Can't change network configuration: %s", err)
+
# wait until the disks are connected; we need to retry the re-attach
# if the device becomes standalone, as this might happen if the one
# node disconnects and reconnects in a different mode before the
# other node reconnects; in this case, one or both of the nodes will
# decide it has wrong configuration and switch to standalone
- RECONNECT_TIMEOUT = 2 * 60
- sleep_time = 0.100 # start with 100 miliseconds
- timeout_limit = time.time() + RECONNECT_TIMEOUT
- while time.time() < timeout_limit:
+
+ def _Attach():
all_connected = True
+
for rd in bdevs:
stats = rd.GetProcStatus()
- if not (stats.is_connected or stats.is_in_resync):
- all_connected = False
+
+ all_connected = (all_connected and
+ (stats.is_connected or stats.is_in_resync))
+
if stats.is_standalone:
# peer had different config info and this node became
# standalone, even though this should not happen with the
try:
rd.AttachNet(multimaster)
except errors.BlockDeviceError, err:
- return (False, "Can't change network configuration: %s" % str(err))
- if all_connected:
- break
- time.sleep(sleep_time)
- sleep_time = min(5, sleep_time * 1.5)
- if not all_connected:
- return (False, "Timeout in disk reconnecting")
+ _Fail("Can't change network configuration: %s", err)
+
+ if not all_connected:
+ raise utils.RetryAgain()
+
+ try:
+ # Start with a delay of 100 miliseconds and go up to 5 seconds
+ utils.Retry(_Attach, (0.1, 1.5, 5.0), 2 * 60)
+ except utils.RetryTimeout:
+ _Fail("Timeout in disk reconnecting")
+
if multimaster:
# change to primary mode
for rd in bdevs:
try:
rd.Open()
except errors.BlockDeviceError, err:
- return (False, "Can't change to primary mode: %s" % str(err))
- if multimaster:
- msg = "multi-master and primary"
- else:
- msg = "single-master"
- return (True, "Disks are now configured as %s" % msg)
+ _Fail("Can't change to primary mode: %s", err)
def DrbdWaitSync(nodes_ip, disks):
"""Wait until DRBDs have synchronized.
"""
- status, bdevs = _FindDisks(nodes_ip, disks)
- if not status:
- return status, bdevs
+ def _helper(rd):
+ stats = rd.GetProcStatus()
+ if not (stats.is_connected or stats.is_in_resync):
+ raise utils.RetryAgain()
+ return stats
+
+ bdevs = _FindDisks(nodes_ip, disks)
min_resync = 100
alldone = True
- failure = False
for rd in bdevs:
- stats = rd.GetProcStatus()
- if not (stats.is_connected or stats.is_in_resync):
- failure = True
- break
+ try:
+ # poll each second for 15 seconds
+ stats = utils.Retry(_helper, 1, 15, args=[rd])
+ except utils.RetryTimeout:
+ stats = rd.GetProcStatus()
+ # last check
+ if not (stats.is_connected or stats.is_in_resync):
+ _Fail("DRBD device %s is not in sync: stats=%s", rd, stats)
alldone = alldone and (not stats.is_in_resync)
if stats.sync_percent is not None:
min_resync = min(min_resync, stats.sync_percent)
- return (not failure, (alldone, min_resync))
+
+ return (alldone, min_resync)
+
+
+def PowercycleNode(hypervisor_type):
+ """Hard-powercycle the node.
+
+ Because we need to return first, and schedule the powercycle in the
+ background, we won't be able to report failures nicely.
+
+ """
+ hyper = hypervisor.GetHypervisor(hypervisor_type)
+ try:
+ pid = os.fork()
+ except OSError:
+ # if we can't fork, we'll pretend that we're in the child process
+ pid = 0
+ if pid > 0:
+ return "Reboot scheduled in 5 seconds"
+ time.sleep(5)
+ hyper.PowercycleNode()
class HooksRunner(object):
on the master side.
"""
- RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
-
def __init__(self, hooks_base_dir=None):
"""Constructor for hooks runner.
"""
if hooks_base_dir is None:
hooks_base_dir = constants.HOOKS_BASE_DIR
- self._BASE_DIR = hooks_base_dir
-
- @staticmethod
- def ExecHook(script, env):
- """Exec one hook script.
-
- @type script: str
- @param script: the full path to the script
- @type env: dict
- @param env: the environment with which to exec the script
- @rtype: tuple (success, message)
- @return: a tuple of success and message, where success
- indicates the succes of the operation, and message
- which will contain the error details in case we
- failed
-
- """
- # exec the process using subprocess and log the output
- fdstdin = None
- try:
- fdstdin = open("/dev/null", "r")
- child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
- stderr=subprocess.STDOUT, close_fds=True,
- shell=False, cwd="/", env=env)
- output = ""
- try:
- output = child.stdout.read(4096)
- child.stdout.close()
- except EnvironmentError, err:
- output += "Hook script error: %s" % str(err)
-
- while True:
- try:
- result = child.wait()
- break
- except EnvironmentError, err:
- if err.errno == errno.EINTR:
- continue
- raise
- finally:
- # try not to leak fds
- for fd in (fdstdin, ):
- if fd is not None:
- try:
- fd.close()
- except EnvironmentError, err:
- # just log the error
- #logging.exception("Error while closing fd %s", fd)
- pass
-
- return result == 0, utils.SafeEncode(output.strip())
+ # yeah, _BASE_DIR is not valid for attributes, we use it like a
+ # constant
+ self._BASE_DIR = hooks_base_dir # pylint: disable-msg=C0103
def RunHooks(self, hpath, phase, env):
"""Run the scripts in the hooks directory.
elif phase == constants.HOOKS_PHASE_POST:
suffix = "post"
else:
- raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
- rr = []
+ _Fail("Unknown hooks phase '%s'", phase)
+
subdir = "%s-%s.d" % (hpath, suffix)
- dir_name = "%s/%s" % (self._BASE_DIR, subdir)
- try:
- dir_contents = utils.ListVisibleFiles(dir_name)
- except OSError, err:
- # FIXME: must log output in case of failures
- return rr
-
- # we use the standard python sort order,
- # so 00name is the recommended naming scheme
- dir_contents.sort()
- for relname in dir_contents:
- fname = os.path.join(dir_name, relname)
- if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
- self.RE_MASK.match(relname) is not None):
+ dir_name = utils.PathJoin(self._BASE_DIR, subdir)
+
+ results = []
+
+ if not os.path.isdir(dir_name):
+ # for non-existing/non-dirs, we simply exit instead of logging a
+ # warning at every operation
+ return results
+
+ runparts_results = utils.RunParts(dir_name, env=env, reset_env=True)
+
+ for (relname, relstatus, runresult) in runparts_results:
+ if relstatus == constants.RUNPARTS_SKIP:
rrval = constants.HKR_SKIP
output = ""
- else:
- result, output = self.ExecHook(fname, env)
- if not result:
+ elif relstatus == constants.RUNPARTS_ERR:
+ rrval = constants.HKR_FAIL
+ output = "Hook script execution error: %s" % runresult
+ elif relstatus == constants.RUNPARTS_RUN:
+ if runresult.failed:
rrval = constants.HKR_FAIL
else:
rrval = constants.HKR_SUCCESS
- rr.append(("%s/%s" % (subdir, relname), rrval, output))
+ output = utils.SafeEncode(runresult.output.strip())
+ results.append(("%s/%s" % (subdir, relname), rrval, output))
- return rr
+ return results
class IAllocatorRunner(object):
the master side.
"""
- def Run(self, name, idata):
+ @staticmethod
+ def Run(name, idata):
"""Run an iallocator script.
@type name: str
@param idata: the allocator input data
@rtype: tuple
- @return: four element tuple of:
- - run status (one of the IARUN_ constants)
- - stdout
- - stderr
- - fail reason (as from L{utils.RunResult})
+ @return: two element tuple of:
+ - status
+ - either error message or stdout of allocator (for success)
"""
alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH,
os.path.isfile)
if alloc_script is None:
- return (constants.IARUN_NOTFOUND, None, None, None)
+ _Fail("iallocator module '%s' not found in the search path", name)
fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.")
try:
os.close(fd)
result = utils.RunCmd([alloc_script, fin_name])
if result.failed:
- return (constants.IARUN_FAILURE, result.stdout, result.stderr,
- result.fail_reason)
+ _Fail("iallocator module '%s' failed: %s, output '%s'",
+ name, result.fail_reason, result.output)
finally:
os.unlink(fin_name)
- return (constants.IARUN_SUCCESS, result.stdout, result.stderr, None)
+ return result.stdout
class DevCacheManager(object):
if dev_path.startswith(cls._DEV_PREFIX):
dev_path = dev_path[len(cls._DEV_PREFIX):]
dev_path = dev_path.replace("/", "_")
- fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path)
+ fpath = utils.PathJoin(cls._ROOT_DIR, "bdev_%s" % dev_path)
return fpath
@classmethod
try:
utils.WriteFile(fpath, data=fdata)
except EnvironmentError, err:
- logging.exception("Can't update bdev cache for %s", dev_path)
+ logging.exception("Can't update bdev cache for %s: %s", dev_path, err)
@classmethod
def RemoveCache(cls, dev_path):
try:
utils.RemoveFile(fpath)
except EnvironmentError, err:
- logging.exception("Can't update bdev cache for %s", dev_path)
+ logging.exception("Can't update bdev cache for %s: %s", dev_path, err)