# 02110-1301, USA.
-"""Functions used by the node daemon"""
+"""Functions used by the node daemon
+
+@var _ALLOWED_UPLOAD_FILES: denotes which files are accepted in
+ the L{UploadFile} function
+
+"""
import os
utils.RemoveFile(full_name)
+def _BuildUploadFileList():
+ """Build the list of allowed upload files.
+
+ This is abstracted so that it's built only once at module import time.
+
+ """
+ return frozenset([
+ constants.CLUSTER_CONF_FILE,
+ constants.ETC_HOSTS,
+ constants.SSH_KNOWN_HOSTS_FILE,
+ constants.VNC_PASSWORD_FILE,
+ ])
+
+
+_ALLOWED_UPLOAD_FILES = _BuildUploadFileList()
+
+
def JobQueuePurge():
"""Removes job queue files and archived jobs.
master_netdev = cfg.GetMasterNetdev()
master_ip = cfg.GetMasterIP()
master_node = cfg.GetMasterNode()
- except errors.ConfigurationError, err:
+ except errors.ConfigurationError:
logging.exception("Cluster configuration incomplete")
return (None, None, None)
return (master_netdev, master_ip, master_node)
-def StartMaster(start_daemons):
+def StartMaster(start_daemons, no_voting):
"""Activate local node as master node.
The function will always try activate the IP address of the master
@type start_daemons: boolean
@param start_daemons: whther to also start the master
daemons (ganeti-masterd and ganeti-rapi)
+ @type no_voting: boolean
+ @param no_voting: whether to start ganeti-masterd without a node vote
+ (if start_daemons is True), but still non-interactively
@rtype: None
"""
# and now start the master and rapi daemons
if start_daemons:
- for daemon in 'ganeti-masterd', 'ganeti-rapi':
- result = utils.RunCmd([daemon])
+ daemons_params = {
+ 'ganeti-masterd': [],
+ 'ganeti-rapi': [],
+ }
+ if no_voting:
+ daemons_params['ganeti-masterd'].append('--no-voting')
+ daemons_params['ganeti-masterd'].append('--yes-do-it')
+ for daemon in daemons_params:
+ cmd = [daemon]
+ cmd.extend(daemons_params[daemon])
+ result = utils.RunCmd(cmd)
if result.failed:
logging.error("Can't start daemon %s: %s", daemon, result.output)
ok = False
def GetNodeInfo(vgname, hypervisor_type):
- """Gives back a hash with different informations about the node.
+ """Gives back a hash with different information about the node.
@type vgname: C{string}
@param vgname: the name of the volume group to ask for disk space information
if constants.NV_DRBDLIST in what:
try:
used_minors = bdev.DRBD8.GetUsedDevs().keys()
- except errors.BlockDeviceError:
+ except errors.BlockDeviceError, err:
logging.warning("Can't get used minors list", exc_info=True)
- used_minors = []
+ used_minors = str(err)
result[constants.NV_DRBDLIST] = used_minors
return result
try:
names = hypervisor.GetHypervisor(hname).ListInstances()
results.extend(names)
- except errors.HypervisorError, err:
+ except errors.HypervisorError:
logging.exception("Error enumerating instances for hypevisor %s", hname)
- # FIXME: should we somehow not propagate this to the master?
raise
return results
def GetInstanceInfo(instance, hname):
- """Gives back the informations about an instance as a dictionary.
+ """Gives back the information about an instance as a dictionary.
@type instance: string
@param instance: the instance name
'state': state,
'time': times,
}
- if name in output and output[name] != value:
- raise errors.HypervisorError("Instance %s running duplicate"
- " with different parameters" % name)
+ if name in output:
+ # we only check static parameters, like memory and vcpus,
+ # and not state and time which can change between the
+ # invocations of the different hypervisors
+ for key in 'memory', 'vcpus':
+ if value[key] != output[name][key]:
+ raise errors.HypervisorError("Instance %s is running twice"
+ " with different parameters" % name)
output[name] = value
return output
def _GetVGInfo(vg_name):
- """Get informations about the volume group.
+ """Get information about the volume group.
@type vg_name: str
@param vg_name: the volume group which we query
return block_devices
-def StartInstance(instance, extra_args):
+def StartInstance(instance):
"""Start an instance.
@type instance: L{objects.Instance}
try:
block_devices = _GatherAndLinkBlockDevs(instance)
hyper = hypervisor.GetHypervisor(instance.hypervisor)
- hyper.StartInstance(instance, block_devices, extra_args)
+ hyper.StartInstance(instance, block_devices)
except errors.BlockDeviceError, err:
logging.exception("Failed to start instance")
return (False, "Block device error: %s" % str(err))
return (True, "Instance started successfully")
-def ShutdownInstance(instance):
+def InstanceShutdown(instance):
"""Shut an instance down.
@note: this functions uses polling with a hardcoded timeout.
running_instances = GetInstanceList([hv_name])
if instance.name not in running_instances:
- return True
+ return (True, "Instance already stopped")
hyper = hypervisor.GetHypervisor(hv_name)
try:
hyper.StopInstance(instance)
except errors.HypervisorError, err:
- logging.error("Failed to stop instance: %s" % err)
- return False
+ msg = "Failed to stop instance %s: %s" % (instance.name, err)
+ logging.error(msg)
+ return (False, msg)
# test every 10secs for 2min
time.sleep(1)
- for dummy in range(11):
+ for _ in range(11):
if instance.name not in GetInstanceList([hv_name]):
break
time.sleep(10)
try:
hyper.StopInstance(instance, force=True)
except errors.HypervisorError, err:
- logging.exception("Failed to stop instance: %s" % err)
- return False
+ msg = "Failed to force stop instance %s: %s" % (instance.name, err)
+ logging.error(msg)
+ return (False, msg)
time.sleep(1)
if instance.name in GetInstanceList([hv_name]):
- logging.error("Could not shutdown instance '%s' even by destroy",
- instance.name)
- return False
+ msg = ("Could not shutdown instance %s even by destroy" %
+ instance.name)
+ logging.error(msg)
+ return (False, msg)
_RemoveBlockDevLinks(instance.name, instance.disks)
- return True
+ return (True, "Instance has been shutdown successfully")
-def RebootInstance(instance, reboot_type, extra_args):
+def InstanceReboot(instance, reboot_type):
"""Reboot an instance.
@type instance: L{objects.Instance}
instance OS, do not recreate the VM
- L{constants.INSTANCE_REBOOT_HARD}: tear down and
restart the VM (at the hypervisor level)
- - the other reboot type (L{constants.INSTANCE_REBOOT_HARD})
- is not accepted here, since that mode is handled
- differently
+ - the other reboot type (L{constants.INSTANCE_REBOOT_FULL}) is
+ not accepted here, since that mode is handled differently, in
+ cmdlib, and translates into full stop and start of the
+ instance (instead of a call_instance_reboot RPC)
@rtype: boolean
@return: the success of the operation
running_instances = GetInstanceList([instance.hypervisor])
if instance.name not in running_instances:
- logging.error("Cannot reboot instance that is not running")
- return False
+ msg = "Cannot reboot instance %s that is not running" % instance.name
+ logging.error(msg)
+ return (False, msg)
hyper = hypervisor.GetHypervisor(instance.hypervisor)
if reboot_type == constants.INSTANCE_REBOOT_SOFT:
try:
hyper.RebootInstance(instance)
except errors.HypervisorError, err:
- logging.exception("Failed to soft reboot instance")
- return False
+ msg = "Failed to soft reboot instance %s: %s" % (instance.name, err)
+ logging.error(msg)
+ return (False, msg)
elif reboot_type == constants.INSTANCE_REBOOT_HARD:
try:
- ShutdownInstance(instance)
- StartInstance(instance, extra_args)
+ stop_result = InstanceShutdown(instance)
+ if not stop_result[0]:
+ return stop_result
+ return StartInstance(instance)
except errors.HypervisorError, err:
- logging.exception("Failed to hard reboot instance")
- return False
+ msg = "Failed to hard reboot instance %s: %s" % (instance.name, err)
+ logging.error(msg)
+ return (False, msg)
else:
- raise errors.ParameterError("reboot_type invalid")
+ return (False, "Invalid reboot_type received: %s" % (reboot_type,))
- return True
+ return (True, "Reboot successful")
def MigrationInfo(instance):
msg = "Failed to accept instance"
logging.exception(msg)
return (False, '%s: %s' % (msg, err))
- return (True, "Accept successfull")
+ return (True, "Accept successful")
def FinalizeMigration(instance, info, success):
msg = "Failed to migrate instance"
logging.exception(msg)
return (False, "%s: %s" % (msg, err))
- return (True, "Migration successfull")
+ return (True, "Migration successful")
-def CreateBlockDevice(disk, size, owner, on_primary, info):
+def BlockdevCreate(disk, size, owner, on_primary, info):
"""Creates a block device for an instance.
@type disk: L{objects.Disk}
clist = []
if disk.children:
for child in disk.children:
- crdev = _RecursiveAssembleBD(child, owner, on_primary)
+ try:
+ crdev = _RecursiveAssembleBD(child, owner, on_primary)
+ except errors.BlockDeviceError, err:
+ errmsg = "Can't assemble device %s: %s" % (child, err)
+ logging.error(errmsg)
+ return False, errmsg
if on_primary or disk.AssembleOnSecondary():
# we need the children open in case the device itself has to
# be assembled
- crdev.Open()
+ try:
+ crdev.Open()
+ except errors.BlockDeviceError, err:
+ errmsg = "Can't make child '%s' read-write: %s" % (child, err)
+ logging.error(errmsg)
+ return False, errmsg
clist.append(crdev)
try:
- device = bdev.Create(disk.dev_type, disk.physical_id, clist, size)
- except errors.GenericError, err:
+ device = bdev.Create(disk.dev_type, disk.physical_id, clist, disk.size)
+ except errors.BlockDeviceError, err:
return False, "Can't create block device: %s" % str(err)
if on_primary or disk.AssembleOnSecondary():
- if not device.Assemble():
- errorstring = "Can't assemble device after creation, very unusual event"
- logging.error(errorstring)
- return False, errorstring
+ try:
+ device.Assemble()
+ except errors.BlockDeviceError, err:
+ errmsg = ("Can't assemble device after creation, very"
+ " unusual event: %s" % str(err))
+ logging.error(errmsg)
+ return False, errmsg
device.SetSyncSpeed(constants.SYNC_SPEED)
if on_primary or disk.OpenOnSecondary():
- device.Open(force=True)
+ try:
+ device.Open(force=True)
+ except errors.BlockDeviceError, err:
+ errmsg = ("Can't make device r/w after creation, very"
+ " unusual event: %s" % str(err))
+ logging.error(errmsg)
+ return False, errmsg
DevCacheManager.UpdateCache(device.dev_path, owner,
on_primary, disk.iv_name)
return True, physical_id
-def RemoveBlockDevice(disk):
+def BlockdevRemove(disk):
"""Remove a block device.
@note: This is intended to be called recursively.
@return: the success of the operation
"""
+ msgs = []
+ result = True
try:
rdev = _RecursiveFindBD(disk)
except errors.BlockDeviceError, err:
rdev = None
if rdev is not None:
r_path = rdev.dev_path
- result = rdev.Remove()
+ try:
+ rdev.Remove()
+ except errors.BlockDeviceError, err:
+ msgs.append(str(err))
+ result = False
if result:
DevCacheManager.RemoveCache(r_path)
- else:
- result = True
+
if disk.children:
for child in disk.children:
- result = result and RemoveBlockDevice(child)
- return result
+ c_status, c_msg = BlockdevRemove(child)
+ result = result and c_status
+ if c_msg: # not an empty message
+ msgs.append(c_msg)
+
+ return (result, "; ".join(msgs))
def _RecursiveAssembleBD(disk, owner, as_primary):
if children.count(None) >= mcn:
raise
cdev = None
- logging.debug("Error in child activation: %s", str(err))
+ logging.error("Error in child activation (but continuing): %s",
+ str(err))
children.append(cdev)
if as_primary or disk.AssembleOnSecondary():
- r_dev = bdev.Assemble(disk.dev_type, disk.physical_id, children)
+ r_dev = bdev.Assemble(disk.dev_type, disk.physical_id, children, disk.size)
r_dev.SetSyncSpeed(constants.SYNC_SPEED)
result = r_dev
if as_primary or disk.OpenOnSecondary():
return result
-def AssembleBlockDevice(disk, owner, as_primary):
+def BlockdevAssemble(disk, owner, as_primary):
"""Activate a block device for an instance.
This is a wrapper over _RecursiveAssembleBD.
C{True} for secondary nodes
"""
- result = _RecursiveAssembleBD(disk, owner, as_primary)
- if isinstance(result, bdev.BlockDev):
- result = result.dev_path
- return result
+ status = True
+ result = "no error information"
+ try:
+ result = _RecursiveAssembleBD(disk, owner, as_primary)
+ if isinstance(result, bdev.BlockDev):
+ result = result.dev_path
+ except errors.BlockDeviceError, err:
+ result = "Error while assembling disk: %s" % str(err)
+ status = False
+ return (status, result)
-def ShutdownBlockDevice(disk):
+def BlockdevShutdown(disk):
"""Shut down a block device.
- First, if the device is assembled (Attach() is successfull), then
+ First, if the device is assembled (Attach() is successful), then
the device is shutdown. Then the children of the device are
shutdown.
@return: the success of the operation
"""
+ msgs = []
+ result = True
r_dev = _RecursiveFindBD(disk)
if r_dev is not None:
r_path = r_dev.dev_path
- result = r_dev.Shutdown()
- if result:
+ try:
+ r_dev.Shutdown()
DevCacheManager.RemoveCache(r_path)
- else:
- result = True
+ except errors.BlockDeviceError, err:
+ msgs.append(str(err))
+ result = False
+
if disk.children:
for child in disk.children:
- result = result and ShutdownBlockDevice(child)
- return result
+ c_status, c_msg = BlockdevShutdown(child)
+ result = result and c_status
+ if c_msg: # not an empty message
+ msgs.append(c_msg)
+
+ return (result, "; ".join(msgs))
-def MirrorAddChildren(parent_cdev, new_cdevs):
+def BlockdevAddchildren(parent_cdev, new_cdevs):
"""Extend a mirrored block device.
@type parent_cdev: L{objects.Disk}
return True
-def MirrorRemoveChildren(parent_cdev, new_cdevs):
+def BlockdevRemovechildren(parent_cdev, new_cdevs):
"""Shrink a mirrored block device.
@type parent_cdev: L{objects.Disk}
return True
-def GetMirrorStatus(disks):
+def BlockdevGetmirrorstatus(disks):
"""Get the mirroring status of a list of devices.
@type disks: list of L{objects.Disk}
def _RecursiveFindBD(disk):
"""Check if a device is activated.
- If so, return informations about the real device.
+ If so, return information about the real device.
@type disk: L{objects.Disk}
@param disk: the disk object we need to find
for chdisk in disk.children:
children.append(_RecursiveFindBD(chdisk))
- return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
+ return bdev.FindDevice(disk.dev_type, disk.physical_id, children, disk.size)
-def CallBlockdevFind(disk):
+def BlockdevFind(disk):
"""Check if a device is activated.
- If it is, return informations about the real device.
+ If it is, return information about the real device.
@type disk: L{objects.Disk}
@param disk: the disk to find
return (True, (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus())
+def BlockdevGetsize(disks):
+ """Computes the size of the given disks.
+
+ If a disk is not found, returns None instead.
+
+ @type disks: list of L{objects.Disk}
+ @param disks: the list of disk to compute the size for
+ @rtype: list
+ @return: list with elements None if the disk cannot be found,
+ otherwise the size
+
+ """
+ result = []
+ for cf in disks:
+ try:
+ rbd = _RecursiveFindBD(cf)
+ except errors.BlockDeviceError, err:
+ result.append(None)
+ continue
+ if rbd is None:
+ result.append(None)
+ else:
+ result.append(rbd.GetActualSize())
+ return result
+
+
def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
"""Write a file to the filesystem.
file_name)
return False
- allowed_files = [
- constants.CLUSTER_CONF_FILE,
- constants.ETC_HOSTS,
- constants.SSH_KNOWN_HOSTS_FILE,
- constants.VNC_PASSWORD_FILE,
- ]
-
- if file_name not in allowed_files:
+ if file_name not in _ALLOWED_UPLOAD_FILES:
logging.error("Filename passed to UploadFile not in allowed"
" upload targets: '%s'", file_name)
return False
str(disk))
real_disk.Open()
result['DISK_%d_PATH' % idx] = real_disk.dev_path
- # FIXME: When disks will have read-only mode, populate this
result['DISK_%d_ACCESS' % idx] = disk.mode
if constants.HV_DISK_TYPE in instance.hvparams:
result['DISK_%d_FRONTEND_TYPE' % idx] = \
result['NIC_%d_FRONTEND_TYPE' % idx] = \
instance.hvparams[constants.HV_NIC_TYPE]
+ for source, kind in [(instance.beparams, "BE"), (instance.hvparams, "HV")]:
+ for key, value in source.items():
+ result["INSTANCE_%s_%s" % (kind, key)] = str(value)
+
return result
-def GrowBlockDevice(disk, amount):
+def BlockdevGrow(disk, amount):
"""Grow a stack of block devices.
This function is called recursively, with the childrens being the
return True, None
-def SnapshotBlockDevice(disk):
+def BlockdevSnapshot(disk):
"""Create a snapshot copy of a block device.
This function is called recursively, and the snapshot is actually created
if disk.children:
if len(disk.children) == 1:
# only one child, let's recurse on it
- return SnapshotBlockDevice(disk.children[0])
+ return BlockdevSnapshot(disk.children[0])
else:
# more than one child, choose one that matches
for child in disk.children:
if child.size == disk.size:
# return implies breaking the loop
- return SnapshotBlockDevice(child)
+ return BlockdevSnapshot(child)
elif disk.dev_type == constants.LD_LV:
r_dev = _RecursiveFindBD(disk)
if r_dev is not None:
# the target command is built out of three individual commands,
# which are joined by pipes; we check each individual command for
# valid parameters
- expcmd = utils.BuildShellCmd("cd %s; %s 2>%s", inst_os.path,
- export_script, logfile)
+ expcmd = utils.BuildShellCmd("set -e; set -o pipefail; cd %s; %s 2>%s",
+ inst_os.path, export_script, logfile)
comprcmd = "gzip"
# all commands have been checked, so we're safe to combine them
command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
- result = utils.RunCmd(command, env=export_env)
+ result = utils.RunCmd(["bash", "-c", command], env=export_env)
if result.failed:
logging.error("os snapshot export command '%s' returned error: %s"
return True
-def RenameBlockDevices(devlist):
+def BlockdevRename(devlist):
"""Rename a list of block devices.
@type devlist: list of tuples
# but we don't have the owner here - maybe parse from existing
# cache? for now, we only lose lvm data when we rename, which
# is less critical than DRBD or MD
- except errors.BlockDeviceError, err:
+ except errors.BlockDeviceError:
logging.exception("Can't rename device '%s' to '%s'", dev, unique_id)
result = False
return result
@param file_storage_dir: the directory we should cleanup
@rtype: tuple (success,)
@return: tuple of one element, C{success}, denoting
- whether the operation was successfull
+ whether the operation was successful
"""
file_storage_dir = _TransformFileStorageDir(file_storage_dir)
# deletes dir only if empty, otherwise we want to return False
try:
os.rmdir(file_storage_dir)
- except OSError, err:
+ except OSError:
logging.exception("Cannot remove file storage directory '%s'",
file_storage_dir)
result = False,
if os.path.isdir(old_file_storage_dir):
try:
os.rename(old_file_storage_dir, new_file_storage_dir)
- except OSError, err:
+ except OSError:
logging.exception("Cannot rename '%s' to '%s'",
old_file_storage_dir, new_file_storage_dir)
result = False,
return True
-def CloseBlockDevices(instance_name, disks):
+def BlockdevClose(instance_name, disks):
"""Closes the given block devices.
This means they will be switched to secondary mode (in case of
if utils.IsProcessAlive(utils.ReadPidFile(pid_file)):
return (False, "The master daemon is running, will not demote")
try:
- utils.CreateBackup(constants.CLUSTER_CONF_FILE)
+ if os.path.isfile(constants.CLUSTER_CONF_FILE):
+ utils.CreateBackup(constants.CLUSTER_CONF_FILE)
except EnvironmentError, err:
if err.errno != errno.ENOENT:
return (False, "Error while backing up cluster file: %s" % str(err))
# standalone, even though this should not happen with the
# new staged way of changing disk configs
try:
- rd.ReAttachNet(multimaster)
+ rd.AttachNet(multimaster)
except errors.BlockDeviceError, err:
return (False, "Can't change network configuration: %s" % str(err))
if all_connected:
on the master side.
"""
- RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
-
def __init__(self, hooks_base_dir=None):
"""Constructor for hooks runner.
dir_name = "%s/%s" % (self._BASE_DIR, subdir)
try:
dir_contents = utils.ListVisibleFiles(dir_name)
- except OSError, err:
+ except OSError:
# FIXME: must log output in case of failures
return rr
for relname in dir_contents:
fname = os.path.join(dir_name, relname)
if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
- self.RE_MASK.match(relname) is not None):
+ constants.EXT_PLUGIN_MASK.match(relname) is not None):
rrval = constants.HKR_SKIP
output = ""
else:
fdata = "%s %s %s\n" % (str(owner), state, iv_name)
try:
utils.WriteFile(fpath, data=fdata)
- except EnvironmentError, err:
+ except EnvironmentError:
logging.exception("Can't update bdev cache for %s", dev_path)
@classmethod
fpath = cls._ConvertPath(dev_path)
try:
utils.RemoveFile(fpath)
- except EnvironmentError, err:
+ except EnvironmentError:
logging.exception("Can't update bdev cache for %s", dev_path)