X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/cc208ed0b2925195c20f905c953b99a878db698e..95f84636f431ffaf777de917dabbb12bc548a678:/lib/backend.py diff --git a/lib/backend.py b/lib/backend.py index 0860e0f..81f82bc 100644 --- a/lib/backend.py +++ b/lib/backend.py @@ -19,7 +19,20 @@ # 02110-1301, USA. -"""Functions used by the node daemon""" +"""Functions used by the node daemon + +@var _ALLOWED_UPLOAD_FILES: denotes which files are accepted in + the L{UploadFile} function +@var _ALLOWED_CLEAN_DIRS: denotes which directories are accepted + in the L{_CleanDirectory} function + +""" + +# pylint: disable-msg=E1103 + +# E1103: %s %r has no %r member (but some types could not be +# inferred), because the _TryOSFromDisk returns either (True, os_obj) +# or (False, "string") which confuses pylint import os @@ -29,7 +42,6 @@ import time import stat import errno import re -import subprocess import random import logging import tempfile @@ -46,6 +58,14 @@ from ganeti import objects from ganeti import ssconf +_BOOT_ID_PATH = "/proc/sys/kernel/random/boot_id" +_ALLOWED_CLEAN_DIRS = frozenset([ + constants.DATA_DIR, + constants.JOB_QUEUE_ARCHIVE_DIR, + constants.QUEUE_DIR, + ]) + + class RPCFail(Exception): """Class denoting RPC failure. @@ -53,6 +73,7 @@ class RPCFail(Exception): """ + def _Fail(msg, *args, **kwargs): """Log an error and the raise an RPCFail exception. @@ -129,6 +150,10 @@ def _CleanDirectory(path, exclude=None): to the empty list """ + if path not in _ALLOWED_CLEAN_DIRS: + _Fail("Path passed to _CleanDirectory not in allowed clean targets: '%s'", + path) + if not os.path.isdir(path): return if exclude is None: @@ -138,13 +163,39 @@ def _CleanDirectory(path, exclude=None): exclude = [os.path.normpath(i) for i in exclude] for rel_name in utils.ListVisibleFiles(path): - full_name = os.path.normpath(os.path.join(path, rel_name)) + full_name = utils.PathJoin(path, rel_name) if full_name in exclude: continue if os.path.isfile(full_name) and not os.path.islink(full_name): utils.RemoveFile(full_name) +def _BuildUploadFileList(): + """Build the list of allowed upload files. + + This is abstracted so that it's built only once at module import time. + + """ + allowed_files = set([ + constants.CLUSTER_CONF_FILE, + constants.ETC_HOSTS, + constants.SSH_KNOWN_HOSTS_FILE, + constants.VNC_PASSWORD_FILE, + constants.RAPI_CERT_FILE, + constants.RAPI_USERS_FILE, + constants.CONFD_HMAC_KEY, + ]) + + for hv_name in constants.HYPER_TYPES: + hv_class = hypervisor.GetHypervisorClass(hv_name) + allowed_files.update(hv_class.GetAncillaryFiles()) + + return frozenset(allowed_files) + + +_ALLOWED_UPLOAD_FILES = _BuildUploadFileList() + + def JobQueuePurge(): """Removes job queue files and archived jobs. @@ -174,10 +225,10 @@ def GetMasterInfo(): master_node = cfg.GetMasterNode() except errors.ConfigurationError, err: _Fail("Cluster configuration incomplete: %s", err, exc=True) - return master_netdev, master_ip, master_node + return (master_netdev, master_ip, master_node) -def StartMaster(start_daemons): +def StartMaster(start_daemons, no_voting): """Activate local node as master node. The function will always try activate the IP address of the master @@ -187,6 +238,9 @@ def StartMaster(start_daemons): @type start_daemons: boolean @param start_daemons: whether to also start the master daemons (ganeti-masterd and ganeti-rapi) + @type no_voting: boolean + @param no_voting: whether to start ganeti-masterd without a node vote + (if start_daemons is True), but still non-interactively @rtype: None """ @@ -217,12 +271,20 @@ def StartMaster(start_daemons): # and now start the master and rapi daemons if start_daemons: - for daemon in 'ganeti-masterd', 'ganeti-rapi': - result = utils.RunCmd([daemon]) - if result.failed: - msg = "Can't start daemon %s: %s" % (daemon, result.output) - logging.error(msg) - err_msgs.append(msg) + if no_voting: + masterd_args = "--no-voting --yes-do-it" + else: + masterd_args = "" + + env = { + "EXTRA_MASTERD_ARGS": masterd_args, + } + + result = utils.RunCmd([constants.DAEMON_UTIL, "start-master"], env=env) + if result.failed: + msg = "Can't start Ganeti master: %s" % result.output + logging.error(msg) + err_msgs.append(msg) if err_msgs: _Fail("; ".join(err_msgs)) @@ -254,9 +316,11 @@ def StopMaster(stop_daemons): # but otherwise ignore the failure if stop_daemons: - # stop/kill the rapi and the master daemon - for daemon in constants.RAPI_PID, constants.MASTERD_PID: - utils.KillProcess(utils.ReadPidFile(utils.DaemonPidFileName(daemon))) + result = utils.RunCmd([constants.DAEMON_UTIL, "stop-master"]) + if result.failed: + logging.error("Could not stop Ganeti master, command %s had exitcode %s" + " and error %s", + result.cmd, result.exit_code, result.output) def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub): @@ -301,10 +365,13 @@ def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub): utils.AddAuthorizedKey(auth_keys, sshpub) - utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"]) + result = utils.RunCmd([constants.DAEMON_UTIL, "reload-ssh-keys"]) + if result.failed: + _Fail("Unable to reload SSH keys (command %r, exit code %s, output %r)", + result.cmd, result.exit_code, result.output) -def LeaveCluster(): +def LeaveCluster(modify_ssh_setup): """Cleans up and remove the current node. This function cleans up and prepares the current node to be removed @@ -314,30 +381,41 @@ def LeaveCluster(): L{errors.QuitGanetiException} which is used as a special case to shutdown the node daemon. + @param modify_ssh_setup: boolean + """ _CleanDirectory(constants.DATA_DIR) JobQueuePurge() - try: - priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS) - - f = open(pub_key, 'r') + if modify_ssh_setup: try: - utils.RemoveAuthorizedKey(auth_keys, f.read(8192)) - finally: - f.close() + priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS) - utils.RemoveFile(priv_key) - utils.RemoveFile(pub_key) - except errors.OpExecError: - logging.exception("Error while processing ssh files") + utils.RemoveAuthorizedKey(auth_keys, utils.ReadFile(pub_key)) + + utils.RemoveFile(priv_key) + utils.RemoveFile(pub_key) + except errors.OpExecError: + logging.exception("Error while processing ssh files") + + try: + utils.RemoveFile(constants.CONFD_HMAC_KEY) + utils.RemoveFile(constants.RAPI_CERT_FILE) + utils.RemoveFile(constants.NODED_CERT_FILE) + except: # pylint: disable-msg=W0702 + logging.exception("Error while removing cluster secrets") + + result = utils.RunCmd([constants.DAEMON_UTIL, "stop", constants.CONFD]) + if result.failed: + logging.error("Command %s failed with exitcode %s and error %s", + result.cmd, result.exit_code, result.output) # Raise a custom exception (handled in ganeti-noded) raise errors.QuitGanetiException(True, 'Shutdown scheduled') def GetNodeInfo(vgname, hypervisor_type): - """Gives back a hash with different informations about the node. + """Gives back a hash with different information about the node. @type vgname: C{string} @param vgname: the name of the volume group to ask for disk space information @@ -363,11 +441,7 @@ def GetNodeInfo(vgname, hypervisor_type): if hyp_info is not None: outputarray.update(hyp_info) - f = open("/proc/sys/kernel/random/boot_id", 'r') - try: - outputarray["bootid"] = f.read(128).rstrip("\n") - finally: - f.close() + outputarray["bootid"] = utils.ReadFile(_BOOT_ID_PATH, size=128).rstrip("\n") return outputarray @@ -406,7 +480,11 @@ def VerifyNode(what, cluster_name): if constants.NV_HYPERVISOR in what: result[constants.NV_HYPERVISOR] = tmp = {} for hv_name in what[constants.NV_HYPERVISOR]: - tmp[hv_name] = hypervisor.GetHypervisor(hv_name).Verify() + try: + val = hypervisor.GetHypervisor(hv_name).Verify() + except errors.HypervisorError, err: + val = "Error while checking hypervisor: %s" % str(err) + tmp[hv_name] = val if constants.NV_FILELIST in what: result[constants.NV_FILELIST] = utils.FingerprintFiles( @@ -433,7 +511,7 @@ def VerifyNode(what, cluster_name): tmp[my_name] = ("Can't find my own primary/secondary IP" " in the node list") else: - port = utils.GetNodeDaemonPort() + port = utils.GetDaemonPort(constants.NODED) for name, pip, sip in what[constants.NV_NODENETTEST]: fail = [] if not utils.TcpPing(pip, port, source=my_pip): @@ -446,15 +524,28 @@ def VerifyNode(what, cluster_name): " and ".join(fail)) if constants.NV_LVLIST in what: - result[constants.NV_LVLIST] = GetVolumeList(what[constants.NV_LVLIST]) + try: + val = GetVolumeList(what[constants.NV_LVLIST]) + except RPCFail, err: + val = str(err) + result[constants.NV_LVLIST] = val if constants.NV_INSTANCELIST in what: - result[constants.NV_INSTANCELIST] = GetInstanceList( - what[constants.NV_INSTANCELIST]) + # GetInstanceList can fail + try: + val = GetInstanceList(what[constants.NV_INSTANCELIST]) + except RPCFail, err: + val = str(err) + result[constants.NV_INSTANCELIST] = val if constants.NV_VGLIST in what: result[constants.NV_VGLIST] = utils.ListVolumeGroups() + if constants.NV_PVLIST in what: + result[constants.NV_PVLIST] = \ + bdev.LogicalVolume.GetPVInfo(what[constants.NV_PVLIST], + filter_allocatable=False) + if constants.NV_VERSION in what: result[constants.NV_VERSION] = (constants.PROTOCOL_VERSION, constants.RELEASE_VERSION) @@ -471,6 +562,21 @@ def VerifyNode(what, cluster_name): used_minors = str(err) result[constants.NV_DRBDLIST] = used_minors + if constants.NV_NODESETUP in what: + result[constants.NV_NODESETUP] = tmpr = [] + if not os.path.isdir("/sys/block") or not os.path.isdir("/sys/class/net"): + tmpr.append("The sysfs filesytem doesn't seem to be mounted" + " under /sys, missing required directories /sys/block" + " and /sys/class/net") + if (not os.path.isdir("/proc/sys") or + not os.path.isfile("/proc/sysrq-trigger")): + tmpr.append("The procfs filesystem doesn't seem to be mounted" + " under /proc, missing required directory /proc/sys and" + " the file /proc/sysrq-trigger") + + if constants.NV_TIME in what: + result[constants.NV_TIME] = utils.SplitTime(time.time()) + return result @@ -508,6 +614,11 @@ def GetVolumeList(vg_name): name, size, attr = match.groups() inactive = attr[4] == '-' online = attr[5] == 'o' + virtual = attr[0] == 'v' + if virtual: + # we don't want to report such volumes as existing, since they + # don't really hold data + continue lvs[name] = (size, inactive, online) return lvs @@ -551,21 +662,23 @@ def NodeVolumes(): result.output) def parse_dev(dev): - if '(' in dev: - return dev.split('(')[0] - else: - return dev + return dev.split('(')[0] + + def handle_dev(dev): + return [parse_dev(x) for x in dev.split(",")] def map_line(line): - return { - 'name': line[0].strip(), - 'size': line[1].strip(), - 'dev': parse_dev(line[2].strip()), - 'vg': line[3].strip(), - } + line = [v.strip() for v in line] + return [{'name': line[0], 'size': line[1], + 'dev': dev, 'vg': line[3]} for dev in handle_dev(line[2])] - return [map_line(line.split('|')) for line in result.stdout.splitlines() - if line.count('|') >= 3] + all_devs = [] + for line in result.stdout.splitlines(): + if line.count('|') >= 3: + all_devs.extend(map_line(line.split('|'))) + else: + logging.warning("Strange line in the output from lvs: '%s'", line) + return all_devs def BridgesExist(bridges_list): @@ -581,7 +694,7 @@ def BridgesExist(bridges_list): missing.append(bridge) if missing: - _Fail("Missing bridges %s", ", ".join(missing)) + _Fail("Missing bridges %s", utils.CommaJoin(missing)) def GetInstanceList(hypervisor_list): @@ -609,7 +722,7 @@ def GetInstanceList(hypervisor_list): def GetInstanceInfo(instance, hname): - """Gives back the informations about an instance as a dictionary. + """Gives back the information about an instance as a dictionary. @type instance: string @param instance: the instance name @@ -700,24 +813,44 @@ def GetAllInstancesInfo(hypervisor_list): return output -def InstanceOsAdd(instance, reinstall): +def _InstanceLogName(kind, os_name, instance): + """Compute the OS log filename for a given instance and operation. + + The instance name and os name are passed in as strings since not all + operations have these as part of an instance object. + + @type kind: string + @param kind: the operation type (e.g. add, import, etc.) + @type os_name: string + @param os_name: the os name + @type instance: string + @param instance: the name of the instance being imported/added/etc. + + """ + base = ("%s-%s-%s-%s.log" % + (kind, os_name, instance, utils.TimestampForFilename())) + return utils.PathJoin(constants.LOG_OS_DIR, base) + + +def InstanceOsAdd(instance, reinstall, debug): """Add an OS to an instance. @type instance: L{objects.Instance} @param instance: Instance whose OS is to be installed @type reinstall: boolean @param reinstall: whether this is an instance reinstall + @type debug: integer + @param debug: debug level, passed to the OS scripts @rtype: None """ inst_os = OSFromDisk(instance.os) - create_env = OSEnvironment(instance, inst_os) + create_env = OSEnvironment(instance, inst_os, debug) if reinstall: create_env['INSTANCE_REINSTALL'] = "1" - logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os, - instance.name, int(time.time())) + logfile = _InstanceLogName("add", instance.os, instance.name) result = utils.RunCmd([inst_os.create_script], env=create_env, cwd=inst_os.path, output=logfile,) @@ -731,25 +864,26 @@ def InstanceOsAdd(instance, reinstall): " log file:\n%s", result.fail_reason, "\n".join(lines), log=False) -def RunRenameInstance(instance, old_name): +def RunRenameInstance(instance, old_name, debug): """Run the OS rename script for an instance. @type instance: L{objects.Instance} @param instance: Instance whose OS is to be installed @type old_name: string @param old_name: previous instance name + @type debug: integer + @param debug: debug level, passed to the OS scripts @rtype: boolean @return: the success of the operation """ inst_os = OSFromDisk(instance.os) - rename_env = OSEnvironment(instance, inst_os) + rename_env = OSEnvironment(instance, inst_os, debug) rename_env['OLD_INSTANCE_NAME'] = old_name - logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os, - old_name, - instance.name, int(time.time())) + logfile = _InstanceLogName("rename", instance.os, + "%s-%s" % (old_name, instance.name)) result = utils.RunCmd([inst_os.rename_script], env=rename_env, cwd=inst_os.path, output=logfile) @@ -764,7 +898,7 @@ def RunRenameInstance(instance, old_name): def _GetVGInfo(vg_name): - """Get informations about the volume group. + """Get information about the volume group. @type vg_name: str @param vg_name: the volume group which we query @@ -795,7 +929,7 @@ def _GetVGInfo(vg_name): "vg_free": int(round(float(valarr[1]), 0)), "pv_count": int(valarr[2]), } - except ValueError, err: + except (TypeError, ValueError), err: logging.exception("Fail to parse vgs output: %s", err) else: logging.error("vgs output has the wrong number of fields (expected" @@ -804,8 +938,8 @@ def _GetVGInfo(vg_name): def _GetBlockDevSymlinkPath(instance_name, idx): - return os.path.join(constants.DISK_LINKS_DIR, - "%s:%d" % (instance_name, idx)) + return utils.PathJoin(constants.DISK_LINKS_DIR, + "%s:%d" % (instance_name, idx)) def _SymlinkBlockDev(instance_name, device_path, idx): @@ -904,54 +1038,76 @@ def StartInstance(instance): _Fail("Hypervisor error: %s", err, exc=True) -def InstanceShutdown(instance): +def InstanceShutdown(instance, timeout): """Shut an instance down. @note: this functions uses polling with a hardcoded timeout. @type instance: L{objects.Instance} @param instance: the instance object + @type timeout: integer + @param timeout: maximum timeout for soft shutdown @rtype: None """ hv_name = instance.hypervisor - running_instances = GetInstanceList([hv_name]) + hyper = hypervisor.GetHypervisor(hv_name) iname = instance.name - if iname not in running_instances: + if instance.name not in hyper.ListInstances(): logging.info("Instance %s not running, doing nothing", iname) return - hyper = hypervisor.GetHypervisor(hv_name) - try: - hyper.StopInstance(instance) - except errors.HypervisorError, err: - _Fail("Failed to stop instance %s: %s", iname, err) + class _TryShutdown: + def __init__(self): + self.tried_once = False - # test every 10secs for 2min + def __call__(self): + if iname not in hyper.ListInstances(): + return - time.sleep(1) - for dummy in range(11): - if instance.name not in GetInstanceList([hv_name]): - break - time.sleep(10) - else: + try: + hyper.StopInstance(instance, retry=self.tried_once) + except errors.HypervisorError, err: + if iname not in hyper.ListInstances(): + # if the instance is no longer existing, consider this a + # success and go to cleanup + return + + _Fail("Failed to stop instance %s: %s", iname, err) + + self.tried_once = True + + raise utils.RetryAgain() + + try: + utils.Retry(_TryShutdown(), 5, timeout) + except utils.RetryTimeout: # the shutdown did not succeed - logging.error("Shutdown of '%s' unsuccessful, using destroy", iname) + logging.error("Shutdown of '%s' unsuccessful, forcing", iname) try: hyper.StopInstance(instance, force=True) except errors.HypervisorError, err: - _Fail("Failed to force stop instance %s: %s", iname, err) + if iname in hyper.ListInstances(): + # only raise an error if the instance still exists, otherwise + # the error could simply be "instance ... unknown"! + _Fail("Failed to force stop instance %s: %s", iname, err) time.sleep(1) - if instance.name in GetInstanceList([hv_name]): + + if iname in hyper.ListInstances(): _Fail("Could not shutdown instance %s even by destroy", iname) + try: + hyper.CleanupInstance(instance.name) + except errors.HypervisorError, err: + logging.warning("Failed to execute post-shutdown cleanup step: %s", err) + _RemoveBlockDevLinks(iname, instance.disks) -def InstanceReboot(instance, reboot_type): +def InstanceReboot(instance, reboot_type, shutdown_timeout): """Reboot an instance. @type instance: L{objects.Instance} @@ -967,6 +1123,8 @@ def InstanceReboot(instance, reboot_type): not accepted here, since that mode is handled differently, in cmdlib, and translates into full stop and start of the instance (instead of a call_instance_reboot RPC) + @type shutdown_timeout: integer + @param shutdown_timeout: maximum timeout for soft shutdown @rtype: None """ @@ -983,7 +1141,7 @@ def InstanceReboot(instance, reboot_type): _Fail("Failed to soft reboot instance %s: %s", instance.name, err) elif reboot_type == constants.INSTANCE_REBOOT_HARD: try: - InstanceShutdown(instance) + InstanceShutdown(instance, shutdown_timeout) return StartInstance(instance) except errors.HypervisorError, err: _Fail("Failed to hard reboot instance %s: %s", instance.name, err) @@ -1061,7 +1219,7 @@ def MigrateInstance(instance, target, live): hyper = hypervisor.GetHypervisor(instance.hypervisor) try: - hyper.MigrateInstance(instance.name, target, live) + hyper.MigrateInstance(instance, target, live) except errors.HypervisorError, err: _Fail("Failed to migrate instance: %s", err, exc=True) @@ -1087,6 +1245,8 @@ def BlockdevCreate(disk, size, owner, on_primary, info): it's not required to return anything. """ + # TODO: remove the obsolete 'size' argument + # pylint: disable-msg=W0613 clist = [] if disk.children: for child in disk.children: @@ -1098,6 +1258,7 @@ def BlockdevCreate(disk, size, owner, on_primary, info): # we need the children open in case the device itself has to # be assembled try: + # pylint: disable-msg=E1103 crdev.Open() except errors.BlockDeviceError, err: _Fail("Can't make child '%s' read-write: %s", child, err) @@ -1232,6 +1393,7 @@ def BlockdevAssemble(disk, owner, as_primary): try: result = _RecursiveAssembleBD(disk, owner, as_primary) if isinstance(result, bdev.BlockDev): + # pylint: disable-msg=E1103 result = result.dev_path except errors.BlockDeviceError, err: _Fail("Error while assembling disk: %s", err, exc=True) @@ -1242,7 +1404,7 @@ def BlockdevAssemble(disk, owner, as_primary): def BlockdevShutdown(disk): """Shut down a block device. - First, if the device is assembled (Attach() is successfull), then + First, if the device is assembled (Attach() is successful), then the device is shutdown. Then the children of the device are shutdown. @@ -1319,6 +1481,8 @@ def BlockdevRemovechildren(parent_cdev, new_cdevs): else: devs.append(bd.dev_path) else: + if not utils.IsNormAbsPath(rpath): + _Fail("Strange path returned from StaticDevPath: '%s'", rpath) devs.append(rpath) parent_bdev.RemoveChildren(devs) @@ -1341,14 +1505,16 @@ def BlockdevGetmirrorstatus(disks): rbd = _RecursiveFindBD(dsk) if rbd is None: _Fail("Can't find device %s", dsk) + stats.append(rbd.CombinedSyncStatus()) + return stats def _RecursiveFindBD(disk): """Check if a device is activated. - If so, return informations about the real device. + If so, return information about the real device. @type disk: L{objects.Disk} @param disk: the disk object we need to find @@ -1365,26 +1531,113 @@ def _RecursiveFindBD(disk): return bdev.FindDevice(disk.dev_type, disk.physical_id, children, disk.size) +def _OpenRealBD(disk): + """Opens the underlying block device of a disk. + + @type disk: L{objects.Disk} + @param disk: the disk object we want to open + + """ + real_disk = _RecursiveFindBD(disk) + if real_disk is None: + _Fail("Block device '%s' is not set up", disk) + + real_disk.Open() + + return real_disk + + def BlockdevFind(disk): """Check if a device is activated. - If it is, return informations about the real device. + If it is, return information about the real device. @type disk: L{objects.Disk} @param disk: the disk to find - @rtype: None or tuple - @return: None if the disk cannot be found, otherwise a - tuple (device_path, major, minor, sync_percent, - estimated_time, is_degraded) + @rtype: None or objects.BlockDevStatus + @return: None if the disk cannot be found, otherwise a the current + information """ try: rbd = _RecursiveFindBD(disk) except errors.BlockDeviceError, err: _Fail("Failed to find device: %s", err, exc=True) + if rbd is None: return None - return (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus() + + return rbd.GetSyncStatus() + + +def BlockdevGetsize(disks): + """Computes the size of the given disks. + + If a disk is not found, returns None instead. + + @type disks: list of L{objects.Disk} + @param disks: the list of disk to compute the size for + @rtype: list + @return: list with elements None if the disk cannot be found, + otherwise the size + + """ + result = [] + for cf in disks: + try: + rbd = _RecursiveFindBD(cf) + except errors.BlockDeviceError: + result.append(None) + continue + if rbd is None: + result.append(None) + else: + result.append(rbd.GetActualSize()) + return result + + +def BlockdevExport(disk, dest_node, dest_path, cluster_name): + """Export a block device to a remote node. + + @type disk: L{objects.Disk} + @param disk: the description of the disk to export + @type dest_node: str + @param dest_node: the destination node to export to + @type dest_path: str + @param dest_path: the destination path on the target node + @type cluster_name: str + @param cluster_name: the cluster name, needed for SSH hostalias + @rtype: None + + """ + real_disk = _OpenRealBD(disk) + + # the block size on the read dd is 1MiB to match our units + expcmd = utils.BuildShellCmd("set -e; set -o pipefail; " + "dd if=%s bs=1048576 count=%s", + real_disk.dev_path, str(disk.size)) + + # we set here a smaller block size as, due to ssh buffering, more + # than 64-128k will mostly ignored; we use nocreat to fail if the + # device is not already there or we pass a wrong path; we use + # notrunc to no attempt truncate on an LV device; we use oflag=dsync + # to not buffer too much memory; this means that at best, we flush + # every 64k, which will not be very fast + destcmd = utils.BuildShellCmd("dd of=%s conv=nocreat,notrunc bs=65536" + " oflag=dsync", dest_path) + + remotecmd = _GetSshRunner(cluster_name).BuildCmd(dest_node, + constants.GANETI_RUNAS, + destcmd) + + # all commands have been checked, so we're safe to combine them + command = '|'.join([expcmd, utils.ShellQuoteArgs(remotecmd)]) + + result = utils.RunCmd(["bash", "-c", command]) + + if result.failed: + _Fail("Disk copy command '%s' returned error: %s" + " output: %s", command, result.fail_reason, result.output) def UploadFile(file_name, data, mode, uid, gid, atime, mtime): @@ -1413,20 +1666,7 @@ def UploadFile(file_name, data, mode, uid, gid, atime, mtime): if not os.path.isabs(file_name): _Fail("Filename passed to UploadFile is not absolute: '%s'", file_name) - allowed_files = set([ - constants.CLUSTER_CONF_FILE, - constants.ETC_HOSTS, - constants.SSH_KNOWN_HOSTS_FILE, - constants.VNC_PASSWORD_FILE, - constants.RAPI_CERT_FILE, - constants.RAPI_USERS_FILE, - ]) - - for hv_name in constants.HYPER_TYPES: - hv_class = hypervisor.GetHypervisor(hv_name) - allowed_files.update(hv_class.GetAncillaryFiles()) - - if file_name not in allowed_files: + if file_name not in _ALLOWED_UPLOAD_FILES: _Fail("Filename passed to UploadFile not in allowed upload targets: '%s'", file_name) @@ -1463,32 +1703,30 @@ def _ErrnoOrStr(err): return detail -def _OSOndiskAPIVersion(name, os_dir): +def _OSOndiskAPIVersion(os_dir): """Compute and return the API version of a given OS. - This function will try to read the API version of the OS given by - the 'name' parameter and residing in the 'os_dir' directory. + This function will try to read the API version of the OS residing in + the 'os_dir' directory. - @type name: str - @param name: the OS name we should look for @type os_dir: str - @param os_dir: the directory inwhich we should look for the OS + @param os_dir: the directory in which we should look for the OS @rtype: tuple @return: tuple (status, data) with status denoting the validity and data holding either the vaid versions or an error message """ - api_file = os.path.sep.join([os_dir, "ganeti_api_version"]) + api_file = utils.PathJoin(os_dir, constants.OS_API_FILE) try: st = os.stat(api_file) except EnvironmentError, err: - return False, ("Required file 'ganeti_api_version' file not" - " found under path %s: %s" % (os_dir, _ErrnoOrStr(err))) + return False, ("Required file '%s' not found under path %s: %s" % + (constants.OS_API_FILE, os_dir, _ErrnoOrStr(err))) if not stat.S_ISREG(stat.S_IFMT(st.st_mode)): - return False, ("File 'ganeti_api_version' file at %s is not" - " a regular file" % os_dir) + return False, ("File '%s' in %s is not a regular file" % + (constants.OS_API_FILE, os_dir)) try: api_versions = utils.ReadFile(api_file).splitlines() @@ -1513,12 +1751,13 @@ def DiagnoseOS(top_dirs=None): search (if not given defaults to L{constants.OS_SEARCH_PATH}) @rtype: list of L{objects.OS} - @return: a list of tuples (name, path, status, diagnose) + @return: a list of tuples (name, path, status, diagnose, variants) for all (potential) OSes under all search paths, where: - name is the (potential) OS name - path is the full path to the OS - status True/False is the validity of the OS - diagnose is the error message for an invalid OS, otherwise empty + - variants is a list of supported OS variants, if any """ if top_dirs is None: @@ -1533,13 +1772,15 @@ def DiagnoseOS(top_dirs=None): logging.exception("Can't list the OS directory %s: %s", dir_name, err) break for name in f_names: - os_path = os.path.sep.join([dir_name, name]) + os_path = utils.PathJoin(dir_name, name) status, os_inst = _TryOSFromDisk(name, base_dir=dir_name) if status: diagnose = "" + variants = os_inst.supported_variants else: diagnose = os_inst - result.append((name, os_path, status, diagnose)) + variants = [] + result.append((name, os_path, status, diagnose, variants)) return result @@ -1560,12 +1801,13 @@ def _TryOSFromDisk(name, base_dir=None): """ if base_dir is None: os_dir = utils.FindFile(name, constants.OS_SEARCH_PATH, os.path.isdir) - if os_dir is None: - return False, "Directory for OS %s not found in search path" % name else: - os_dir = os.path.sep.join([base_dir, name]) + os_dir = utils.FindFile(name, [base_dir], os.path.isdir) + + if os_dir is None: + return False, "Directory for OS %s not found in search path" % name - status, api_versions = _OSOndiskAPIVersion(name, os_dir) + status, api_versions = _OSOndiskAPIVersion(os_dir) if not status: # push the error up return status, api_versions @@ -1574,31 +1816,47 @@ def _TryOSFromDisk(name, base_dir=None): return False, ("API version mismatch for path '%s': found %s, want %s." % (os_dir, api_versions, constants.OS_API_VERSIONS)) - # OS Scripts dictionary, we will populate it with the actual script names - os_scripts = dict.fromkeys(constants.OS_SCRIPTS) + # OS Files dictionary, we will populate it with the absolute path names + os_files = dict.fromkeys(constants.OS_SCRIPTS) - for script in os_scripts: - os_scripts[script] = os.path.sep.join([os_dir, script]) + if max(api_versions) >= constants.OS_API_V15: + os_files[constants.OS_VARIANTS_FILE] = '' + + for filename in os_files: + os_files[filename] = utils.PathJoin(os_dir, filename) try: - st = os.stat(os_scripts[script]) + st = os.stat(os_files[filename]) except EnvironmentError, err: - return False, ("Script '%s' under path '%s' is missing (%s)" % - (script, os_dir, _ErrnoOrStr(err))) - - if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR: - return False, ("Script '%s' under path '%s' is not executable" % - (script, os_dir)) + return False, ("File '%s' under path '%s' is missing (%s)" % + (filename, os_dir, _ErrnoOrStr(err))) if not stat.S_ISREG(stat.S_IFMT(st.st_mode)): - return False, ("Script '%s' under path '%s' is not a regular file" % - (script, os_dir)) + return False, ("File '%s' under path '%s' is not a regular file" % + (filename, os_dir)) + + if filename in constants.OS_SCRIPTS: + if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR: + return False, ("File '%s' under path '%s' is not executable" % + (filename, os_dir)) + + variants = None + if constants.OS_VARIANTS_FILE in os_files: + variants_file = os_files[constants.OS_VARIANTS_FILE] + try: + variants = utils.ReadFile(variants_file).splitlines() + except EnvironmentError, err: + return False, ("Error while reading the OS variants file at %s: %s" % + (variants_file, _ErrnoOrStr(err))) + if not variants: + return False, ("No supported os variant found") os_obj = objects.OS(name=name, path=os_dir, - create_script=os_scripts[constants.OS_SCRIPT_CREATE], - export_script=os_scripts[constants.OS_SCRIPT_EXPORT], - import_script=os_scripts[constants.OS_SCRIPT_IMPORT], - rename_script=os_scripts[constants.OS_SCRIPT_RENAME], + create_script=os_files[constants.OS_SCRIPT_CREATE], + export_script=os_files[constants.OS_SCRIPT_EXPORT], + import_script=os_files[constants.OS_SCRIPT_IMPORT], + rename_script=os_files[constants.OS_SCRIPT_RENAME], + supported_variants=variants, api_versions=api_versions) return True, os_obj @@ -1621,7 +1879,8 @@ def OSFromDisk(name, base_dir=None): @raise RPCFail: if we don't find a valid OS """ - status, payload = _TryOSFromDisk(name, base_dir) + name_only = name.split("+", 1)[0] + status, payload = _TryOSFromDisk(name_only, base_dir) if not status: _Fail(payload) @@ -1629,13 +1888,13 @@ def OSFromDisk(name, base_dir=None): return payload -def OSEnvironment(instance, os, debug=0): +def OSEnvironment(instance, inst_os, debug=0): """Calculate the environment for an os script. @type instance: L{objects.Instance} @param instance: target instance for the os script run - @type os: L{objects.OS} - @param os: operating system for which the environment is being built + @type inst_os: L{objects.OS} + @param inst_os: operating system for which the environment is being built @type debug: integer @param debug: debug level (0 or 1, for OS Api 10) @rtype: dict @@ -1645,7 +1904,8 @@ def OSEnvironment(instance, os, debug=0): """ result = {} - api_version = max(constants.OS_API_VERSIONS.intersection(os.api_versions)) + api_version = \ + max(constants.OS_API_VERSIONS.intersection(inst_os.api_versions)) result['OS_API_VERSION'] = '%d' % api_version result['INSTANCE_NAME'] = instance.name result['INSTANCE_OS'] = instance.os @@ -1653,12 +1913,14 @@ def OSEnvironment(instance, os, debug=0): result['DISK_COUNT'] = '%d' % len(instance.disks) result['NIC_COUNT'] = '%d' % len(instance.nics) result['DEBUG_LEVEL'] = '%d' % debug + if api_version >= constants.OS_API_V15: + try: + variant = instance.os.split('+', 1)[1] + except IndexError: + variant = inst_os.supported_variants[0] + result['OS_VARIANT'] = variant for idx, disk in enumerate(instance.disks): - real_disk = _RecursiveFindBD(disk) - if real_disk is None: - raise errors.BlockDeviceError("Block device '%s' is not set up" % - str(disk)) - real_disk.Open() + real_disk = _OpenRealBD(disk) result['DISK_%d_PATH' % idx] = real_disk.dev_path result['DISK_%d_ACCESS' % idx] = disk.mode if constants.HV_DISK_TYPE in instance.hvparams: @@ -1688,6 +1950,7 @@ def OSEnvironment(instance, os, debug=0): return result + def BlockdevGrow(disk, amount): """Grow a stack of block devices. @@ -1724,19 +1987,15 @@ def BlockdevSnapshot(disk): @return: snapshot disk path """ - if disk.children: - if len(disk.children) == 1: - # only one child, let's recurse on it - return BlockdevSnapshot(disk.children[0]) - else: - # more than one child, choose one that matches - for child in disk.children: - if child.size == disk.size: - # return implies breaking the loop - return BlockdevSnapshot(child) + if disk.dev_type == constants.LD_DRBD8: + if not disk.children: + _Fail("DRBD device '%s' without backing storage cannot be snapshotted", + disk.unique_id) + return BlockdevSnapshot(disk.children[0]) elif disk.dev_type == constants.LD_LV: r_dev = _RecursiveFindBD(disk) if r_dev is not None: + # FIXME: choose a saner value for the snapshot size # let's stay on the safe side and ask for the full size, for now return r_dev.Snapshot(disk.size) else: @@ -1746,7 +2005,7 @@ def BlockdevSnapshot(disk): disk.unique_id, disk.dev_type) -def ExportSnapshot(disk, dest_node, instance, cluster_name, idx): +def ExportSnapshot(disk, dest_node, instance, cluster_name, idx, debug): """Export a block device snapshot to a remote node. @type disk: L{objects.Disk} @@ -1760,40 +2019,36 @@ def ExportSnapshot(disk, dest_node, instance, cluster_name, idx): @type idx: int @param idx: the index of the disk in the instance's disk list, used to export to the OS scripts environment + @type debug: integer + @param debug: debug level, passed to the OS scripts @rtype: None """ inst_os = OSFromDisk(instance.os) - export_env = OSEnvironment(instance, inst_os) + export_env = OSEnvironment(instance, inst_os, debug) export_script = inst_os.export_script - logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name, - instance.name, int(time.time())) - if not os.path.exists(constants.LOG_OS_DIR): - os.mkdir(constants.LOG_OS_DIR, 0750) - real_disk = _RecursiveFindBD(disk) - if real_disk is None: - _Fail("Block device '%s' is not set up", disk) + logfile = _InstanceLogName("export", inst_os.name, instance.name) - real_disk.Open() + real_disk = _OpenRealBD(disk) export_env['EXPORT_DEVICE'] = real_disk.dev_path export_env['EXPORT_INDEX'] = str(idx) - destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new") + destdir = utils.PathJoin(constants.EXPORT_DIR, instance.name + ".new") destfile = disk.physical_id[1] # the target command is built out of three individual commands, # which are joined by pipes; we check each individual command for # valid parameters - expcmd = utils.BuildShellCmd("cd %s; %s 2>%s", inst_os.path, - export_script, logfile) + expcmd = utils.BuildShellCmd("set -e; set -o pipefail; cd %s; %s 2>%s", + inst_os.path, export_script, logfile) comprcmd = "gzip" - destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s", - destdir, destdir, destfile) + destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s", + destdir, utils.PathJoin(destdir, destfile)) remotecmd = _GetSshRunner(cluster_name).BuildCmd(dest_node, constants.GANETI_RUNAS, destcmd) @@ -1801,7 +2056,7 @@ def ExportSnapshot(disk, dest_node, instance, cluster_name, idx): # all commands have been checked, so we're safe to combine them command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)]) - result = utils.RunCmd(command, env=export_env) + result = utils.RunCmd(["bash", "-c", command], env=export_env) if result.failed: _Fail("OS snapshot export command '%s' returned error: %s" @@ -1821,8 +2076,8 @@ def FinalizeExport(instance, snap_disks): @rtype: None """ - destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new") - finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name) + destdir = utils.PathJoin(constants.EXPORT_DIR, instance.name + ".new") + finaldestdir = utils.PathJoin(constants.EXPORT_DIR, instance.name) config = objects.SerializableConfigParser() @@ -1840,6 +2095,7 @@ def FinalizeExport(instance, snap_disks): config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.beparams[constants.BE_VCPUS]) config.set(constants.INISECT_INS, 'disk_template', instance.disk_template) + config.set(constants.INISECT_INS, 'hypervisor', instance.hypervisor) nic_total = 0 for nic_count, nic in enumerate(instance.nics): @@ -1847,8 +2103,9 @@ def FinalizeExport(instance, snap_disks): config.set(constants.INISECT_INS, 'nic%d_mac' % nic_count, '%s' % nic.mac) config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip) - config.set(constants.INISECT_INS, 'nic%d_bridge' % nic_count, - '%s' % nic.bridge) + for param in constants.NICS_PARAMETER_TYPES: + config.set(constants.INISECT_INS, 'nic%d_%s' % (nic_count, param), + '%s' % nic.nicparams.get(param, None)) # TODO: redundant: on load can read nics until it doesn't exist config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_total) @@ -1865,9 +2122,20 @@ def FinalizeExport(instance, snap_disks): config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_total) - utils.WriteFile(os.path.join(destdir, constants.EXPORT_CONF_FILE), + # New-style hypervisor/backend parameters + + config.add_section(constants.INISECT_HYP) + for name, value in instance.hvparams.items(): + if name not in constants.HVC_GLOBALS: + config.set(constants.INISECT_HYP, name, str(value)) + + config.add_section(constants.INISECT_BEP) + for name, value in instance.beparams.items(): + config.set(constants.INISECT_BEP, name, str(value)) + + utils.WriteFile(utils.PathJoin(destdir, constants.EXPORT_CONF_FILE), data=config.Dumps()) - shutil.rmtree(finaldestdir, True) + shutil.rmtree(finaldestdir, ignore_errors=True) shutil.move(destdir, finaldestdir) @@ -1882,7 +2150,7 @@ def ExportInfo(dest): export info """ - cff = os.path.join(dest, constants.EXPORT_CONF_FILE) + cff = utils.PathJoin(dest, constants.EXPORT_CONF_FILE) config = objects.SerializableConfigParser() config.read(cff) @@ -1894,7 +2162,7 @@ def ExportInfo(dest): return config.Dumps() -def ImportOSIntoInstance(instance, src_node, src_images, cluster_name): +def ImportOSIntoInstance(instance, src_node, src_images, cluster_name, debug): """Import an os image into an instance. @type instance: L{objects.Instance} @@ -1903,18 +2171,17 @@ def ImportOSIntoInstance(instance, src_node, src_images, cluster_name): @param src_node: source node for the disk images @type src_images: list of string @param src_images: absolute paths of the disk images + @type debug: integer + @param debug: debug level, passed to the OS scripts @rtype: list of boolean @return: each boolean represent the success of importing the n-th disk """ inst_os = OSFromDisk(instance.os) - import_env = OSEnvironment(instance, inst_os) + import_env = OSEnvironment(instance, inst_os, debug) import_script = inst_os.import_script - logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os, - instance.name, int(time.time())) - if not os.path.exists(constants.LOG_OS_DIR): - os.mkdir(constants.LOG_OS_DIR, 0750) + logfile = _InstanceLogName("import", instance.os, instance.name) comprcmd = "gunzip" impcmd = utils.BuildShellCmd("(cd %s; %s >%s 2>&1)", inst_os.path, @@ -1963,7 +2230,7 @@ def RemoveExport(export): @rtype: None """ - target = os.path.join(constants.EXPORT_DIR, export) + target = utils.PathJoin(constants.EXPORT_DIR, export) try: shutil.rmtree(target) @@ -2025,10 +2292,12 @@ def _TransformFileStorageDir(file_storage_dir): @return: the normalized path if valid, None otherwise """ + if not constants.ENABLE_FILE_STORAGE: + _Fail("File storage disabled at configure time") cfg = _GetConfig() file_storage_dir = os.path.normpath(file_storage_dir) base_file_storage_dir = cfg.GetFileStorageDir() - if (not os.path.commonprefix([file_storage_dir, base_file_storage_dir]) == + if (os.path.commonprefix([file_storage_dir, base_file_storage_dir]) != base_file_storage_dir): _Fail("File storage directory '%s' is not under base file" " storage directory '%s'", file_storage_dir, base_file_storage_dir) @@ -2068,7 +2337,7 @@ def RemoveFileStorageDir(file_storage_dir): @param file_storage_dir: the directory we should cleanup @rtype: tuple (success,) @return: tuple of one element, C{success}, denoting - whether the operation was successfull + whether the operation was successful """ file_storage_dir = _TransformFileStorageDir(file_storage_dir) @@ -2250,14 +2519,18 @@ def DemoteFromMC(): master, myself = ssconf.GetMasterAndMyself() if master == myself: _Fail("ssconf status shows I'm the master node, will not demote") - pid_file = utils.DaemonPidFileName(constants.MASTERD_PID) - if utils.IsProcessAlive(utils.ReadPidFile(pid_file)): + + result = utils.RunCmd([constants.DAEMON_UTIL, "check", constants.MASTERD]) + if not result.failed: _Fail("The master daemon is running, will not demote") + try: - utils.CreateBackup(constants.CLUSTER_CONF_FILE) + if os.path.isfile(constants.CLUSTER_CONF_FILE): + utils.CreateBackup(constants.CLUSTER_CONF_FILE) except EnvironmentError, err: if err.errno != errno.ENOENT: _Fail("Error while backing up cluster file: %s", err, exc=True) + utils.RemoveFile(constants.CLUSTER_CONF_FILE) @@ -2314,20 +2587,22 @@ def DrbdAttachNet(nodes_ip, disks, instance_name, multimaster): rd.AttachNet(multimaster) except errors.BlockDeviceError, err: _Fail("Can't change network configuration: %s", err) + # wait until the disks are connected; we need to retry the re-attach # if the device becomes standalone, as this might happen if the one # node disconnects and reconnects in a different mode before the # other node reconnects; in this case, one or both of the nodes will # decide it has wrong configuration and switch to standalone - RECONNECT_TIMEOUT = 2 * 60 - sleep_time = 0.100 # start with 100 miliseconds - timeout_limit = time.time() + RECONNECT_TIMEOUT - while time.time() < timeout_limit: + + def _Attach(): all_connected = True + for rd in bdevs: stats = rd.GetProcStatus() - if not (stats.is_connected or stats.is_in_resync): - all_connected = False + + all_connected = (all_connected and + (stats.is_connected or stats.is_in_resync)) + if stats.is_standalone: # peer had different config info and this node became # standalone, even though this should not happen with the @@ -2336,12 +2611,16 @@ def DrbdAttachNet(nodes_ip, disks, instance_name, multimaster): rd.AttachNet(multimaster) except errors.BlockDeviceError, err: _Fail("Can't change network configuration: %s", err) - if all_connected: - break - time.sleep(sleep_time) - sleep_time = min(5, sleep_time * 1.5) - if not all_connected: + + if not all_connected: + raise utils.RetryAgain() + + try: + # Start with a delay of 100 miliseconds and go up to 5 seconds + utils.Retry(_Attach, (0.1, 1.5, 5.0), 2 * 60) + except utils.RetryTimeout: _Fail("Timeout in disk reconnecting") + if multimaster: # change to primary mode for rd in bdevs: @@ -2355,14 +2634,25 @@ def DrbdWaitSync(nodes_ip, disks): """Wait until DRBDs have synchronized. """ + def _helper(rd): + stats = rd.GetProcStatus() + if not (stats.is_connected or stats.is_in_resync): + raise utils.RetryAgain() + return stats + bdevs = _FindDisks(nodes_ip, disks) min_resync = 100 alldone = True for rd in bdevs: - stats = rd.GetProcStatus() - if not (stats.is_connected or stats.is_in_resync): - _Fail("DRBD device %s is not in sync: stats=%s", rd, stats) + try: + # poll each second for 15 seconds + stats = utils.Retry(_helper, 1, 15, args=[rd]) + except utils.RetryTimeout: + stats = rd.GetProcStatus() + # last check + if not (stats.is_connected or stats.is_in_resync): + _Fail("DRBD device %s is not in sync: stats=%s", rd, stats) alldone = alldone and (not stats.is_in_resync) if stats.sync_percent is not None: min_resync = min(min_resync, stats.sync_percent) @@ -2396,8 +2686,6 @@ class HooksRunner(object): on the master side. """ - RE_MASK = re.compile("^[a-zA-Z0-9_-]+$") - def __init__(self, hooks_base_dir=None): """Constructor for hooks runner. @@ -2408,57 +2696,9 @@ class HooksRunner(object): """ if hooks_base_dir is None: hooks_base_dir = constants.HOOKS_BASE_DIR - self._BASE_DIR = hooks_base_dir - - @staticmethod - def ExecHook(script, env): - """Exec one hook script. - - @type script: str - @param script: the full path to the script - @type env: dict - @param env: the environment with which to exec the script - @rtype: tuple (success, message) - @return: a tuple of success and message, where success - indicates the succes of the operation, and message - which will contain the error details in case we - failed - - """ - # exec the process using subprocess and log the output - fdstdin = None - try: - fdstdin = open("/dev/null", "r") - child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, close_fds=True, - shell=False, cwd="/", env=env) - output = "" - try: - output = child.stdout.read(4096) - child.stdout.close() - except EnvironmentError, err: - output += "Hook script error: %s" % str(err) - - while True: - try: - result = child.wait() - break - except EnvironmentError, err: - if err.errno == errno.EINTR: - continue - raise - finally: - # try not to leak fds - for fd in (fdstdin, ): - if fd is not None: - try: - fd.close() - except EnvironmentError, err: - # just log the error - #logging.exception("Error while closing fd %s", fd) - pass - - return result == 0, utils.SafeEncode(output.strip()) + # yeah, _BASE_DIR is not valid for attributes, we use it like a + # constant + self._BASE_DIR = hooks_base_dir # pylint: disable-msg=C0103 def RunHooks(self, hpath, phase, env): """Run the scripts in the hooks directory. @@ -2489,34 +2729,35 @@ class HooksRunner(object): else: _Fail("Unknown hooks phase '%s'", phase) - rr = [] subdir = "%s-%s.d" % (hpath, suffix) - dir_name = "%s/%s" % (self._BASE_DIR, subdir) - try: - dir_contents = utils.ListVisibleFiles(dir_name) - except OSError: - # FIXME: must log output in case of failures - return rr - - # we use the standard python sort order, - # so 00name is the recommended naming scheme - dir_contents.sort() - for relname in dir_contents: - fname = os.path.join(dir_name, relname) - if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and - self.RE_MASK.match(relname) is not None): + dir_name = utils.PathJoin(self._BASE_DIR, subdir) + + results = [] + + if not os.path.isdir(dir_name): + # for non-existing/non-dirs, we simply exit instead of logging a + # warning at every operation + return results + + runparts_results = utils.RunParts(dir_name, env=env, reset_env=True) + + for (relname, relstatus, runresult) in runparts_results: + if relstatus == constants.RUNPARTS_SKIP: rrval = constants.HKR_SKIP output = "" - else: - result, output = self.ExecHook(fname, env) - if not result: + elif relstatus == constants.RUNPARTS_ERR: + rrval = constants.HKR_FAIL + output = "Hook script execution error: %s" % runresult + elif relstatus == constants.RUNPARTS_RUN: + if runresult.failed: rrval = constants.HKR_FAIL else: rrval = constants.HKR_SUCCESS - rr.append(("%s/%s" % (subdir, relname), rrval, output)) + output = utils.SafeEncode(runresult.output.strip()) + results.append(("%s/%s" % (subdir, relname), rrval, output)) - return rr + return results class IAllocatorRunner(object): @@ -2526,7 +2767,8 @@ class IAllocatorRunner(object): the master side. """ - def Run(self, name, idata): + @staticmethod + def Run(name, idata): """Run an iallocator script. @type name: str @@ -2582,7 +2824,7 @@ class DevCacheManager(object): if dev_path.startswith(cls._DEV_PREFIX): dev_path = dev_path[len(cls._DEV_PREFIX):] dev_path = dev_path.replace("/", "_") - fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path) + fpath = utils.PathJoin(cls._ROOT_DIR, "bdev_%s" % dev_path) return fpath @classmethod