X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/63cef6c3fdb1e006fad3c1f4bf18f2e26f2debba..410945f11aee035aabc8cc4389f72098341e075c:/lib/hypervisor/hv_lxc.py diff --git a/lib/hypervisor/hv_lxc.py b/lib/hypervisor/hv_lxc.py index 9176ad7..e3f3d1e 100644 --- a/lib/hypervisor/hv_lxc.py +++ b/lib/hypervisor/hv_lxc.py @@ -1,7 +1,7 @@ # # -# Copyright (C) 2010 Google Inc. +# Copyright (C) 2010, 2013 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -29,8 +29,10 @@ import time import logging from ganeti import constants -from ganeti import errors # pylint: disable-msg=W0611 +from ganeti import errors # pylint: disable=W0611 from ganeti import utils +from ganeti import objects +from ganeti import pathutils from ganeti.hypervisor import hv_base from ganeti.errors import HypervisorError @@ -38,19 +40,9 @@ from ganeti.errors import HypervisorError class LXCHypervisor(hv_base.BaseHypervisor): """LXC-based virtualization. - Since current (Spring 2010) distributions are not yet ready for - running under a container, the following changes must be done - manually: - - remove udev - - disable the kernel log component of sysklogd/rsyslog/etc., - otherwise they will fail to read the log, and at least rsyslog - will fill the filesystem with error messages - TODO: - move hardcoded parameters into hypervisor parameters, once we have the container-parameter support - - implement memory limits, but only optionally, depending on host - kernel support Problems/issues: - LXC is very temperamental; in daemon mode, it succeeds or fails @@ -58,14 +50,9 @@ class LXCHypervisor(hv_base.BaseHypervisor): indication, and when failing it can leave network interfaces around, and future successful startups will list the instance twice - - shutdown sequence of containers leaves the init 'dead', and the - container effectively stopped, but LXC still believes the - container to be running; need to investigate using the - notify_on_release and release_agent feature of cgroups """ - _ROOT_DIR = constants.RUN_GANETI_DIR + "/lxc" - _LOG_FILE = constants.LOG_DIR + "hv_lxc.log" + _ROOT_DIR = pathutils.RUN_DIR + "/lxc" _DEVS = [ "c 1:3", # /dev/null "c 1:5", # /dev/zero @@ -89,6 +76,7 @@ class LXCHypervisor(hv_base.BaseHypervisor): _DIR_MODE = 0755 PARAMETERS = { + constants.HV_CPU_MASK: hv_base.OPT_CPU_MASK_CHECK, } def __init__(self): @@ -124,6 +112,13 @@ class LXCHypervisor(hv_base.BaseHypervisor): return utils.PathJoin(cls._ROOT_DIR, instance_name + ".conf") @classmethod + def _InstanceLogFile(cls, instance_name): + """Return the log file for an instance. + + """ + return utils.PathJoin(cls._ROOT_DIR, instance_name + ".log") + + @classmethod def _GetCgroupMountPoint(cls): for _, mountpoint, fstype, _ in utils.GetMounts(): if fstype == "cgroup": @@ -137,71 +132,87 @@ class LXCHypervisor(hv_base.BaseHypervisor): """ cgroup = cls._GetCgroupMountPoint() try: - cpus = utils.ReadFile(utils.PathJoin(cgroup, + cpus = utils.ReadFile(utils.PathJoin(cgroup, 'lxc', instance_name, "cpuset.cpus")) except EnvironmentError, err: raise errors.HypervisorError("Getting CPU list for instance" " %s failed: %s" % (instance_name, err)) - # cpuset.cpus format: comma-separated list of CPU ids - # or dash-separated id ranges - # Example: "0-1,3" - cpu_list = [] - for range_def in cpus.split(","): - boundaries = range_def.split("-") - n_elements = len(boundaries) - lower = int(boundaries[0]) - higher = int(boundaries[n_elements - 1]) - cpu_list.extend(range(lower, higher + 1)) - return cpu_list - - def ListInstances(self): + + return utils.ParseCpuMask(cpus) + + @classmethod + def _GetCgroupMemoryLimit(cls, instance_name): + """Return the memory limit for an instance + + """ + cgroup = cls._GetCgroupMountPoint() + try: + memory = int(utils.ReadFile(utils.PathJoin(cgroup, 'lxc', + instance_name, + "memory.limit_in_bytes"))) + except EnvironmentError: + # memory resource controller may be disabled, ignore + memory = 0 + + return memory + + def ListInstances(self, hvparams=None): """Get the list of running instances. """ - result = utils.RunCmd(["lxc-ls"]) - if result.failed: - raise errors.HypervisorError("Can't run lxc-ls: %s" % result.output) - return result.stdout.splitlines() + return [iinfo[0] for iinfo in self.GetAllInstancesInfo()] - def GetInstanceInfo(self, instance_name): + def GetInstanceInfo(self, instance_name, hvparams=None): """Get instance properties. @type instance_name: string @param instance_name: the instance name - + @type hvparams: dict of strings + @param hvparams: hvparams to be used with this instance + @rtype: tuple of strings @return: (name, id, memory, vcpus, stat, times) """ # TODO: read container info from the cgroup mountpoint - result = utils.RunCmd(["lxc-info", "-n", instance_name]) + result = utils.RunCmd(["lxc-info", "-s", "-n", instance_name]) if result.failed: - raise errors.HypervisorError("Can't run lxc-info: %s" % result.output) + raise errors.HypervisorError("Running lxc-info failed: %s" % + result.output) # lxc-info output examples: - # 'ganeti-lxc-test1' is STOPPED - # 'ganeti-lxc-test1' is RUNNING + # 'state: STOPPED + # 'state: RUNNING _, state = result.stdout.rsplit(None, 1) + if state != "RUNNING": + return None cpu_list = self._GetCgroupCpuList(instance_name) + memory = self._GetCgroupMemoryLimit(instance_name) / (1024 ** 2) + return (instance_name, 0, memory, len(cpu_list), 0, 0) - if state == "RUNNING": - return (instance_name, 0, 0, len(cpu_list), 0, 0) - return None - - def GetAllInstancesInfo(self): + def GetAllInstancesInfo(self, hvparams=None): """Get properties of all instances. + @type hvparams: dict of strings + @param hvparams: hypervisor parameter @return: [(name, id, memory, vcpus, stat, times),...] """ data = [] - for name in self.ListInstances(): - data.append(self.GetInstanceInfo(name)) + for name in os.listdir(self._ROOT_DIR): + try: + info = self.GetInstanceInfo(name) + except errors.HypervisorError: + continue + if info: + data.append(info) return data def _CreateConfigFile(self, instance, root_dir): - """Create an lxc.conf file for an instance""" + """Create an lxc.conf file for an instance. + + """ out = [] # hostname out.append("lxc.utsname = %s" % instance.name) @@ -225,6 +236,30 @@ class LXCHypervisor(hv_base.BaseHypervisor): # TODO: additional mounts, if we disable CAP_SYS_ADMIN + # CPUs + if instance.hvparams[constants.HV_CPU_MASK]: + cpu_list = utils.ParseCpuMask(instance.hvparams[constants.HV_CPU_MASK]) + cpus_in_mask = len(cpu_list) + if cpus_in_mask != instance.beparams["vcpus"]: + raise errors.HypervisorError("Number of VCPUs (%d) doesn't match" + " the number of CPUs in the" + " cpu_mask (%d)" % + (instance.beparams["vcpus"], + cpus_in_mask)) + out.append("lxc.cgroup.cpuset.cpus = %s" % + instance.hvparams[constants.HV_CPU_MASK]) + + # Memory + # Conditionally enable, memory resource controller might be disabled + cgroup = self._GetCgroupMountPoint() + if os.path.exists(utils.PathJoin(cgroup, 'memory.limit_in_bytes')): + out.append("lxc.cgroup.memory.limit_in_bytes = %dM" % + instance.beparams[constants.BE_MAXMEM]) + + if os.path.exists(utils.PathJoin(cgroup, 'memory.memsw.limit_in_bytes')): + out.append("lxc.cgroup.memory.memsw.limit_in_bytes = %dM" % + instance.beparams[constants.BE_MAXMEM]) + # Device control # deny direct device access out.append("lxc.cgroup.devices.deny = a") @@ -252,22 +287,31 @@ class LXCHypervisor(hv_base.BaseHypervisor): return "\n".join(out) + "\n" - def StartInstance(self, instance, block_devices): + def StartInstance(self, instance, block_devices, startup_paused): """Start an instance. - For LCX, we try to mount the block device and execute 'lxc-start - start' (we use volatile containers). + For LXC, we try to mount the block device and execute 'lxc-start'. + We use volatile containers. """ root_dir = self._InstanceDir(instance.name) try: utils.EnsureDirs([(root_dir, self._DIR_MODE)]) except errors.GenericError, err: - raise HypervisorError("Cannot create instance directory: %s", str(err)) + raise HypervisorError("Creating instance directory failed: %s", str(err)) conf_file = self._InstanceConfFile(instance.name) utils.WriteFile(conf_file, data=self._CreateConfigFile(instance, root_dir)) + log_file = self._InstanceLogFile(instance.name) + if not os.path.exists(log_file): + try: + utils.WriteFile(log_file, data="", mode=constants.SECURE_FILE_MODE) + except EnvironmentError, err: + raise errors.HypervisorError("Creating hypervisor log file %s for" + " instance %s failed: %s" % + (log_file, instance.name, err)) + if not os.path.ismount(root_dir): if not block_devices: raise HypervisorError("LXC needs at least one disk") @@ -275,10 +319,11 @@ class LXCHypervisor(hv_base.BaseHypervisor): sda_dev_path = block_devices[0][1] result = utils.RunCmd(["mount", sda_dev_path, root_dir]) if result.failed: - raise HypervisorError("Can't mount the chroot dir: %s" % result.output) - # TODO: replace the global log file with a per-instance log file + raise HypervisorError("Mounting the root dir of LXC instance %s" + " failed: %s" % (instance.name, result.output)) result = utils.RunCmd(["lxc-start", "-n", instance.name, - "-o", self._LOG_FILE, "-l", "DEBUG", + "-o", log_file, + "-l", "DEBUG", "-f", conf_file, "-d"]) if result.failed: raise HypervisorError("Running the lxc-start script failed: %s" % @@ -305,14 +350,17 @@ class LXCHypervisor(hv_base.BaseHypervisor): if not retry and not force: result = utils.RunCmd(["chroot", root_dir, "poweroff"]) if result.failed: - raise HypervisorError("Can't run 'poweroff' for the instance: %s" % - result.output) + raise HypervisorError("Running 'poweroff' on the instance" + " failed: %s" % result.output) time.sleep(2) result = utils.RunCmd(["lxc-stop", "-n", name]) if result.failed: logging.warning("Error while doing lxc-stop for %s: %s", name, result.output) + if not os.path.ismount(root_dir): + return + for mpath in self._GetMountSubdirs(root_dir): result = utils.RunCmd(["umount", mpath]) if result.failed: @@ -324,7 +372,7 @@ class LXCHypervisor(hv_base.BaseHypervisor): msg = ("Processes still alive in the chroot: %s" % utils.RunCmd("fuser -vm %s" % root_dir).output) logging.error(msg) - raise HypervisorError("Can't umount the chroot dir: %s (%s)" % + raise HypervisorError("Unmounting the chroot dir failed: %s (%s)" % (result.output, msg)) def RebootInstance(self, instance): @@ -337,45 +385,76 @@ class LXCHypervisor(hv_base.BaseHypervisor): raise HypervisorError("The LXC hypervisor doesn't implement the" " reboot functionality") - def GetNodeInfo(self): - """Return information about the node. + def BalloonInstanceMemory(self, instance, mem): + """Balloon an instance memory to a certain value. - This is just a wrapper over the base GetLinuxNodeInfo method. + @type instance: L{objects.Instance} + @param instance: instance to be accepted + @type mem: int + @param mem: actual memory size to use for instance runtime - @return: a dict with the following keys (values in MiB): - - memory_total: the total memory size on the node - - memory_free: the available memory on the node for instances - - memory_dom0: the memory used by the node itself, if available + """ + # Currently lxc instances don't have memory limits + pass + + def GetNodeInfo(self, hvparams=None): + """Return information about the node. + + See L{BaseHypervisor.GetLinuxNodeInfo}. """ return self.GetLinuxNodeInfo() @classmethod - def GetShellCommandForConsole(cls, instance, hvparams, beparams): + def GetInstanceConsole(cls, instance, primary_node, hvparams, beparams): """Return a command for connecting to the console of an instance. """ - return "lxc-console -n %s" % instance.name + return objects.InstanceConsole(instance=instance.name, + kind=constants.CONS_SSH, + host=primary_node.name, + user=constants.SSH_CONSOLE_USER, + command=["lxc-console", "-n", instance.name]) - def Verify(self): + def Verify(self, hvparams=None): """Verify the hypervisor. - For the chroot manager, it just checks the existence of the base dir. + For the LXC manager, it just checks the existence of the base dir. + + @type hvparams: dict of strings + @param hvparams: hypervisor parameters to be verified against; not used here + + @return: Problem description if something is wrong, C{None} otherwise """ + msgs = [] + if not os.path.exists(self._ROOT_DIR): - return "The required directory '%s' does not exist." % self._ROOT_DIR + msgs.append("The required directory '%s' does not exist" % + self._ROOT_DIR) + + try: + self._GetCgroupMountPoint() + except errors.HypervisorError, err: + msgs.append(str(err)) + + return self._FormatVerifyResults(msgs) @classmethod - def PowercycleNode(cls): + def PowercycleNode(cls, hvparams=None): """LXC powercycle, just a wrapper over Linux powercycle. + @type hvparams: dict of strings + @param hvparams: hypervisor params to be used on this node + """ cls.LinuxPowercycle() - def MigrateInstance(self, instance, target, live): + def MigrateInstance(self, cluster_name, instance, target, live): """Migrate an instance. + @type cluster_name: string + @param cluster_name: name of the cluster @type instance: L{objects.Instance} @param instance: the instance to be migrated @type target: string @@ -384,4 +463,17 @@ class LXCHypervisor(hv_base.BaseHypervisor): @param live: whether to do a live or non-live migration """ - raise HypervisorError("Migration not supported by the LXC hypervisor") + raise HypervisorError("Migration is not supported by the LXC hypervisor") + + def GetMigrationStatus(self, instance): + """Get the migration status + + @type instance: L{objects.Instance} + @param instance: the instance that is being migrated + @rtype: L{objects.MigrationStatus} + @return: the status of the current migration (one of + L{constants.HV_MIGRATION_VALID_STATUSES}), plus any additional + progress info that can be retrieved from the hypervisor + + """ + raise HypervisorError("Migration is not supported by the LXC hypervisor")