From 263b8de67f923b3484f42b937410a13c9cc85e86 Mon Sep 17 00:00:00 2001 From: Guido Trotter Date: Mon, 22 Mar 2010 11:08:50 +0000 Subject: [PATCH] KVM: Check instances for actual liveness Currently if we find a live process with the pid we saved we assume kvm is alive. What could happen, though, is that the pidfile has been reused. In order to avoid that we change the check to make sure, everywhere, that the process we see is our actual kvm process. In order to do so we open its cmdline, and check that it contains the correct instance name in the -name argument passed to kvm. Signed-off-by: Guido Trotter Reviewed-by: Iustin Pop --- lib/hypervisor/hv_kvm.py | 98 +++++++++++++++++++++++++++++++--------------- 1 file changed, 67 insertions(+), 31 deletions(-) diff --git a/lib/hypervisor/hv_kvm.py b/lib/hypervisor/hv_kvm.py index 17fd286..2035e2d 100644 --- a/lib/hypervisor/hv_kvm.py +++ b/lib/hypervisor/hv_kvm.py @@ -107,13 +107,69 @@ class KVMHypervisor(hv_base.BaseHypervisor): """ return utils.PathJoin(cls._PIDS_DIR, instance_name) + @classmethod + def _InstancePidInfo(cls, pid): + """Check pid file for instance information. + + Check that a pid file is associated with an instance, and retrieve + information from its command line. + + @type pid: string or int + @param pid: process id of the instance to check + @rtype: tuple + @return: (instance_name, memory, vcpus) + @raise errors.HypervisorError: when an instance cannot be found + + """ + alive = utils.IsProcessAlive(pid) + if not alive: + raise errors.HypervisorError("Cannot get info for pid %s" % pid) + + cmdline_file = utils.PathJoin("/proc", str(pid), "cmdline") + try: + cmdline = utils.ReadFile(cmdline_file) + except EnvironmentError, err: + raise errors.HypervisorError("Can't open cmdline file for pid %s: %s" % + (pid, err)) + + instance = None + memory = 0 + vcpus = 0 + + arg_list = cmdline.split('\x00') + while arg_list: + arg = arg_list.pop(0) + if arg == "-name": + instance = arg_list.pop(0) + elif arg == "-m": + memory = int(arg_list.pop(0)) + elif arg == "-smp": + vcpus = int(arg_list.pop(0)) + + if instance is None: + raise errors.HypervisorError("Pid %s doesn't contain a ganeti kvm" + " instance" % pid) + + return (instance, memory, vcpus) + def _InstancePidAlive(self, instance_name): - """Returns the instance pid and pidfile + """Returns the instance pidfile, pid, and liveness. + + @type instance_name: string + @param instance_name: instance name + @rtype: tuple + @return: (pid file name, pid, liveness) """ pidfile = self._InstancePidFile(instance_name) pid = utils.ReadPidFile(pidfile) - alive = utils.IsProcessAlive(pid) + + alive = False + try: + cmd_instance = self._InstancePidInfo(pid)[0] + alive = (cmd_instance == instance_name) + except errors.HypervisorError: + pass return (pidfile, pid, alive) @@ -250,8 +306,7 @@ class KVMHypervisor(hv_base.BaseHypervisor): """ result = [] for name in os.listdir(self._PIDS_DIR): - filename = utils.PathJoin(self._PIDS_DIR, name) - if utils.IsProcessAlive(utils.ReadPidFile(filename)): + if self._InstancePidAlive(name)[2]: result.append(name) return result @@ -268,26 +323,10 @@ class KVMHypervisor(hv_base.BaseHypervisor): if not alive: return None - cmdline_file = utils.PathJoin("/proc", str(pid), "cmdline") - try: - cmdline = utils.ReadFile(cmdline_file) - except EnvironmentError, err: - raise errors.HypervisorError("Failed to list instance %s: %s" % - (instance_name, err)) - - memory = 0 - vcpus = 0 + _, memory, vcpus = self._InstancePidInfo(pid) stat = "---b-" times = "0" - arg_list = cmdline.split('\x00') - while arg_list: - arg = arg_list.pop(0) - if arg == '-m': - memory = int(arg_list.pop(0)) - elif arg == '-smp': - vcpus = int(arg_list.pop(0)) - return (instance_name, pid, memory, vcpus, stat, times) def GetAllInstancesInfo(self): @@ -298,15 +337,12 @@ class KVMHypervisor(hv_base.BaseHypervisor): """ data = [] for name in os.listdir(self._PIDS_DIR): - filename = utils.PathJoin(self._PIDS_DIR, name) - if utils.IsProcessAlive(utils.ReadPidFile(filename)): - try: - info = self.GetInstanceInfo(name) - except errors.HypervisorError: - continue - if info: - data.append(info) - + try: + info = self.GetInstanceInfo(name) + except errors.HypervisorError: + continue + if info: + data.append(info) return data def _GenerateKVMRuntime(self, instance, block_devices): @@ -588,7 +624,7 @@ class KVMHypervisor(hv_base.BaseHypervisor): else: self._CallMonitorCommand(instance.name, 'system_powerdown') - if not utils.IsProcessAlive(pid): + if not self._InstancePidAlive(instance.name)[2]: self._RemoveInstanceRuntimeFiles(pidfile, instance.name) return True else: -- 1.7.10.4