ListVisibleFiles: require normalized path names
[ganeti-local] / lib / hypervisor / hv_kvm.py
index 6d8cdf5..03b2448 100644 (file)
@@ -80,6 +80,8 @@ class KVMHypervisor(hv_base.BaseHypervisor):
 
   _MIGRATION_STATUS_RE = re.compile('Migration\s+status:\s+(\w+)',
                                     re.M | re.I)
+  _MIGRATION_INFO_MAX_BAD_ANSWERS = 5
+  _MIGRATION_INFO_RETRY_DELAY = 2
 
   _KVM_NETWORK_SCRIPT = constants.SYSCONFDIR + "/ganeti/kvm-vif-bridge"
 
@@ -94,12 +96,12 @@ class KVMHypervisor(hv_base.BaseHypervisor):
     dirs = [(dname, constants.RUN_DIRS_MODE) for dname in self._DIRS]
     utils.EnsureDirs(dirs)
 
-  def _InstancePidFile(self, instance_name):
+  @classmethod
+  def _InstancePidFile(cls, instance_name):
     """Returns the instance pidfile.
 
     """
-    pidfile = "%s/%s" % (self._PIDS_DIR, instance_name)
-    return pidfile
+    return utils.PathJoin(cls._PIDS_DIR, instance_name)
 
   def _InstancePidAlive(self, instance_name):
     """Returns the instance pid and pidfile
@@ -111,19 +113,28 @@ class KVMHypervisor(hv_base.BaseHypervisor):
 
     return (pidfile, pid, alive)
 
+  def _CheckDown(self, instance_name):
+    """Raises an error unless the given instance is down.
+
+    """
+    alive = self._InstancePidAlive(instance_name)[2]
+    if alive:
+      raise errors.HypervisorError("Failed to start instance %s: %s" %
+                                   (instance_name, "already running"))
+
   @classmethod
   def _InstanceMonitor(cls, instance_name):
     """Returns the instance monitor socket name
 
     """
-    return '%s/%s.monitor' % (cls._CTRL_DIR, instance_name)
+    return utils.PathJoin(cls._CTRL_DIR, "%s.monitor" % instance_name)
 
   @classmethod
   def _InstanceSerial(cls, instance_name):
     """Returns the instance serial socket name
 
     """
-    return '%s/%s.serial' % (cls._CTRL_DIR, instance_name)
+    return utils.PathJoin(cls._CTRL_DIR, "%s.serial" % instance_name)
 
   @staticmethod
   def _SocatUnixConsoleParams():
@@ -142,7 +153,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
     """Returns the instance KVM runtime filename
 
     """
-    return '%s/%s.runtime' % (cls._CONF_DIR, instance_name)
+    return utils.PathJoin(cls._CONF_DIR, "%s.runtime" % instance_name)
 
   @classmethod
   def _RemoveInstanceRuntimeFiles(cls, pidfile, instance_name):
@@ -172,6 +183,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
     script = StringIO()
     script.write("#!/bin/sh\n")
     script.write("# this is autogenerated by Ganeti, please do not edit\n#\n")
+    script.write("PATH=$PATH:/sbin:/usr/sbin\n")
     script.write("export INSTANCE=%s\n" % instance.name)
     script.write("export MAC=%s\n" % nic.mac)
     if nic.ip:
@@ -187,21 +199,21 @@ class KVMHypervisor(hv_base.BaseHypervisor):
     script.write("  # Execute the user-specific vif file\n")
     script.write("  %s\n" % self._KVM_NETWORK_SCRIPT)
     script.write("else\n")
-    script.write("  /sbin/ifconfig $INTERFACE 0.0.0.0 up\n")
+    script.write("  ifconfig $INTERFACE 0.0.0.0 up\n")
     if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
       script.write("  # Connect the interface to the bridge\n")
-      script.write("  /usr/sbin/brctl addif $BRIDGE $INTERFACE\n")
+      script.write("  brctl addif $BRIDGE $INTERFACE\n")
     elif nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_ROUTED:
       if not nic.ip:
         raise errors.HypervisorError("nic/%d is routed, but has no ip." % seq)
       script.write("  # Route traffic targeted at the IP to the interface\n")
       if nic.nicparams[constants.NIC_LINK]:
-        script.write("  while /sbin/ip rule del dev $INTERFACE; do :; done\n")
-        script.write("  /sbin/ip rule add dev $INTERFACE table $LINK\n")
-        script.write("  /sbin/ip route replace $IP table $LINK proto static"
+        script.write("  while ip rule del dev $INTERFACE; do :; done\n")
+        script.write("  ip rule add dev $INTERFACE table $LINK\n")
+        script.write("  ip route replace $IP table $LINK proto static"
                      " dev $INTERFACE\n")
       else:
-        script.write("  /sbin/ip route replace $IP proto static"
+        script.write("  ip route replace $IP proto static"
                      " dev $INTERFACE\n")
       interface_v4_conf = "/proc/sys/net/ipv4/conf/$INTERFACE"
       interface_v6_conf = "/proc/sys/net/ipv6/conf/$INTERFACE"
@@ -234,7 +246,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
     """
     result = []
     for name in os.listdir(self._PIDS_DIR):
-      filename = "%s/%s" % (self._PIDS_DIR, name)
+      filename = utils.PathJoin(self._PIDS_DIR, name)
       if utils.IsProcessAlive(utils.ReadPidFile(filename)):
         result.append(name)
     return result
@@ -251,7 +263,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
     if not alive:
       return None
 
-    cmdline_file = "/proc/%s/cmdline" % pid
+    cmdline_file = utils.PathJoin("/proc", str(pid), "cmdline")
     try:
       cmdline = utils.ReadFile(cmdline_file)
     except EnvironmentError, err:
@@ -281,7 +293,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
     """
     data = []
     for name in os.listdir(self._PIDS_DIR):
-      filename = "%s/%s" % (self._PIDS_DIR, name)
+      filename = utils.PathJoin(self._PIDS_DIR, name)
       if utils.IsProcessAlive(utils.ReadPidFile(filename)):
         try:
           info = self.GetInstanceInfo(name)
@@ -477,11 +489,9 @@ class KVMHypervisor(hv_base.BaseHypervisor):
     @param incoming: (target_host_ip, port)
 
     """
-    pidfile, _, alive = self._InstancePidAlive(instance.name)
     hvp = instance.hvparams
-    if alive:
-      raise errors.HypervisorError("Failed to start instance %s: %s" %
-                                   (instance.name, "already running"))
+    name = instance.name
+    self._CheckDown(name)
 
     temp_files = []
 
@@ -519,12 +529,10 @@ class KVMHypervisor(hv_base.BaseHypervisor):
     result = utils.RunCmd(kvm_cmd)
     if result.failed:
       raise errors.HypervisorError("Failed to start instance %s: %s (%s)" %
-                                   (instance.name, result.fail_reason,
-                                    result.output))
+                                   (name, result.fail_reason, result.output))
 
-    if not utils.IsProcessAlive(utils.ReadPidFile(pidfile)):
-      raise errors.HypervisorError("Failed to start instance %s" %
-                                   (instance.name))
+    if not self._InstancePidAlive(name)[2]:
+      raise errors.HypervisorError("Failed to start instance %s" % name)
 
     if vnc_pwd:
       change_cmd = 'change vnc password %s' % vnc_pwd
@@ -537,11 +545,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
     """Start an instance.
 
     """
-    pidfile, pid, alive = self._InstancePidAlive(instance.name)
-    if alive:
-      raise errors.HypervisorError("Failed to start instance %s: %s" %
-                                   (instance.name, "already running"))
-
+    self._CheckDown(instance.name)
     kvm_runtime = self._GenerateKVMRuntime(instance, block_devices)
     self._SaveKVMRuntime(instance, kvm_runtime)
     self._ExecuteKVMRuntime(instance, kvm_runtime)
@@ -674,26 +678,37 @@ class KVMHypervisor(hv_base.BaseHypervisor):
 
     info_command = 'info migrate'
     done = False
+    broken_answers = 0
     while not done:
       result = self._CallMonitorCommand(instance_name, info_command)
       match = self._MIGRATION_STATUS_RE.search(result.stdout)
       if not match:
-        raise errors.HypervisorError("Unknown 'info migrate' result: %s" %
-                                     result.stdout)
+        broken_answers += 1
+        if not result.stdout:
+          logging.info("KVM: empty 'info migrate' result")
+        else:
+          logging.warning("KVM: unknown 'info migrate' result: %s",
+                          result.stdout)
+        time.sleep(self._MIGRATION_INFO_RETRY_DELAY)
       else:
         status = match.group(1)
         if status == 'completed':
           done = True
         elif status == 'active':
-          time.sleep(2)
+          # reset the broken answers count
+          broken_answers = 0
+          time.sleep(self._MIGRATION_INFO_RETRY_DELAY)
         elif status == 'failed' or status == 'cancelled':
           if not live:
             self._CallMonitorCommand(instance_name, 'cont')
           raise errors.HypervisorError("Migration %s at the kvm level" %
                                        status)
         else:
-          logging.info("KVM: unknown migration status '%s'", status)
-          time.sleep(2)
+          logging.warning("KVM: unknown migration status '%s'", status)
+          broken_answers += 1
+          time.sleep(self._MIGRATION_INFO_RETRY_DELAY)
+      if broken_answers >= self._MIGRATION_INFO_MAX_BAD_ANSWERS:
+        raise errors.HypervisorError("Too many 'info migrate' broken answers")
 
     utils.KillProcess(pid)
     self._RemoveInstanceRuntimeFiles(pidfile, instance_name)