Fix migration on new KVMs
[ganeti-local] / lib / hypervisor / hv_kvm.py
1 #
2 #
3
4 # Copyright (C) 2008, 2009, 2010 Google Inc.
5 #
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 # General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 # 02110-1301, USA.
20
21
22 """KVM hypervisor
23
24 """
25
26 import errno
27 import os
28 import os.path
29 import re
30 import tempfile
31 import time
32 import logging
33 import pwd
34 from cStringIO import StringIO
35
36 from ganeti import utils
37 from ganeti import constants
38 from ganeti import errors
39 from ganeti import serializer
40 from ganeti import objects
41 from ganeti import uidpool
42 from ganeti import ssconf
43 from ganeti.hypervisor import hv_base
44 from ganeti import netutils
45
46
47 _KVM_NETWORK_SCRIPT = constants.SYSCONFDIR + "/ganeti/kvm-vif-bridge"
48
49
50 def _WriteNetScript(instance, nic, index):
51   """Write a script to connect a net interface to the proper bridge.
52
53   This can be used by any qemu-type hypervisor.
54
55   @type instance: L{objects.Instance}
56   @param instance: Instance object
57   @type nic: L{objects.NIC}
58   @param nic: NIC object
59   @type index: int
60   @param index: NIC index
61   @return: Script
62   @rtype: string
63
64   """
65   if instance.tags:
66     tags = " ".join(instance.tags)
67   else:
68     tags = ""
69
70   buf = StringIO()
71   sw = utils.ShellWriter(buf)
72   sw.Write("#!/bin/sh")
73   sw.Write("# this is autogenerated by Ganeti, please do not edit")
74   sw.Write("export PATH=$PATH:/sbin:/usr/sbin")
75   sw.Write("export INSTANCE=%s", utils.ShellQuote(instance.name))
76   sw.Write("export MAC=%s", utils.ShellQuote(nic.mac))
77   sw.Write("export MODE=%s",
78            utils.ShellQuote(nic.nicparams[constants.NIC_MODE]))
79   sw.Write("export INTERFACE=\"$1\"")
80   sw.Write("export TAGS=%s", utils.ShellQuote(tags))
81
82   if nic.ip:
83     sw.Write("export IP=%s", utils.ShellQuote(nic.ip))
84
85   if nic.nicparams[constants.NIC_LINK]:
86     sw.Write("export LINK=%s",
87              utils.ShellQuote(nic.nicparams[constants.NIC_LINK]))
88
89   if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
90     sw.Write("export BRIDGE=%s",
91              utils.ShellQuote(nic.nicparams[constants.NIC_LINK]))
92
93   # TODO: make this configurable at ./configure time
94   sw.Write("if [ -x %s ]; then", utils.ShellQuote(_KVM_NETWORK_SCRIPT))
95   sw.IncIndent()
96   try:
97     sw.Write("# Execute the user-specific vif file")
98     sw.Write(_KVM_NETWORK_SCRIPT)
99   finally:
100     sw.DecIndent()
101   sw.Write("else")
102   sw.IncIndent()
103   try:
104     sw.Write("ifconfig $INTERFACE 0.0.0.0 up")
105
106     if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
107       sw.Write("# Connect the interface to the bridge")
108       sw.Write("brctl addif $BRIDGE $INTERFACE")
109
110     elif nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_ROUTED:
111       if not nic.ip:
112         raise errors.HypervisorError("nic/%d is routed, but has no IP"
113                                      " address" % index)
114
115       sw.Write("# Route traffic targeted at the IP to the interface")
116       if nic.nicparams[constants.NIC_LINK]:
117         sw.Write("while ip rule del dev $INTERFACE; do :; done")
118         sw.Write("ip rule add dev $INTERFACE table $LINK")
119         sw.Write("ip route replace $IP table $LINK proto static"
120                  " dev $INTERFACE")
121       else:
122         sw.Write("ip route replace $IP proto static dev $INTERFACE")
123
124       interface_v4_conf = "/proc/sys/net/ipv4/conf/$INTERFACE"
125       sw.Write(" if [ -d %s ]; then", interface_v4_conf)
126       sw.IncIndent()
127       try:
128         sw.Write("echo 1 > %s/proxy_arp", interface_v4_conf)
129         sw.Write("echo 1 > %s/forwarding", interface_v4_conf)
130       finally:
131         sw.DecIndent()
132       sw.Write("fi")
133
134       interface_v6_conf = "/proc/sys/net/ipv6/conf/$INTERFACE"
135       sw.Write("if [ -d %s ]; then", interface_v6_conf)
136       sw.IncIndent()
137       try:
138         sw.Write("echo 1 > %s/proxy_ndp", interface_v6_conf)
139         sw.Write("echo 1 > %s/forwarding", interface_v6_conf)
140       finally:
141         sw.DecIndent()
142       sw.Write("fi")
143   finally:
144     sw.DecIndent()
145   sw.Write("fi")
146
147   return buf.getvalue()
148
149
150 class KVMHypervisor(hv_base.BaseHypervisor):
151   """KVM hypervisor interface"""
152   CAN_MIGRATE = True
153
154   _ROOT_DIR = constants.RUN_GANETI_DIR + "/kvm-hypervisor"
155   _PIDS_DIR = _ROOT_DIR + "/pid" # contains live instances pids
156   _UIDS_DIR = _ROOT_DIR + "/uid" # contains instances reserved uids
157   _CTRL_DIR = _ROOT_DIR + "/ctrl" # contains instances control sockets
158   _CONF_DIR = _ROOT_DIR + "/conf" # contains instances startup data
159   # KVM instances with chroot enabled are started in empty chroot directories.
160   _CHROOT_DIR = _ROOT_DIR + "/chroot" # for empty chroot directories
161   # After an instance is stopped, its chroot directory is removed.
162   # If the chroot directory is not empty, it can't be removed.
163   # A non-empty chroot directory indicates a possible security incident.
164   # To support forensics, the non-empty chroot directory is quarantined in
165   # a separate directory, called 'chroot-quarantine'.
166   _CHROOT_QUARANTINE_DIR = _ROOT_DIR + "/chroot-quarantine"
167   _DIRS = [_ROOT_DIR, _PIDS_DIR, _UIDS_DIR, _CTRL_DIR, _CONF_DIR,
168            _CHROOT_DIR, _CHROOT_QUARANTINE_DIR]
169
170   PARAMETERS = {
171     constants.HV_KERNEL_PATH: hv_base.OPT_FILE_CHECK,
172     constants.HV_INITRD_PATH: hv_base.OPT_FILE_CHECK,
173     constants.HV_ROOT_PATH: hv_base.NO_CHECK,
174     constants.HV_KERNEL_ARGS: hv_base.NO_CHECK,
175     constants.HV_ACPI: hv_base.NO_CHECK,
176     constants.HV_SERIAL_CONSOLE: hv_base.NO_CHECK,
177     constants.HV_VNC_BIND_ADDRESS:
178       (False, lambda x: (netutils.IsValidIP4(x) or utils.IsNormAbsPath(x)),
179        "the VNC bind address must be either a valid IP address or an absolute"
180        " pathname", None, None),
181     constants.HV_VNC_TLS: hv_base.NO_CHECK,
182     constants.HV_VNC_X509: hv_base.OPT_DIR_CHECK,
183     constants.HV_VNC_X509_VERIFY: hv_base.NO_CHECK,
184     constants.HV_VNC_PASSWORD_FILE: hv_base.OPT_FILE_CHECK,
185     constants.HV_CDROM_IMAGE_PATH: hv_base.OPT_FILE_CHECK,
186     constants.HV_BOOT_ORDER:
187       hv_base.ParamInSet(True, constants.HT_KVM_VALID_BO_TYPES),
188     constants.HV_NIC_TYPE:
189       hv_base.ParamInSet(True, constants.HT_KVM_VALID_NIC_TYPES),
190     constants.HV_DISK_TYPE:
191       hv_base.ParamInSet(True, constants.HT_KVM_VALID_DISK_TYPES),
192     constants.HV_USB_MOUSE:
193       hv_base.ParamInSet(False, constants.HT_KVM_VALID_MOUSE_TYPES),
194     constants.HV_MIGRATION_PORT: hv_base.NET_PORT_CHECK,
195     constants.HV_MIGRATION_BANDWIDTH: hv_base.NO_CHECK,
196     constants.HV_MIGRATION_DOWNTIME: hv_base.NO_CHECK,
197     constants.HV_MIGRATION_MODE: hv_base.MIGRATION_MODE_CHECK,
198     constants.HV_USE_LOCALTIME: hv_base.NO_CHECK,
199     constants.HV_DISK_CACHE:
200       hv_base.ParamInSet(True, constants.HT_VALID_CACHE_TYPES),
201     constants.HV_SECURITY_MODEL:
202       hv_base.ParamInSet(True, constants.HT_KVM_VALID_SM_TYPES),
203     constants.HV_SECURITY_DOMAIN: hv_base.NO_CHECK,
204     constants.HV_KVM_FLAG:
205       hv_base.ParamInSet(False, constants.HT_KVM_FLAG_VALUES),
206     constants.HV_VHOST_NET: hv_base.NO_CHECK,
207     constants.HV_KVM_USE_CHROOT: hv_base.NO_CHECK,
208     }
209
210   _MIGRATION_STATUS_RE = re.compile('Migration\s+status:\s+(\w+)',
211                                     re.M | re.I)
212   _MIGRATION_INFO_MAX_BAD_ANSWERS = 5
213   _MIGRATION_INFO_RETRY_DELAY = 2
214
215   ANCILLARY_FILES = [
216     _KVM_NETWORK_SCRIPT,
217     ]
218
219   def __init__(self):
220     hv_base.BaseHypervisor.__init__(self)
221     # Let's make sure the directories we need exist, even if the RUN_DIR lives
222     # in a tmpfs filesystem or has been otherwise wiped out.
223     dirs = [(dname, constants.RUN_DIRS_MODE) for dname in self._DIRS]
224     utils.EnsureDirs(dirs)
225
226   @classmethod
227   def _InstancePidFile(cls, instance_name):
228     """Returns the instance pidfile.
229
230     """
231     return utils.PathJoin(cls._PIDS_DIR, instance_name)
232
233   @classmethod
234   def _InstanceUidFile(cls, instance_name):
235     """Returns the instance uidfile.
236
237     """
238     return utils.PathJoin(cls._UIDS_DIR, instance_name)
239
240   @classmethod
241   def _InstancePidInfo(cls, pid):
242     """Check pid file for instance information.
243
244     Check that a pid file is associated with an instance, and retrieve
245     information from its command line.
246
247     @type pid: string or int
248     @param pid: process id of the instance to check
249     @rtype: tuple
250     @return: (instance_name, memory, vcpus)
251     @raise errors.HypervisorError: when an instance cannot be found
252
253     """
254     alive = utils.IsProcessAlive(pid)
255     if not alive:
256       raise errors.HypervisorError("Cannot get info for pid %s" % pid)
257
258     cmdline_file = utils.PathJoin("/proc", str(pid), "cmdline")
259     try:
260       cmdline = utils.ReadFile(cmdline_file)
261     except EnvironmentError, err:
262       raise errors.HypervisorError("Can't open cmdline file for pid %s: %s" %
263                                    (pid, err))
264
265     instance = None
266     memory = 0
267     vcpus = 0
268
269     arg_list = cmdline.split('\x00')
270     while arg_list:
271       arg =  arg_list.pop(0)
272       if arg == "-name":
273         instance = arg_list.pop(0)
274       elif arg == "-m":
275         memory = int(arg_list.pop(0))
276       elif arg == "-smp":
277         vcpus = int(arg_list.pop(0))
278
279     if instance is None:
280       raise errors.HypervisorError("Pid %s doesn't contain a ganeti kvm"
281                                    " instance" % pid)
282
283     return (instance, memory, vcpus)
284
285   def _InstancePidAlive(self, instance_name):
286     """Returns the instance pidfile, pid, and liveness.
287
288     @type instance_name: string
289     @param instance_name: instance name
290     @rtype: tuple
291     @return: (pid file name, pid, liveness)
292
293     """
294     pidfile = self._InstancePidFile(instance_name)
295     pid = utils.ReadPidFile(pidfile)
296
297     alive = False
298     try:
299       cmd_instance = self._InstancePidInfo(pid)[0]
300       alive = (cmd_instance == instance_name)
301     except errors.HypervisorError:
302       pass
303
304     return (pidfile, pid, alive)
305
306   def _CheckDown(self, instance_name):
307     """Raises an error unless the given instance is down.
308
309     """
310     alive = self._InstancePidAlive(instance_name)[2]
311     if alive:
312       raise errors.HypervisorError("Failed to start instance %s: %s" %
313                                    (instance_name, "already running"))
314
315   @classmethod
316   def _InstanceMonitor(cls, instance_name):
317     """Returns the instance monitor socket name
318
319     """
320     return utils.PathJoin(cls._CTRL_DIR, "%s.monitor" % instance_name)
321
322   @classmethod
323   def _InstanceSerial(cls, instance_name):
324     """Returns the instance serial socket name
325
326     """
327     return utils.PathJoin(cls._CTRL_DIR, "%s.serial" % instance_name)
328
329   @staticmethod
330   def _SocatUnixConsoleParams():
331     """Returns the correct parameters for socat
332
333     If we have a new-enough socat we can use raw mode with an escape character.
334
335     """
336     if constants.SOCAT_USE_ESCAPE:
337       return "raw,echo=0,escape=%s" % constants.SOCAT_ESCAPE_CODE
338     else:
339       return "echo=0,icanon=0"
340
341   @classmethod
342   def _InstanceKVMRuntime(cls, instance_name):
343     """Returns the instance KVM runtime filename
344
345     """
346     return utils.PathJoin(cls._CONF_DIR, "%s.runtime" % instance_name)
347
348   @classmethod
349   def _InstanceChrootDir(cls, instance_name):
350     """Returns the name of the KVM chroot dir of the instance
351
352     """
353     return utils.PathJoin(cls._CHROOT_DIR, instance_name)
354
355   @classmethod
356   def _TryReadUidFile(cls, uid_file):
357     """Try to read a uid file
358
359     """
360     if os.path.exists(uid_file):
361       try:
362         uid = int(utils.ReadOneLineFile(uid_file))
363         return uid
364       except EnvironmentError:
365         logging.warning("Can't read uid file", exc_info=True)
366       except (TypeError, ValueError):
367         logging.warning("Can't parse uid file contents", exc_info=True)
368     return None
369
370   @classmethod
371   def _RemoveInstanceRuntimeFiles(cls, pidfile, instance_name):
372     """Removes an instance's rutime sockets/files/dirs.
373
374     """
375     utils.RemoveFile(pidfile)
376     utils.RemoveFile(cls._InstanceMonitor(instance_name))
377     utils.RemoveFile(cls._InstanceSerial(instance_name))
378     utils.RemoveFile(cls._InstanceKVMRuntime(instance_name))
379     uid_file = cls._InstanceUidFile(instance_name)
380     uid = cls._TryReadUidFile(uid_file)
381     utils.RemoveFile(uid_file)
382     if uid is not None:
383       uidpool.ReleaseUid(uid)
384     try:
385       chroot_dir = cls._InstanceChrootDir(instance_name)
386       utils.RemoveDir(chroot_dir)
387     except OSError, err:
388       if err.errno == errno.ENOTEMPTY:
389         # The chroot directory is expected to be empty, but it isn't.
390         new_chroot_dir = tempfile.mkdtemp(dir=cls._CHROOT_QUARANTINE_DIR,
391                                           prefix="%s-%s-" %
392                                           (instance_name,
393                                            utils.TimestampForFilename()))
394         logging.warning("The chroot directory of instance %s can not be"
395                         " removed as it is not empty. Moving it to the"
396                         " quarantine instead. Please investigate the"
397                         " contents (%s) and clean up manually",
398                         instance_name, new_chroot_dir)
399         utils.RenameFile(chroot_dir, new_chroot_dir)
400       else:
401         raise
402
403   @staticmethod
404   def _WriteNetScriptFile(instance, seq, nic):
405     """Write a script to connect a net interface to the proper bridge.
406
407     This can be used by any qemu-type hypervisor.
408
409     @param instance: instance we're acting on
410     @type instance: instance object
411     @param seq: nic sequence number
412     @type seq: int
413     @param nic: nic we're acting on
414     @type nic: nic object
415     @return: netscript file name
416     @rtype: string
417
418     """
419     script = _WriteNetScript(instance, nic, seq)
420
421     # As much as we'd like to put this in our _ROOT_DIR, that will happen to be
422     # mounted noexec sometimes, so we'll have to find another place.
423     (tmpfd, tmpfile_name) = tempfile.mkstemp()
424     tmpfile = os.fdopen(tmpfd, 'w')
425     try:
426       tmpfile.write(script)
427     finally:
428       tmpfile.close()
429     os.chmod(tmpfile_name, 0755)
430     return tmpfile_name
431
432   def ListInstances(self):
433     """Get the list of running instances.
434
435     We can do this by listing our live instances directory and
436     checking whether the associated kvm process is still alive.
437
438     """
439     result = []
440     for name in os.listdir(self._PIDS_DIR):
441       if self._InstancePidAlive(name)[2]:
442         result.append(name)
443     return result
444
445   def GetInstanceInfo(self, instance_name):
446     """Get instance properties.
447
448     @type instance_name: string
449     @param instance_name: the instance name
450     @rtype: tuple of strings
451     @return: (name, id, memory, vcpus, stat, times)
452
453     """
454     _, pid, alive = self._InstancePidAlive(instance_name)
455     if not alive:
456       return None
457
458     _, memory, vcpus = self._InstancePidInfo(pid)
459     stat = "---b-"
460     times = "0"
461
462     return (instance_name, pid, memory, vcpus, stat, times)
463
464   def GetAllInstancesInfo(self):
465     """Get properties of all instances.
466
467     @return: list of tuples (name, id, memory, vcpus, stat, times)
468
469     """
470     data = []
471     for name in os.listdir(self._PIDS_DIR):
472       try:
473         info = self.GetInstanceInfo(name)
474       except errors.HypervisorError:
475         continue
476       if info:
477         data.append(info)
478     return data
479
480   def _GenerateKVMRuntime(self, instance, block_devices):
481     """Generate KVM information to start an instance.
482
483     """
484     pidfile  = self._InstancePidFile(instance.name)
485     kvm = constants.KVM_PATH
486     kvm_cmd = [kvm]
487     # used just by the vnc server, if enabled
488     kvm_cmd.extend(['-name', instance.name])
489     kvm_cmd.extend(['-m', instance.beparams[constants.BE_MEMORY]])
490     kvm_cmd.extend(['-smp', instance.beparams[constants.BE_VCPUS]])
491     kvm_cmd.extend(['-pidfile', pidfile])
492     kvm_cmd.extend(['-daemonize'])
493     if not instance.hvparams[constants.HV_ACPI]:
494       kvm_cmd.extend(['-no-acpi'])
495
496     hvp = instance.hvparams
497     boot_disk = hvp[constants.HV_BOOT_ORDER] == constants.HT_BO_DISK
498     boot_cdrom = hvp[constants.HV_BOOT_ORDER] == constants.HT_BO_CDROM
499     boot_network = hvp[constants.HV_BOOT_ORDER] == constants.HT_BO_NETWORK
500
501     if hvp[constants.HV_KVM_FLAG] == constants.HT_KVM_ENABLED:
502       kvm_cmd.extend(["-enable-kvm"])
503     elif hvp[constants.HV_KVM_FLAG] == constants.HT_KVM_DISABLED:
504       kvm_cmd.extend(["-disable-kvm"])
505
506     if boot_network:
507       kvm_cmd.extend(['-boot', 'n'])
508
509     disk_type = hvp[constants.HV_DISK_TYPE]
510     if disk_type == constants.HT_DISK_PARAVIRTUAL:
511       if_val = ',if=virtio'
512     else:
513       if_val = ',if=%s' % disk_type
514     # Cache mode
515     disk_cache = hvp[constants.HV_DISK_CACHE]
516     if disk_cache != constants.HT_CACHE_DEFAULT:
517       cache_val = ",cache=%s" % disk_cache
518     else:
519       cache_val = ""
520     for cfdev, dev_path in block_devices:
521       if cfdev.mode != constants.DISK_RDWR:
522         raise errors.HypervisorError("Instance has read-only disks which"
523                                      " are not supported by KVM")
524       # TODO: handle FD_LOOP and FD_BLKTAP (?)
525       if boot_disk:
526         kvm_cmd.extend(['-boot', 'c'])
527         if disk_type != constants.HT_DISK_IDE:
528           boot_val = ',boot=on'
529         else:
530           boot_val = ''
531         # We only boot from the first disk
532         boot_disk = False
533       else:
534         boot_val = ''
535
536       drive_val = 'file=%s,format=raw%s%s%s' % (dev_path, if_val, boot_val,
537                                                 cache_val)
538       kvm_cmd.extend(['-drive', drive_val])
539
540     iso_image = hvp[constants.HV_CDROM_IMAGE_PATH]
541     if iso_image:
542       options = ',format=raw,media=cdrom'
543       if boot_cdrom:
544         kvm_cmd.extend(['-boot', 'd'])
545         if disk_type != constants.HT_DISK_IDE:
546           options = '%s,boot=on' % options
547       else:
548         if disk_type == constants.HT_DISK_PARAVIRTUAL:
549           if_val = ',if=virtio'
550         else:
551           if_val = ',if=%s' % disk_type
552         options = '%s%s' % (options, if_val)
553       drive_val = 'file=%s%s' % (iso_image, options)
554       kvm_cmd.extend(['-drive', drive_val])
555
556     kernel_path = hvp[constants.HV_KERNEL_PATH]
557     if kernel_path:
558       kvm_cmd.extend(['-kernel', kernel_path])
559       initrd_path = hvp[constants.HV_INITRD_PATH]
560       if initrd_path:
561         kvm_cmd.extend(['-initrd', initrd_path])
562       root_append = ['root=%s' % hvp[constants.HV_ROOT_PATH],
563                      hvp[constants.HV_KERNEL_ARGS]]
564       if hvp[constants.HV_SERIAL_CONSOLE]:
565         root_append.append('console=ttyS0,38400')
566       kvm_cmd.extend(['-append', ' '.join(root_append)])
567
568     mouse_type = hvp[constants.HV_USB_MOUSE]
569     vnc_bind_address = hvp[constants.HV_VNC_BIND_ADDRESS]
570
571     if mouse_type:
572       kvm_cmd.extend(['-usb'])
573       kvm_cmd.extend(['-usbdevice', mouse_type])
574     elif vnc_bind_address:
575       kvm_cmd.extend(['-usbdevice', constants.HT_MOUSE_TABLET])
576
577     if vnc_bind_address:
578       if netutils.IsValidIP4(vnc_bind_address):
579         if instance.network_port > constants.VNC_BASE_PORT:
580           display = instance.network_port - constants.VNC_BASE_PORT
581           if vnc_bind_address == constants.IP4_ADDRESS_ANY:
582             vnc_arg = ':%d' % (display)
583           else:
584             vnc_arg = '%s:%d' % (vnc_bind_address, display)
585         else:
586           logging.error("Network port is not a valid VNC display (%d < %d)."
587                         " Not starting VNC", instance.network_port,
588                         constants.VNC_BASE_PORT)
589           vnc_arg = 'none'
590
591         # Only allow tls and other option when not binding to a file, for now.
592         # kvm/qemu gets confused otherwise about the filename to use.
593         vnc_append = ''
594         if hvp[constants.HV_VNC_TLS]:
595           vnc_append = '%s,tls' % vnc_append
596           if hvp[constants.HV_VNC_X509_VERIFY]:
597             vnc_append = '%s,x509verify=%s' % (vnc_append,
598                                                hvp[constants.HV_VNC_X509])
599           elif hvp[constants.HV_VNC_X509]:
600             vnc_append = '%s,x509=%s' % (vnc_append,
601                                          hvp[constants.HV_VNC_X509])
602         if hvp[constants.HV_VNC_PASSWORD_FILE]:
603           vnc_append = '%s,password' % vnc_append
604
605         vnc_arg = '%s%s' % (vnc_arg, vnc_append)
606
607       else:
608         vnc_arg = 'unix:%s/%s.vnc' % (vnc_bind_address, instance.name)
609
610       kvm_cmd.extend(['-vnc', vnc_arg])
611     else:
612       kvm_cmd.extend(['-nographic'])
613
614     monitor_dev = ("unix:%s,server,nowait" %
615                    self._InstanceMonitor(instance.name))
616     kvm_cmd.extend(['-monitor', monitor_dev])
617     if hvp[constants.HV_SERIAL_CONSOLE]:
618       serial_dev = ('unix:%s,server,nowait' %
619                     self._InstanceSerial(instance.name))
620       kvm_cmd.extend(['-serial', serial_dev])
621     else:
622       kvm_cmd.extend(['-serial', 'none'])
623
624     if hvp[constants.HV_USE_LOCALTIME]:
625       kvm_cmd.extend(['-localtime'])
626
627     if hvp[constants.HV_KVM_USE_CHROOT]:
628       kvm_cmd.extend(['-chroot', self._InstanceChrootDir(instance.name)])
629
630     # Save the current instance nics, but defer their expansion as parameters,
631     # as we'll need to generate executable temp files for them.
632     kvm_nics = instance.nics
633     hvparams = hvp
634
635     return (kvm_cmd, kvm_nics, hvparams)
636
637   def _WriteKVMRuntime(self, instance_name, data):
638     """Write an instance's KVM runtime
639
640     """
641     try:
642       utils.WriteFile(self._InstanceKVMRuntime(instance_name),
643                       data=data)
644     except EnvironmentError, err:
645       raise errors.HypervisorError("Failed to save KVM runtime file: %s" % err)
646
647   def _ReadKVMRuntime(self, instance_name):
648     """Read an instance's KVM runtime
649
650     """
651     try:
652       file_content = utils.ReadFile(self._InstanceKVMRuntime(instance_name))
653     except EnvironmentError, err:
654       raise errors.HypervisorError("Failed to load KVM runtime file: %s" % err)
655     return file_content
656
657   def _SaveKVMRuntime(self, instance, kvm_runtime):
658     """Save an instance's KVM runtime
659
660     """
661     kvm_cmd, kvm_nics, hvparams = kvm_runtime
662     serialized_nics = [nic.ToDict() for nic in kvm_nics]
663     serialized_form = serializer.Dump((kvm_cmd, serialized_nics, hvparams))
664     self._WriteKVMRuntime(instance.name, serialized_form)
665
666   def _LoadKVMRuntime(self, instance, serialized_runtime=None):
667     """Load an instance's KVM runtime
668
669     """
670     if not serialized_runtime:
671       serialized_runtime = self._ReadKVMRuntime(instance.name)
672     loaded_runtime = serializer.Load(serialized_runtime)
673     kvm_cmd, serialized_nics, hvparams = loaded_runtime
674     kvm_nics = [objects.NIC.FromDict(snic) for snic in serialized_nics]
675     return (kvm_cmd, kvm_nics, hvparams)
676
677   def _RunKVMCmd(self, name, kvm_cmd):
678     """Run the KVM cmd and check for errors
679
680     @type name: string
681     @param name: instance name
682     @type kvm_cmd: list of strings
683     @param kvm_cmd: runcmd input for kvm
684
685     """
686     result = utils.RunCmd(kvm_cmd)
687     if result.failed:
688       raise errors.HypervisorError("Failed to start instance %s: %s (%s)" %
689                                    (name, result.fail_reason, result.output))
690     if not self._InstancePidAlive(name)[2]:
691       raise errors.HypervisorError("Failed to start instance %s" % name)
692
693   def _ExecuteKVMRuntime(self, instance, kvm_runtime, incoming=None):
694     """Execute a KVM cmd, after completing it with some last minute data
695
696     @type incoming: tuple of strings
697     @param incoming: (target_host_ip, port)
698
699     """
700     # Small _ExecuteKVMRuntime hv parameters programming howto:
701     #  - conf_hvp contains the parameters as configured on ganeti. they might
702     #    have changed since the instance started; only use them if the change
703     #    won't affect the inside of the instance (which hasn't been rebooted).
704     #  - up_hvp contains the parameters as they were when the instance was
705     #    started, plus any new parameter which has been added between ganeti
706     #    versions: it is paramount that those default to a value which won't
707     #    affect the inside of the instance as well.
708     conf_hvp = instance.hvparams
709     name = instance.name
710     self._CheckDown(name)
711
712     temp_files = []
713
714     kvm_cmd, kvm_nics, up_hvp = kvm_runtime
715     up_hvp = objects.FillDict(conf_hvp, up_hvp)
716
717     # We know it's safe to run as a different user upon migration, so we'll use
718     # the latest conf, from conf_hvp.
719     security_model = conf_hvp[constants.HV_SECURITY_MODEL]
720     if security_model == constants.HT_SM_USER:
721       kvm_cmd.extend(["-runas", conf_hvp[constants.HV_SECURITY_DOMAIN]])
722
723     # We have reasons to believe changing something like the nic driver/type
724     # upon migration won't exactly fly with the instance kernel, so for nic
725     # related parameters we'll use up_hvp
726     if not kvm_nics:
727       kvm_cmd.extend(["-net", "none"])
728     else:
729       tap_extra = ""
730       nic_type = up_hvp[constants.HV_NIC_TYPE]
731       if nic_type == constants.HT_NIC_PARAVIRTUAL:
732         nic_model = "model=virtio"
733         if up_hvp[constants.HV_VHOST_NET]:
734           tap_extra = ",vhost=on"
735       else:
736         nic_model = "model=%s" % nic_type
737
738       for nic_seq, nic in enumerate(kvm_nics):
739         nic_val = "nic,vlan=%s,macaddr=%s,%s" % (nic_seq, nic.mac, nic_model)
740         script = self._WriteNetScriptFile(instance, nic_seq, nic)
741         tap_val = "tap,vlan=%s,script=%s%s" % (nic_seq, script, tap_extra)
742         kvm_cmd.extend(["-net", nic_val])
743         kvm_cmd.extend(["-net", tap_val])
744         temp_files.append(script)
745
746     if incoming:
747       target, port = incoming
748       kvm_cmd.extend(['-incoming', 'tcp:%s:%s' % (target, port)])
749
750     # Changing the vnc password doesn't bother the guest that much. At most it
751     # will surprise people who connect to it. Whether positively or negatively
752     # it's debatable.
753     vnc_pwd_file = conf_hvp[constants.HV_VNC_PASSWORD_FILE]
754     vnc_pwd = None
755     if vnc_pwd_file:
756       try:
757         vnc_pwd = utils.ReadOneLineFile(vnc_pwd_file, strict=True)
758       except EnvironmentError, err:
759         raise errors.HypervisorError("Failed to open VNC password file %s: %s"
760                                      % (vnc_pwd_file, err))
761
762     if conf_hvp[constants.HV_KVM_USE_CHROOT]:
763       utils.EnsureDirs([(self._InstanceChrootDir(name),
764                          constants.SECURE_DIR_MODE)])
765
766     if security_model == constants.HT_SM_POOL:
767       ss = ssconf.SimpleStore()
768       uid_pool = uidpool.ParseUidPool(ss.GetUidPool(), separator="\n")
769       all_uids = set(uidpool.ExpandUidPool(uid_pool))
770       uid = uidpool.RequestUnusedUid(all_uids)
771       try:
772         username = pwd.getpwuid(uid.GetUid()).pw_name
773         kvm_cmd.extend(["-runas", username])
774         self._RunKVMCmd(name, kvm_cmd)
775       except:
776         uidpool.ReleaseUid(uid)
777         raise
778       else:
779         uid.Unlock()
780         utils.WriteFile(self._InstanceUidFile(name), data=str(uid))
781     else:
782       self._RunKVMCmd(name, kvm_cmd)
783
784     if vnc_pwd:
785       change_cmd = 'change vnc password %s' % vnc_pwd
786       self._CallMonitorCommand(instance.name, change_cmd)
787
788     for filename in temp_files:
789       utils.RemoveFile(filename)
790
791   def StartInstance(self, instance, block_devices):
792     """Start an instance.
793
794     """
795     self._CheckDown(instance.name)
796     kvm_runtime = self._GenerateKVMRuntime(instance, block_devices)
797     self._SaveKVMRuntime(instance, kvm_runtime)
798     self._ExecuteKVMRuntime(instance, kvm_runtime)
799
800   def _CallMonitorCommand(self, instance_name, command):
801     """Invoke a command on the instance monitor.
802
803     """
804     socat = ("echo %s | %s STDIO UNIX-CONNECT:%s" %
805              (utils.ShellQuote(command),
806               constants.SOCAT_PATH,
807               utils.ShellQuote(self._InstanceMonitor(instance_name))))
808     result = utils.RunCmd(socat)
809     if result.failed:
810       msg = ("Failed to send command '%s' to instance %s."
811              " output: %s, error: %s, fail_reason: %s" %
812              (command, instance_name,
813               result.stdout, result.stderr, result.fail_reason))
814       raise errors.HypervisorError(msg)
815
816     return result
817
818   def StopInstance(self, instance, force=False, retry=False, name=None):
819     """Stop an instance.
820
821     """
822     if name is not None and not force:
823       raise errors.HypervisorError("Cannot shutdown cleanly by name only")
824     if name is None:
825       name = instance.name
826       acpi = instance.hvparams[constants.HV_ACPI]
827     else:
828       acpi = False
829     _, pid, alive = self._InstancePidAlive(name)
830     if pid > 0 and alive:
831       if force or not acpi:
832         utils.KillProcess(pid)
833       else:
834         self._CallMonitorCommand(name, 'system_powerdown')
835
836   def CleanupInstance(self, instance_name):
837     """Cleanup after a stopped instance
838
839     """
840     pidfile, pid, alive = self._InstancePidAlive(instance_name)
841     if pid > 0 and alive:
842       raise errors.HypervisorError("Cannot cleanup a live instance")
843     self._RemoveInstanceRuntimeFiles(pidfile, instance_name)
844
845   def RebootInstance(self, instance):
846     """Reboot an instance.
847
848     """
849     # For some reason if we do a 'send-key ctrl-alt-delete' to the control
850     # socket the instance will stop, but now power up again. So we'll resort
851     # to shutdown and restart.
852     _, _, alive = self._InstancePidAlive(instance.name)
853     if not alive:
854       raise errors.HypervisorError("Failed to reboot instance %s:"
855                                    " not running" % instance.name)
856     # StopInstance will delete the saved KVM runtime so:
857     # ...first load it...
858     kvm_runtime = self._LoadKVMRuntime(instance)
859     # ...now we can safely call StopInstance...
860     if not self.StopInstance(instance):
861       self.StopInstance(instance, force=True)
862     # ...and finally we can save it again, and execute it...
863     self._SaveKVMRuntime(instance, kvm_runtime)
864     self._ExecuteKVMRuntime(instance, kvm_runtime)
865
866   def MigrationInfo(self, instance):
867     """Get instance information to perform a migration.
868
869     @type instance: L{objects.Instance}
870     @param instance: instance to be migrated
871     @rtype: string
872     @return: content of the KVM runtime file
873
874     """
875     return self._ReadKVMRuntime(instance.name)
876
877   def AcceptInstance(self, instance, info, target):
878     """Prepare to accept an instance.
879
880     @type instance: L{objects.Instance}
881     @param instance: instance to be accepted
882     @type info: string
883     @param info: content of the KVM runtime file on the source node
884     @type target: string
885     @param target: target host (usually ip), on this node
886
887     """
888     kvm_runtime = self._LoadKVMRuntime(instance, serialized_runtime=info)
889     incoming_address = (target, instance.hvparams[constants.HV_MIGRATION_PORT])
890     self._ExecuteKVMRuntime(instance, kvm_runtime, incoming=incoming_address)
891
892   def FinalizeMigration(self, instance, info, success):
893     """Finalize an instance migration.
894
895     Stop the incoming mode KVM.
896
897     @type instance: L{objects.Instance}
898     @param instance: instance whose migration is being finalized
899
900     """
901     if success:
902       self._WriteKVMRuntime(instance.name, info)
903     else:
904       self.StopInstance(instance, force=True)
905
906   def MigrateInstance(self, instance, target, live):
907     """Migrate an instance to a target node.
908
909     The migration will not be attempted if the instance is not
910     currently running.
911
912     @type instance: L{objects.Instance}
913     @param instance: the instance to be migrated
914     @type target: string
915     @param target: ip address of the target node
916     @type live: boolean
917     @param live: perform a live migration
918
919     """
920     instance_name = instance.name
921     port = instance.hvparams[constants.HV_MIGRATION_PORT]
922     pidfile, pid, alive = self._InstancePidAlive(instance_name)
923     if not alive:
924       raise errors.HypervisorError("Instance not running, cannot migrate")
925
926     if not live:
927       self._CallMonitorCommand(instance_name, 'stop')
928
929     migrate_command = ('migrate_set_speed %dm' %
930         instance.hvparams[constants.HV_MIGRATION_BANDWIDTH])
931     self._CallMonitorCommand(instance_name, migrate_command)
932
933     migrate_command = ('migrate_set_downtime %dms' %
934         instance.hvparams[constants.HV_MIGRATION_DOWNTIME])
935     self._CallMonitorCommand(instance_name, migrate_command)
936
937     migrate_command = 'migrate -d tcp:%s:%s' % (target, port)
938     self._CallMonitorCommand(instance_name, migrate_command)
939
940     info_command = 'info migrate'
941     done = False
942     broken_answers = 0
943     while not done:
944       result = self._CallMonitorCommand(instance_name, info_command)
945       match = self._MIGRATION_STATUS_RE.search(result.stdout)
946       if not match:
947         broken_answers += 1
948         if not result.stdout:
949           logging.info("KVM: empty 'info migrate' result")
950         else:
951           logging.warning("KVM: unknown 'info migrate' result: %s",
952                           result.stdout)
953         time.sleep(self._MIGRATION_INFO_RETRY_DELAY)
954       else:
955         status = match.group(1)
956         if status == 'completed':
957           done = True
958         elif status == 'active':
959           # reset the broken answers count
960           broken_answers = 0
961           time.sleep(self._MIGRATION_INFO_RETRY_DELAY)
962         elif status == 'failed' or status == 'cancelled':
963           if not live:
964             self._CallMonitorCommand(instance_name, 'cont')
965           raise errors.HypervisorError("Migration %s at the kvm level" %
966                                        status)
967         else:
968           logging.warning("KVM: unknown migration status '%s'", status)
969           broken_answers += 1
970           time.sleep(self._MIGRATION_INFO_RETRY_DELAY)
971       if broken_answers >= self._MIGRATION_INFO_MAX_BAD_ANSWERS:
972         raise errors.HypervisorError("Too many 'info migrate' broken answers")
973
974     utils.KillProcess(pid)
975     self._RemoveInstanceRuntimeFiles(pidfile, instance_name)
976
977   def GetNodeInfo(self):
978     """Return information about the node.
979
980     This is just a wrapper over the base GetLinuxNodeInfo method.
981
982     @return: a dict with the following keys (values in MiB):
983           - memory_total: the total memory size on the node
984           - memory_free: the available memory on the node for instances
985           - memory_dom0: the memory used by the node itself, if available
986
987     """
988     return self.GetLinuxNodeInfo()
989
990   @classmethod
991   def GetShellCommandForConsole(cls, instance, hvparams, beparams):
992     """Return a command for connecting to the console of an instance.
993
994     """
995     if hvparams[constants.HV_SERIAL_CONSOLE]:
996       shell_command = ("%s STDIO,%s UNIX-CONNECT:%s" %
997                        (constants.SOCAT_PATH, cls._SocatUnixConsoleParams(),
998                         utils.ShellQuote(cls._InstanceSerial(instance.name))))
999     else:
1000       shell_command = "echo 'No serial shell for instance %s'" % instance.name
1001
1002     vnc_bind_address = hvparams[constants.HV_VNC_BIND_ADDRESS]
1003     if vnc_bind_address:
1004       if instance.network_port > constants.VNC_BASE_PORT:
1005         display = instance.network_port - constants.VNC_BASE_PORT
1006         vnc_command = ("echo 'Instance has VNC listening on %s:%d"
1007                        " (display: %d)'" % (vnc_bind_address,
1008                                             instance.network_port,
1009                                             display))
1010         shell_command = "%s; %s" % (vnc_command, shell_command)
1011
1012     return shell_command
1013
1014   def Verify(self):
1015     """Verify the hypervisor.
1016
1017     Check that the binary exists.
1018
1019     """
1020     if not os.path.exists(constants.KVM_PATH):
1021       return "The kvm binary ('%s') does not exist." % constants.KVM_PATH
1022     if not os.path.exists(constants.SOCAT_PATH):
1023       return "The socat binary ('%s') does not exist." % constants.SOCAT_PATH
1024
1025
1026   @classmethod
1027   def CheckParameterSyntax(cls, hvparams):
1028     """Check the given parameters for validity.
1029
1030     @type hvparams:  dict
1031     @param hvparams: dictionary with parameter names/value
1032     @raise errors.HypervisorError: when a parameter is not valid
1033
1034     """
1035     super(KVMHypervisor, cls).CheckParameterSyntax(hvparams)
1036
1037     kernel_path = hvparams[constants.HV_KERNEL_PATH]
1038     if kernel_path:
1039       if not hvparams[constants.HV_ROOT_PATH]:
1040         raise errors.HypervisorError("Need a root partition for the instance,"
1041                                      " if a kernel is defined")
1042
1043     if (hvparams[constants.HV_VNC_X509_VERIFY] and
1044         not hvparams[constants.HV_VNC_X509]):
1045       raise errors.HypervisorError("%s must be defined, if %s is" %
1046                                    (constants.HV_VNC_X509,
1047                                     constants.HV_VNC_X509_VERIFY))
1048
1049     boot_order = hvparams[constants.HV_BOOT_ORDER]
1050     if (boot_order == constants.HT_BO_CDROM and
1051         not hvparams[constants.HV_CDROM_IMAGE_PATH]):
1052       raise errors.HypervisorError("Cannot boot from cdrom without an"
1053                                    " ISO path")
1054
1055     security_model = hvparams[constants.HV_SECURITY_MODEL]
1056     if security_model == constants.HT_SM_USER:
1057       if not hvparams[constants.HV_SECURITY_DOMAIN]:
1058         raise errors.HypervisorError("A security domain (user to run kvm as)"
1059                                      " must be specified")
1060     elif (security_model == constants.HT_SM_NONE or
1061           security_model == constants.HT_SM_POOL):
1062       if hvparams[constants.HV_SECURITY_DOMAIN]:
1063         raise errors.HypervisorError("Cannot have a security domain when the"
1064                                      " security model is 'none' or 'pool'")
1065
1066   @classmethod
1067   def ValidateParameters(cls, hvparams):
1068     """Check the given parameters for validity.
1069
1070     @type hvparams:  dict
1071     @param hvparams: dictionary with parameter names/value
1072     @raise errors.HypervisorError: when a parameter is not valid
1073
1074     """
1075     super(KVMHypervisor, cls).ValidateParameters(hvparams)
1076
1077     security_model = hvparams[constants.HV_SECURITY_MODEL]
1078     if security_model == constants.HT_SM_USER:
1079       username = hvparams[constants.HV_SECURITY_DOMAIN]
1080       try:
1081         pwd.getpwnam(username)
1082       except KeyError:
1083         raise errors.HypervisorError("Unknown security domain user %s"
1084                                      % username)
1085
1086   @classmethod
1087   def PowercycleNode(cls):
1088     """KVM powercycle, just a wrapper over Linux powercycle.
1089
1090     """
1091     cls.LinuxPowercycle()