Merge branch 'devel-2.1' into devel-2.2
[ganeti-local] / lib / hypervisor / hv_kvm.py
1 #
2 #
3
4 # Copyright (C) 2008, 2009, 2010 Google Inc.
5 #
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 # General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 # 02110-1301, USA.
20
21
22 """KVM hypervisor
23
24 """
25
26 import errno
27 import os
28 import os.path
29 import re
30 import tempfile
31 import time
32 import logging
33 import pwd
34 from cStringIO import StringIO
35
36 from ganeti import utils
37 from ganeti import constants
38 from ganeti import errors
39 from ganeti import serializer
40 from ganeti import objects
41 from ganeti import uidpool
42 from ganeti import ssconf
43 from ganeti.hypervisor import hv_base
44 from ganeti import netutils
45
46
47 _KVM_NETWORK_SCRIPT = constants.SYSCONFDIR + "/ganeti/kvm-vif-bridge"
48
49
50 def _WriteNetScript(instance, nic, index):
51   """Write a script to connect a net interface to the proper bridge.
52
53   This can be used by any qemu-type hypervisor.
54
55   @type instance: L{objects.Instance}
56   @param instance: Instance object
57   @type nic: L{objects.NIC}
58   @param nic: NIC object
59   @type index: int
60   @param index: NIC index
61   @return: Script
62   @rtype: string
63
64   """
65   if instance.tags:
66     tags = " ".join(instance.tags)
67   else:
68     tags = ""
69
70   buf = StringIO()
71   sw = utils.ShellWriter(buf)
72   sw.Write("#!/bin/sh")
73   sw.Write("# this is autogenerated by Ganeti, please do not edit")
74   sw.Write("export PATH=$PATH:/sbin:/usr/sbin")
75   sw.Write("export INSTANCE=%s", utils.ShellQuote(instance.name))
76   sw.Write("export MAC=%s", utils.ShellQuote(nic.mac))
77   sw.Write("export MODE=%s",
78            utils.ShellQuote(nic.nicparams[constants.NIC_MODE]))
79   sw.Write("export INTERFACE=\"$1\"")
80   sw.Write("export TAGS=%s", utils.ShellQuote(tags))
81
82   if nic.ip:
83     sw.Write("export IP=%s", utils.ShellQuote(nic.ip))
84
85   if nic.nicparams[constants.NIC_LINK]:
86     sw.Write("export LINK=%s",
87              utils.ShellQuote(nic.nicparams[constants.NIC_LINK]))
88
89   if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
90     sw.Write("export BRIDGE=%s",
91              utils.ShellQuote(nic.nicparams[constants.NIC_LINK]))
92
93   # TODO: make this configurable at ./configure time
94   sw.Write("if [ -x %s ]; then", utils.ShellQuote(_KVM_NETWORK_SCRIPT))
95   sw.IncIndent()
96   try:
97     sw.Write("# Execute the user-specific vif file")
98     sw.Write(_KVM_NETWORK_SCRIPT)
99   finally:
100     sw.DecIndent()
101   sw.Write("else")
102   sw.IncIndent()
103   try:
104     sw.Write("ifconfig $INTERFACE 0.0.0.0 up")
105
106     if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
107       sw.Write("# Connect the interface to the bridge")
108       sw.Write("brctl addif $BRIDGE $INTERFACE")
109
110     elif nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_ROUTED:
111       if not nic.ip:
112         raise errors.HypervisorError("nic/%d is routed, but has no IP"
113                                      " address" % index)
114
115       sw.Write("# Route traffic targeted at the IP to the interface")
116       if nic.nicparams[constants.NIC_LINK]:
117         sw.Write("while ip rule del dev $INTERFACE; do :; done")
118         sw.Write("ip rule add dev $INTERFACE table $LINK")
119         sw.Write("ip route replace $IP table $LINK proto static"
120                  " dev $INTERFACE")
121       else:
122         sw.Write("ip route replace $IP proto static dev $INTERFACE")
123
124       interface_v4_conf = "/proc/sys/net/ipv4/conf/$INTERFACE"
125       sw.Write(" if [ -d %s ]; then", interface_v4_conf)
126       sw.IncIndent()
127       try:
128         sw.Write("echo 1 > %s/proxy_arp", interface_v4_conf)
129         sw.Write("echo 1 > %s/forwarding", interface_v4_conf)
130       finally:
131         sw.DecIndent()
132       sw.Write("fi")
133
134       interface_v6_conf = "/proc/sys/net/ipv6/conf/$INTERFACE"
135       sw.Write("if [ -d %s ]; then", interface_v6_conf)
136       sw.IncIndent()
137       try:
138         sw.Write("echo 1 > %s/proxy_ndp", interface_v6_conf)
139         sw.Write("echo 1 > %s/forwarding", interface_v6_conf)
140       finally:
141         sw.DecIndent()
142       sw.Write("fi")
143   finally:
144     sw.DecIndent()
145   sw.Write("fi")
146
147   return buf.getvalue()
148
149
150 class KVMHypervisor(hv_base.BaseHypervisor):
151   """KVM hypervisor interface"""
152   CAN_MIGRATE = True
153
154   _ROOT_DIR = constants.RUN_GANETI_DIR + "/kvm-hypervisor"
155   _PIDS_DIR = _ROOT_DIR + "/pid" # contains live instances pids
156   _UIDS_DIR = _ROOT_DIR + "/uid" # contains instances reserved uids
157   _CTRL_DIR = _ROOT_DIR + "/ctrl" # contains instances control sockets
158   _CONF_DIR = _ROOT_DIR + "/conf" # contains instances startup data
159   # KVM instances with chroot enabled are started in empty chroot directories.
160   _CHROOT_DIR = _ROOT_DIR + "/chroot" # for empty chroot directories
161   # After an instance is stopped, its chroot directory is removed.
162   # If the chroot directory is not empty, it can't be removed.
163   # A non-empty chroot directory indicates a possible security incident.
164   # To support forensics, the non-empty chroot directory is quarantined in
165   # a separate directory, called 'chroot-quarantine'.
166   _CHROOT_QUARANTINE_DIR = _ROOT_DIR + "/chroot-quarantine"
167   _DIRS = [_ROOT_DIR, _PIDS_DIR, _UIDS_DIR, _CTRL_DIR, _CONF_DIR,
168            _CHROOT_DIR, _CHROOT_QUARANTINE_DIR]
169
170   PARAMETERS = {
171     constants.HV_KERNEL_PATH: hv_base.OPT_FILE_CHECK,
172     constants.HV_INITRD_PATH: hv_base.OPT_FILE_CHECK,
173     constants.HV_ROOT_PATH: hv_base.NO_CHECK,
174     constants.HV_KERNEL_ARGS: hv_base.NO_CHECK,
175     constants.HV_ACPI: hv_base.NO_CHECK,
176     constants.HV_SERIAL_CONSOLE: hv_base.NO_CHECK,
177     constants.HV_VNC_BIND_ADDRESS:
178       (False, lambda x: (netutils.IsValidIP4(x) or utils.IsNormAbsPath(x)),
179        "the VNC bind address must be either a valid IP address or an absolute"
180        " pathname", None, None),
181     constants.HV_VNC_TLS: hv_base.NO_CHECK,
182     constants.HV_VNC_X509: hv_base.OPT_DIR_CHECK,
183     constants.HV_VNC_X509_VERIFY: hv_base.NO_CHECK,
184     constants.HV_VNC_PASSWORD_FILE: hv_base.OPT_FILE_CHECK,
185     constants.HV_CDROM_IMAGE_PATH: hv_base.OPT_FILE_CHECK,
186     constants.HV_BOOT_ORDER:
187       hv_base.ParamInSet(True, constants.HT_KVM_VALID_BO_TYPES),
188     constants.HV_NIC_TYPE:
189       hv_base.ParamInSet(True, constants.HT_KVM_VALID_NIC_TYPES),
190     constants.HV_DISK_TYPE:
191       hv_base.ParamInSet(True, constants.HT_KVM_VALID_DISK_TYPES),
192     constants.HV_USB_MOUSE:
193       hv_base.ParamInSet(False, constants.HT_KVM_VALID_MOUSE_TYPES),
194     constants.HV_MIGRATION_PORT: hv_base.NET_PORT_CHECK,
195     constants.HV_MIGRATION_BANDWIDTH: hv_base.NO_CHECK,
196     constants.HV_MIGRATION_DOWNTIME: hv_base.NO_CHECK,
197     constants.HV_MIGRATION_MODE: hv_base.MIGRATION_MODE_CHECK,
198     constants.HV_USE_LOCALTIME: hv_base.NO_CHECK,
199     constants.HV_DISK_CACHE:
200       hv_base.ParamInSet(True, constants.HT_VALID_CACHE_TYPES),
201     constants.HV_SECURITY_MODEL:
202       hv_base.ParamInSet(True, constants.HT_KVM_VALID_SM_TYPES),
203     constants.HV_SECURITY_DOMAIN: hv_base.NO_CHECK,
204     constants.HV_KVM_FLAG:
205       hv_base.ParamInSet(False, constants.HT_KVM_FLAG_VALUES),
206     constants.HV_VHOST_NET: hv_base.NO_CHECK,
207     constants.HV_KVM_USE_CHROOT: hv_base.NO_CHECK,
208     }
209
210   _MIGRATION_STATUS_RE = re.compile('Migration\s+status:\s+(\w+)',
211                                     re.M | re.I)
212   _MIGRATION_INFO_MAX_BAD_ANSWERS = 5
213   _MIGRATION_INFO_RETRY_DELAY = 2
214
215   ANCILLARY_FILES = [
216     _KVM_NETWORK_SCRIPT,
217     ]
218
219   def __init__(self):
220     hv_base.BaseHypervisor.__init__(self)
221     # Let's make sure the directories we need exist, even if the RUN_DIR lives
222     # in a tmpfs filesystem or has been otherwise wiped out.
223     dirs = [(dname, constants.RUN_DIRS_MODE) for dname in self._DIRS]
224     utils.EnsureDirs(dirs)
225
226   @classmethod
227   def _InstancePidFile(cls, instance_name):
228     """Returns the instance pidfile.
229
230     """
231     return utils.PathJoin(cls._PIDS_DIR, instance_name)
232
233   @classmethod
234   def _InstanceUidFile(cls, instance_name):
235     """Returns the instance uidfile.
236
237     """
238     return utils.PathJoin(cls._UIDS_DIR, instance_name)
239
240   @classmethod
241   def _InstancePidInfo(cls, pid):
242     """Check pid file for instance information.
243
244     Check that a pid file is associated with an instance, and retrieve
245     information from its command line.
246
247     @type pid: string or int
248     @param pid: process id of the instance to check
249     @rtype: tuple
250     @return: (instance_name, memory, vcpus)
251     @raise errors.HypervisorError: when an instance cannot be found
252
253     """
254     alive = utils.IsProcessAlive(pid)
255     if not alive:
256       raise errors.HypervisorError("Cannot get info for pid %s" % pid)
257
258     cmdline_file = utils.PathJoin("/proc", str(pid), "cmdline")
259     try:
260       cmdline = utils.ReadFile(cmdline_file)
261     except EnvironmentError, err:
262       raise errors.HypervisorError("Can't open cmdline file for pid %s: %s" %
263                                    (pid, err))
264
265     instance = None
266     memory = 0
267     vcpus = 0
268
269     arg_list = cmdline.split('\x00')
270     while arg_list:
271       arg =  arg_list.pop(0)
272       if arg == "-name":
273         instance = arg_list.pop(0)
274       elif arg == "-m":
275         memory = int(arg_list.pop(0))
276       elif arg == "-smp":
277         vcpus = int(arg_list.pop(0))
278
279     if instance is None:
280       raise errors.HypervisorError("Pid %s doesn't contain a ganeti kvm"
281                                    " instance" % pid)
282
283     return (instance, memory, vcpus)
284
285   def _InstancePidAlive(self, instance_name):
286     """Returns the instance pidfile, pid, and liveness.
287
288     @type instance_name: string
289     @param instance_name: instance name
290     @rtype: tuple
291     @return: (pid file name, pid, liveness)
292
293     """
294     pidfile = self._InstancePidFile(instance_name)
295     pid = utils.ReadPidFile(pidfile)
296
297     alive = False
298     try:
299       cmd_instance = self._InstancePidInfo(pid)[0]
300       alive = (cmd_instance == instance_name)
301     except errors.HypervisorError:
302       pass
303
304     return (pidfile, pid, alive)
305
306   def _CheckDown(self, instance_name):
307     """Raises an error unless the given instance is down.
308
309     """
310     alive = self._InstancePidAlive(instance_name)[2]
311     if alive:
312       raise errors.HypervisorError("Failed to start instance %s: %s" %
313                                    (instance_name, "already running"))
314
315   @classmethod
316   def _InstanceMonitor(cls, instance_name):
317     """Returns the instance monitor socket name
318
319     """
320     return utils.PathJoin(cls._CTRL_DIR, "%s.monitor" % instance_name)
321
322   @classmethod
323   def _InstanceSerial(cls, instance_name):
324     """Returns the instance serial socket name
325
326     """
327     return utils.PathJoin(cls._CTRL_DIR, "%s.serial" % instance_name)
328
329   @staticmethod
330   def _SocatUnixConsoleParams():
331     """Returns the correct parameters for socat
332
333     If we have a new-enough socat we can use raw mode with an escape character.
334
335     """
336     if constants.SOCAT_USE_ESCAPE:
337       return "raw,echo=0,escape=%s" % constants.SOCAT_ESCAPE_CODE
338     else:
339       return "echo=0,icanon=0"
340
341   @classmethod
342   def _InstanceKVMRuntime(cls, instance_name):
343     """Returns the instance KVM runtime filename
344
345     """
346     return utils.PathJoin(cls._CONF_DIR, "%s.runtime" % instance_name)
347
348   @classmethod
349   def _InstanceChrootDir(cls, instance_name):
350     """Returns the name of the KVM chroot dir of the instance
351
352     """
353     return utils.PathJoin(cls._CHROOT_DIR, instance_name)
354
355   @classmethod
356   def _TryReadUidFile(cls, uid_file):
357     """Try to read a uid file
358
359     """
360     if os.path.exists(uid_file):
361       try:
362         uid = int(utils.ReadOneLineFile(uid_file))
363         return uid
364       except EnvironmentError:
365         logging.warning("Can't read uid file", exc_info=True)
366       except (TypeError, ValueError):
367         logging.warning("Can't parse uid file contents", exc_info=True)
368     return None
369
370   @classmethod
371   def _RemoveInstanceRuntimeFiles(cls, pidfile, instance_name):
372     """Removes an instance's rutime sockets/files/dirs.
373
374     """
375     utils.RemoveFile(pidfile)
376     utils.RemoveFile(cls._InstanceMonitor(instance_name))
377     utils.RemoveFile(cls._InstanceSerial(instance_name))
378     utils.RemoveFile(cls._InstanceKVMRuntime(instance_name))
379     uid_file = cls._InstanceUidFile(instance_name)
380     uid = cls._TryReadUidFile(uid_file)
381     utils.RemoveFile(uid_file)
382     if uid is not None:
383       uidpool.ReleaseUid(uid)
384     try:
385       chroot_dir = cls._InstanceChrootDir(instance_name)
386       utils.RemoveDir(chroot_dir)
387     except OSError, err:
388       if err.errno == errno.ENOTEMPTY:
389         # The chroot directory is expected to be empty, but it isn't.
390         new_chroot_dir = tempfile.mkdtemp(dir=cls._CHROOT_QUARANTINE_DIR,
391                                           prefix="%s-%s-" %
392                                           (instance_name,
393                                            utils.TimestampForFilename()))
394         logging.warning("The chroot directory of instance %s can not be"
395                         " removed as it is not empty. Moving it to the"
396                         " quarantine instead. Please investigate the"
397                         " contents (%s) and clean up manually",
398                         instance_name, new_chroot_dir)
399         utils.RenameFile(chroot_dir, new_chroot_dir)
400       else:
401         raise
402
403   @staticmethod
404   def _WriteNetScriptFile(instance, seq, nic):
405     """Write a script to connect a net interface to the proper bridge.
406
407     This can be used by any qemu-type hypervisor.
408
409     @param instance: instance we're acting on
410     @type instance: instance object
411     @param seq: nic sequence number
412     @type seq: int
413     @param nic: nic we're acting on
414     @type nic: nic object
415     @return: netscript file name
416     @rtype: string
417
418     """
419     script = _WriteNetScript(instance, nic, seq)
420
421     # As much as we'd like to put this in our _ROOT_DIR, that will happen to be
422     # mounted noexec sometimes, so we'll have to find another place.
423     (tmpfd, tmpfile_name) = tempfile.mkstemp()
424     tmpfile = os.fdopen(tmpfd, 'w')
425     try:
426       tmpfile.write(script)
427     finally:
428       tmpfile.close()
429     os.chmod(tmpfile_name, 0755)
430     return tmpfile_name
431
432   def ListInstances(self):
433     """Get the list of running instances.
434
435     We can do this by listing our live instances directory and
436     checking whether the associated kvm process is still alive.
437
438     """
439     result = []
440     for name in os.listdir(self._PIDS_DIR):
441       if self._InstancePidAlive(name)[2]:
442         result.append(name)
443     return result
444
445   def GetInstanceInfo(self, instance_name):
446     """Get instance properties.
447
448     @type instance_name: string
449     @param instance_name: the instance name
450     @rtype: tuple of strings
451     @return: (name, id, memory, vcpus, stat, times)
452
453     """
454     _, pid, alive = self._InstancePidAlive(instance_name)
455     if not alive:
456       return None
457
458     _, memory, vcpus = self._InstancePidInfo(pid)
459     stat = "---b-"
460     times = "0"
461
462     return (instance_name, pid, memory, vcpus, stat, times)
463
464   def GetAllInstancesInfo(self):
465     """Get properties of all instances.
466
467     @return: list of tuples (name, id, memory, vcpus, stat, times)
468
469     """
470     data = []
471     for name in os.listdir(self._PIDS_DIR):
472       try:
473         info = self.GetInstanceInfo(name)
474       except errors.HypervisorError:
475         continue
476       if info:
477         data.append(info)
478     return data
479
480   def _GenerateKVMRuntime(self, instance, block_devices):
481     """Generate KVM information to start an instance.
482
483     """
484     pidfile  = self._InstancePidFile(instance.name)
485     kvm = constants.KVM_PATH
486     kvm_cmd = [kvm]
487     # used just by the vnc server, if enabled
488     kvm_cmd.extend(['-name', instance.name])
489     kvm_cmd.extend(['-m', instance.beparams[constants.BE_MEMORY]])
490     kvm_cmd.extend(['-smp', instance.beparams[constants.BE_VCPUS]])
491     kvm_cmd.extend(['-pidfile', pidfile])
492     kvm_cmd.extend(['-daemonize'])
493     if not instance.hvparams[constants.HV_ACPI]:
494       kvm_cmd.extend(['-no-acpi'])
495
496     hvp = instance.hvparams
497     boot_disk = hvp[constants.HV_BOOT_ORDER] == constants.HT_BO_DISK
498     boot_cdrom = hvp[constants.HV_BOOT_ORDER] == constants.HT_BO_CDROM
499     boot_network = hvp[constants.HV_BOOT_ORDER] == constants.HT_BO_NETWORK
500
501     if hvp[constants.HV_KVM_FLAG] == constants.HT_KVM_ENABLED:
502       kvm_cmd.extend(["-enable-kvm"])
503     elif hvp[constants.HV_KVM_FLAG] == constants.HT_KVM_DISABLED:
504       kvm_cmd.extend(["-disable-kvm"])
505
506     if boot_network:
507       kvm_cmd.extend(['-boot', 'n'])
508
509     disk_type = hvp[constants.HV_DISK_TYPE]
510     if disk_type == constants.HT_DISK_PARAVIRTUAL:
511       if_val = ',if=virtio'
512     else:
513       if_val = ',if=%s' % disk_type
514     # Cache mode
515     disk_cache = hvp[constants.HV_DISK_CACHE]
516     if disk_cache != constants.HT_CACHE_DEFAULT:
517       cache_val = ",cache=%s" % disk_cache
518     else:
519       cache_val = ""
520     for cfdev, dev_path in block_devices:
521       if cfdev.mode != constants.DISK_RDWR:
522         raise errors.HypervisorError("Instance has read-only disks which"
523                                      " are not supported by KVM")
524       # TODO: handle FD_LOOP and FD_BLKTAP (?)
525       if boot_disk:
526         kvm_cmd.extend(['-boot', 'c'])
527         if disk_type != constants.HT_DISK_IDE:
528           boot_val = ',boot=on'
529         else:
530           boot_val = ''
531         # We only boot from the first disk
532         boot_disk = False
533       else:
534         boot_val = ''
535
536       drive_val = 'file=%s,format=raw%s%s%s' % (dev_path, if_val, boot_val,
537                                                 cache_val)
538       kvm_cmd.extend(['-drive', drive_val])
539
540     iso_image = hvp[constants.HV_CDROM_IMAGE_PATH]
541     if iso_image:
542       options = ',format=raw,media=cdrom'
543       if boot_cdrom:
544         kvm_cmd.extend(['-boot', 'd'])
545         if disk_type != constants.HT_DISK_IDE:
546           options = '%s,boot=on' % options
547       else:
548         if disk_type == constants.HT_DISK_PARAVIRTUAL:
549           if_val = ',if=virtio'
550         else:
551           if_val = ',if=%s' % disk_type
552         options = '%s%s' % (options, if_val)
553       drive_val = 'file=%s%s' % (iso_image, options)
554       kvm_cmd.extend(['-drive', drive_val])
555
556     kernel_path = hvp[constants.HV_KERNEL_PATH]
557     if kernel_path:
558       kvm_cmd.extend(['-kernel', kernel_path])
559       initrd_path = hvp[constants.HV_INITRD_PATH]
560       if initrd_path:
561         kvm_cmd.extend(['-initrd', initrd_path])
562       root_append = ['root=%s' % hvp[constants.HV_ROOT_PATH],
563                      hvp[constants.HV_KERNEL_ARGS]]
564       if hvp[constants.HV_SERIAL_CONSOLE]:
565         root_append.append('console=ttyS0,38400')
566       kvm_cmd.extend(['-append', ' '.join(root_append)])
567
568     mouse_type = hvp[constants.HV_USB_MOUSE]
569     vnc_bind_address = hvp[constants.HV_VNC_BIND_ADDRESS]
570
571     if mouse_type:
572       kvm_cmd.extend(['-usb'])
573       kvm_cmd.extend(['-usbdevice', mouse_type])
574     elif vnc_bind_address:
575       kvm_cmd.extend(['-usbdevice', constants.HT_MOUSE_TABLET])
576
577     if vnc_bind_address:
578       if netutils.IsValidIP4(vnc_bind_address):
579         if instance.network_port > constants.VNC_BASE_PORT:
580           display = instance.network_port - constants.VNC_BASE_PORT
581           if vnc_bind_address == constants.IP4_ADDRESS_ANY:
582             vnc_arg = ':%d' % (display)
583           else:
584             vnc_arg = '%s:%d' % (vnc_bind_address, display)
585         else:
586           logging.error("Network port is not a valid VNC display (%d < %d)."
587                         " Not starting VNC", instance.network_port,
588                         constants.VNC_BASE_PORT)
589           vnc_arg = 'none'
590
591         # Only allow tls and other option when not binding to a file, for now.
592         # kvm/qemu gets confused otherwise about the filename to use.
593         vnc_append = ''
594         if hvp[constants.HV_VNC_TLS]:
595           vnc_append = '%s,tls' % vnc_append
596           if hvp[constants.HV_VNC_X509_VERIFY]:
597             vnc_append = '%s,x509verify=%s' % (vnc_append,
598                                                hvp[constants.HV_VNC_X509])
599           elif hvp[constants.HV_VNC_X509]:
600             vnc_append = '%s,x509=%s' % (vnc_append,
601                                          hvp[constants.HV_VNC_X509])
602         if hvp[constants.HV_VNC_PASSWORD_FILE]:
603           vnc_append = '%s,password' % vnc_append
604
605         vnc_arg = '%s%s' % (vnc_arg, vnc_append)
606
607       else:
608         vnc_arg = 'unix:%s/%s.vnc' % (vnc_bind_address, instance.name)
609
610       kvm_cmd.extend(['-vnc', vnc_arg])
611     else:
612       kvm_cmd.extend(['-nographic'])
613
614     monitor_dev = ("unix:%s,server,nowait" %
615                    self._InstanceMonitor(instance.name))
616     kvm_cmd.extend(['-monitor', monitor_dev])
617     if hvp[constants.HV_SERIAL_CONSOLE]:
618       serial_dev = ('unix:%s,server,nowait' %
619                     self._InstanceSerial(instance.name))
620       kvm_cmd.extend(['-serial', serial_dev])
621     else:
622       kvm_cmd.extend(['-serial', 'none'])
623
624     if hvp[constants.HV_USE_LOCALTIME]:
625       kvm_cmd.extend(['-localtime'])
626
627     if hvp[constants.HV_KVM_USE_CHROOT]:
628       kvm_cmd.extend(['-chroot', self._InstanceChrootDir(instance.name)])
629
630     # Save the current instance nics, but defer their expansion as parameters,
631     # as we'll need to generate executable temp files for them.
632     kvm_nics = instance.nics
633     hvparams = hvp
634
635     return (kvm_cmd, kvm_nics, hvparams)
636
637   def _WriteKVMRuntime(self, instance_name, data):
638     """Write an instance's KVM runtime
639
640     """
641     try:
642       utils.WriteFile(self._InstanceKVMRuntime(instance_name),
643                       data=data)
644     except EnvironmentError, err:
645       raise errors.HypervisorError("Failed to save KVM runtime file: %s" % err)
646
647   def _ReadKVMRuntime(self, instance_name):
648     """Read an instance's KVM runtime
649
650     """
651     try:
652       file_content = utils.ReadFile(self._InstanceKVMRuntime(instance_name))
653     except EnvironmentError, err:
654       raise errors.HypervisorError("Failed to load KVM runtime file: %s" % err)
655     return file_content
656
657   def _SaveKVMRuntime(self, instance, kvm_runtime):
658     """Save an instance's KVM runtime
659
660     """
661     kvm_cmd, kvm_nics, hvparams = kvm_runtime
662     serialized_nics = [nic.ToDict() for nic in kvm_nics]
663     serialized_form = serializer.Dump((kvm_cmd, serialized_nics, hvparams))
664     self._WriteKVMRuntime(instance.name, serialized_form)
665
666   def _LoadKVMRuntime(self, instance, serialized_runtime=None):
667     """Load an instance's KVM runtime
668
669     """
670     if not serialized_runtime:
671       serialized_runtime = self._ReadKVMRuntime(instance.name)
672     loaded_runtime = serializer.Load(serialized_runtime)
673     kvm_cmd, serialized_nics, hvparams = loaded_runtime
674     kvm_nics = [objects.NIC.FromDict(snic) for snic in serialized_nics]
675     return (kvm_cmd, kvm_nics, hvparams)
676
677   def _RunKVMCmd(self, name, kvm_cmd):
678     """Run the KVM cmd and check for errors
679
680     @type name: string
681     @param name: instance name
682     @type kvm_cmd: list of strings
683     @param kvm_cmd: runcmd input for kvm
684
685     """
686     result = utils.RunCmd(kvm_cmd)
687     if result.failed:
688       raise errors.HypervisorError("Failed to start instance %s: %s (%s)" %
689                                    (name, result.fail_reason, result.output))
690     if not self._InstancePidAlive(name)[2]:
691       raise errors.HypervisorError("Failed to start instance %s" % name)
692
693   def _ExecuteKVMRuntime(self, instance, kvm_runtime, incoming=None):
694     """Execute a KVM cmd, after completing it with some last minute data
695
696     @type incoming: tuple of strings
697     @param incoming: (target_host_ip, port)
698
699     """
700     # Small _ExecuteKVMRuntime hv parameters programming howto:
701     #  - conf_hvp contains the parameters as configured on ganeti. they might
702     #    have changed since the instance started; only use them if the change
703     #    won't affect the inside of the instance (which hasn't been rebooted).
704     #  - up_hvp contains the parameters as they were when the instance was
705     #    started, plus any new parameter which has been added between ganeti
706     #    versions: it is paramount that those default to a value which won't
707     #    affect the inside of the instance as well.
708     conf_hvp = instance.hvparams
709     name = instance.name
710     self._CheckDown(name)
711
712     temp_files = []
713
714     kvm_cmd, kvm_nics, up_hvp = kvm_runtime
715     up_hvp = objects.FillDict(conf_hvp, up_hvp)
716
717     # We know it's safe to run as a different user upon migration, so we'll use
718     # the latest conf, from conf_hvp.
719     security_model = conf_hvp[constants.HV_SECURITY_MODEL]
720     if security_model == constants.HT_SM_USER:
721       kvm_cmd.extend(["-runas", conf_hvp[constants.HV_SECURITY_DOMAIN]])
722
723     # We have reasons to believe changing something like the nic driver/type
724     # upon migration won't exactly fly with the instance kernel, so for nic
725     # related parameters we'll use up_hvp
726     if not kvm_nics:
727       kvm_cmd.extend(["-net", "none"])
728     else:
729       tap_extra = ""
730       nic_type = up_hvp[constants.HV_NIC_TYPE]
731       if nic_type == constants.HT_NIC_PARAVIRTUAL:
732         nic_model = "model=virtio"
733         if up_hvp[constants.HV_VHOST_NET]:
734           tap_extra = ",vhost=on"
735       else:
736         nic_model = "model=%s" % nic_type
737
738       for nic_seq, nic in enumerate(kvm_nics):
739         nic_val = "nic,vlan=%s,macaddr=%s,%s" % (nic_seq, nic.mac, nic_model)
740         script = self._WriteNetScriptFile(instance, nic_seq, nic)
741         tap_val = "tap,vlan=%s,script=%s%s" % (nic_seq, script, tap_extra)
742         kvm_cmd.extend(["-net", nic_val])
743         kvm_cmd.extend(["-net", tap_val])
744         temp_files.append(script)
745
746     if incoming:
747       target, port = incoming
748       kvm_cmd.extend(['-incoming', 'tcp:%s:%s' % (target, port)])
749
750     # Changing the vnc password doesn't bother the guest that much. At most it
751     # will surprise people who connect to it. Whether positively or negatively
752     # it's debatable.
753     vnc_pwd_file = conf_hvp[constants.HV_VNC_PASSWORD_FILE]
754     vnc_pwd = None
755     if vnc_pwd_file:
756       try:
757         vnc_pwd = utils.ReadOneLineFile(vnc_pwd_file, strict=True)
758       except EnvironmentError, err:
759         raise errors.HypervisorError("Failed to open VNC password file %s: %s"
760                                      % (vnc_pwd_file, err))
761
762     if conf_hvp[constants.HV_KVM_USE_CHROOT]:
763       utils.EnsureDirs([(self._InstanceChrootDir(name),
764                          constants.SECURE_DIR_MODE)])
765
766     if security_model == constants.HT_SM_POOL:
767       ss = ssconf.SimpleStore()
768       uid_pool = uidpool.ParseUidPool(ss.GetUidPool(), separator="\n")
769       all_uids = set(uidpool.ExpandUidPool(uid_pool))
770       uid = uidpool.RequestUnusedUid(all_uids)
771       try:
772         username = pwd.getpwuid(uid.GetUid()).pw_name
773         kvm_cmd.extend(["-runas", username])
774         self._RunKVMCmd(name, kvm_cmd)
775       except:
776         uidpool.ReleaseUid(uid)
777         raise
778       else:
779         uid.Unlock()
780         utils.WriteFile(self._InstanceUidFile(name), data=str(uid))
781     else:
782       self._RunKVMCmd(name, kvm_cmd)
783
784     if vnc_pwd:
785       change_cmd = 'change vnc password %s' % vnc_pwd
786       self._CallMonitorCommand(instance.name, change_cmd)
787
788     for filename in temp_files:
789       utils.RemoveFile(filename)
790
791   def StartInstance(self, instance, block_devices):
792     """Start an instance.
793
794     """
795     self._CheckDown(instance.name)
796     kvm_runtime = self._GenerateKVMRuntime(instance, block_devices)
797     self._SaveKVMRuntime(instance, kvm_runtime)
798     self._ExecuteKVMRuntime(instance, kvm_runtime)
799
800   def _CallMonitorCommand(self, instance_name, command):
801     """Invoke a command on the instance monitor.
802
803     """
804     socat = ("echo %s | %s STDIO UNIX-CONNECT:%s" %
805              (utils.ShellQuote(command),
806               constants.SOCAT_PATH,
807               utils.ShellQuote(self._InstanceMonitor(instance_name))))
808     result = utils.RunCmd(socat)
809     if result.failed:
810       msg = ("Failed to send command '%s' to instance %s."
811              " output: %s, error: %s, fail_reason: %s" %
812              (command, instance_name,
813               result.stdout, result.stderr, result.fail_reason))
814       raise errors.HypervisorError(msg)
815
816     return result
817
818   def StopInstance(self, instance, force=False, retry=False, name=None):
819     """Stop an instance.
820
821     """
822     if name is not None and not force:
823       raise errors.HypervisorError("Cannot shutdown cleanly by name only")
824     if name is None:
825       name = instance.name
826       acpi = instance.hvparams[constants.HV_ACPI]
827     else:
828       acpi = False
829     _, pid, alive = self._InstancePidAlive(name)
830     if pid > 0 and alive:
831       if force or not acpi:
832         utils.KillProcess(pid)
833       else:
834         self._CallMonitorCommand(name, 'system_powerdown')
835
836   def CleanupInstance(self, instance_name):
837     """Cleanup after a stopped instance
838
839     """
840     pidfile, pid, alive = self._InstancePidAlive(instance_name)
841     if pid > 0 and alive:
842       raise errors.HypervisorError("Cannot cleanup a live instance")
843     self._RemoveInstanceRuntimeFiles(pidfile, instance_name)
844
845   def RebootInstance(self, instance):
846     """Reboot an instance.
847
848     """
849     # For some reason if we do a 'send-key ctrl-alt-delete' to the control
850     # socket the instance will stop, but now power up again. So we'll resort
851     # to shutdown and restart.
852     _, _, alive = self._InstancePidAlive(instance.name)
853     if not alive:
854       raise errors.HypervisorError("Failed to reboot instance %s:"
855                                    " not running" % instance.name)
856     # StopInstance will delete the saved KVM runtime so:
857     # ...first load it...
858     kvm_runtime = self._LoadKVMRuntime(instance)
859     # ...now we can safely call StopInstance...
860     if not self.StopInstance(instance):
861       self.StopInstance(instance, force=True)
862     # ...and finally we can save it again, and execute it...
863     self._SaveKVMRuntime(instance, kvm_runtime)
864     self._ExecuteKVMRuntime(instance, kvm_runtime)
865
866   def MigrationInfo(self, instance):
867     """Get instance information to perform a migration.
868
869     @type instance: L{objects.Instance}
870     @param instance: instance to be migrated
871     @rtype: string
872     @return: content of the KVM runtime file
873
874     """
875     return self._ReadKVMRuntime(instance.name)
876
877   def AcceptInstance(self, instance, info, target):
878     """Prepare to accept an instance.
879
880     @type instance: L{objects.Instance}
881     @param instance: instance to be accepted
882     @type info: string
883     @param info: content of the KVM runtime file on the source node
884     @type target: string
885     @param target: target host (usually ip), on this node
886
887     """
888     kvm_runtime = self._LoadKVMRuntime(instance, serialized_runtime=info)
889     incoming_address = (target, instance.hvparams[constants.HV_MIGRATION_PORT])
890     self._ExecuteKVMRuntime(instance, kvm_runtime, incoming=incoming_address)
891
892   def FinalizeMigration(self, instance, info, success):
893     """Finalize an instance migration.
894
895     Stop the incoming mode KVM.
896
897     @type instance: L{objects.Instance}
898     @param instance: instance whose migration is being finalized
899
900     """
901     if success:
902       self._WriteKVMRuntime(instance.name, info)
903     else:
904       self.StopInstance(instance, force=True)
905
906   def MigrateInstance(self, instance, target, live):
907     """Migrate an instance to a target node.
908
909     The migration will not be attempted if the instance is not
910     currently running.
911
912     @type instance: L{objects.Instance}
913     @param instance: the instance to be migrated
914     @type target: string
915     @param target: ip address of the target node
916     @type live: boolean
917     @param live: perform a live migration
918
919     """
920     instance_name = instance.name
921     port = instance.hvparams[constants.HV_MIGRATION_PORT]
922     pidfile, pid, alive = self._InstancePidAlive(instance_name)
923     if not alive:
924       raise errors.HypervisorError("Instance not running, cannot migrate")
925
926     if not netutils.TcpPing(target, port, live_port_needed=True):
927       raise errors.HypervisorError("Remote host %s not listening on port"
928                                    " %s, cannot migrate" % (target, port))
929
930     if not live:
931       self._CallMonitorCommand(instance_name, 'stop')
932
933     migrate_command = ('migrate_set_speed %dm' %
934         instance.hvparams[constants.HV_MIGRATION_BANDWIDTH])
935     self._CallMonitorCommand(instance_name, migrate_command)
936
937     migrate_command = ('migrate_set_downtime %dms' %
938         instance.hvparams[constants.HV_MIGRATION_DOWNTIME])
939     self._CallMonitorCommand(instance_name, migrate_command)
940
941     migrate_command = 'migrate -d tcp:%s:%s' % (target, port)
942     self._CallMonitorCommand(instance_name, migrate_command)
943
944     info_command = 'info migrate'
945     done = False
946     broken_answers = 0
947     while not done:
948       result = self._CallMonitorCommand(instance_name, info_command)
949       match = self._MIGRATION_STATUS_RE.search(result.stdout)
950       if not match:
951         broken_answers += 1
952         if not result.stdout:
953           logging.info("KVM: empty 'info migrate' result")
954         else:
955           logging.warning("KVM: unknown 'info migrate' result: %s",
956                           result.stdout)
957         time.sleep(self._MIGRATION_INFO_RETRY_DELAY)
958       else:
959         status = match.group(1)
960         if status == 'completed':
961           done = True
962         elif status == 'active':
963           # reset the broken answers count
964           broken_answers = 0
965           time.sleep(self._MIGRATION_INFO_RETRY_DELAY)
966         elif status == 'failed' or status == 'cancelled':
967           if not live:
968             self._CallMonitorCommand(instance_name, 'cont')
969           raise errors.HypervisorError("Migration %s at the kvm level" %
970                                        status)
971         else:
972           logging.warning("KVM: unknown migration status '%s'", status)
973           broken_answers += 1
974           time.sleep(self._MIGRATION_INFO_RETRY_DELAY)
975       if broken_answers >= self._MIGRATION_INFO_MAX_BAD_ANSWERS:
976         raise errors.HypervisorError("Too many 'info migrate' broken answers")
977
978     utils.KillProcess(pid)
979     self._RemoveInstanceRuntimeFiles(pidfile, instance_name)
980
981   def GetNodeInfo(self):
982     """Return information about the node.
983
984     This is just a wrapper over the base GetLinuxNodeInfo method.
985
986     @return: a dict with the following keys (values in MiB):
987           - memory_total: the total memory size on the node
988           - memory_free: the available memory on the node for instances
989           - memory_dom0: the memory used by the node itself, if available
990
991     """
992     return self.GetLinuxNodeInfo()
993
994   @classmethod
995   def GetShellCommandForConsole(cls, instance, hvparams, beparams):
996     """Return a command for connecting to the console of an instance.
997
998     """
999     if hvparams[constants.HV_SERIAL_CONSOLE]:
1000       shell_command = ("%s STDIO,%s UNIX-CONNECT:%s" %
1001                        (constants.SOCAT_PATH, cls._SocatUnixConsoleParams(),
1002                         utils.ShellQuote(cls._InstanceSerial(instance.name))))
1003     else:
1004       shell_command = "echo 'No serial shell for instance %s'" % instance.name
1005
1006     vnc_bind_address = hvparams[constants.HV_VNC_BIND_ADDRESS]
1007     if vnc_bind_address:
1008       if instance.network_port > constants.VNC_BASE_PORT:
1009         display = instance.network_port - constants.VNC_BASE_PORT
1010         vnc_command = ("echo 'Instance has VNC listening on %s:%d"
1011                        " (display: %d)'" % (vnc_bind_address,
1012                                             instance.network_port,
1013                                             display))
1014         shell_command = "%s; %s" % (vnc_command, shell_command)
1015
1016     return shell_command
1017
1018   def Verify(self):
1019     """Verify the hypervisor.
1020
1021     Check that the binary exists.
1022
1023     """
1024     if not os.path.exists(constants.KVM_PATH):
1025       return "The kvm binary ('%s') does not exist." % constants.KVM_PATH
1026     if not os.path.exists(constants.SOCAT_PATH):
1027       return "The socat binary ('%s') does not exist." % constants.SOCAT_PATH
1028
1029
1030   @classmethod
1031   def CheckParameterSyntax(cls, hvparams):
1032     """Check the given parameters for validity.
1033
1034     @type hvparams:  dict
1035     @param hvparams: dictionary with parameter names/value
1036     @raise errors.HypervisorError: when a parameter is not valid
1037
1038     """
1039     super(KVMHypervisor, cls).CheckParameterSyntax(hvparams)
1040
1041     kernel_path = hvparams[constants.HV_KERNEL_PATH]
1042     if kernel_path:
1043       if not hvparams[constants.HV_ROOT_PATH]:
1044         raise errors.HypervisorError("Need a root partition for the instance,"
1045                                      " if a kernel is defined")
1046
1047     if (hvparams[constants.HV_VNC_X509_VERIFY] and
1048         not hvparams[constants.HV_VNC_X509]):
1049       raise errors.HypervisorError("%s must be defined, if %s is" %
1050                                    (constants.HV_VNC_X509,
1051                                     constants.HV_VNC_X509_VERIFY))
1052
1053     boot_order = hvparams[constants.HV_BOOT_ORDER]
1054     if (boot_order == constants.HT_BO_CDROM and
1055         not hvparams[constants.HV_CDROM_IMAGE_PATH]):
1056       raise errors.HypervisorError("Cannot boot from cdrom without an"
1057                                    " ISO path")
1058
1059     security_model = hvparams[constants.HV_SECURITY_MODEL]
1060     if security_model == constants.HT_SM_USER:
1061       if not hvparams[constants.HV_SECURITY_DOMAIN]:
1062         raise errors.HypervisorError("A security domain (user to run kvm as)"
1063                                      " must be specified")
1064     elif (security_model == constants.HT_SM_NONE or
1065           security_model == constants.HT_SM_POOL):
1066       if hvparams[constants.HV_SECURITY_DOMAIN]:
1067         raise errors.HypervisorError("Cannot have a security domain when the"
1068                                      " security model is 'none' or 'pool'")
1069
1070   @classmethod
1071   def ValidateParameters(cls, hvparams):
1072     """Check the given parameters for validity.
1073
1074     @type hvparams:  dict
1075     @param hvparams: dictionary with parameter names/value
1076     @raise errors.HypervisorError: when a parameter is not valid
1077
1078     """
1079     super(KVMHypervisor, cls).ValidateParameters(hvparams)
1080
1081     security_model = hvparams[constants.HV_SECURITY_MODEL]
1082     if security_model == constants.HT_SM_USER:
1083       username = hvparams[constants.HV_SECURITY_DOMAIN]
1084       try:
1085         pwd.getpwnam(username)
1086       except KeyError:
1087         raise errors.HypervisorError("Unknown security domain user %s"
1088                                      % username)
1089
1090   @classmethod
1091   def PowercycleNode(cls):
1092     """KVM powercycle, just a wrapper over Linux powercycle.
1093
1094     """
1095     cls.LinuxPowercycle()