LUVerifyCluster: update _ValidateNode description
[ganeti-local] / lib / hypervisor / hv_kvm.py
1 #
2 #
3
4 # Copyright (C) 2008 Google Inc.
5 #
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 # General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 # 02110-1301, USA.
20
21
22 """KVM hypervisor
23
24 """
25
26 import errno
27 import os
28 import os.path
29 import re
30 import tempfile
31 import time
32 import logging
33 import pwd
34 from cStringIO import StringIO
35
36 from ganeti import utils
37 from ganeti import constants
38 from ganeti import errors
39 from ganeti import serializer
40 from ganeti import objects
41 from ganeti import uidpool
42 from ganeti import ssconf
43 from ganeti.hypervisor import hv_base
44 from ganeti import netutils
45
46
47 _KVM_NETWORK_SCRIPT = constants.SYSCONFDIR + "/ganeti/kvm-vif-bridge"
48
49
50 def _WriteNetScript(instance, nic, index):
51   """Write a script to connect a net interface to the proper bridge.
52
53   This can be used by any qemu-type hypervisor.
54
55   @type instance: L{objects.Instance}
56   @param instance: Instance object
57   @type nic: L{objects.NIC}
58   @param nic: NIC object
59   @type index: int
60   @param index: NIC index
61   @return: Script
62   @rtype: string
63
64   """
65   if instance.tags:
66     tags = " ".join(instance.tags)
67   else:
68     tags = ""
69
70   buf = StringIO()
71   sw = utils.ShellWriter(buf)
72   sw.Write("#!/bin/sh")
73   sw.Write("# this is autogenerated by Ganeti, please do not edit")
74   sw.Write("export PATH=$PATH:/sbin:/usr/sbin")
75   sw.Write("export INSTANCE=%s", utils.ShellQuote(instance.name))
76   sw.Write("export MAC=%s", utils.ShellQuote(nic.mac))
77   sw.Write("export MODE=%s",
78            utils.ShellQuote(nic.nicparams[constants.NIC_MODE]))
79   sw.Write("export INTERFACE=\"$1\"")
80   sw.Write("export TAGS=%s", utils.ShellQuote(tags))
81
82   if nic.ip:
83     sw.Write("export IP=%s", utils.ShellQuote(nic.ip))
84
85   if nic.nicparams[constants.NIC_LINK]:
86     sw.Write("export LINK=%s",
87              utils.ShellQuote(nic.nicparams[constants.NIC_LINK]))
88
89   if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
90     sw.Write("export BRIDGE=%s",
91              utils.ShellQuote(nic.nicparams[constants.NIC_LINK]))
92
93   # TODO: make this configurable at ./configure time
94   sw.Write("if [ -x %s ]; then", utils.ShellQuote(_KVM_NETWORK_SCRIPT))
95   sw.IncIndent()
96   try:
97     sw.Write("# Execute the user-specific vif file")
98     sw.Write(_KVM_NETWORK_SCRIPT)
99   finally:
100     sw.DecIndent()
101   sw.Write("else")
102   sw.IncIndent()
103   try:
104     sw.Write("ifconfig $INTERFACE 0.0.0.0 up")
105
106     if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
107       sw.Write("# Connect the interface to the bridge")
108       sw.Write("brctl addif $BRIDGE $INTERFACE")
109
110     elif nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_ROUTED:
111       if not nic.ip:
112         raise errors.HypervisorError("nic/%d is routed, but has no IP"
113                                      " address" % index)
114
115       sw.Write("# Route traffic targeted at the IP to the interface")
116       if nic.nicparams[constants.NIC_LINK]:
117         sw.Write("while ip rule del dev $INTERFACE; do :; done")
118         sw.Write("ip rule add dev $INTERFACE table $LINK")
119         sw.Write("ip route replace $IP table $LINK proto static"
120                  " dev $INTERFACE")
121       else:
122         sw.Write("ip route replace $IP proto static dev $INTERFACE")
123
124       interface_v4_conf = "/proc/sys/net/ipv4/conf/$INTERFACE"
125       sw.Write(" if [ -d %s ]; then", interface_v4_conf)
126       sw.IncIndent()
127       try:
128         sw.Write("echo 1 > %s/proxy_arp", interface_v4_conf)
129         sw.Write("echo 1 > %s/forwarding", interface_v4_conf)
130       finally:
131         sw.DecIndent()
132       sw.Write("fi")
133
134       interface_v6_conf = "/proc/sys/net/ipv6/conf/$INTERFACE"
135       sw.Write("if [ -d %s ]; then", interface_v6_conf)
136       sw.IncIndent()
137       try:
138         sw.Write("echo 1 > %s/proxy_ndp", interface_v6_conf)
139         sw.Write("echo 1 > %s/forwarding", interface_v6_conf)
140       finally:
141         sw.DecIndent()
142       sw.Write("fi")
143   finally:
144     sw.DecIndent()
145   sw.Write("fi")
146
147   return buf.getvalue()
148
149
150 class KVMHypervisor(hv_base.BaseHypervisor):
151   """KVM hypervisor interface"""
152   CAN_MIGRATE = True
153
154   _ROOT_DIR = constants.RUN_GANETI_DIR + "/kvm-hypervisor"
155   _PIDS_DIR = _ROOT_DIR + "/pid" # contains live instances pids
156   _UIDS_DIR = _ROOT_DIR + "/uid" # contains instances reserved uids
157   _CTRL_DIR = _ROOT_DIR + "/ctrl" # contains instances control sockets
158   _CONF_DIR = _ROOT_DIR + "/conf" # contains instances startup data
159   # KVM instances with chroot enabled are started in empty chroot directories.
160   _CHROOT_DIR = _ROOT_DIR + "/chroot" # for empty chroot directories
161   # After an instance is stopped, its chroot directory is removed.
162   # If the chroot directory is not empty, it can't be removed.
163   # A non-empty chroot directory indicates a possible security incident.
164   # To support forensics, the non-empty chroot directory is quarantined in
165   # a separate directory, called 'chroot-quarantine'.
166   _CHROOT_QUARANTINE_DIR = _ROOT_DIR + "/chroot-quarantine"
167   _DIRS = [_ROOT_DIR, _PIDS_DIR, _UIDS_DIR, _CTRL_DIR, _CONF_DIR,
168            _CHROOT_DIR, _CHROOT_QUARANTINE_DIR]
169
170   PARAMETERS = {
171     constants.HV_KERNEL_PATH: hv_base.OPT_FILE_CHECK,
172     constants.HV_INITRD_PATH: hv_base.OPT_FILE_CHECK,
173     constants.HV_ROOT_PATH: hv_base.NO_CHECK,
174     constants.HV_KERNEL_ARGS: hv_base.NO_CHECK,
175     constants.HV_ACPI: hv_base.NO_CHECK,
176     constants.HV_SERIAL_CONSOLE: hv_base.NO_CHECK,
177     constants.HV_VNC_BIND_ADDRESS:
178       (False, lambda x: (netutils.IsValidIP4(x) or utils.IsNormAbsPath(x)),
179        "the VNC bind address must be either a valid IP address or an absolute"
180        " pathname", None, None),
181     constants.HV_VNC_TLS: hv_base.NO_CHECK,
182     constants.HV_VNC_X509: hv_base.OPT_DIR_CHECK,
183     constants.HV_VNC_X509_VERIFY: hv_base.NO_CHECK,
184     constants.HV_VNC_PASSWORD_FILE: hv_base.OPT_FILE_CHECK,
185     constants.HV_CDROM_IMAGE_PATH: hv_base.OPT_FILE_CHECK,
186     constants.HV_BOOT_ORDER:
187       hv_base.ParamInSet(True, constants.HT_KVM_VALID_BO_TYPES),
188     constants.HV_NIC_TYPE:
189       hv_base.ParamInSet(True, constants.HT_KVM_VALID_NIC_TYPES),
190     constants.HV_DISK_TYPE:
191       hv_base.ParamInSet(True, constants.HT_KVM_VALID_DISK_TYPES),
192     constants.HV_USB_MOUSE:
193       hv_base.ParamInSet(False, constants.HT_KVM_VALID_MOUSE_TYPES),
194     constants.HV_MIGRATION_PORT: hv_base.NET_PORT_CHECK,
195     constants.HV_MIGRATION_BANDWIDTH: hv_base.NO_CHECK,
196     constants.HV_MIGRATION_DOWNTIME: hv_base.NO_CHECK,
197     constants.HV_USE_LOCALTIME: hv_base.NO_CHECK,
198     constants.HV_DISK_CACHE:
199       hv_base.ParamInSet(True, constants.HT_VALID_CACHE_TYPES),
200     constants.HV_SECURITY_MODEL:
201       hv_base.ParamInSet(True, constants.HT_KVM_VALID_SM_TYPES),
202     constants.HV_SECURITY_DOMAIN: hv_base.NO_CHECK,
203     constants.HV_KVM_FLAG:
204       hv_base.ParamInSet(False, constants.HT_KVM_FLAG_VALUES),
205     constants.HV_VHOST_NET: hv_base.NO_CHECK,
206     constants.HV_KVM_USE_CHROOT: hv_base.NO_CHECK,
207     }
208
209   _MIGRATION_STATUS_RE = re.compile('Migration\s+status:\s+(\w+)',
210                                     re.M | re.I)
211   _MIGRATION_INFO_MAX_BAD_ANSWERS = 5
212   _MIGRATION_INFO_RETRY_DELAY = 2
213
214   ANCILLARY_FILES = [
215     _KVM_NETWORK_SCRIPT,
216     ]
217
218   def __init__(self):
219     hv_base.BaseHypervisor.__init__(self)
220     # Let's make sure the directories we need exist, even if the RUN_DIR lives
221     # in a tmpfs filesystem or has been otherwise wiped out.
222     dirs = [(dname, constants.RUN_DIRS_MODE) for dname in self._DIRS]
223     utils.EnsureDirs(dirs)
224
225   @classmethod
226   def _InstancePidFile(cls, instance_name):
227     """Returns the instance pidfile.
228
229     """
230     return utils.PathJoin(cls._PIDS_DIR, instance_name)
231
232   @classmethod
233   def _InstanceUidFile(cls, instance_name):
234     """Returns the instance uidfile.
235
236     """
237     return utils.PathJoin(cls._UIDS_DIR, instance_name)
238
239   @classmethod
240   def _InstancePidInfo(cls, pid):
241     """Check pid file for instance information.
242
243     Check that a pid file is associated with an instance, and retrieve
244     information from its command line.
245
246     @type pid: string or int
247     @param pid: process id of the instance to check
248     @rtype: tuple
249     @return: (instance_name, memory, vcpus)
250     @raise errors.HypervisorError: when an instance cannot be found
251
252     """
253     alive = utils.IsProcessAlive(pid)
254     if not alive:
255       raise errors.HypervisorError("Cannot get info for pid %s" % pid)
256
257     cmdline_file = utils.PathJoin("/proc", str(pid), "cmdline")
258     try:
259       cmdline = utils.ReadFile(cmdline_file)
260     except EnvironmentError, err:
261       raise errors.HypervisorError("Can't open cmdline file for pid %s: %s" %
262                                    (pid, err))
263
264     instance = None
265     memory = 0
266     vcpus = 0
267
268     arg_list = cmdline.split('\x00')
269     while arg_list:
270       arg =  arg_list.pop(0)
271       if arg == "-name":
272         instance = arg_list.pop(0)
273       elif arg == "-m":
274         memory = int(arg_list.pop(0))
275       elif arg == "-smp":
276         vcpus = int(arg_list.pop(0))
277
278     if instance is None:
279       raise errors.HypervisorError("Pid %s doesn't contain a ganeti kvm"
280                                    " instance" % pid)
281
282     return (instance, memory, vcpus)
283
284   def _InstancePidAlive(self, instance_name):
285     """Returns the instance pidfile, pid, and liveness.
286
287     @type instance_name: string
288     @param instance_name: instance name
289     @rtype: tuple
290     @return: (pid file name, pid, liveness)
291
292     """
293     pidfile = self._InstancePidFile(instance_name)
294     pid = utils.ReadPidFile(pidfile)
295
296     alive = False
297     try:
298       cmd_instance = self._InstancePidInfo(pid)[0]
299       alive = (cmd_instance == instance_name)
300     except errors.HypervisorError:
301       pass
302
303     return (pidfile, pid, alive)
304
305   def _CheckDown(self, instance_name):
306     """Raises an error unless the given instance is down.
307
308     """
309     alive = self._InstancePidAlive(instance_name)[2]
310     if alive:
311       raise errors.HypervisorError("Failed to start instance %s: %s" %
312                                    (instance_name, "already running"))
313
314   @classmethod
315   def _InstanceMonitor(cls, instance_name):
316     """Returns the instance monitor socket name
317
318     """
319     return utils.PathJoin(cls._CTRL_DIR, "%s.monitor" % instance_name)
320
321   @classmethod
322   def _InstanceSerial(cls, instance_name):
323     """Returns the instance serial socket name
324
325     """
326     return utils.PathJoin(cls._CTRL_DIR, "%s.serial" % instance_name)
327
328   @staticmethod
329   def _SocatUnixConsoleParams():
330     """Returns the correct parameters for socat
331
332     If we have a new-enough socat we can use raw mode with an escape character.
333
334     """
335     if constants.SOCAT_USE_ESCAPE:
336       return "raw,echo=0,escape=%s" % constants.SOCAT_ESCAPE_CODE
337     else:
338       return "echo=0,icanon=0"
339
340   @classmethod
341   def _InstanceKVMRuntime(cls, instance_name):
342     """Returns the instance KVM runtime filename
343
344     """
345     return utils.PathJoin(cls._CONF_DIR, "%s.runtime" % instance_name)
346
347   @classmethod
348   def _InstanceChrootDir(cls, instance_name):
349     """Returns the name of the KVM chroot dir of the instance
350
351     """
352     return utils.PathJoin(cls._CHROOT_DIR, instance_name)
353
354   @classmethod
355   def _TryReadUidFile(cls, uid_file):
356     """Try to read a uid file
357
358     """
359     if os.path.exists(uid_file):
360       try:
361         uid = int(utils.ReadOneLineFile(uid_file))
362         return uid
363       except EnvironmentError:
364         logging.warning("Can't read uid file", exc_info=True)
365       except (TypeError, ValueError):
366         logging.warning("Can't parse uid file contents", exc_info=True)
367     return None
368
369   @classmethod
370   def _RemoveInstanceRuntimeFiles(cls, pidfile, instance_name):
371     """Removes an instance's rutime sockets/files/dirs.
372
373     """
374     utils.RemoveFile(pidfile)
375     utils.RemoveFile(cls._InstanceMonitor(instance_name))
376     utils.RemoveFile(cls._InstanceSerial(instance_name))
377     utils.RemoveFile(cls._InstanceKVMRuntime(instance_name))
378     uid_file = cls._InstanceUidFile(instance_name)
379     uid = cls._TryReadUidFile(uid_file)
380     utils.RemoveFile(uid_file)
381     if uid is not None:
382       uidpool.ReleaseUid(uid)
383     try:
384       chroot_dir = cls._InstanceChrootDir(instance_name)
385       utils.RemoveDir(chroot_dir)
386     except OSError, err:
387       if err.errno == errno.ENOTEMPTY:
388         # The chroot directory is expected to be empty, but it isn't.
389         new_chroot_dir = tempfile.mkdtemp(dir=cls._CHROOT_QUARANTINE_DIR,
390                                           prefix="%s-%s-" %
391                                           (instance_name,
392                                            utils.TimestampForFilename()))
393         logging.warning("The chroot directory of instance %s can not be"
394                         " removed as it is not empty. Moving it to the"
395                         " quarantine instead. Please investigate the"
396                         " contents (%s) and clean up manually",
397                         instance_name, new_chroot_dir)
398         utils.RenameFile(chroot_dir, new_chroot_dir)
399       else:
400         raise
401
402   @staticmethod
403   def _WriteNetScriptFile(instance, seq, nic):
404     """Write a script to connect a net interface to the proper bridge.
405
406     This can be used by any qemu-type hypervisor.
407
408     @param instance: instance we're acting on
409     @type instance: instance object
410     @param seq: nic sequence number
411     @type seq: int
412     @param nic: nic we're acting on
413     @type nic: nic object
414     @return: netscript file name
415     @rtype: string
416
417     """
418     script = _WriteNetScript(instance, nic, seq)
419
420     # As much as we'd like to put this in our _ROOT_DIR, that will happen to be
421     # mounted noexec sometimes, so we'll have to find another place.
422     (tmpfd, tmpfile_name) = tempfile.mkstemp()
423     tmpfile = os.fdopen(tmpfd, 'w')
424     try:
425       tmpfile.write(script)
426     finally:
427       tmpfile.close()
428     os.chmod(tmpfile_name, 0755)
429     return tmpfile_name
430
431   def ListInstances(self):
432     """Get the list of running instances.
433
434     We can do this by listing our live instances directory and
435     checking whether the associated kvm process is still alive.
436
437     """
438     result = []
439     for name in os.listdir(self._PIDS_DIR):
440       if self._InstancePidAlive(name)[2]:
441         result.append(name)
442     return result
443
444   def GetInstanceInfo(self, instance_name):
445     """Get instance properties.
446
447     @type instance_name: string
448     @param instance_name: the instance name
449     @rtype: tuple of strings
450     @return: (name, id, memory, vcpus, stat, times)
451
452     """
453     _, pid, alive = self._InstancePidAlive(instance_name)
454     if not alive:
455       return None
456
457     _, memory, vcpus = self._InstancePidInfo(pid)
458     stat = "---b-"
459     times = "0"
460
461     return (instance_name, pid, memory, vcpus, stat, times)
462
463   def GetAllInstancesInfo(self):
464     """Get properties of all instances.
465
466     @return: list of tuples (name, id, memory, vcpus, stat, times)
467
468     """
469     data = []
470     for name in os.listdir(self._PIDS_DIR):
471       try:
472         info = self.GetInstanceInfo(name)
473       except errors.HypervisorError:
474         continue
475       if info:
476         data.append(info)
477     return data
478
479   def _GenerateKVMRuntime(self, instance, block_devices):
480     """Generate KVM information to start an instance.
481
482     """
483     pidfile  = self._InstancePidFile(instance.name)
484     kvm = constants.KVM_PATH
485     kvm_cmd = [kvm]
486     # used just by the vnc server, if enabled
487     kvm_cmd.extend(['-name', instance.name])
488     kvm_cmd.extend(['-m', instance.beparams[constants.BE_MEMORY]])
489     kvm_cmd.extend(['-smp', instance.beparams[constants.BE_VCPUS]])
490     kvm_cmd.extend(['-pidfile', pidfile])
491     kvm_cmd.extend(['-daemonize'])
492     if not instance.hvparams[constants.HV_ACPI]:
493       kvm_cmd.extend(['-no-acpi'])
494
495     hvp = instance.hvparams
496     boot_disk = hvp[constants.HV_BOOT_ORDER] == constants.HT_BO_DISK
497     boot_cdrom = hvp[constants.HV_BOOT_ORDER] == constants.HT_BO_CDROM
498     boot_network = hvp[constants.HV_BOOT_ORDER] == constants.HT_BO_NETWORK
499
500     if hvp[constants.HV_KVM_FLAG] == constants.HT_KVM_ENABLED:
501       kvm_cmd.extend(["-enable-kvm"])
502     elif hvp[constants.HV_KVM_FLAG] == constants.HT_KVM_DISABLED:
503       kvm_cmd.extend(["-disable-kvm"])
504
505     if boot_network:
506       kvm_cmd.extend(['-boot', 'n'])
507
508     disk_type = hvp[constants.HV_DISK_TYPE]
509     if disk_type == constants.HT_DISK_PARAVIRTUAL:
510       if_val = ',if=virtio'
511     else:
512       if_val = ',if=%s' % disk_type
513     # Cache mode
514     disk_cache = hvp[constants.HV_DISK_CACHE]
515     if disk_cache != constants.HT_CACHE_DEFAULT:
516       cache_val = ",cache=%s" % disk_cache
517     else:
518       cache_val = ""
519     for cfdev, dev_path in block_devices:
520       if cfdev.mode != constants.DISK_RDWR:
521         raise errors.HypervisorError("Instance has read-only disks which"
522                                      " are not supported by KVM")
523       # TODO: handle FD_LOOP and FD_BLKTAP (?)
524       if boot_disk:
525         kvm_cmd.extend(['-boot', 'c'])
526         if disk_type != constants.HT_DISK_IDE:
527           boot_val = ',boot=on'
528         else:
529           boot_val = ''
530         # We only boot from the first disk
531         boot_disk = False
532       else:
533         boot_val = ''
534
535       drive_val = 'file=%s,format=raw%s%s%s' % (dev_path, if_val, boot_val,
536                                                 cache_val)
537       kvm_cmd.extend(['-drive', drive_val])
538
539     iso_image = hvp[constants.HV_CDROM_IMAGE_PATH]
540     if iso_image:
541       options = ',format=raw,media=cdrom'
542       if boot_cdrom:
543         kvm_cmd.extend(['-boot', 'd'])
544         if disk_type != constants.HT_DISK_IDE:
545           options = '%s,boot=on' % options
546       else:
547         if disk_type == constants.HT_DISK_PARAVIRTUAL:
548           if_val = ',if=virtio'
549         else:
550           if_val = ',if=%s' % disk_type
551         options = '%s%s' % (options, if_val)
552       drive_val = 'file=%s%s' % (iso_image, options)
553       kvm_cmd.extend(['-drive', drive_val])
554
555     kernel_path = hvp[constants.HV_KERNEL_PATH]
556     if kernel_path:
557       kvm_cmd.extend(['-kernel', kernel_path])
558       initrd_path = hvp[constants.HV_INITRD_PATH]
559       if initrd_path:
560         kvm_cmd.extend(['-initrd', initrd_path])
561       root_append = ['root=%s' % hvp[constants.HV_ROOT_PATH],
562                      hvp[constants.HV_KERNEL_ARGS]]
563       if hvp[constants.HV_SERIAL_CONSOLE]:
564         root_append.append('console=ttyS0,38400')
565       kvm_cmd.extend(['-append', ' '.join(root_append)])
566
567     mouse_type = hvp[constants.HV_USB_MOUSE]
568     if mouse_type:
569       kvm_cmd.extend(['-usb'])
570       kvm_cmd.extend(['-usbdevice', mouse_type])
571
572     vnc_bind_address = hvp[constants.HV_VNC_BIND_ADDRESS]
573     if vnc_bind_address:
574       if netutils.IsValidIP4(vnc_bind_address):
575         if instance.network_port > constants.VNC_BASE_PORT:
576           display = instance.network_port - constants.VNC_BASE_PORT
577           if vnc_bind_address == constants.IP4_ADDRESS_ANY:
578             vnc_arg = ':%d' % (display)
579           else:
580             vnc_arg = '%s:%d' % (vnc_bind_address, display)
581         else:
582           logging.error("Network port is not a valid VNC display (%d < %d)."
583                         " Not starting VNC", instance.network_port,
584                         constants.VNC_BASE_PORT)
585           vnc_arg = 'none'
586
587         # Only allow tls and other option when not binding to a file, for now.
588         # kvm/qemu gets confused otherwise about the filename to use.
589         vnc_append = ''
590         if hvp[constants.HV_VNC_TLS]:
591           vnc_append = '%s,tls' % vnc_append
592           if hvp[constants.HV_VNC_X509_VERIFY]:
593             vnc_append = '%s,x509verify=%s' % (vnc_append,
594                                                hvp[constants.HV_VNC_X509])
595           elif hvp[constants.HV_VNC_X509]:
596             vnc_append = '%s,x509=%s' % (vnc_append,
597                                          hvp[constants.HV_VNC_X509])
598         if hvp[constants.HV_VNC_PASSWORD_FILE]:
599           vnc_append = '%s,password' % vnc_append
600
601         vnc_arg = '%s%s' % (vnc_arg, vnc_append)
602
603       else:
604         vnc_arg = 'unix:%s/%s.vnc' % (vnc_bind_address, instance.name)
605
606       kvm_cmd.extend(['-vnc', vnc_arg])
607
608       # Also add a tablet USB device to act as a mouse
609       # This solves various mouse alignment issues
610       kvm_cmd.extend(['-usbdevice', 'tablet'])
611     else:
612       kvm_cmd.extend(['-nographic'])
613
614     monitor_dev = ("unix:%s,server,nowait" %
615                    self._InstanceMonitor(instance.name))
616     kvm_cmd.extend(['-monitor', monitor_dev])
617     if hvp[constants.HV_SERIAL_CONSOLE]:
618       serial_dev = ('unix:%s,server,nowait' %
619                     self._InstanceSerial(instance.name))
620       kvm_cmd.extend(['-serial', serial_dev])
621     else:
622       kvm_cmd.extend(['-serial', 'none'])
623
624     if hvp[constants.HV_USE_LOCALTIME]:
625       kvm_cmd.extend(['-localtime'])
626
627     if hvp[constants.HV_KVM_USE_CHROOT]:
628       kvm_cmd.extend(['-chroot', self._InstanceChrootDir(instance.name)])
629
630     # Save the current instance nics, but defer their expansion as parameters,
631     # as we'll need to generate executable temp files for them.
632     kvm_nics = instance.nics
633     hvparams = hvp
634
635     return (kvm_cmd, kvm_nics, hvparams)
636
637   def _WriteKVMRuntime(self, instance_name, data):
638     """Write an instance's KVM runtime
639
640     """
641     try:
642       utils.WriteFile(self._InstanceKVMRuntime(instance_name),
643                       data=data)
644     except EnvironmentError, err:
645       raise errors.HypervisorError("Failed to save KVM runtime file: %s" % err)
646
647   def _ReadKVMRuntime(self, instance_name):
648     """Read an instance's KVM runtime
649
650     """
651     try:
652       file_content = utils.ReadFile(self._InstanceKVMRuntime(instance_name))
653     except EnvironmentError, err:
654       raise errors.HypervisorError("Failed to load KVM runtime file: %s" % err)
655     return file_content
656
657   def _SaveKVMRuntime(self, instance, kvm_runtime):
658     """Save an instance's KVM runtime
659
660     """
661     kvm_cmd, kvm_nics, hvparams = kvm_runtime
662     serialized_nics = [nic.ToDict() for nic in kvm_nics]
663     serialized_form = serializer.Dump((kvm_cmd, serialized_nics, hvparams))
664     self._WriteKVMRuntime(instance.name, serialized_form)
665
666   def _LoadKVMRuntime(self, instance, serialized_runtime=None):
667     """Load an instance's KVM runtime
668
669     """
670     if not serialized_runtime:
671       serialized_runtime = self._ReadKVMRuntime(instance.name)
672     loaded_runtime = serializer.Load(serialized_runtime)
673     kvm_cmd, serialized_nics, hvparams = loaded_runtime
674     kvm_nics = [objects.NIC.FromDict(snic) for snic in serialized_nics]
675     return (kvm_cmd, kvm_nics, hvparams)
676
677   def _RunKVMCmd(self, name, kvm_cmd):
678     """Run the KVM cmd and check for errors
679
680     @type name: string
681     @param name: instance name
682     @type kvm_cmd: list of strings
683     @param kvm_cmd: runcmd input for kvm
684
685     """
686     result = utils.RunCmd(kvm_cmd)
687     if result.failed:
688       raise errors.HypervisorError("Failed to start instance %s: %s (%s)" %
689                                    (name, result.fail_reason, result.output))
690     if not self._InstancePidAlive(name)[2]:
691       raise errors.HypervisorError("Failed to start instance %s" % name)
692
693   def _ExecuteKVMRuntime(self, instance, kvm_runtime, incoming=None):
694     """Execute a KVM cmd, after completing it with some last minute data
695
696     @type incoming: tuple of strings
697     @param incoming: (target_host_ip, port)
698
699     """
700     # Small _ExecuteKVMRuntime hv parameters programming howto:
701     #  - conf_hvp contains the parameters as configured on ganeti. they might
702     #    have changed since the instance started; only use them if the change
703     #    won't affect the inside of the instance (which hasn't been rebooted).
704     #  - up_hvp contains the parameters as they were when the instance was
705     #    started, plus any new parameter which has been added between ganeti
706     #    versions: it is paramount that those default to a value which won't
707     #    affect the inside of the instance as well.
708     conf_hvp = instance.hvparams
709     name = instance.name
710     self._CheckDown(name)
711
712     temp_files = []
713
714     kvm_cmd, kvm_nics, up_hvp = kvm_runtime
715     up_hvp = objects.FillDict(conf_hvp, up_hvp)
716
717     # We know it's safe to run as a different user upon migration, so we'll use
718     # the latest conf, from conf_hvp.
719     security_model = conf_hvp[constants.HV_SECURITY_MODEL]
720     if security_model == constants.HT_SM_USER:
721       kvm_cmd.extend(["-runas", conf_hvp[constants.HV_SECURITY_DOMAIN]])
722
723     # We have reasons to believe changing something like the nic driver/type
724     # upon migration won't exactly fly with the instance kernel, so for nic
725     # related parameters we'll use up_hvp
726     if not kvm_nics:
727       kvm_cmd.extend(["-net", "none"])
728     else:
729       tap_extra = ""
730       nic_type = up_hvp[constants.HV_NIC_TYPE]
731       if nic_type == constants.HT_NIC_PARAVIRTUAL:
732         nic_model = "model=virtio"
733         if up_hvp[constants.HV_VHOST_NET]:
734           tap_extra = ",vhost=on"
735       else:
736         nic_model = "model=%s" % nic_type
737
738       for nic_seq, nic in enumerate(kvm_nics):
739         nic_val = "nic,vlan=%s,macaddr=%s,%s" % (nic_seq, nic.mac, nic_model)
740         script = self._WriteNetScriptFile(instance, nic_seq, nic)
741         tap_val = "tap,vlan=%s,script=%s%s" % (nic_seq, script, tap_extra)
742         kvm_cmd.extend(["-net", nic_val])
743         kvm_cmd.extend(["-net", tap_val])
744         temp_files.append(script)
745
746     if incoming:
747       target, port = incoming
748       kvm_cmd.extend(['-incoming', 'tcp:%s:%s' % (target, port)])
749
750     # Changing the vnc password doesn't bother the guest that much. At most it
751     # will surprise people who connect to it. Whether positively or negatively
752     # it's debatable.
753     vnc_pwd_file = conf_hvp[constants.HV_VNC_PASSWORD_FILE]
754     vnc_pwd = None
755     if vnc_pwd_file:
756       try:
757         vnc_pwd = utils.ReadOneLineFile(vnc_pwd_file, strict=True)
758       except EnvironmentError, err:
759         raise errors.HypervisorError("Failed to open VNC password file %s: %s"
760                                      % (vnc_pwd_file, err))
761
762     if conf_hvp[constants.HV_KVM_USE_CHROOT]:
763       utils.EnsureDirs([(self._InstanceChrootDir(name),
764                          constants.SECURE_DIR_MODE)])
765
766     if security_model == constants.HT_SM_POOL:
767       ss = ssconf.SimpleStore()
768       uid_pool = uidpool.ParseUidPool(ss.GetUidPool(), separator="\n")
769       all_uids = set(uidpool.ExpandUidPool(uid_pool))
770       uid = uidpool.RequestUnusedUid(all_uids)
771       try:
772         username = pwd.getpwuid(uid.GetUid()).pw_name
773         kvm_cmd.extend(["-runas", username])
774         self._RunKVMCmd(name, kvm_cmd)
775       except:
776         uidpool.ReleaseUid(uid)
777         raise
778       else:
779         uid.Unlock()
780         utils.WriteFile(self._InstanceUidFile(name), data=str(uid))
781     else:
782       self._RunKVMCmd(name, kvm_cmd)
783
784     if vnc_pwd:
785       change_cmd = 'change vnc password %s' % vnc_pwd
786       self._CallMonitorCommand(instance.name, change_cmd)
787
788     for filename in temp_files:
789       utils.RemoveFile(filename)
790
791   def StartInstance(self, instance, block_devices):
792     """Start an instance.
793
794     """
795     self._CheckDown(instance.name)
796     kvm_runtime = self._GenerateKVMRuntime(instance, block_devices)
797     self._SaveKVMRuntime(instance, kvm_runtime)
798     self._ExecuteKVMRuntime(instance, kvm_runtime)
799
800   def _CallMonitorCommand(self, instance_name, command):
801     """Invoke a command on the instance monitor.
802
803     """
804     socat = ("echo %s | %s STDIO UNIX-CONNECT:%s" %
805              (utils.ShellQuote(command),
806               constants.SOCAT_PATH,
807               utils.ShellQuote(self._InstanceMonitor(instance_name))))
808     result = utils.RunCmd(socat)
809     if result.failed:
810       msg = ("Failed to send command '%s' to instance %s."
811              " output: %s, error: %s, fail_reason: %s" %
812              (command, instance_name,
813               result.stdout, result.stderr, result.fail_reason))
814       raise errors.HypervisorError(msg)
815
816     return result
817
818   def StopInstance(self, instance, force=False, retry=False, name=None):
819     """Stop an instance.
820
821     """
822     if name is not None and not force:
823       raise errors.HypervisorError("Cannot shutdown cleanly by name only")
824     if name is None:
825       name = instance.name
826       acpi = instance.hvparams[constants.HV_ACPI]
827     else:
828       acpi = False
829     _, pid, alive = self._InstancePidAlive(name)
830     if pid > 0 and alive:
831       if force or not acpi:
832         utils.KillProcess(pid)
833       else:
834         self._CallMonitorCommand(name, 'system_powerdown')
835
836   def CleanupInstance(self, instance_name):
837     """Cleanup after a stopped instance
838
839     """
840     pidfile, pid, alive = self._InstancePidAlive(instance_name)
841     if pid > 0 and alive:
842       raise errors.HypervisorError("Cannot cleanup a live instance")
843     self._RemoveInstanceRuntimeFiles(pidfile, instance_name)
844
845   def RebootInstance(self, instance):
846     """Reboot an instance.
847
848     """
849     # For some reason if we do a 'send-key ctrl-alt-delete' to the control
850     # socket the instance will stop, but now power up again. So we'll resort
851     # to shutdown and restart.
852     _, _, alive = self._InstancePidAlive(instance.name)
853     if not alive:
854       raise errors.HypervisorError("Failed to reboot instance %s:"
855                                    " not running" % instance.name)
856     # StopInstance will delete the saved KVM runtime so:
857     # ...first load it...
858     kvm_runtime = self._LoadKVMRuntime(instance)
859     # ...now we can safely call StopInstance...
860     if not self.StopInstance(instance):
861       self.StopInstance(instance, force=True)
862     # ...and finally we can save it again, and execute it...
863     self._SaveKVMRuntime(instance, kvm_runtime)
864     self._ExecuteKVMRuntime(instance, kvm_runtime)
865
866   def MigrationInfo(self, instance):
867     """Get instance information to perform a migration.
868
869     @type instance: L{objects.Instance}
870     @param instance: instance to be migrated
871     @rtype: string
872     @return: content of the KVM runtime file
873
874     """
875     return self._ReadKVMRuntime(instance.name)
876
877   def AcceptInstance(self, instance, info, target):
878     """Prepare to accept an instance.
879
880     @type instance: L{objects.Instance}
881     @param instance: instance to be accepted
882     @type info: string
883     @param info: content of the KVM runtime file on the source node
884     @type target: string
885     @param target: target host (usually ip), on this node
886
887     """
888     kvm_runtime = self._LoadKVMRuntime(instance, serialized_runtime=info)
889     incoming_address = (target, instance.hvparams[constants.HV_MIGRATION_PORT])
890     self._ExecuteKVMRuntime(instance, kvm_runtime, incoming=incoming_address)
891
892   def FinalizeMigration(self, instance, info, success):
893     """Finalize an instance migration.
894
895     Stop the incoming mode KVM.
896
897     @type instance: L{objects.Instance}
898     @param instance: instance whose migration is being finalized
899
900     """
901     if success:
902       self._WriteKVMRuntime(instance.name, info)
903     else:
904       self.StopInstance(instance, force=True)
905
906   def MigrateInstance(self, instance, target, live):
907     """Migrate an instance to a target node.
908
909     The migration will not be attempted if the instance is not
910     currently running.
911
912     @type instance: L{objects.Instance}
913     @param instance: the instance to be migrated
914     @type target: string
915     @param target: ip address of the target node
916     @type live: boolean
917     @param live: perform a live migration
918
919     """
920     instance_name = instance.name
921     port = instance.hvparams[constants.HV_MIGRATION_PORT]
922     pidfile, pid, alive = self._InstancePidAlive(instance_name)
923     if not alive:
924       raise errors.HypervisorError("Instance not running, cannot migrate")
925
926     if not netutils.TcpPing(target, port, live_port_needed=True):
927       raise errors.HypervisorError("Remote host %s not listening on port"
928                                    " %s, cannot migrate" % (target, port))
929
930     if not live:
931       self._CallMonitorCommand(instance_name, 'stop')
932
933     migrate_command = ('migrate_set_speed %dm' %
934         instance.hvparams[constants.HV_MIGRATION_BANDWIDTH])
935     self._CallMonitorCommand(instance_name, migrate_command)
936
937     migrate_command = ('migrate_set_downtime %dms' %
938         instance.hvparams[constants.HV_MIGRATION_DOWNTIME])
939     self._CallMonitorCommand(instance_name, migrate_command)
940
941     migrate_command = 'migrate -d tcp:%s:%s' % (target, port)
942     self._CallMonitorCommand(instance_name, migrate_command)
943
944     info_command = 'info migrate'
945     done = False
946     broken_answers = 0
947     while not done:
948       result = self._CallMonitorCommand(instance_name, info_command)
949       match = self._MIGRATION_STATUS_RE.search(result.stdout)
950       if not match:
951         broken_answers += 1
952         if not result.stdout:
953           logging.info("KVM: empty 'info migrate' result")
954         else:
955           logging.warning("KVM: unknown 'info migrate' result: %s",
956                           result.stdout)
957         time.sleep(self._MIGRATION_INFO_RETRY_DELAY)
958       else:
959         status = match.group(1)
960         if status == 'completed':
961           done = True
962         elif status == 'active':
963           # reset the broken answers count
964           broken_answers = 0
965           time.sleep(self._MIGRATION_INFO_RETRY_DELAY)
966         elif status == 'failed' or status == 'cancelled':
967           if not live:
968             self._CallMonitorCommand(instance_name, 'cont')
969           raise errors.HypervisorError("Migration %s at the kvm level" %
970                                        status)
971         else:
972           logging.warning("KVM: unknown migration status '%s'", status)
973           broken_answers += 1
974           time.sleep(self._MIGRATION_INFO_RETRY_DELAY)
975       if broken_answers >= self._MIGRATION_INFO_MAX_BAD_ANSWERS:
976         raise errors.HypervisorError("Too many 'info migrate' broken answers")
977
978     utils.KillProcess(pid)
979     self._RemoveInstanceRuntimeFiles(pidfile, instance_name)
980
981   def GetNodeInfo(self):
982     """Return information about the node.
983
984     This is just a wrapper over the base GetLinuxNodeInfo method.
985
986     @return: a dict with the following keys (values in MiB):
987           - memory_total: the total memory size on the node
988           - memory_free: the available memory on the node for instances
989           - memory_dom0: the memory used by the node itself, if available
990
991     """
992     return self.GetLinuxNodeInfo()
993
994   @classmethod
995   def GetShellCommandForConsole(cls, instance, hvparams, beparams):
996     """Return a command for connecting to the console of an instance.
997
998     """
999     if hvparams[constants.HV_SERIAL_CONSOLE]:
1000       shell_command = ("%s STDIO,%s UNIX-CONNECT:%s" %
1001                        (constants.SOCAT_PATH, cls._SocatUnixConsoleParams(),
1002                         utils.ShellQuote(cls._InstanceSerial(instance.name))))
1003     else:
1004       shell_command = "echo 'No serial shell for instance %s'" % instance.name
1005
1006     vnc_bind_address = hvparams[constants.HV_VNC_BIND_ADDRESS]
1007     if vnc_bind_address:
1008       if instance.network_port > constants.VNC_BASE_PORT:
1009         display = instance.network_port - constants.VNC_BASE_PORT
1010         vnc_command = ("echo 'Instance has VNC listening on %s:%d"
1011                        " (display: %d)'" % (vnc_bind_address,
1012                                             instance.network_port,
1013                                             display))
1014         shell_command = "%s; %s" % (vnc_command, shell_command)
1015
1016     return shell_command
1017
1018   def Verify(self):
1019     """Verify the hypervisor.
1020
1021     Check that the binary exists.
1022
1023     """
1024     if not os.path.exists(constants.KVM_PATH):
1025       return "The kvm binary ('%s') does not exist." % constants.KVM_PATH
1026     if not os.path.exists(constants.SOCAT_PATH):
1027       return "The socat binary ('%s') does not exist." % constants.SOCAT_PATH
1028
1029
1030   @classmethod
1031   def CheckParameterSyntax(cls, hvparams):
1032     """Check the given parameters for validity.
1033
1034     @type hvparams:  dict
1035     @param hvparams: dictionary with parameter names/value
1036     @raise errors.HypervisorError: when a parameter is not valid
1037
1038     """
1039     super(KVMHypervisor, cls).CheckParameterSyntax(hvparams)
1040
1041     kernel_path = hvparams[constants.HV_KERNEL_PATH]
1042     if kernel_path:
1043       if not hvparams[constants.HV_ROOT_PATH]:
1044         raise errors.HypervisorError("Need a root partition for the instance,"
1045                                      " if a kernel is defined")
1046
1047     if (hvparams[constants.HV_VNC_X509_VERIFY] and
1048         not hvparams[constants.HV_VNC_X509]):
1049       raise errors.HypervisorError("%s must be defined, if %s is" %
1050                                    (constants.HV_VNC_X509,
1051                                     constants.HV_VNC_X509_VERIFY))
1052
1053     boot_order = hvparams[constants.HV_BOOT_ORDER]
1054     if (boot_order == constants.HT_BO_CDROM and
1055         not hvparams[constants.HV_CDROM_IMAGE_PATH]):
1056       raise errors.HypervisorError("Cannot boot from cdrom without an"
1057                                    " ISO path")
1058
1059     security_model = hvparams[constants.HV_SECURITY_MODEL]
1060     if security_model == constants.HT_SM_USER:
1061       if not hvparams[constants.HV_SECURITY_DOMAIN]:
1062         raise errors.HypervisorError("A security domain (user to run kvm as)"
1063                                      " must be specified")
1064     elif (security_model == constants.HT_SM_NONE or
1065           security_model == constants.HT_SM_POOL):
1066       if hvparams[constants.HV_SECURITY_DOMAIN]:
1067         raise errors.HypervisorError("Cannot have a security domain when the"
1068                                      " security model is 'none' or 'pool'")
1069
1070   @classmethod
1071   def ValidateParameters(cls, hvparams):
1072     """Check the given parameters for validity.
1073
1074     @type hvparams:  dict
1075     @param hvparams: dictionary with parameter names/value
1076     @raise errors.HypervisorError: when a parameter is not valid
1077
1078     """
1079     super(KVMHypervisor, cls).ValidateParameters(hvparams)
1080
1081     security_model = hvparams[constants.HV_SECURITY_MODEL]
1082     if security_model == constants.HT_SM_USER:
1083       username = hvparams[constants.HV_SECURITY_DOMAIN]
1084       try:
1085         pwd.getpwnam(username)
1086       except KeyError:
1087         raise errors.HypervisorError("Unknown security domain user %s"
1088                                      % username)
1089
1090   @classmethod
1091   def PowercycleNode(cls):
1092     """KVM powercycle, just a wrapper over Linux powercycle.
1093
1094     """
1095     cls.LinuxPowercycle()