Statistics
| Branch: | Tag: | Revision:

root / lib / hypervisor.py @ 4a72cc75

History | View | Annotate | Download (14.2 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module that abstracts the virtualisation interface
23

24
"""
25

    
26
import time
27
import os
28
from cStringIO import StringIO
29

    
30
from ganeti import utils
31
from ganeti import logger
32
from ganeti import ssconf
33
from ganeti.errors import HypervisorError
34

    
35
_HT_XEN30 = "xen-3.0"
36
_HT_FAKE = "fake"
37

    
38
VALID_HTYPES = (_HT_XEN30, _HT_FAKE)
39

    
40
def GetHypervisor():
41
  """Return a Hypervisor instance.
42

43
  This function parses the cluster hypervisor configuration file and
44
  instantiates a class based on the value of this file.
45

46
  """
47
  ht_kind = ssconf.SimpleStore().GetHypervisorType()
48
  if ht_kind == _HT_XEN30:
49
    cls = XenHypervisor
50
  elif ht_kind == _HT_FAKE:
51
    cls = FakeHypervisor
52
  else:
53
    raise HypervisorError("Unknown hypervisor type '%s'" % ht_kind)
54
  return cls()
55

    
56

    
57
class BaseHypervisor(object):
58
  """Abstract virtualisation technology interface
59

60
  The goal is that all aspects of the virtualisation technology must
61
  be abstracted away from the rest of code.
62

63
  """
64
  def __init__(self):
65
    pass
66

    
67
  def StartInstance(self, instance, block_devices, extra_args):
68
    """Start an instance."""
69
    raise NotImplementedError
70

    
71
  def StopInstance(self, instance, force=False):
72
    """Stop an instance."""
73
    raise NotImplementedError
74

    
75
  def ListInstances(self):
76
    """Get the list of running instances."""
77
    raise NotImplementedError
78

    
79
  def GetInstanceInfo(self, instance_name):
80
    """Get instance properties.
81

82
    Args:
83
      instance_name: the instance name
84

85
    Returns:
86
      (name, id, memory, vcpus, state, times)
87

88
    """
89
    raise NotImplementedError
90

    
91
  def GetAllInstancesInfo(self):
92
    """Get properties of all instances.
93

94
    Returns:
95
      [(name, id, memory, vcpus, stat, times),...]
96
    """
97
    raise NotImplementedError
98

    
99
  def GetNodeInfo(self):
100
    """Return information about the node.
101

102
    The return value is a dict, which has to have the following items:
103
      (all values in MiB)
104
      - memory_total: the total memory size on the node
105
      - memory_free: the available memory on the node for instances
106
      - memory_dom0: the memory used by the node itself, if available
107

108
    """
109
    raise NotImplementedError
110

    
111
  @staticmethod
112
  def GetShellCommandForConsole(instance_name):
113
    """Return a command for connecting to the console of an instance.
114

115
    """
116
    raise NotImplementedError
117

    
118
  def Verify(self):
119
    """Verify the hypervisor.
120

121
    """
122
    raise NotImplementedError
123

    
124

    
125
class XenHypervisor(BaseHypervisor):
126
  """Xen hypervisor interface"""
127

    
128
  @staticmethod
129
  def _WriteConfigFile(instance, block_devices, extra_args):
130
    """Create a Xen 3.0 config file.
131

132
    """
133
    config = StringIO()
134
    config.write("# this is autogenerated by Ganeti, please do not edit\n#\n")
135
    config.write("kernel = '/boot/vmlinuz-2.6-xenU'\n")
136
    if os.path.exists("/boot/initrd-2.6-xenU"):
137
      config.write("ramdisk = '/boot/initrd-2.6-xenU'\n")
138
    config.write("memory = %d\n" % instance.memory)
139
    config.write("vcpus = %d\n" % instance.vcpus)
140
    config.write("name = '%s'\n" % instance.name)
141

    
142
    vif_data = []
143
    for nic in instance.nics:
144
      nic_str = "mac=%s, bridge=%s" % (nic.mac, nic.bridge)
145
      ip = getattr(nic, "ip", None)
146
      if ip is not None:
147
        nic_str += ", ip=%s" % ip
148
      vif_data.append("'%s'" % nic_str)
149

    
150
    config.write("vif = [%s]\n" % ",".join(vif_data))
151

    
152
    disk_data = ["'phy:%s,%s,w'" % (rldev.dev_path, cfdev.iv_name)
153
                 for cfdev, rldev in block_devices]
154
    config.write("disk = [%s]\n" % ",".join(disk_data))
155

    
156
    config.write("root = '/dev/sda ro'\n")
157
    config.write("on_poweroff = 'destroy'\n")
158
    config.write("on_reboot = 'restart'\n")
159
    config.write("on_crash = 'restart'\n")
160
    if extra_args:
161
      config.write("extra = '%s'\n" % extra_args)
162
    # just in case it exists
163
    utils.RemoveFile("/etc/xen/auto/%s" % instance.name)
164
    f = open("/etc/xen/%s" % instance.name, "w")
165
    f.write(config.getvalue())
166
    f.close()
167
    return True
168

    
169
  @staticmethod
170
  def _RemoveConfigFile(instance):
171
    """Remove the xen configuration file.
172

173
    """
174
    utils.RemoveFile("/etc/xen/%s" % instance.name)
175

    
176
  @staticmethod
177
  def _GetXMList(include_node):
178
    """Return the list of running instances.
179

180
    If the `include_node` argument is True, then we return information
181
    for dom0 also, otherwise we filter that from the return value.
182

183
    The return value is a list of (name, id, memory, vcpus, state, time spent)
184

185
    """
186
    for dummy in range(5):
187
      result = utils.RunCmd(["xm", "list"])
188
      if not result.failed:
189
        break
190
      logger.Error("xm list failed (%s): %s" % (result.fail_reason,
191
                                                result.output))
192
      time.sleep(1)
193

    
194
    if result.failed:
195
      raise HypervisorError("xm list failed, retries exceeded (%s): %s" %
196
                            (result.fail_reason, result.stderr))
197

    
198
    # skip over the heading and the domain 0 line (optional)
199
    if include_node:
200
      to_skip = 1
201
    else:
202
      to_skip = 2
203
    lines = result.stdout.splitlines()[to_skip:]
204
    result = []
205
    for line in lines:
206
      # The format of lines is:
207
      # Name      ID Mem(MiB) VCPUs State  Time(s)
208
      # Domain-0   0  3418     4 r-----    266.2
209
      data = line.split()
210
      if len(data) != 6:
211
        raise HypervisorError("Can't parse output of xm list, line: %s" % line)
212
      try:
213
        data[1] = int(data[1])
214
        data[2] = int(data[2])
215
        data[3] = int(data[3])
216
        data[5] = float(data[5])
217
      except ValueError, err:
218
        raise HypervisorError("Can't parse output of xm list,"
219
                              " line: %s, error: %s" % (line, err))
220
      result.append(data)
221
    return result
222

    
223
  def ListInstances(self):
224
    """Get the list of running instances.
225

226
    """
227
    xm_list = self._GetXMList(False)
228
    names = [info[0] for info in xm_list]
229
    return names
230

    
231
  def GetInstanceInfo(self, instance_name):
232
    """Get instance properties.
233

234
    Args:
235
      instance_name: the instance name
236

237
    Returns:
238
      (name, id, memory, vcpus, stat, times)
239
    """
240
    xm_list = self._GetXMList(instance_name=="Domain-0")
241
    result = None
242
    for data in xm_list:
243
      if data[0] == instance_name:
244
        result = data
245
        break
246
    return result
247

    
248
  def GetAllInstancesInfo(self):
249
    """Get properties of all instances.
250

251
    Returns:
252
      [(name, id, memory, vcpus, stat, times),...]
253
    """
254
    xm_list = self._GetXMList(False)
255
    return xm_list
256

    
257
  def StartInstance(self, instance, block_devices, extra_args):
258
    """Start an instance."""
259
    self._WriteConfigFile(instance, block_devices, extra_args)
260
    result = utils.RunCmd(["xm", "create", instance.name])
261

    
262
    if result.failed:
263
      raise HypervisorError("Failed to start instance %s: %s" %
264
                            (instance.name, result.fail_reason))
265

    
266
  def StopInstance(self, instance, force=False):
267
    """Stop an instance."""
268
    self._RemoveConfigFile(instance)
269
    if force:
270
      command = ["xm", "destroy", instance.name]
271
    else:
272
      command = ["xm", "shutdown", instance.name]
273
    result = utils.RunCmd(command)
274

    
275
    if result.failed:
276
      raise HypervisorError("Failed to stop instance %s: %s" %
277
                            (instance.name, result.fail_reason))
278

    
279
  def GetNodeInfo(self):
280
    """Return information about the node.
281

282
    The return value is a dict, which has to have the following items:
283
      (all values in MiB)
284
      - memory_total: the total memory size on the node
285
      - memory_free: the available memory on the node for instances
286
      - memory_dom0: the memory used by the node itself, if available
287

288
    """
289
    # note: in xen 3, memory has changed to total_memory
290
    result = utils.RunCmd(["xm", "info"])
291
    if result.failed:
292
      logger.Error("Can't run 'xm info': %s" % result.fail_reason)
293
      return None
294

    
295
    xmoutput = result.stdout.splitlines()
296
    result = {}
297
    for line in xmoutput:
298
      splitfields = line.split(":", 1)
299

    
300
      if len(splitfields) > 1:
301
        key = splitfields[0].strip()
302
        val = splitfields[1].strip()
303
        if key == 'memory' or key == 'total_memory':
304
          result['memory_total'] = int(val)
305
        elif key == 'free_memory':
306
          result['memory_free'] = int(val)
307
    dom0_info = self.GetInstanceInfo("Domain-0")
308
    if dom0_info is not None:
309
      result['memory_dom0'] = dom0_info[2]
310

    
311
    return result
312

    
313
  @staticmethod
314
  def GetShellCommandForConsole(instance_name):
315
    """Return a command for connecting to the console of an instance.
316

317
    """
318
    return "xm console %s" % instance_name
319

    
320

    
321
  def Verify(self):
322
    """Verify the hypervisor.
323

324
    For Xen, this verifies that the xend process is running.
325

326
    """
327
    if not utils.CheckDaemonAlive('/var/run/xend.pid', 'xend'):
328
      return "xend daemon is not running"
329

    
330

    
331
class FakeHypervisor(BaseHypervisor):
332
  """Fake hypervisor interface.
333

334
  This can be used for testing the ganeti code without having to have
335
  a real virtualisation software installed.
336

337
  """
338
  _ROOT_DIR = "/var/run/ganeti-fake-hypervisor"
339

    
340
  def __init__(self):
341
    BaseHypervisor.__init__(self)
342
    if not os.path.exists(self._ROOT_DIR):
343
      os.mkdir(self._ROOT_DIR)
344

    
345
  def ListInstances(self):
346
    """Get the list of running instances.
347

348
    """
349
    return os.listdir(self._ROOT_DIR)
350

    
351
  def GetInstanceInfo(self, instance_name):
352
    """Get instance properties.
353

354
    Args:
355
      instance_name: the instance name
356

357
    Returns:
358
      (name, id, memory, vcpus, stat, times)
359
    """
360
    file_name = "%s/%s" % (self._ROOT_DIR, instance_name)
361
    if not os.path.exists(file_name):
362
      return None
363
    try:
364
      fh = file(file_name, "r")
365
      try:
366
        inst_id = fh.readline().strip()
367
        memory = fh.readline().strip()
368
        vcpus = fh.readline().strip()
369
        stat = "---b-"
370
        times = "0"
371
        return (instance_name, inst_id, memory, vcpus, stat, times)
372
      finally:
373
        fh.close()
374
    except IOError, err:
375
      raise HypervisorError("Failed to list instance %s: %s" %
376
                            (instance_name, err))
377

    
378
  def GetAllInstancesInfo(self):
379
    """Get properties of all instances.
380

381
    Returns:
382
      [(name, id, memory, vcpus, stat, times),...]
383
    """
384
    data = []
385
    for file_name in os.listdir(self._ROOT_DIR):
386
      try:
387
        fh = file(self._ROOT_DIR+"/"+file_name, "r")
388
        inst_id = "-1"
389
        memory = "0"
390
        stat = "-----"
391
        times = "-1"
392
        try:
393
          inst_id = fh.readline().strip()
394
          memory = fh.readline().strip()
395
          vcpus = fh.readline().strip()
396
          stat = "---b-"
397
          times = "0"
398
        finally:
399
          fh.close()
400
        data.append((file_name, inst_id, memory, vcpus, stat, times))
401
      except IOError, err:
402
        raise HypervisorError("Failed to list instances: %s" % err)
403
    return data
404

    
405
  def StartInstance(self, instance, force, extra_args):
406
    """Start an instance.
407

408
    For the fake hypervisor, it just creates a file in the base dir,
409
    creating an exception if it already exists. We don't actually
410
    handle race conditions properly, since these are *FAKE* instances.
411

412
    """
413
    file_name = self._ROOT_DIR + "/%s" % instance.name
414
    if os.path.exists(file_name):
415
      raise HypervisorError("Failed to start instance %s: %s" %
416
                            (instance.name, "already running"))
417
    try:
418
      fh = file(file_name, "w")
419
      try:
420
        fh.write("0\n%d\n%d\n" % (instance.memory, instance.vcpus))
421
      finally:
422
        fh.close()
423
    except IOError, err:
424
      raise HypervisorError("Failed to start instance %s: %s" %
425
                            (instance.name, err))
426

    
427
  def StopInstance(self, instance, force=False):
428
    """Stop an instance.
429

430
    For the fake hypervisor, this just removes the file in the base
431
    dir, if it exist, otherwise we raise an exception.
432

433
    """
434
    file_name = self._ROOT_DIR + "/%s" % instance.name
435
    if not os.path.exists(file_name):
436
      raise HypervisorError("Failed to stop instance %s: %s" %
437
                            (instance.name, "not running"))
438
    utils.RemoveFile(file_name)
439

    
440
  def GetNodeInfo(self):
441
    """Return information about the node.
442

443
    The return value is a dict, which has to have the following items:
444
      (all values in MiB)
445
      - memory_total: the total memory size on the node
446
      - memory_free: the available memory on the node for instances
447
      - memory_dom0: the memory used by the node itself, if available
448

449
    """
450
    # global ram usage from the xm info command
451
    # memory                 : 3583
452
    # free_memory            : 747
453
    # note: in xen 3, memory has changed to total_memory
454
    try:
455
      fh = file("/proc/meminfo")
456
      try:
457
        data = fh.readlines()
458
      finally:
459
        fh.close()
460
    except IOError, err:
461
      raise HypervisorError("Failed to list node info: %s" % err)
462

    
463
    result = {}
464
    sum_free = 0
465
    for line in data:
466
      splitfields = line.split(":", 1)
467

    
468
      if len(splitfields) > 1:
469
        key = splitfields[0].strip()
470
        val = splitfields[1].strip()
471
        if key == 'MemTotal':
472
          result['memory_total'] = int(val.split()[0])/1024
473
        elif key in ('MemFree', 'Buffers', 'Cached'):
474
          sum_free += int(val.split()[0])/1024
475
        elif key == 'Active':
476
          result['memory_dom0'] = int(val.split()[0])/1024
477

    
478
    result['memory_free'] = sum_free
479
    return result
480

    
481
  @staticmethod
482
  def GetShellCommandForConsole(instance_name):
483
    """Return a command for connecting to the console of an instance.
484

485
    """
486
    return "echo Console not available for fake hypervisor"
487

    
488
  def Verify(self):
489
    """Verify the hypervisor.
490

491
    For the fake hypervisor, it just checks the existence of the base
492
    dir.
493

494
    """
495
    if not os.path.exists(self._ROOT_DIR):
496
      return "The required directory '%s' does not exist." % self._ROOT_DIR