Statistics
| Branch: | Tag: | Revision:

root / lib / hypervisor.py @ a8083063

History | View | Annotate | Download (14.1 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module that abstracts the virtualisation interface
23

24
"""
25

    
26
import time
27
import os
28
from cStringIO import StringIO
29

    
30
from ganeti import utils
31
from ganeti import logger
32
from ganeti import ssconf
33
from ganeti.errors import HypervisorError
34

    
35
_HT_XEN30 = "xen-3.0"
36
_HT_FAKE = "fake"
37

    
38
VALID_HTYPES = (_HT_XEN30, _HT_FAKE)
39

    
40
def GetHypervisor():
41
  """Return a Hypervisor instance.
42

43
  This function parses the cluster hypervisor configuration file and
44
  instantiates a class based on the value of this file.
45

46
  """
47
  ht_kind = ssconf.SimpleStore().GetHypervisorType()
48
  if ht_kind == _HT_XEN30:
49
    cls = XenHypervisor
50
  elif ht_kind == _HT_FAKE:
51
    cls = FakeHypervisor
52
  else:
53
    raise HypervisorError, "Unknown hypervisor type '%s'" % ht_kind
54
  return cls()
55

    
56

    
57
class BaseHypervisor(object):
58
  """Abstract virtualisation technology interface
59

60
  The goal is that all aspects of the virtualisation technology must
61
  be abstracted away from the rest of code.
62

63
  """
64
  def __init__(self):
65
    pass
66

    
67
  def StartInstance(self, instance, block_devices, extra_args):
68
    """Start an instance."""
69
    raise NotImplementedError
70

    
71
  def StopInstance(self, instance, force=False):
72
    """Stop an instance."""
73
    raise NotImplementedError
74

    
75
  def ListInstances(self):
76
    """Get the list of running instances."""
77
    raise NotImplementedError
78

    
79
  def GetInstanceInfo(self, instance_name):
80
    """Get instance properties.
81

82
    Args:
83
      instance_name: the instance name
84

85
    Returns:
86
      (name, id, memory, vcpus, state, times)
87

88
    """
89
    raise NotImplementedError
90

    
91
  def GetAllInstancesInfo(self):
92
    """Get properties of all instances.
93

94
    Returns:
95
      [(name, id, memory, vcpus, stat, times),...]
96
    """
97
    raise NotImplementedError
98

    
99
  def GetNodeInfo(self):
100
    """Return information about the node.
101

102
    The return value is a dict, which has to have the following items:
103
      (all values in MiB)
104
      - memory_total: the total memory size on the node
105
      - memory_free: the available memory on the node for instances
106
      - memory_dom0: the memory used by the node itself, if available
107

108
    """
109
    raise NotImplementedError
110

    
111
  @staticmethod
112
  def GetShellCommandForConsole(instance_name):
113
    """Return a command for connecting to the console of an instance.
114

115
    """
116
    raise NotImplementedError
117

    
118
  def Verify(self):
119
    """Verify the hypervisor.
120

121
    """
122
    raise NotImplementedError
123

    
124

    
125
class XenHypervisor(BaseHypervisor):
126
  """Xen hypervisor interface"""
127

    
128
  @staticmethod
129
  def _WriteConfigFile(instance, block_devices, extra_args):
130
    """Create a Xen 3.0 config file.
131

132
    """
133

    
134
    config = StringIO()
135
    config.write("# this is autogenerated by Ganeti, please do not edit\n#\n")
136
    config.write("kernel = '/boot/vmlinuz-2.6-xenU'\n")
137
    config.write("memory = %d\n" % instance.memory)
138
    config.write("vcpus = %d\n" % instance.vcpus)
139
    config.write("name = '%s'\n" % instance.name)
140

    
141
    vif_data = []
142
    for nic in instance.nics:
143
      nic_str = "mac=%s, bridge=%s" % (nic.mac, nic.bridge)
144
      ip = getattr(nic, "ip", None)
145
      if ip is not None:
146
        nic_str += ", ip=%s" % ip
147
      vif_data.append("'%s'" % nic_str)
148

    
149
    config.write("vif = [%s]\n" % ",".join(vif_data))
150

    
151
    disk_data = ["'phy:%s,%s,w'" % (rldev.dev_path, cfdev.iv_name)
152
                 for cfdev, rldev in block_devices]
153
    config.write("disk = [%s]\n" % ",".join(disk_data))
154

    
155
    config.write("root = '/dev/sda ro'\n")
156
    config.write("on_poweroff = 'destroy'\n")
157
    config.write("on_reboot = 'restart'\n")
158
    config.write("on_crash = 'restart'\n")
159
    if extra_args:
160
      config.write("extra = '%s'\n" % extra_args)
161
    # just in case it exists
162
    utils.RemoveFile("/etc/xen/auto/%s" % instance.name)
163
    f = open("/etc/xen/%s" % instance.name, "w")
164
    f.write(config.getvalue())
165
    f.close()
166
    return True
167

    
168
  @staticmethod
169
  def _RemoveConfigFile(instance):
170
    """Remove the xen configuration file.
171

172
    """
173
    utils.RemoveFile("/etc/xen/%s" % instance.name)
174

    
175
  @staticmethod
176
  def _GetXMList(include_node):
177
    """Return the list of running instances.
178

179
    If the `include_node` argument is True, then we return information
180
    for dom0 also, otherwise we filter that from the return value.
181

182
    The return value is a list of (name, id, memory, vcpus, state, time spent)
183

184
    """
185
    for dummy in range(5):
186
      result = utils.RunCmd(["xm", "list"])
187
      if not result.failed:
188
        break
189
      logger.Error("xm list failed (%s): %s" % (result.fail_reason,
190
                                                result.output))
191
      time.sleep(1)
192

    
193
    if result.failed:
194
      raise HypervisorError("xm list failed, retries exceeded (%s): %s" %
195
                            (result.fail_reason, result.stderr))
196

    
197
    # skip over the heading and the domain 0 line (optional)
198
    if include_node:
199
      to_skip = 1
200
    else:
201
      to_skip = 2
202
    lines = result.stdout.splitlines()[to_skip:]
203
    result = []
204
    for line in lines:
205
      # The format of lines is:
206
      # Name      ID Mem(MiB) VCPUs State  Time(s)
207
      # Domain-0   0  3418     4 r-----    266.2
208
      data = line.split()
209
      if len(data) != 6:
210
        raise HypervisorError("Can't parse output of xm list, line: %s" % line)
211
      try:
212
        data[1] = int(data[1])
213
        data[2] = int(data[2])
214
        data[3] = int(data[3])
215
        data[5] = float(data[5])
216
      except ValueError, err:
217
        raise HypervisorError("Can't parse output of xm list,"
218
                              " line: %s, error: %s" % (line, err))
219
      result.append(data)
220
    return result
221

    
222
  def ListInstances(self):
223
    """Get the list of running instances.
224

225
    """
226
    xm_list = self._GetXMList(False)
227
    names = [info[0] for info in xm_list]
228
    return names
229

    
230
  def GetInstanceInfo(self, instance_name):
231
    """Get instance properties.
232

233
    Args:
234
      instance_name: the instance name
235

236
    Returns:
237
      (name, id, memory, vcpus, stat, times)
238
    """
239
    xm_list = self._GetXMList(instance_name=="Domain-0")
240
    result = None
241
    for data in xm_list:
242
      if data[0] == instance_name:
243
        result = data
244
        break
245
    return result
246

    
247
  def GetAllInstancesInfo(self):
248
    """Get properties of all instances.
249

250
    Returns:
251
      [(name, id, memory, vcpus, stat, times),...]
252
    """
253
    xm_list = self._GetXMList(False)
254
    return xm_list
255

    
256
  def StartInstance(self, instance, block_devices, extra_args):
257
    """Start an instance."""
258
    self._WriteConfigFile(instance, block_devices, extra_args)
259
    result = utils.RunCmd(["xm", "create", instance.name])
260

    
261
    if result.failed:
262
      raise HypervisorError("Failed to start instance %s: %s" %
263
                            (instance.name, result.fail_reason))
264

    
265
  def StopInstance(self, instance, force=False):
266
    """Stop an instance."""
267
    self._RemoveConfigFile(instance)
268
    if force:
269
      command = ["xm", "destroy", instance.name]
270
    else:
271
      command = ["xm", "shutdown", instance.name]
272
    result = utils.RunCmd(command)
273

    
274
    if result.failed:
275
      raise HypervisorError("Failed to stop instance %s: %s" %
276
                            (instance.name, result.fail_reason))
277

    
278
  def GetNodeInfo(self):
279
    """Return information about the node.
280

281
    The return value is a dict, which has to have the following items:
282
      (all values in MiB)
283
      - memory_total: the total memory size on the node
284
      - memory_free: the available memory on the node for instances
285
      - memory_dom0: the memory used by the node itself, if available
286

287
    """
288
    # note: in xen 3, memory has changed to total_memory
289
    result = utils.RunCmd(["xm", "info"])
290
    if result.failed:
291
      logger.Error("Can't run 'xm info': %s" % result.fail_reason)
292
      return None
293

    
294
    xmoutput = result.stdout.splitlines()
295
    result = {}
296
    for line in xmoutput:
297
      splitfields = line.split(":", 1)
298

    
299
      if len(splitfields) > 1:
300
        key = splitfields[0].strip()
301
        val = splitfields[1].strip()
302
        if key == 'memory' or key == 'total_memory':
303
          result['memory_total'] = int(val)
304
        elif key == 'free_memory':
305
          result['memory_free'] = int(val)
306
    dom0_info = self.GetInstanceInfo("Domain-0")
307
    if dom0_info is not None:
308
      result['memory_dom0'] = dom0_info[2]
309

    
310
    return result
311

    
312
  @staticmethod
313
  def GetShellCommandForConsole(instance_name):
314
    """Return a command for connecting to the console of an instance.
315

316
    """
317
    return "xm console %s" % instance_name
318

    
319

    
320
  def Verify(self):
321
    """Verify the hypervisor.
322

323
    For Xen, this verifies that the xend process is running.
324

325
    """
326
    if not utils.CheckDaemonAlive('/var/run/xend.pid', 'xend'):
327
      return "xend daemon is not running"
328

    
329

    
330
class FakeHypervisor(BaseHypervisor):
331
  """Fake hypervisor interface.
332

333
  This can be used for testing the ganeti code without having to have
334
  a real virtualisation software installed.
335

336
  """
337

    
338
  _ROOT_DIR = "/var/run/ganeti-fake-hypervisor"
339

    
340
  def __init__(self):
341
    BaseHypervisor.__init__(self)
342
    if not os.path.exists(self._ROOT_DIR):
343
      os.mkdir(self._ROOT_DIR)
344

    
345
  def ListInstances(self):
346
    """Get the list of running instances.
347

348
    """
349
    return os.listdir(self._ROOT_DIR)
350

    
351
  def GetInstanceInfo(self, instance_name):
352
    """Get instance properties.
353

354
    Args:
355
      instance_name: the instance name
356

357
    Returns:
358
      (name, id, memory, vcpus, stat, times)
359
    """
360
    file_name = "%s/%s" % (self._ROOT_DIR, instance_name)
361
    if not os.path.exists(file_name):
362
      return None
363
    try:
364
      fh = file(file_name, "r")
365
      try:
366
        inst_id = fh.readline().strip()
367
        memory = fh.readline().strip()
368
        vcpus = fh.readline().strip()
369
        stat = "---b-"
370
        times = "0"
371
        return (instance_name, inst_id, memory, vcpus, stat, times)
372
      finally:
373
        fh.close()
374
    except IOError, err:
375
      raise HypervisorError("Failed to list instance %s: %s" %
376
                            (instance_name, err))
377

    
378
  def GetAllInstancesInfo(self):
379
    """Get properties of all instances.
380

381
    Returns:
382
      [(name, id, memory, vcpus, stat, times),...]
383
    """
384
    data = []
385
    for file_name in os.listdir(self._ROOT_DIR):
386
      try:
387
        fh = file(self._ROOT_DIR+"/"+file_name, "r")
388
        inst_id = "-1"
389
        memory = "0"
390
        stat = "-----"
391
        times = "-1"
392
        try:
393
          inst_id = fh.readline().strip()
394
          memory = fh.readline().strip()
395
          vcpus = fh.readline().strip()
396
          stat = "---b-"
397
          times = "0"
398
        finally:
399
          fh.close()
400
        data.append((file_name, inst_id, memory, vcpus, stat, times))
401
      except IOError, err:
402
        raise HypervisorError("Failed to list instances: %s" % err)
403
    return data
404

    
405
  def StartInstance(self, instance, force, extra_args):
406
    """Start an instance.
407

408
    For the fake hypervisor, it just creates a file in the base dir,
409
    creating an exception if it already exists. We don't actually
410
    handle race conditions properly, since these are *FAKE* instances.
411

412
    """
413
    file_name = self._ROOT_DIR + "/%s" % instance.name
414
    if os.path.exists(file_name):
415
      raise HypervisorError("Failed to start instance %s: %s" %
416
                            (instance.name, "already running"))
417
    try:
418
      fh = file(file_name, "w")
419
      try:
420
        fh.write("0\n%d\n%d\n" % (instance.memory, instance.vcpus))
421
      finally:
422
        fh.close()
423
    except IOError, err:
424
      raise HypervisorError("Failed to start instance %s: %s" %
425
                            (instance.name, err))
426

    
427
  def StopInstance(self, instance, force=False):
428
    """Stop an instance.
429

430
    For the fake hypervisor, this just removes the file in the base
431
    dir, if it exist, otherwise we raise an exception.
432

433
    """
434
    file_name = self._ROOT_DIR + "/%s" % instance.name
435
    if not os.path.exists(file_name):
436
      raise HypervisorError("Failed to stop instance %s: %s" %
437
                            (instance.name, "not running"))
438
    utils.RemoveFile(file_name)
439

    
440
  def GetNodeInfo(self):
441
    """Return information about the node.
442

443
    The return value is a dict, which has to have the following items:
444
      (all values in MiB)
445
      - memory_total: the total memory size on the node
446
      - memory_free: the available memory on the node for instances
447
      - memory_dom0: the memory used by the node itself, if available
448

449
    """
450
    # global ram usage from the xm info command
451
    # memory                 : 3583
452
    # free_memory            : 747
453
    # note: in xen 3, memory has changed to total_memory
454
    try:
455
      fh = file("/proc/meminfo")
456
      try:
457
        data = fh.readlines()
458
      finally:
459
        fh.close()
460
    except IOError, err:
461
      raise HypervisorError("Failed to list node info: %s" % err)
462

    
463
    result = {}
464
    sum_free = 0
465
    for line in data:
466
      splitfields = line.split(":", 1)
467

    
468
      if len(splitfields) > 1:
469
        key = splitfields[0].strip()
470
        val = splitfields[1].strip()
471
        if key == 'MemTotal':
472
          result['memory_total'] = int(val.split()[0])/1024
473
        elif key in ('MemFree', 'Buffers', 'Cached'):
474
          sum_free += int(val.split()[0])/1024
475
        elif key == 'Active':
476
          result['memory_dom0'] = int(val.split()[0])/1024
477

    
478
    result['memory_free'] = sum_free
479
    return result
480

    
481
  @staticmethod
482
  def GetShellCommandForConsole(instance_name):
483
    """Return a command for connecting to the console of an instance.
484

485
    """
486
    return "echo Console not available for fake hypervisor"
487

    
488
  def Verify(self):
489
    """Verify the hypervisor.
490

491
    For the fake hypervisor, it just checks the existence of the base
492
    dir.
493

494
    """
495
    if not os.path.exists(self._ROOT_DIR):
496
      return "The required directory '%s' does not exist." % self._ROOT_DIR