Statistics
| Branch: | Tag: | Revision:

root / lib / hypervisor.py @ 098c0958

History | View | Annotate | Download (14.1 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module that abstracts the virtualisation interface
23

24
"""
25

    
26
import time
27
import os
28
from cStringIO import StringIO
29

    
30
from ganeti import utils
31
from ganeti import logger
32
from ganeti import ssconf
33
from ganeti.errors import HypervisorError
34

    
35
_HT_XEN30 = "xen-3.0"
36
_HT_FAKE = "fake"
37

    
38
VALID_HTYPES = (_HT_XEN30, _HT_FAKE)
39

    
40
def GetHypervisor():
41
  """Return a Hypervisor instance.
42

43
  This function parses the cluster hypervisor configuration file and
44
  instantiates a class based on the value of this file.
45

46
  """
47
  ht_kind = ssconf.SimpleStore().GetHypervisorType()
48
  if ht_kind == _HT_XEN30:
49
    cls = XenHypervisor
50
  elif ht_kind == _HT_FAKE:
51
    cls = FakeHypervisor
52
  else:
53
    raise HypervisorError, "Unknown hypervisor type '%s'" % ht_kind
54
  return cls()
55

    
56

    
57
class BaseHypervisor(object):
58
  """Abstract virtualisation technology interface
59

60
  The goal is that all aspects of the virtualisation technology must
61
  be abstracted away from the rest of code.
62

63
  """
64
  def __init__(self):
65
    pass
66

    
67
  def StartInstance(self, instance, block_devices, extra_args):
68
    """Start an instance."""
69
    raise NotImplementedError
70

    
71
  def StopInstance(self, instance, force=False):
72
    """Stop an instance."""
73
    raise NotImplementedError
74

    
75
  def ListInstances(self):
76
    """Get the list of running instances."""
77
    raise NotImplementedError
78

    
79
  def GetInstanceInfo(self, instance_name):
80
    """Get instance properties.
81

82
    Args:
83
      instance_name: the instance name
84

85
    Returns:
86
      (name, id, memory, vcpus, state, times)
87

88
    """
89
    raise NotImplementedError
90

    
91
  def GetAllInstancesInfo(self):
92
    """Get properties of all instances.
93

94
    Returns:
95
      [(name, id, memory, vcpus, stat, times),...]
96
    """
97
    raise NotImplementedError
98

    
99
  def GetNodeInfo(self):
100
    """Return information about the node.
101

102
    The return value is a dict, which has to have the following items:
103
      (all values in MiB)
104
      - memory_total: the total memory size on the node
105
      - memory_free: the available memory on the node for instances
106
      - memory_dom0: the memory used by the node itself, if available
107

108
    """
109
    raise NotImplementedError
110

    
111
  @staticmethod
112
  def GetShellCommandForConsole(instance_name):
113
    """Return a command for connecting to the console of an instance.
114

115
    """
116
    raise NotImplementedError
117

    
118
  def Verify(self):
119
    """Verify the hypervisor.
120

121
    """
122
    raise NotImplementedError
123

    
124

    
125
class XenHypervisor(BaseHypervisor):
126
  """Xen hypervisor interface"""
127

    
128
  @staticmethod
129
  def _WriteConfigFile(instance, block_devices, extra_args):
130
    """Create a Xen 3.0 config file.
131

132
    """
133
    config = StringIO()
134
    config.write("# this is autogenerated by Ganeti, please do not edit\n#\n")
135
    config.write("kernel = '/boot/vmlinuz-2.6-xenU'\n")
136
    config.write("memory = %d\n" % instance.memory)
137
    config.write("vcpus = %d\n" % instance.vcpus)
138
    config.write("name = '%s'\n" % instance.name)
139

    
140
    vif_data = []
141
    for nic in instance.nics:
142
      nic_str = "mac=%s, bridge=%s" % (nic.mac, nic.bridge)
143
      ip = getattr(nic, "ip", None)
144
      if ip is not None:
145
        nic_str += ", ip=%s" % ip
146
      vif_data.append("'%s'" % nic_str)
147

    
148
    config.write("vif = [%s]\n" % ",".join(vif_data))
149

    
150
    disk_data = ["'phy:%s,%s,w'" % (rldev.dev_path, cfdev.iv_name)
151
                 for cfdev, rldev in block_devices]
152
    config.write("disk = [%s]\n" % ",".join(disk_data))
153

    
154
    config.write("root = '/dev/sda ro'\n")
155
    config.write("on_poweroff = 'destroy'\n")
156
    config.write("on_reboot = 'restart'\n")
157
    config.write("on_crash = 'restart'\n")
158
    if extra_args:
159
      config.write("extra = '%s'\n" % extra_args)
160
    # just in case it exists
161
    utils.RemoveFile("/etc/xen/auto/%s" % instance.name)
162
    f = open("/etc/xen/%s" % instance.name, "w")
163
    f.write(config.getvalue())
164
    f.close()
165
    return True
166

    
167
  @staticmethod
168
  def _RemoveConfigFile(instance):
169
    """Remove the xen configuration file.
170

171
    """
172
    utils.RemoveFile("/etc/xen/%s" % instance.name)
173

    
174
  @staticmethod
175
  def _GetXMList(include_node):
176
    """Return the list of running instances.
177

178
    If the `include_node` argument is True, then we return information
179
    for dom0 also, otherwise we filter that from the return value.
180

181
    The return value is a list of (name, id, memory, vcpus, state, time spent)
182

183
    """
184
    for dummy in range(5):
185
      result = utils.RunCmd(["xm", "list"])
186
      if not result.failed:
187
        break
188
      logger.Error("xm list failed (%s): %s" % (result.fail_reason,
189
                                                result.output))
190
      time.sleep(1)
191

    
192
    if result.failed:
193
      raise HypervisorError("xm list failed, retries exceeded (%s): %s" %
194
                            (result.fail_reason, result.stderr))
195

    
196
    # skip over the heading and the domain 0 line (optional)
197
    if include_node:
198
      to_skip = 1
199
    else:
200
      to_skip = 2
201
    lines = result.stdout.splitlines()[to_skip:]
202
    result = []
203
    for line in lines:
204
      # The format of lines is:
205
      # Name      ID Mem(MiB) VCPUs State  Time(s)
206
      # Domain-0   0  3418     4 r-----    266.2
207
      data = line.split()
208
      if len(data) != 6:
209
        raise HypervisorError("Can't parse output of xm list, line: %s" % line)
210
      try:
211
        data[1] = int(data[1])
212
        data[2] = int(data[2])
213
        data[3] = int(data[3])
214
        data[5] = float(data[5])
215
      except ValueError, err:
216
        raise HypervisorError("Can't parse output of xm list,"
217
                              " line: %s, error: %s" % (line, err))
218
      result.append(data)
219
    return result
220

    
221
  def ListInstances(self):
222
    """Get the list of running instances.
223

224
    """
225
    xm_list = self._GetXMList(False)
226
    names = [info[0] for info in xm_list]
227
    return names
228

    
229
  def GetInstanceInfo(self, instance_name):
230
    """Get instance properties.
231

232
    Args:
233
      instance_name: the instance name
234

235
    Returns:
236
      (name, id, memory, vcpus, stat, times)
237
    """
238
    xm_list = self._GetXMList(instance_name=="Domain-0")
239
    result = None
240
    for data in xm_list:
241
      if data[0] == instance_name:
242
        result = data
243
        break
244
    return result
245

    
246
  def GetAllInstancesInfo(self):
247
    """Get properties of all instances.
248

249
    Returns:
250
      [(name, id, memory, vcpus, stat, times),...]
251
    """
252
    xm_list = self._GetXMList(False)
253
    return xm_list
254

    
255
  def StartInstance(self, instance, block_devices, extra_args):
256
    """Start an instance."""
257
    self._WriteConfigFile(instance, block_devices, extra_args)
258
    result = utils.RunCmd(["xm", "create", instance.name])
259

    
260
    if result.failed:
261
      raise HypervisorError("Failed to start instance %s: %s" %
262
                            (instance.name, result.fail_reason))
263

    
264
  def StopInstance(self, instance, force=False):
265
    """Stop an instance."""
266
    self._RemoveConfigFile(instance)
267
    if force:
268
      command = ["xm", "destroy", instance.name]
269
    else:
270
      command = ["xm", "shutdown", instance.name]
271
    result = utils.RunCmd(command)
272

    
273
    if result.failed:
274
      raise HypervisorError("Failed to stop instance %s: %s" %
275
                            (instance.name, result.fail_reason))
276

    
277
  def GetNodeInfo(self):
278
    """Return information about the node.
279

280
    The return value is a dict, which has to have the following items:
281
      (all values in MiB)
282
      - memory_total: the total memory size on the node
283
      - memory_free: the available memory on the node for instances
284
      - memory_dom0: the memory used by the node itself, if available
285

286
    """
287
    # note: in xen 3, memory has changed to total_memory
288
    result = utils.RunCmd(["xm", "info"])
289
    if result.failed:
290
      logger.Error("Can't run 'xm info': %s" % result.fail_reason)
291
      return None
292

    
293
    xmoutput = result.stdout.splitlines()
294
    result = {}
295
    for line in xmoutput:
296
      splitfields = line.split(":", 1)
297

    
298
      if len(splitfields) > 1:
299
        key = splitfields[0].strip()
300
        val = splitfields[1].strip()
301
        if key == 'memory' or key == 'total_memory':
302
          result['memory_total'] = int(val)
303
        elif key == 'free_memory':
304
          result['memory_free'] = int(val)
305
    dom0_info = self.GetInstanceInfo("Domain-0")
306
    if dom0_info is not None:
307
      result['memory_dom0'] = dom0_info[2]
308

    
309
    return result
310

    
311
  @staticmethod
312
  def GetShellCommandForConsole(instance_name):
313
    """Return a command for connecting to the console of an instance.
314

315
    """
316
    return "xm console %s" % instance_name
317

    
318

    
319
  def Verify(self):
320
    """Verify the hypervisor.
321

322
    For Xen, this verifies that the xend process is running.
323

324
    """
325
    if not utils.CheckDaemonAlive('/var/run/xend.pid', 'xend'):
326
      return "xend daemon is not running"
327

    
328

    
329
class FakeHypervisor(BaseHypervisor):
330
  """Fake hypervisor interface.
331

332
  This can be used for testing the ganeti code without having to have
333
  a real virtualisation software installed.
334

335
  """
336
  _ROOT_DIR = "/var/run/ganeti-fake-hypervisor"
337

    
338
  def __init__(self):
339
    BaseHypervisor.__init__(self)
340
    if not os.path.exists(self._ROOT_DIR):
341
      os.mkdir(self._ROOT_DIR)
342

    
343
  def ListInstances(self):
344
    """Get the list of running instances.
345

346
    """
347
    return os.listdir(self._ROOT_DIR)
348

    
349
  def GetInstanceInfo(self, instance_name):
350
    """Get instance properties.
351

352
    Args:
353
      instance_name: the instance name
354

355
    Returns:
356
      (name, id, memory, vcpus, stat, times)
357
    """
358
    file_name = "%s/%s" % (self._ROOT_DIR, instance_name)
359
    if not os.path.exists(file_name):
360
      return None
361
    try:
362
      fh = file(file_name, "r")
363
      try:
364
        inst_id = fh.readline().strip()
365
        memory = fh.readline().strip()
366
        vcpus = fh.readline().strip()
367
        stat = "---b-"
368
        times = "0"
369
        return (instance_name, inst_id, memory, vcpus, stat, times)
370
      finally:
371
        fh.close()
372
    except IOError, err:
373
      raise HypervisorError("Failed to list instance %s: %s" %
374
                            (instance_name, err))
375

    
376
  def GetAllInstancesInfo(self):
377
    """Get properties of all instances.
378

379
    Returns:
380
      [(name, id, memory, vcpus, stat, times),...]
381
    """
382
    data = []
383
    for file_name in os.listdir(self._ROOT_DIR):
384
      try:
385
        fh = file(self._ROOT_DIR+"/"+file_name, "r")
386
        inst_id = "-1"
387
        memory = "0"
388
        stat = "-----"
389
        times = "-1"
390
        try:
391
          inst_id = fh.readline().strip()
392
          memory = fh.readline().strip()
393
          vcpus = fh.readline().strip()
394
          stat = "---b-"
395
          times = "0"
396
        finally:
397
          fh.close()
398
        data.append((file_name, inst_id, memory, vcpus, stat, times))
399
      except IOError, err:
400
        raise HypervisorError("Failed to list instances: %s" % err)
401
    return data
402

    
403
  def StartInstance(self, instance, force, extra_args):
404
    """Start an instance.
405

406
    For the fake hypervisor, it just creates a file in the base dir,
407
    creating an exception if it already exists. We don't actually
408
    handle race conditions properly, since these are *FAKE* instances.
409

410
    """
411
    file_name = self._ROOT_DIR + "/%s" % instance.name
412
    if os.path.exists(file_name):
413
      raise HypervisorError("Failed to start instance %s: %s" %
414
                            (instance.name, "already running"))
415
    try:
416
      fh = file(file_name, "w")
417
      try:
418
        fh.write("0\n%d\n%d\n" % (instance.memory, instance.vcpus))
419
      finally:
420
        fh.close()
421
    except IOError, err:
422
      raise HypervisorError("Failed to start instance %s: %s" %
423
                            (instance.name, err))
424

    
425
  def StopInstance(self, instance, force=False):
426
    """Stop an instance.
427

428
    For the fake hypervisor, this just removes the file in the base
429
    dir, if it exist, otherwise we raise an exception.
430

431
    """
432
    file_name = self._ROOT_DIR + "/%s" % instance.name
433
    if not os.path.exists(file_name):
434
      raise HypervisorError("Failed to stop instance %s: %s" %
435
                            (instance.name, "not running"))
436
    utils.RemoveFile(file_name)
437

    
438
  def GetNodeInfo(self):
439
    """Return information about the node.
440

441
    The return value is a dict, which has to have the following items:
442
      (all values in MiB)
443
      - memory_total: the total memory size on the node
444
      - memory_free: the available memory on the node for instances
445
      - memory_dom0: the memory used by the node itself, if available
446

447
    """
448
    # global ram usage from the xm info command
449
    # memory                 : 3583
450
    # free_memory            : 747
451
    # note: in xen 3, memory has changed to total_memory
452
    try:
453
      fh = file("/proc/meminfo")
454
      try:
455
        data = fh.readlines()
456
      finally:
457
        fh.close()
458
    except IOError, err:
459
      raise HypervisorError("Failed to list node info: %s" % err)
460

    
461
    result = {}
462
    sum_free = 0
463
    for line in data:
464
      splitfields = line.split(":", 1)
465

    
466
      if len(splitfields) > 1:
467
        key = splitfields[0].strip()
468
        val = splitfields[1].strip()
469
        if key == 'MemTotal':
470
          result['memory_total'] = int(val.split()[0])/1024
471
        elif key in ('MemFree', 'Buffers', 'Cached'):
472
          sum_free += int(val.split()[0])/1024
473
        elif key == 'Active':
474
          result['memory_dom0'] = int(val.split()[0])/1024
475

    
476
    result['memory_free'] = sum_free
477
    return result
478

    
479
  @staticmethod
480
  def GetShellCommandForConsole(instance_name):
481
    """Return a command for connecting to the console of an instance.
482

483
    """
484
    return "echo Console not available for fake hypervisor"
485

    
486
  def Verify(self):
487
    """Verify the hypervisor.
488

489
    For the fake hypervisor, it just checks the existence of the base
490
    dir.
491

492
    """
493
    if not os.path.exists(self._ROOT_DIR):
494
      return "The required directory '%s' does not exist." % self._ROOT_DIR