Statistics
| Branch: | Tag: | Revision:

root / lib / hypervisor / hv_lxc.py @ 55cc0a44

History | View | Annotate | Download (13.8 kB)

1
#
2
#
3

    
4
# Copyright (C) 2010 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""LXC hypervisor
23

24
"""
25

    
26
import os
27
import os.path
28
import time
29
import logging
30

    
31
from ganeti import constants
32
from ganeti import errors # pylint: disable-msg=W0611
33
from ganeti import utils
34
from ganeti import objects
35
from ganeti.hypervisor import hv_base
36
from ganeti.errors import HypervisorError
37

    
38

    
39
class LXCHypervisor(hv_base.BaseHypervisor):
40
  """LXC-based virtualization.
41

42
  Since current (Spring 2010) distributions are not yet ready for
43
  running under a container, the following changes must be done
44
  manually:
45
    - remove udev
46
    - disable the kernel log component of sysklogd/rsyslog/etc.,
47
      otherwise they will fail to read the log, and at least rsyslog
48
      will fill the filesystem with error messages
49

50
  TODO:
51
    - move hardcoded parameters into hypervisor parameters, once we
52
      have the container-parameter support
53
    - implement memory limits, but only optionally, depending on host
54
      kernel support
55

56
  Problems/issues:
57
    - LXC is very temperamental; in daemon mode, it succeeds or fails
58
      in launching the instance silently, without any error
59
      indication, and when failing it can leave network interfaces
60
      around, and future successful startups will list the instance
61
      twice
62
    - shutdown sequence of containers leaves the init 'dead', and the
63
      container effectively stopped, but LXC still believes the
64
      container to be running; need to investigate using the
65
      notify_on_release and release_agent feature of cgroups
66

67
  """
68
  _ROOT_DIR = constants.RUN_GANETI_DIR + "/lxc"
69
  _DEVS = [
70
    "c 1:3",   # /dev/null
71
    "c 1:5",   # /dev/zero
72
    "c 1:7",   # /dev/full
73
    "c 1:8",   # /dev/random
74
    "c 1:9",   # /dev/urandom
75
    "c 1:10",  # /dev/aio
76
    "c 5:0",   # /dev/tty
77
    "c 5:1",   # /dev/console
78
    "c 5:2",   # /dev/ptmx
79
    "c 136:*", # first block of Unix98 PTY slaves
80
    ]
81
  _DENIED_CAPABILITIES = [
82
    "mac_override",    # Allow MAC configuration or state changes
83
    # TODO: remove sys_admin too, for safety
84
    #"sys_admin",       # Perform  a range of system administration operations
85
    "sys_boot",        # Use reboot(2) and kexec_load(2)
86
    "sys_module",      # Load  and  unload kernel modules
87
    "sys_time",        # Set  system  clock, set real-time (hardware) clock
88
    ]
89
  _DIR_MODE = 0755
90

    
91
  PARAMETERS = {
92
    constants.HV_CPU_MASK: hv_base.OPT_CPU_MASK_CHECK,
93
    }
94

    
95
  def __init__(self):
96
    hv_base.BaseHypervisor.__init__(self)
97
    utils.EnsureDirs([(self._ROOT_DIR, self._DIR_MODE)])
98

    
99
  @staticmethod
100
  def _GetMountSubdirs(path):
101
    """Return the list of mountpoints under a given path.
102

103
    """
104
    result = []
105
    for _, mountpoint, _, _ in utils.GetMounts():
106
      if (mountpoint.startswith(path) and
107
          mountpoint != path):
108
        result.append(mountpoint)
109

    
110
    result.sort(key=lambda x: x.count("/"), reverse=True)
111
    return result
112

    
113
  @classmethod
114
  def _InstanceDir(cls, instance_name):
115
    """Return the root directory for an instance.
116

117
    """
118
    return utils.PathJoin(cls._ROOT_DIR, instance_name)
119

    
120
  @classmethod
121
  def _InstanceConfFile(cls, instance_name):
122
    """Return the configuration file for an instance.
123

124
    """
125
    return utils.PathJoin(cls._ROOT_DIR, instance_name + ".conf")
126

    
127
  @classmethod
128
  def _InstanceLogFile(cls, instance_name):
129
    """Return the log file for an instance.
130

131
    """
132
    return utils.PathJoin(cls._ROOT_DIR, instance_name + ".log")
133

    
134
  @classmethod
135
  def _GetCgroupMountPoint(cls):
136
    for _, mountpoint, fstype, _ in utils.GetMounts():
137
      if fstype == "cgroup":
138
        return mountpoint
139
    raise errors.HypervisorError("The cgroup filesystem is not mounted")
140

    
141
  @classmethod
142
  def _GetCgroupCpuList(cls, instance_name):
143
    """Return the list of CPU ids for an instance.
144

145
    """
146
    cgroup = cls._GetCgroupMountPoint()
147
    try:
148
      cpus = utils.ReadFile(utils.PathJoin(cgroup,
149
                                           instance_name,
150
                                           "cpuset.cpus"))
151
    except EnvironmentError, err:
152
      raise errors.HypervisorError("Getting CPU list for instance"
153
                                   " %s failed: %s" % (instance_name, err))
154

    
155
    return utils.ParseCpuMask(cpus)
156

    
157
  def ListInstances(self):
158
    """Get the list of running instances.
159

160
    """
161
    result = utils.RunCmd(["lxc-ls"])
162
    if result.failed:
163
      raise errors.HypervisorError("Running lxc-ls failed: %s" % result.output)
164
    return result.stdout.splitlines()
165

    
166
  def GetInstanceInfo(self, instance_name):
167
    """Get instance properties.
168

169
    @type instance_name: string
170
    @param instance_name: the instance name
171

172
    @return: (name, id, memory, vcpus, stat, times)
173

174
    """
175
    # TODO: read container info from the cgroup mountpoint
176

    
177
    result = utils.RunCmd(["lxc-info", "-n", instance_name])
178
    if result.failed:
179
      raise errors.HypervisorError("Running lxc-info failed: %s" %
180
                                   result.output)
181
    # lxc-info output examples:
182
    # 'ganeti-lxc-test1' is STOPPED
183
    # 'ganeti-lxc-test1' is RUNNING
184
    _, state = result.stdout.rsplit(None, 1)
185
    if state != "RUNNING":
186
      return None
187

    
188
    cpu_list = self._GetCgroupCpuList(instance_name)
189
    return (instance_name, 0, 0, len(cpu_list), 0, 0)
190

    
191
  def GetAllInstancesInfo(self):
192
    """Get properties of all instances.
193

194
    @return: [(name, id, memory, vcpus, stat, times),...]
195

196
    """
197
    data = []
198
    for name in self.ListInstances():
199
      data.append(self.GetInstanceInfo(name))
200
    return data
201

    
202
  def _CreateConfigFile(self, instance, root_dir):
203
    """Create an lxc.conf file for an instance.
204

205
    """
206
    out = []
207
    # hostname
208
    out.append("lxc.utsname = %s" % instance.name)
209

    
210
    # separate pseudo-TTY instances
211
    out.append("lxc.pts = 255")
212
    # standard TTYs
213
    out.append("lxc.tty = 6")
214
    # console log file
215
    console_log = utils.PathJoin(self._ROOT_DIR, instance.name + ".console")
216
    try:
217
      utils.WriteFile(console_log, data="", mode=constants.SECURE_FILE_MODE)
218
    except EnvironmentError, err:
219
      raise errors.HypervisorError("Creating console log file %s for"
220
                                   " instance %s failed: %s" %
221
                                   (console_log, instance.name, err))
222
    out.append("lxc.console = %s" % console_log)
223

    
224
    # root FS
225
    out.append("lxc.rootfs = %s" % root_dir)
226

    
227
    # TODO: additional mounts, if we disable CAP_SYS_ADMIN
228

    
229
    # CPUs
230
    if instance.hvparams[constants.HV_CPU_MASK]:
231
      cpu_list = utils.ParseCpuMask(instance.hvparams[constants.HV_CPU_MASK])
232
      cpus_in_mask = len(cpu_list)
233
      if cpus_in_mask != instance.beparams["vcpus"]:
234
        raise errors.HypervisorError("Number of VCPUs (%d) doesn't match"
235
                                     " the number of CPUs in the"
236
                                     " cpu_mask (%d)" %
237
                                     (instance.beparams["vcpus"],
238
                                      cpus_in_mask))
239
      out.append("lxc.cgroup.cpuset.cpus = %s" %
240
                 instance.hvparams[constants.HV_CPU_MASK])
241

    
242
    # Device control
243
    # deny direct device access
244
    out.append("lxc.cgroup.devices.deny = a")
245
    for devinfo in self._DEVS:
246
      out.append("lxc.cgroup.devices.allow = %s rw" % devinfo)
247

    
248
    # Networking
249
    for idx, nic in enumerate(instance.nics):
250
      out.append("# NIC %d" % idx)
251
      mode = nic.nicparams[constants.NIC_MODE]
252
      link = nic.nicparams[constants.NIC_LINK]
253
      if mode == constants.NIC_MODE_BRIDGED:
254
        out.append("lxc.network.type = veth")
255
        out.append("lxc.network.link = %s" % link)
256
      else:
257
        raise errors.HypervisorError("LXC hypervisor only supports"
258
                                     " bridged mode (NIC %d has mode %s)" %
259
                                     (idx, mode))
260
      out.append("lxc.network.hwaddr = %s" % nic.mac)
261
      out.append("lxc.network.flags = up")
262

    
263
    # Capabilities
264
    for cap in self._DENIED_CAPABILITIES:
265
      out.append("lxc.cap.drop = %s" % cap)
266

    
267
    return "\n".join(out) + "\n"
268

    
269
  def StartInstance(self, instance, block_devices):
270
    """Start an instance.
271

272
    For LCX, we try to mount the block device and execute 'lxc-start'.
273
    We use volatile containers.
274

275
    """
276
    root_dir = self._InstanceDir(instance.name)
277
    try:
278
      utils.EnsureDirs([(root_dir, self._DIR_MODE)])
279
    except errors.GenericError, err:
280
      raise HypervisorError("Creating instance directory failed: %s", str(err))
281

    
282
    conf_file = self._InstanceConfFile(instance.name)
283
    utils.WriteFile(conf_file, data=self._CreateConfigFile(instance, root_dir))
284

    
285
    log_file = self._InstanceLogFile(instance.name)
286
    if not os.path.exists(log_file):
287
      try:
288
        utils.WriteFile(log_file, data="", mode=constants.SECURE_FILE_MODE)
289
      except EnvironmentError, err:
290
        raise errors.HypervisorError("Creating hypervisor log file %s for"
291
                                     " instance %s failed: %s" %
292
                                     (log_file, instance.name, err))
293

    
294
    if not os.path.ismount(root_dir):
295
      if not block_devices:
296
        raise HypervisorError("LXC needs at least one disk")
297

    
298
      sda_dev_path = block_devices[0][1]
299
      result = utils.RunCmd(["mount", sda_dev_path, root_dir])
300
      if result.failed:
301
        raise HypervisorError("Mounting the root dir of LXC instance %s"
302
                              " failed: %s" % (instance.name, result.output))
303
    result = utils.RunCmd(["lxc-start", "-n", instance.name,
304
                           "-o", log_file,
305
                           "-l", "DEBUG",
306
                           "-f", conf_file, "-d"])
307
    if result.failed:
308
      raise HypervisorError("Running the lxc-start script failed: %s" %
309
                            result.output)
310

    
311
  def StopInstance(self, instance, force=False, retry=False, name=None):
312
    """Stop an instance.
313

314
    This method has complicated cleanup tests, as we must:
315
      - try to kill all leftover processes
316
      - try to unmount any additional sub-mountpoints
317
      - finally unmount the instance dir
318

319
    """
320
    if name is None:
321
      name = instance.name
322

    
323
    root_dir = self._InstanceDir(name)
324
    if not os.path.exists(root_dir):
325
      return
326

    
327
    if name in self.ListInstances():
328
      # Signal init to shutdown; this is a hack
329
      if not retry and not force:
330
        result = utils.RunCmd(["chroot", root_dir, "poweroff"])
331
        if result.failed:
332
          raise HypervisorError("Running 'poweroff' on the instance"
333
                                " failed: %s" % result.output)
334
      time.sleep(2)
335
      result = utils.RunCmd(["lxc-stop", "-n", name])
336
      if result.failed:
337
        logging.warning("Error while doing lxc-stop for %s: %s", name,
338
                        result.output)
339

    
340
    for mpath in self._GetMountSubdirs(root_dir):
341
      result = utils.RunCmd(["umount", mpath])
342
      if result.failed:
343
        logging.warning("Error while umounting subpath %s for instance %s: %s",
344
                        mpath, name, result.output)
345

    
346
    result = utils.RunCmd(["umount", root_dir])
347
    if result.failed and force:
348
      msg = ("Processes still alive in the chroot: %s" %
349
             utils.RunCmd("fuser -vm %s" % root_dir).output)
350
      logging.error(msg)
351
      raise HypervisorError("Unmounting the chroot dir failed: %s (%s)" %
352
                            (result.output, msg))
353

    
354
  def RebootInstance(self, instance):
355
    """Reboot an instance.
356

357
    This is not (yet) implemented (in Ganeti) for the LXC hypervisor.
358

359
    """
360
    # TODO: implement reboot
361
    raise HypervisorError("The LXC hypervisor doesn't implement the"
362
                          " reboot functionality")
363

    
364
  def GetNodeInfo(self):
365
    """Return information about the node.
366

367
    This is just a wrapper over the base GetLinuxNodeInfo method.
368

369
    @return: a dict with the following keys (values in MiB):
370
          - memory_total: the total memory size on the node
371
          - memory_free: the available memory on the node for instances
372
          - memory_dom0: the memory used by the node itself, if available
373

374
    """
375
    return self.GetLinuxNodeInfo()
376

    
377
  @classmethod
378
  def GetInstanceConsole(cls, instance, hvparams, beparams):
379
    """Return a command for connecting to the console of an instance.
380

381
    """
382
    return objects.InstanceConsole(instance=instance.name,
383
                                   kind=constants.CONS_SSH,
384
                                   host=instance.primary_node,
385
                                   user=constants.GANETI_RUNAS,
386
                                   command=["lxc-console", "-n", instance.name])
387

    
388
  def Verify(self):
389
    """Verify the hypervisor.
390

391
    For the chroot manager, it just checks the existence of the base dir.
392

393
    """
394
    if not os.path.exists(self._ROOT_DIR):
395
      return "The required directory '%s' does not exist." % self._ROOT_DIR
396

    
397
  @classmethod
398
  def PowercycleNode(cls):
399
    """LXC powercycle, just a wrapper over Linux powercycle.
400

401
    """
402
    cls.LinuxPowercycle()
403

    
404
  def MigrateInstance(self, instance, target, live):
405
    """Migrate an instance.
406

407
    @type instance: L{objects.Instance}
408
    @param instance: the instance to be migrated
409
    @type target: string
410
    @param target: hostname (usually ip) of the target node
411
    @type live: boolean
412
    @param live: whether to do a live or non-live migration
413

414
    """
415
    raise HypervisorError("Migration is not supported by the LXC hypervisor")