Statistics
| Branch: | Tag: | Revision:

root / lib / hypervisor / hv_lxc.py @ e3ed5316

History | View | Annotate | Download (13.5 kB)

1
#
2
#
3

    
4
# Copyright (C) 2010 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""LXC hypervisor
23

24
"""
25

    
26
import os
27
import os.path
28
import time
29
import logging
30

    
31
from ganeti import constants
32
from ganeti import errors # pylint: disable-msg=W0611
33
from ganeti import utils
34
from ganeti.hypervisor import hv_base
35
from ganeti.errors import HypervisorError
36

    
37

    
38
class LXCHypervisor(hv_base.BaseHypervisor):
39
  """LXC-based virtualization.
40

41
  Since current (Spring 2010) distributions are not yet ready for
42
  running under a container, the following changes must be done
43
  manually:
44
    - remove udev
45
    - disable the kernel log component of sysklogd/rsyslog/etc.,
46
      otherwise they will fail to read the log, and at least rsyslog
47
      will fill the filesystem with error messages
48

49
  TODO:
50
    - move hardcoded parameters into hypervisor parameters, once we
51
      have the container-parameter support
52
    - implement memory limits, but only optionally, depending on host
53
      kernel support
54

55
  Problems/issues:
56
    - LXC is very temperamental; in daemon mode, it succeeds or fails
57
      in launching the instance silently, without any error
58
      indication, and when failing it can leave network interfaces
59
      around, and future successful startups will list the instance
60
      twice
61
    - shutdown sequence of containers leaves the init 'dead', and the
62
      container effectively stopped, but LXC still believes the
63
      container to be running; need to investigate using the
64
      notify_on_release and release_agent feature of cgroups
65

66
  """
67
  _ROOT_DIR = constants.RUN_GANETI_DIR + "/lxc"
68
  _DEVS = [
69
    "c 1:3",   # /dev/null
70
    "c 1:5",   # /dev/zero
71
    "c 1:7",   # /dev/full
72
    "c 1:8",   # /dev/random
73
    "c 1:9",   # /dev/urandom
74
    "c 1:10",  # /dev/aio
75
    "c 5:0",   # /dev/tty
76
    "c 5:1",   # /dev/console
77
    "c 5:2",   # /dev/ptmx
78
    "c 136:*", # first block of Unix98 PTY slaves
79
    ]
80
  _DENIED_CAPABILITIES = [
81
    "mac_override",    # Allow MAC configuration or state changes
82
    # TODO: remove sys_admin too, for safety
83
    #"sys_admin",       # Perform  a range of system administration operations
84
    "sys_boot",        # Use reboot(2) and kexec_load(2)
85
    "sys_module",      # Load  and  unload kernel modules
86
    "sys_time",        # Set  system  clock, set real-time (hardware) clock
87
    ]
88
  _DIR_MODE = 0755
89

    
90
  PARAMETERS = {
91
    constants.HV_CPU_MASK: hv_base.OPT_CPU_MASK_CHECK,
92
    }
93

    
94
  def __init__(self):
95
    hv_base.BaseHypervisor.__init__(self)
96
    utils.EnsureDirs([(self._ROOT_DIR, self._DIR_MODE)])
97

    
98
  @staticmethod
99
  def _GetMountSubdirs(path):
100
    """Return the list of mountpoints under a given path.
101

102
    """
103
    result = []
104
    for _, mountpoint, _, _ in utils.GetMounts():
105
      if (mountpoint.startswith(path) and
106
          mountpoint != path):
107
        result.append(mountpoint)
108

    
109
    result.sort(key=lambda x: x.count("/"), reverse=True)
110
    return result
111

    
112
  @classmethod
113
  def _InstanceDir(cls, instance_name):
114
    """Return the root directory for an instance.
115

116
    """
117
    return utils.PathJoin(cls._ROOT_DIR, instance_name)
118

    
119
  @classmethod
120
  def _InstanceConfFile(cls, instance_name):
121
    """Return the configuration file for an instance.
122

123
    """
124
    return utils.PathJoin(cls._ROOT_DIR, instance_name + ".conf")
125

    
126
  @classmethod
127
  def _InstanceLogFile(cls, instance_name):
128
    """Return the log file for an instance.
129

130
    """
131
    return utils.PathJoin(cls._ROOT_DIR, instance_name + ".log")
132

    
133
  @classmethod
134
  def _GetCgroupMountPoint(cls):
135
    for _, mountpoint, fstype, _ in utils.GetMounts():
136
      if fstype == "cgroup":
137
        return mountpoint
138
    raise errors.HypervisorError("The cgroup filesystem is not mounted")
139

    
140
  @classmethod
141
  def _GetCgroupCpuList(cls, instance_name):
142
    """Return the list of CPU ids for an instance.
143

144
    """
145
    cgroup = cls._GetCgroupMountPoint()
146
    try:
147
      cpus = utils.ReadFile(utils.PathJoin(cgroup,
148
                                           instance_name,
149
                                           "cpuset.cpus"))
150
    except EnvironmentError, err:
151
      raise errors.HypervisorError("Getting CPU list for instance"
152
                                   " %s failed: %s" % (instance_name, err))
153

    
154
    return utils.ParseCpuMask(cpus)
155

    
156
  def ListInstances(self):
157
    """Get the list of running instances.
158

159
    """
160
    result = utils.RunCmd(["lxc-ls"])
161
    if result.failed:
162
      raise errors.HypervisorError("Running lxc-ls failed: %s" % result.output)
163
    return result.stdout.splitlines()
164

    
165
  def GetInstanceInfo(self, instance_name):
166
    """Get instance properties.
167

168
    @type instance_name: string
169
    @param instance_name: the instance name
170

171
    @return: (name, id, memory, vcpus, stat, times)
172

173
    """
174
    # TODO: read container info from the cgroup mountpoint
175

    
176
    result = utils.RunCmd(["lxc-info", "-n", instance_name])
177
    if result.failed:
178
      raise errors.HypervisorError("Running lxc-info failed: %s" %
179
                                   result.output)
180
    # lxc-info output examples:
181
    # 'ganeti-lxc-test1' is STOPPED
182
    # 'ganeti-lxc-test1' is RUNNING
183
    _, state = result.stdout.rsplit(None, 1)
184
    if state != "RUNNING":
185
      return None
186

    
187
    cpu_list = self._GetCgroupCpuList(instance_name)
188
    return (instance_name, 0, 0, len(cpu_list), 0, 0)
189

    
190
  def GetAllInstancesInfo(self):
191
    """Get properties of all instances.
192

193
    @return: [(name, id, memory, vcpus, stat, times),...]
194

195
    """
196
    data = []
197
    for name in self.ListInstances():
198
      data.append(self.GetInstanceInfo(name))
199
    return data
200

    
201
  def _CreateConfigFile(self, instance, root_dir):
202
    """Create an lxc.conf file for an instance.
203

204
    """
205
    out = []
206
    # hostname
207
    out.append("lxc.utsname = %s" % instance.name)
208

    
209
    # separate pseudo-TTY instances
210
    out.append("lxc.pts = 255")
211
    # standard TTYs
212
    out.append("lxc.tty = 6")
213
    # console log file
214
    console_log = utils.PathJoin(self._ROOT_DIR, instance.name + ".console")
215
    try:
216
      utils.WriteFile(console_log, data="", mode=constants.SECURE_FILE_MODE)
217
    except EnvironmentError, err:
218
      raise errors.HypervisorError("Creating console log file %s for"
219
                                   " instance %s failed: %s" %
220
                                   (console_log, instance.name, err))
221
    out.append("lxc.console = %s" % console_log)
222

    
223
    # root FS
224
    out.append("lxc.rootfs = %s" % root_dir)
225

    
226
    # TODO: additional mounts, if we disable CAP_SYS_ADMIN
227

    
228
    # CPUs
229
    if instance.hvparams[constants.HV_CPU_MASK]:
230
      cpu_list = utils.ParseCpuMask(instance.hvparams[constants.HV_CPU_MASK])
231
      cpus_in_mask = len(cpu_list)
232
      if cpus_in_mask != instance.beparams["vcpus"]:
233
        raise errors.HypervisorError("Number of VCPUs (%d) doesn't match"
234
                                     " the number of CPUs in the"
235
                                     " cpu_mask (%d)" %
236
                                     (instance.beparams["vcpus"],
237
                                      cpus_in_mask))
238
      out.append("lxc.cgroup.cpuset.cpus = %s" %
239
                 instance.hvparams[constants.HV_CPU_MASK])
240

    
241
    # Device control
242
    # deny direct device access
243
    out.append("lxc.cgroup.devices.deny = a")
244
    for devinfo in self._DEVS:
245
      out.append("lxc.cgroup.devices.allow = %s rw" % devinfo)
246

    
247
    # Networking
248
    for idx, nic in enumerate(instance.nics):
249
      out.append("# NIC %d" % idx)
250
      mode = nic.nicparams[constants.NIC_MODE]
251
      link = nic.nicparams[constants.NIC_LINK]
252
      if mode == constants.NIC_MODE_BRIDGED:
253
        out.append("lxc.network.type = veth")
254
        out.append("lxc.network.link = %s" % link)
255
      else:
256
        raise errors.HypervisorError("LXC hypervisor only supports"
257
                                     " bridged mode (NIC %d has mode %s)" %
258
                                     (idx, mode))
259
      out.append("lxc.network.hwaddr = %s" % nic.mac)
260
      out.append("lxc.network.flags = up")
261

    
262
    # Capabilities
263
    for cap in self._DENIED_CAPABILITIES:
264
      out.append("lxc.cap.drop = %s" % cap)
265

    
266
    return "\n".join(out) + "\n"
267

    
268
  def StartInstance(self, instance, block_devices):
269
    """Start an instance.
270

271
    For LCX, we try to mount the block device and execute 'lxc-start'.
272
    We use volatile containers.
273

274
    """
275
    root_dir = self._InstanceDir(instance.name)
276
    try:
277
      utils.EnsureDirs([(root_dir, self._DIR_MODE)])
278
    except errors.GenericError, err:
279
      raise HypervisorError("Creating instance directory failed: %s", str(err))
280

    
281
    conf_file = self._InstanceConfFile(instance.name)
282
    utils.WriteFile(conf_file, data=self._CreateConfigFile(instance, root_dir))
283

    
284
    log_file = self._InstanceLogFile(instance.name)
285
    if not os.path.exists(log_file):
286
      try:
287
        utils.WriteFile(log_file, data="", mode=constants.SECURE_FILE_MODE)
288
      except EnvironmentError, err:
289
        raise errors.HypervisorError("Creating hypervisor log file %s for"
290
                                     " instance %s failed: %s" %
291
                                     (log_file, instance.name, err))
292

    
293
    if not os.path.ismount(root_dir):
294
      if not block_devices:
295
        raise HypervisorError("LXC needs at least one disk")
296

    
297
      sda_dev_path = block_devices[0][1]
298
      result = utils.RunCmd(["mount", sda_dev_path, root_dir])
299
      if result.failed:
300
        raise HypervisorError("Mounting the root dir of LXC instance %s"
301
                              " failed: %s" % (instance.name, result.output))
302
    result = utils.RunCmd(["lxc-start", "-n", instance.name,
303
                           "-o", log_file,
304
                           "-l", "DEBUG",
305
                           "-f", conf_file, "-d"])
306
    if result.failed:
307
      raise HypervisorError("Running the lxc-start script failed: %s" %
308
                            result.output)
309

    
310
  def StopInstance(self, instance, force=False, retry=False, name=None):
311
    """Stop an instance.
312

313
    This method has complicated cleanup tests, as we must:
314
      - try to kill all leftover processes
315
      - try to unmount any additional sub-mountpoints
316
      - finally unmount the instance dir
317

318
    """
319
    if name is None:
320
      name = instance.name
321

    
322
    root_dir = self._InstanceDir(name)
323
    if not os.path.exists(root_dir):
324
      return
325

    
326
    if name in self.ListInstances():
327
      # Signal init to shutdown; this is a hack
328
      if not retry and not force:
329
        result = utils.RunCmd(["chroot", root_dir, "poweroff"])
330
        if result.failed:
331
          raise HypervisorError("Running 'poweroff' on the instance"
332
                                " failed: %s" % result.output)
333
      time.sleep(2)
334
      result = utils.RunCmd(["lxc-stop", "-n", name])
335
      if result.failed:
336
        logging.warning("Error while doing lxc-stop for %s: %s", name,
337
                        result.output)
338

    
339
    for mpath in self._GetMountSubdirs(root_dir):
340
      result = utils.RunCmd(["umount", mpath])
341
      if result.failed:
342
        logging.warning("Error while umounting subpath %s for instance %s: %s",
343
                        mpath, name, result.output)
344

    
345
    result = utils.RunCmd(["umount", root_dir])
346
    if result.failed and force:
347
      msg = ("Processes still alive in the chroot: %s" %
348
             utils.RunCmd("fuser -vm %s" % root_dir).output)
349
      logging.error(msg)
350
      raise HypervisorError("Unmounting the chroot dir failed: %s (%s)" %
351
                            (result.output, msg))
352

    
353
  def RebootInstance(self, instance):
354
    """Reboot an instance.
355

356
    This is not (yet) implemented (in Ganeti) for the LXC hypervisor.
357

358
    """
359
    # TODO: implement reboot
360
    raise HypervisorError("The LXC hypervisor doesn't implement the"
361
                          " reboot functionality")
362

    
363
  def GetNodeInfo(self):
364
    """Return information about the node.
365

366
    This is just a wrapper over the base GetLinuxNodeInfo method.
367

368
    @return: a dict with the following keys (values in MiB):
369
          - memory_total: the total memory size on the node
370
          - memory_free: the available memory on the node for instances
371
          - memory_dom0: the memory used by the node itself, if available
372

373
    """
374
    return self.GetLinuxNodeInfo()
375

    
376
  @classmethod
377
  def GetShellCommandForConsole(cls, instance, hvparams, beparams):
378
    """Return a command for connecting to the console of an instance.
379

380
    """
381
    return "lxc-console -n %s" % instance.name
382

    
383
  def Verify(self):
384
    """Verify the hypervisor.
385

386
    For the chroot manager, it just checks the existence of the base dir.
387

388
    """
389
    if not os.path.exists(self._ROOT_DIR):
390
      return "The required directory '%s' does not exist." % self._ROOT_DIR
391

    
392
  @classmethod
393
  def PowercycleNode(cls):
394
    """LXC powercycle, just a wrapper over Linux powercycle.
395

396
    """
397
    cls.LinuxPowercycle()
398

    
399
  def MigrateInstance(self, instance, target, live):
400
    """Migrate an instance.
401

402
    @type instance: L{objects.Instance}
403
    @param instance: the instance to be migrated
404
    @type target: string
405
    @param target: hostname (usually ip) of the target node
406
    @type live: boolean
407
    @param live: whether to do a live or non-live migration
408

409
    """
410
    raise HypervisorError("Migration is not supported by the LXC hypervisor")