Statistics
| Branch: | Tag: | Revision:

root / lib / hypervisor / hv_lxc.py @ b8aa46ed

History | View | Annotate | Download (13.1 kB)

1
#
2
#
3

    
4
# Copyright (C) 2010 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""LXC hypervisor
23

24
"""
25

    
26
import os
27
import os.path
28
import time
29
import logging
30

    
31
from ganeti import constants
32
from ganeti import errors # pylint: disable-msg=W0611
33
from ganeti import utils
34
from ganeti.hypervisor import hv_base
35
from ganeti.errors import HypervisorError
36

    
37

    
38
class LXCHypervisor(hv_base.BaseHypervisor):
39
  """LXC-based virtualization.
40

41
  Since current (Spring 2010) distributions are not yet ready for
42
  running under a container, the following changes must be done
43
  manually:
44
    - remove udev
45
    - disable the kernel log component of sysklogd/rsyslog/etc.,
46
      otherwise they will fail to read the log, and at least rsyslog
47
      will fill the filesystem with error messages
48

49
  TODO:
50
    - move hardcoded parameters into hypervisor parameters, once we
51
      have the container-parameter support
52
    - implement memory limits, but only optionally, depending on host
53
      kernel support
54

55
  Problems/issues:
56
    - LXC is very temperamental; in daemon mode, it succeeds or fails
57
      in launching the instance silently, without any error
58
      indication, and when failing it can leave network interfaces
59
      around, and future successful startups will list the instance
60
      twice
61
    - shutdown sequence of containers leaves the init 'dead', and the
62
      container effectively stopped, but LXC still believes the
63
      container to be running; need to investigate using the
64
      notify_on_release and release_agent feature of cgroups
65

66
  """
67
  _ROOT_DIR = constants.RUN_GANETI_DIR + "/lxc"
68
  _DEVS = [
69
    "c 1:3",   # /dev/null
70
    "c 1:5",   # /dev/zero
71
    "c 1:7",   # /dev/full
72
    "c 1:8",   # /dev/random
73
    "c 1:9",   # /dev/urandom
74
    "c 1:10",  # /dev/aio
75
    "c 5:0",   # /dev/tty
76
    "c 5:1",   # /dev/console
77
    "c 5:2",   # /dev/ptmx
78
    "c 136:*", # first block of Unix98 PTY slaves
79
    ]
80
  _DENIED_CAPABILITIES = [
81
    "mac_override",    # Allow MAC configuration or state changes
82
    # TODO: remove sys_admin too, for safety
83
    #"sys_admin",       # Perform  a range of system administration operations
84
    "sys_boot",        # Use reboot(2) and kexec_load(2)
85
    "sys_module",      # Load  and  unload kernel modules
86
    "sys_time",        # Set  system  clock, set real-time (hardware) clock
87
    ]
88
  _DIR_MODE = 0755
89

    
90
  PARAMETERS = {
91
    }
92

    
93
  def __init__(self):
94
    hv_base.BaseHypervisor.__init__(self)
95
    utils.EnsureDirs([(self._ROOT_DIR, self._DIR_MODE)])
96

    
97
  @staticmethod
98
  def _GetMountSubdirs(path):
99
    """Return the list of mountpoints under a given path.
100

101
    """
102
    result = []
103
    for _, mountpoint, _, _ in utils.GetMounts():
104
      if (mountpoint.startswith(path) and
105
          mountpoint != path):
106
        result.append(mountpoint)
107

    
108
    result.sort(key=lambda x: x.count("/"), reverse=True)
109
    return result
110

    
111
  @classmethod
112
  def _InstanceDir(cls, instance_name):
113
    """Return the root directory for an instance.
114

115
    """
116
    return utils.PathJoin(cls._ROOT_DIR, instance_name)
117

    
118
  @classmethod
119
  def _InstanceConfFile(cls, instance_name):
120
    """Return the configuration file for an instance.
121

122
    """
123
    return utils.PathJoin(cls._ROOT_DIR, instance_name + ".conf")
124

    
125
  @classmethod
126
  def _InstanceLogFile(cls, instance_name):
127
    """Return the log file for an instance.
128

129
    """
130
    return utils.PathJoin(cls._ROOT_DIR, instance_name + ".log")
131

    
132
  @classmethod
133
  def _GetCgroupMountPoint(cls):
134
    for _, mountpoint, fstype, _ in utils.GetMounts():
135
      if fstype == "cgroup":
136
        return mountpoint
137
    raise errors.HypervisorError("The cgroup filesystem is not mounted")
138

    
139
  @classmethod
140
  def _GetCgroupCpuList(cls, instance_name):
141
    """Return the list of CPU ids for an instance.
142

143
    """
144
    cgroup = cls._GetCgroupMountPoint()
145
    try:
146
      cpus = utils.ReadFile(utils.PathJoin(cgroup,
147
                                           instance_name,
148
                                           "cpuset.cpus"))
149
    except EnvironmentError, err:
150
      raise errors.HypervisorError("Getting CPU list for instance"
151
                                   " %s failed: %s" % (instance_name, err))
152
    # cpuset.cpus format: comma-separated list of CPU ids
153
    # or dash-separated id ranges
154
    # Example: "0-1,3"
155
    cpu_list = []
156
    for range_def in cpus.split(","):
157
      boundaries = range_def.split("-")
158
      n_elements = len(boundaries)
159
      lower = int(boundaries[0])
160
      higher = int(boundaries[n_elements - 1])
161
      cpu_list.extend(range(lower, higher + 1))
162
    return cpu_list
163

    
164
  def ListInstances(self):
165
    """Get the list of running instances.
166

167
    """
168
    result = utils.RunCmd(["lxc-ls"])
169
    if result.failed:
170
      raise errors.HypervisorError("Running lxc-ls failed: %s" % result.output)
171
    return result.stdout.splitlines()
172

    
173
  def GetInstanceInfo(self, instance_name):
174
    """Get instance properties.
175

176
    @type instance_name: string
177
    @param instance_name: the instance name
178

179
    @return: (name, id, memory, vcpus, stat, times)
180

181
    """
182
    # TODO: read container info from the cgroup mountpoint
183

    
184
    result = utils.RunCmd(["lxc-info", "-n", instance_name])
185
    if result.failed:
186
      raise errors.HypervisorError("Running lxc-info failed: %s" %
187
                                   result.output)
188
    # lxc-info output examples:
189
    # 'ganeti-lxc-test1' is STOPPED
190
    # 'ganeti-lxc-test1' is RUNNING
191
    _, state = result.stdout.rsplit(None, 1)
192
    if state != "RUNNING":
193
      return None
194

    
195
    cpu_list = self._GetCgroupCpuList(instance_name)
196
    return (instance_name, 0, 0, len(cpu_list), 0, 0)
197

    
198
  def GetAllInstancesInfo(self):
199
    """Get properties of all instances.
200

201
    @return: [(name, id, memory, vcpus, stat, times),...]
202

203
    """
204
    data = []
205
    for name in self.ListInstances():
206
      data.append(self.GetInstanceInfo(name))
207
    return data
208

    
209
  def _CreateConfigFile(self, instance, root_dir):
210
    """Create an lxc.conf file for an instance"""
211
    out = []
212
    # hostname
213
    out.append("lxc.utsname = %s" % instance.name)
214

    
215
    # separate pseudo-TTY instances
216
    out.append("lxc.pts = 255")
217
    # standard TTYs
218
    out.append("lxc.tty = 6")
219
    # console log file
220
    console_log = utils.PathJoin(self._ROOT_DIR, instance.name + ".console")
221
    try:
222
      utils.WriteFile(console_log, data="", mode=constants.SECURE_FILE_MODE)
223
    except EnvironmentError, err:
224
      raise errors.HypervisorError("Creating console log file %s for"
225
                                   " instance %s failed: %s" %
226
                                   (console_log, instance.name, err))
227
    out.append("lxc.console = %s" % console_log)
228

    
229
    # root FS
230
    out.append("lxc.rootfs = %s" % root_dir)
231

    
232
    # TODO: additional mounts, if we disable CAP_SYS_ADMIN
233

    
234
    # Device control
235
    # deny direct device access
236
    out.append("lxc.cgroup.devices.deny = a")
237
    for devinfo in self._DEVS:
238
      out.append("lxc.cgroup.devices.allow = %s rw" % devinfo)
239

    
240
    # Networking
241
    for idx, nic in enumerate(instance.nics):
242
      out.append("# NIC %d" % idx)
243
      mode = nic.nicparams[constants.NIC_MODE]
244
      link = nic.nicparams[constants.NIC_LINK]
245
      if mode == constants.NIC_MODE_BRIDGED:
246
        out.append("lxc.network.type = veth")
247
        out.append("lxc.network.link = %s" % link)
248
      else:
249
        raise errors.HypervisorError("LXC hypervisor only supports"
250
                                     " bridged mode (NIC %d has mode %s)" %
251
                                     (idx, mode))
252
      out.append("lxc.network.hwaddr = %s" % nic.mac)
253
      out.append("lxc.network.flags = up")
254

    
255
    # Capabilities
256
    for cap in self._DENIED_CAPABILITIES:
257
      out.append("lxc.cap.drop = %s" % cap)
258

    
259
    return "\n".join(out) + "\n"
260

    
261
  def StartInstance(self, instance, block_devices):
262
    """Start an instance.
263

264
    For LCX, we try to mount the block device and execute 'lxc-start
265
    start' (we use volatile containers).
266

267
    """
268
    root_dir = self._InstanceDir(instance.name)
269
    try:
270
      utils.EnsureDirs([(root_dir, self._DIR_MODE)])
271
    except errors.GenericError, err:
272
      raise HypervisorError("Creating instance directory failed: %s", str(err))
273

    
274
    conf_file = self._InstanceConfFile(instance.name)
275
    utils.WriteFile(conf_file, data=self._CreateConfigFile(instance, root_dir))
276

    
277
    log_file = self._InstanceLogFile(instance.name)
278
    if not os.path.exists(log_file):
279
      try:
280
        utils.WriteFile(log_file, data="", mode=constants.SECURE_FILE_MODE)
281
      except EnvironmentError, err:
282
        raise errors.HypervisorError("Creating hypervisor log file %s for"
283
                                     " instance %s failed: %s" %
284
                                     (log_file, instance.name, err))
285

    
286
    if not os.path.ismount(root_dir):
287
      if not block_devices:
288
        raise HypervisorError("LXC needs at least one disk")
289

    
290
      sda_dev_path = block_devices[0][1]
291
      result = utils.RunCmd(["mount", sda_dev_path, root_dir])
292
      if result.failed:
293
        raise HypervisorError("Mounting the root dir of LXC instance %s"
294
                              " failed: %s" % (instance.name, result.output))
295
    result = utils.RunCmd(["lxc-start", "-n", instance.name,
296
                           "-o", log_file,
297
                           "-l", "DEBUG",
298
                           "-f", conf_file, "-d"])
299
    if result.failed:
300
      raise HypervisorError("Running the lxc-start script failed: %s" %
301
                            result.output)
302

    
303
  def StopInstance(self, instance, force=False, retry=False, name=None):
304
    """Stop an instance.
305

306
    This method has complicated cleanup tests, as we must:
307
      - try to kill all leftover processes
308
      - try to unmount any additional sub-mountpoints
309
      - finally unmount the instance dir
310

311
    """
312
    if name is None:
313
      name = instance.name
314

    
315
    root_dir = self._InstanceDir(name)
316
    if not os.path.exists(root_dir):
317
      return
318

    
319
    if name in self.ListInstances():
320
      # Signal init to shutdown; this is a hack
321
      if not retry and not force:
322
        result = utils.RunCmd(["chroot", root_dir, "poweroff"])
323
        if result.failed:
324
          raise HypervisorError("Running 'poweroff' on the instance"
325
                                " failed: %s" % result.output)
326
      time.sleep(2)
327
      result = utils.RunCmd(["lxc-stop", "-n", name])
328
      if result.failed:
329
        logging.warning("Error while doing lxc-stop for %s: %s", name,
330
                        result.output)
331

    
332
    for mpath in self._GetMountSubdirs(root_dir):
333
      result = utils.RunCmd(["umount", mpath])
334
      if result.failed:
335
        logging.warning("Error while umounting subpath %s for instance %s: %s",
336
                        mpath, name, result.output)
337

    
338
    result = utils.RunCmd(["umount", root_dir])
339
    if result.failed and force:
340
      msg = ("Processes still alive in the chroot: %s" %
341
             utils.RunCmd("fuser -vm %s" % root_dir).output)
342
      logging.error(msg)
343
      raise HypervisorError("Unmounting the chroot dir failed: %s (%s)" %
344
                            (result.output, msg))
345

    
346
  def RebootInstance(self, instance):
347
    """Reboot an instance.
348

349
    This is not (yet) implemented (in Ganeti) for the LXC hypervisor.
350

351
    """
352
    # TODO: implement reboot
353
    raise HypervisorError("The LXC hypervisor doesn't implement the"
354
                          " reboot functionality")
355

    
356
  def GetNodeInfo(self):
357
    """Return information about the node.
358

359
    This is just a wrapper over the base GetLinuxNodeInfo method.
360

361
    @return: a dict with the following keys (values in MiB):
362
          - memory_total: the total memory size on the node
363
          - memory_free: the available memory on the node for instances
364
          - memory_dom0: the memory used by the node itself, if available
365

366
    """
367
    return self.GetLinuxNodeInfo()
368

    
369
  @classmethod
370
  def GetShellCommandForConsole(cls, instance, hvparams, beparams):
371
    """Return a command for connecting to the console of an instance.
372

373
    """
374
    return "lxc-console -n %s" % instance.name
375

    
376
  def Verify(self):
377
    """Verify the hypervisor.
378

379
    For the chroot manager, it just checks the existence of the base dir.
380

381
    """
382
    if not os.path.exists(self._ROOT_DIR):
383
      return "The required directory '%s' does not exist." % self._ROOT_DIR
384

    
385
  @classmethod
386
  def PowercycleNode(cls):
387
    """LXC powercycle, just a wrapper over Linux powercycle.
388

389
    """
390
    cls.LinuxPowercycle()
391

    
392
  def MigrateInstance(self, instance, target, live):
393
    """Migrate an instance.
394

395
    @type instance: L{objects.Instance}
396
    @param instance: the instance to be migrated
397
    @type target: string
398
    @param target: hostname (usually ip) of the target node
399
    @type live: boolean
400
    @param live: whether to do a live or non-live migration
401

402
    """
403
    raise HypervisorError("Migration is not supported by the LXC hypervisor")