Statistics
| Branch: | Tag: | Revision:

root / lib / config.py @ b2fddf63

History | View | Annotate | Download (16.1 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Configuration management for Ganeti
23

24
This module provides the interface to the ganeti cluster configuration.
25

26

27
The configuration data is stored on every node but is updated on the
28
master only. After each update, the master distributes the data to the
29
other nodes.
30

31
Currently the data storage format is pickle as yaml was initially not
32
available, then we used it but it was a memory-eating slow beast, so
33
we reverted to pickle using custom Unpicklers.
34

35
"""
36

    
37
import os
38
import socket
39
import tempfile
40
import random
41

    
42
from ganeti import errors
43
from ganeti import logger
44
from ganeti import utils
45
from ganeti import constants
46
from ganeti import rpc
47
from ganeti import objects
48

    
49

    
50
class ConfigWriter:
51
  """The interface to the cluster configuration"""
52

    
53
  def __init__(self, cfg_file=None, offline=False):
54
    self._config_data = None
55
    self._config_time = None
56
    self._config_size = None
57
    self._config_inode = None
58
    self._offline = offline
59
    if cfg_file is None:
60
      self._cfg_file = constants.CLUSTER_CONF_FILE
61
    else:
62
      self._cfg_file = cfg_file
63

    
64
  # this method needs to be static, so that we can call it on the class
65
  @staticmethod
66
  def IsCluster():
67
    """Check if the cluster is configured.
68

69
    """
70
    return os.path.exists(constants.CLUSTER_CONF_FILE)
71

    
72
  def GenerateMAC(self):
73
    """Generate a MAC for an instance.
74

75
    This should check the current instances for duplicates.
76

77
    """
78
    self._OpenConfig()
79
    self._ReleaseLock()
80
    prefix = self._config_data.cluster.mac_prefix
81
    all_macs = self._AllMACs()
82
    retries = 64
83
    while retries > 0:
84
      byte1 = random.randrange(0, 256)
85
      byte2 = random.randrange(0, 256)
86
      byte3 = random.randrange(0, 256)
87
      mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
88
      if mac not in all_macs:
89
        break
90
      retries -= 1
91
    else:
92
      raise errors.ConfigurationError, ("Can't generate unique MAC")
93
    return mac
94

    
95
  def _AllMACs(self):
96
    """Return all MACs present in the config.
97

98
    """
99
    self._OpenConfig()
100
    self._ReleaseLock()
101

    
102
    result = []
103
    for instance in self._config_data.instances.values():
104
      for nic in instance.nics:
105
        result.append(nic.mac)
106

    
107
    return result
108

    
109
  def VerifyConfig(self):
110
    """Stub verify function.
111
    """
112
    self._OpenConfig()
113
    self._ReleaseLock()
114

    
115
    result = []
116
    seen_macs = []
117
    data = self._config_data
118
    for instance_name in data.instances:
119
      instance = data.instances[instance_name]
120
      if instance.primary_node not in data.nodes:
121
        result.append("Instance '%s' has invalid primary node '%s'" %
122
                      (instance_name, instance.primary_node))
123
      for snode in instance.secondary_nodes:
124
        if snode not in data.nodes:
125
          result.append("Instance '%s' has invalid secondary node '%s'" %
126
                        (instance_name, snode))
127
      for idx, nic in enumerate(instance.nics):
128
        if nic.mac in seen_macs:
129
          result.append("Instance '%s' has NIC %d mac %s duplicate" %
130
                        (instance_name, idx, nic.mac))
131
        else:
132
          seen_macs.append(nic.mac)
133
    return result
134

    
135

    
136
  def SetDiskID(self, disk, node_name):
137
    """Convert the unique ID to the ID needed on the target nodes.
138

139
    This is used only for drbd, which needs ip/port configuration.
140

141
    The routine descends down and updates its children also, because
142
    this helps when the only the top device is passed to the remote
143
    node.
144

145
    """
146
    if disk.children:
147
      for child in disk.children:
148
        self.SetDiskID(child, node_name)
149

    
150
    if disk.logical_id is None and disk.physical_id is not None:
151
      return
152
    if disk.dev_type == "drbd":
153
      pnode, snode, port = disk.logical_id
154
      if node_name not in (pnode, snode):
155
        raise errors.ConfigurationError, ("DRBD device not knowing node %s" %
156
                                          node_name)
157
      pnode_info = self.GetNodeInfo(pnode)
158
      snode_info = self.GetNodeInfo(snode)
159
      if pnode_info is None or snode_info is None:
160
        raise errors.ConfigurationError("Can't find primary or secondary node"
161
                                        " for %s" % str(disk))
162
      if pnode == node_name:
163
        disk.physical_id = (pnode_info.secondary_ip, port,
164
                            snode_info.secondary_ip, port)
165
      else: # it must be secondary, we tested above
166
        disk.physical_id = (snode_info.secondary_ip, port,
167
                            pnode_info.secondary_ip, port)
168
    else:
169
      disk.physical_id = disk.logical_id
170
    return
171

    
172
  def AddTcpUdpPort(self, port):
173
    """Adds a new port to the available port pool.
174

175
    """
176
    if not isinstance(port, int):
177
      raise errors.ProgrammerError, ("Invalid type passed for port")
178

    
179
    self._OpenConfig()
180
    self._config_data.cluster.tcpudp_port_pool.add(port)
181
    self._WriteConfig()
182

    
183
  def GetPortList(self):
184
    """Returns a copy of the current port list.
185

186
    """
187
    self._OpenConfig()
188
    self._ReleaseLock()
189
    return self._config_data.cluster.tcpudp_port_pool.copy()
190

    
191
  def AllocatePort(self):
192
    """Allocate a port.
193

194
    The port will be taken from the available port pool or from the
195
    default port range (and in this case we increase
196
    highest_used_port).
197

198
    """
199
    self._OpenConfig()
200

    
201
    # If there are TCP/IP ports configured, we use them first.
202
    if self._config_data.cluster.tcpudp_port_pool:
203
      port = self._config_data.cluster.tcpudp_port_pool.pop()
204
    else:
205
      port = self._config_data.cluster.highest_used_port + 1
206
      if port >= constants.LAST_DRBD_PORT:
207
        raise errors.ConfigurationError, ("The highest used port is greater"
208
                                          " than %s. Aborting." %
209
                                          constants.LAST_DRBD_PORT)
210
      self._config_data.cluster.highest_used_port = port
211

    
212
    self._WriteConfig()
213
    return port
214

    
215
  def GetHostKey(self):
216
    """Return the rsa hostkey from the config.
217

218
    Args: None
219

220
    Returns: rsa hostkey
221
    """
222
    self._OpenConfig()
223
    self._ReleaseLock()
224
    return self._config_data.cluster.rsahostkeypub
225

    
226
  def AddInstance(self, instance):
227
    """Add an instance to the config.
228

229
    This should be used after creating a new instance.
230

231
    Args:
232
      instance: the instance object
233
    """
234
    if not isinstance(instance, objects.Instance):
235
      raise errors.ProgrammerError("Invalid type passed to AddInstance")
236

    
237
    self._OpenConfig()
238
    self._config_data.instances[instance.name] = instance
239
    self._WriteConfig()
240

    
241
  def MarkInstanceUp(self, instance_name):
242
    """Mark the instance status to up in the config.
243

244
    """
245
    self._OpenConfig()
246

    
247
    if instance_name not in self._config_data.instances:
248
      raise errors.ConfigurationError, ("Unknown instance '%s'" %
249
                                        instance_name)
250
    instance = self._config_data.instances[instance_name]
251
    instance.status = "up"
252
    self._WriteConfig()
253

    
254
  def RemoveInstance(self, instance_name):
255
    """Remove the instance from the configuration.
256

257
    """
258
    self._OpenConfig()
259

    
260
    if instance_name not in self._config_data.instances:
261
      raise errors.ConfigurationError, ("Unknown instance '%s'" %
262
                                        instance_name)
263
    del self._config_data.instances[instance_name]
264
    self._WriteConfig()
265

    
266
  def MarkInstanceDown(self, instance_name):
267
    """Mark the status of an instance to down in the configuration.
268

269
    """
270

    
271
    self._OpenConfig()
272

    
273
    if instance_name not in self._config_data.instances:
274
      raise errors.ConfigurationError, ("Unknown instance '%s'" %
275
                                        instance_name)
276
    instance = self._config_data.instances[instance_name]
277
    instance.status = "down"
278
    self._WriteConfig()
279

    
280
  def GetInstanceList(self):
281
    """Get the list of instances.
282

283
    Returns:
284
      array of instances, ex. ['instance2.example.com','instance1.example.com']
285
      these contains all the instances, also the ones in Admin_down state
286

287
    """
288
    self._OpenConfig()
289
    self._ReleaseLock()
290

    
291
    return self._config_data.instances.keys()
292

    
293
  def ExpandInstanceName(self, short_name):
294
    """Attempt to expand an incomplete instance name.
295

296
    """
297
    self._OpenConfig()
298
    self._ReleaseLock()
299

    
300
    return utils.MatchNameComponent(short_name,
301
                                    self._config_data.instances.keys())
302

    
303
  def GetInstanceInfo(self, instance_name):
304
    """Returns informations about an instance.
305

306
    It takes the information from the configuration file. Other informations of
307
    an instance are taken from the live systems.
308

309
    Args:
310
      instance: name of the instance, ex instance1.example.com
311

312
    Returns:
313
      the instance object
314

315
    """
316
    self._OpenConfig()
317
    self._ReleaseLock()
318

    
319
    if instance_name not in self._config_data.instances:
320
      return None
321

    
322
    return self._config_data.instances[instance_name]
323

    
324
  def AddNode(self, node):
325
    """Add a node to the configuration.
326

327
    Args:
328
      node: an object.Node instance
329

330
    """
331
    self._OpenConfig()
332
    self._config_data.nodes[node.name] = node
333
    self._WriteConfig()
334

    
335
  def RemoveNode(self, node_name):
336
    """Remove a node from the configuration.
337

338
    """
339
    self._OpenConfig()
340
    if node_name not in self._config_data.nodes:
341
      raise errors.ConfigurationError, ("Unknown node '%s'" % node_name)
342

    
343
    del self._config_data.nodes[node_name]
344
    self._WriteConfig()
345

    
346
  def ExpandNodeName(self, short_name):
347
    """Attempt to expand an incomplete instance name.
348

349
    """
350
    self._OpenConfig()
351
    self._ReleaseLock()
352

    
353
    return utils.MatchNameComponent(short_name,
354
                                    self._config_data.nodes.keys())
355

    
356
  def GetNodeInfo(self, node_name):
357
    """Get the configuration of a node, as stored in the config.
358

359
    Args: node: nodename (tuple) of the node
360

361
    Returns: the node object
362

363
    """
364
    self._OpenConfig()
365
    self._ReleaseLock()
366

    
367
    if node_name not in self._config_data.nodes:
368
      return None
369

    
370
    return self._config_data.nodes[node_name]
371

    
372
  def GetNodeList(self):
373
    """Return the list of nodes which are in the configuration.
374

375
    """
376
    self._OpenConfig()
377
    self._ReleaseLock()
378
    return self._config_data.nodes.keys()
379

    
380
  def DumpConfig(self):
381
    """Return the entire configuration of the cluster.
382
    """
383
    self._OpenConfig()
384
    self._ReleaseLock()
385
    return self._config_data
386

    
387
  def _BumpSerialNo(self):
388
    """Bump up the serial number of the config.
389

390
    """
391
    self._config_data.cluster.serial_no += 1
392

    
393
  def _OpenConfig(self):
394
    """Read the config data from disk.
395

396
    In case we already have configuration data and the config file has
397
    the same mtime as when we read it, we skip the parsing of the
398
    file, since de-serialisation could be slow.
399

400
    """
401
    try:
402
      st = os.stat(self._cfg_file)
403
    except OSError, err:
404
      raise errors.ConfigurationError, "Can't stat config file: %s" % err
405
    if (self._config_data is not None and
406
        self._config_time is not None and
407
        self._config_time == st.st_mtime and
408
        self._config_size == st.st_size and
409
        self._config_inode == st.st_ino):
410
      # data is current, so skip loading of config file
411
      return
412
    f = open(self._cfg_file, 'r')
413
    try:
414
      try:
415
        data = objects.ConfigObject.Load(f)
416
      except Exception, err:
417
        raise errors.ConfigurationError, err
418
    finally:
419
      f.close()
420
    if (not hasattr(data, 'cluster') or
421
        not hasattr(data.cluster, 'config_version')):
422
      raise errors.ConfigurationError, ("Incomplete configuration"
423
                                        " (missing cluster.config_version)")
424
    if data.cluster.config_version != constants.CONFIG_VERSION:
425
      raise errors.ConfigurationError, ("Cluster configuration version"
426
                                        " mismatch, got %s instead of %s" %
427
                                        (data.cluster.config_version,
428
                                         constants.CONFIG_VERSION))
429
    self._config_data = data
430
    self._config_time = st.st_mtime
431
    self._config_size = st.st_size
432
    self._config_inode = st.st_ino
433

    
434
  def _ReleaseLock(self):
435
    """xxxx
436
    """
437

    
438
  def _DistributeConfig(self):
439
    """Distribute the configuration to the other nodes.
440

441
    Currently, this only copies the configuration file. In the future,
442
    it could be used to encapsulate the 2/3-phase update mechanism.
443

444
    """
445
    if self._offline:
446
      return True
447
    bad = False
448
    nodelist = self.GetNodeList()
449
    myhostname = socket.gethostname()
450

    
451
    tgt_list = []
452
    for node in nodelist:
453
      nodeinfo = self.GetNodeInfo(node)
454
      if nodeinfo.name == myhostname:
455
        continue
456
      tgt_list.append(node)
457

    
458
    result = rpc.call_upload_file(tgt_list, self._cfg_file)
459
    for node in tgt_list:
460
      if not result[node]:
461
        logger.Error("copy of file %s to node %s failed" %
462
                     (self._cfg_file, node))
463
        bad = True
464
    return not bad
465

    
466
  def _WriteConfig(self, destination=None):
467
    """Write the configuration data to persistent storage.
468

469
    """
470
    if destination is None:
471
      destination = self._cfg_file
472
    self._BumpSerialNo()
473
    dir_name, file_name = os.path.split(destination)
474
    fd, name = tempfile.mkstemp('.newconfig', file_name, dir_name)
475
    f = os.fdopen(fd, 'w')
476
    try:
477
      self._config_data.Dump(f)
478
      os.fsync(f.fileno())
479
    finally:
480
      f.close()
481
    # we don't need to do os.close(fd) as f.close() did it
482
    os.rename(name, destination)
483
    self._DistributeConfig()
484

    
485
  def InitConfig(self, node, primary_ip, secondary_ip,
486
                 clustername, hostkeypub, mac_prefix, vg_name, def_bridge):
487
    """Create the initial cluster configuration.
488

489
    It will contain the current node, which will also be the master
490
    node, and no instances or operating systmes.
491

492
    Args:
493
      node: the nodename of the initial node
494
      primary_ip: the IP address of the current host
495
      secondary_ip: the secondary IP of the current host or None
496
      clustername: the name of the cluster
497
      hostkeypub: the public hostkey of this host
498

499
    """
500
    hu_port = constants.FIRST_DRBD_PORT - 1
501
    globalconfig = objects.Cluster(config_version=constants.CONFIG_VERSION,
502
                                   serial_no=1, master_node=node,
503
                                   name=clustername,
504
                                   rsahostkeypub=hostkeypub,
505
                                   highest_used_port=hu_port,
506
                                   mac_prefix=mac_prefix,
507
                                   volume_group_name=vg_name,
508
                                   default_bridge=def_bridge,
509
                                   tcpudp_port_pool=set())
510
    if secondary_ip is None:
511
      secondary_ip = primary_ip
512
    nodeconfig = objects.Node(name=node, primary_ip=primary_ip,
513
                              secondary_ip=secondary_ip)
514

    
515
    self._config_data = objects.ConfigData(nodes={node: nodeconfig},
516
                                           instances={},
517
                                           cluster=globalconfig)
518
    self._WriteConfig()
519

    
520
  def GetClusterName(self):
521
    """Return the cluster name.
522

523
    """
524
    self._OpenConfig()
525
    self._ReleaseLock()
526
    return self._config_data.cluster.name
527

    
528
  def GetVGName(self):
529
    """Return the volume group name.
530

531
    """
532
    self._OpenConfig()
533
    self._ReleaseLock()
534
    return self._config_data.cluster.volume_group_name
535

    
536
  def GetDefBridge(self):
537
    """Return the default bridge.
538

539
    """
540
    self._OpenConfig()
541
    self._ReleaseLock()
542
    return self._config_data.cluster.default_bridge
543

    
544
  def GetMACPrefix(self):
545
    """Return the mac prefix.
546

547
    """
548
    self._OpenConfig()
549
    self._ReleaseLock()
550
    return self._config_data.cluster.mac_prefix