Statistics
| Branch: | Tag: | Revision:

root / lib / config.py @ 5fcdc80d

History | View | Annotate | Download (15.8 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Configuration management for Ganeti
23

24
This module provides the interface to the ganeti cluster configuration.
25

26

27
The configuration data is stored on every node but is updated on the
28
master only. After each update, the master distributes the data to the
29
other nodes.
30

31
Currently the data storage format is pickle as yaml was initially not
32
available, then we used it but it was a memory-eating slow beast, so
33
we reverted to pickle using custom Unpicklers.
34

35
"""
36

    
37
import os
38
import socket
39
import tempfile
40
import random
41

    
42
from ganeti import errors
43
from ganeti import logger
44
from ganeti import utils
45
from ganeti import constants
46
from ganeti import rpc
47
from ganeti import objects
48

    
49

    
50
class ConfigWriter:
51
  """The interface to the cluster configuration.
52

53
  """
54
  def __init__(self, cfg_file=None, offline=False):
55
    self._config_data = None
56
    self._config_time = None
57
    self._config_size = None
58
    self._config_inode = None
59
    self._offline = offline
60
    if cfg_file is None:
61
      self._cfg_file = constants.CLUSTER_CONF_FILE
62
    else:
63
      self._cfg_file = cfg_file
64

    
65
  # this method needs to be static, so that we can call it on the class
66
  @staticmethod
67
  def IsCluster():
68
    """Check if the cluster is configured.
69

70
    """
71
    return os.path.exists(constants.CLUSTER_CONF_FILE)
72

    
73
  def GenerateMAC(self):
74
    """Generate a MAC for an instance.
75

76
    This should check the current instances for duplicates.
77

78
    """
79
    self._OpenConfig()
80
    self._ReleaseLock()
81
    prefix = self._config_data.cluster.mac_prefix
82
    all_macs = self._AllMACs()
83
    retries = 64
84
    while retries > 0:
85
      byte1 = random.randrange(0, 256)
86
      byte2 = random.randrange(0, 256)
87
      byte3 = random.randrange(0, 256)
88
      mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
89
      if mac not in all_macs:
90
        break
91
      retries -= 1
92
    else:
93
      raise errors.ConfigurationError, ("Can't generate unique MAC")
94
    return mac
95

    
96
  def _AllMACs(self):
97
    """Return all MACs present in the config.
98

99
    """
100
    self._OpenConfig()
101
    self._ReleaseLock()
102

    
103
    result = []
104
    for instance in self._config_data.instances.values():
105
      for nic in instance.nics:
106
        result.append(nic.mac)
107

    
108
    return result
109

    
110
  def VerifyConfig(self):
111
    """Stub verify function.
112
    """
113
    self._OpenConfig()
114
    self._ReleaseLock()
115

    
116
    result = []
117
    seen_macs = []
118
    data = self._config_data
119
    for instance_name in data.instances:
120
      instance = data.instances[instance_name]
121
      if instance.primary_node not in data.nodes:
122
        result.append("Instance '%s' has invalid primary node '%s'" %
123
                      (instance_name, instance.primary_node))
124
      for snode in instance.secondary_nodes:
125
        if snode not in data.nodes:
126
          result.append("Instance '%s' has invalid secondary node '%s'" %
127
                        (instance_name, snode))
128
      for idx, nic in enumerate(instance.nics):
129
        if nic.mac in seen_macs:
130
          result.append("Instance '%s' has NIC %d mac %s duplicate" %
131
                        (instance_name, idx, nic.mac))
132
        else:
133
          seen_macs.append(nic.mac)
134
    return result
135

    
136

    
137
  def SetDiskID(self, disk, node_name):
138
    """Convert the unique ID to the ID needed on the target nodes.
139

140
    This is used only for drbd, which needs ip/port configuration.
141

142
    The routine descends down and updates its children also, because
143
    this helps when the only the top device is passed to the remote
144
    node.
145

146
    """
147
    if disk.children:
148
      for child in disk.children:
149
        self.SetDiskID(child, node_name)
150

    
151
    if disk.logical_id is None and disk.physical_id is not None:
152
      return
153
    if disk.dev_type == "drbd":
154
      pnode, snode, port = disk.logical_id
155
      if node_name not in (pnode, snode):
156
        raise errors.ConfigurationError, ("DRBD device not knowing node %s" %
157
                                          node_name)
158
      pnode_info = self.GetNodeInfo(pnode)
159
      snode_info = self.GetNodeInfo(snode)
160
      if pnode_info is None or snode_info is None:
161
        raise errors.ConfigurationError("Can't find primary or secondary node"
162
                                        " for %s" % str(disk))
163
      if pnode == node_name:
164
        disk.physical_id = (pnode_info.secondary_ip, port,
165
                            snode_info.secondary_ip, port)
166
      else: # it must be secondary, we tested above
167
        disk.physical_id = (snode_info.secondary_ip, port,
168
                            pnode_info.secondary_ip, port)
169
    else:
170
      disk.physical_id = disk.logical_id
171
    return
172

    
173
  def AddTcpUdpPort(self, port):
174
    """Adds a new port to the available port pool.
175

176
    """
177
    if not isinstance(port, int):
178
      raise errors.ProgrammerError, ("Invalid type passed for port")
179

    
180
    self._OpenConfig()
181
    self._config_data.cluster.tcpudp_port_pool.add(port)
182
    self._WriteConfig()
183

    
184
  def GetPortList(self):
185
    """Returns a copy of the current port list.
186

187
    """
188
    self._OpenConfig()
189
    self._ReleaseLock()
190
    return self._config_data.cluster.tcpudp_port_pool.copy()
191

    
192
  def AllocatePort(self):
193
    """Allocate a port.
194

195
    The port will be taken from the available port pool or from the
196
    default port range (and in this case we increase
197
    highest_used_port).
198

199
    """
200
    self._OpenConfig()
201

    
202
    # If there are TCP/IP ports configured, we use them first.
203
    if self._config_data.cluster.tcpudp_port_pool:
204
      port = self._config_data.cluster.tcpudp_port_pool.pop()
205
    else:
206
      port = self._config_data.cluster.highest_used_port + 1
207
      if port >= constants.LAST_DRBD_PORT:
208
        raise errors.ConfigurationError, ("The highest used port is greater"
209
                                          " than %s. Aborting." %
210
                                          constants.LAST_DRBD_PORT)
211
      self._config_data.cluster.highest_used_port = port
212

    
213
    self._WriteConfig()
214
    return port
215

    
216
  def GetHostKey(self):
217
    """Return the rsa hostkey from the config.
218

219
    Args: None
220

221
    Returns: rsa hostkey
222
    """
223
    self._OpenConfig()
224
    self._ReleaseLock()
225
    return self._config_data.cluster.rsahostkeypub
226

    
227
  def AddInstance(self, instance):
228
    """Add an instance to the config.
229

230
    This should be used after creating a new instance.
231

232
    Args:
233
      instance: the instance object
234
    """
235
    if not isinstance(instance, objects.Instance):
236
      raise errors.ProgrammerError("Invalid type passed to AddInstance")
237

    
238
    self._OpenConfig()
239
    self._config_data.instances[instance.name] = instance
240
    self._WriteConfig()
241

    
242
  def MarkInstanceUp(self, instance_name):
243
    """Mark the instance status to up in the config.
244

245
    """
246
    self._OpenConfig()
247

    
248
    if instance_name not in self._config_data.instances:
249
      raise errors.ConfigurationError, ("Unknown instance '%s'" %
250
                                        instance_name)
251
    instance = self._config_data.instances[instance_name]
252
    instance.status = "up"
253
    self._WriteConfig()
254

    
255
  def RemoveInstance(self, instance_name):
256
    """Remove the instance from the configuration.
257

258
    """
259
    self._OpenConfig()
260

    
261
    if instance_name not in self._config_data.instances:
262
      raise errors.ConfigurationError, ("Unknown instance '%s'" %
263
                                        instance_name)
264
    del self._config_data.instances[instance_name]
265
    self._WriteConfig()
266

    
267
  def MarkInstanceDown(self, instance_name):
268
    """Mark the status of an instance to down in the configuration.
269

270
    """
271
    self._OpenConfig()
272

    
273
    if instance_name not in self._config_data.instances:
274
      raise errors.ConfigurationError, ("Unknown instance '%s'" %
275
                                        instance_name)
276
    instance = self._config_data.instances[instance_name]
277
    instance.status = "down"
278
    self._WriteConfig()
279

    
280
  def GetInstanceList(self):
281
    """Get the list of instances.
282

283
    Returns:
284
      array of instances, ex. ['instance2.example.com','instance1.example.com']
285
      these contains all the instances, also the ones in Admin_down state
286

287
    """
288
    self._OpenConfig()
289
    self._ReleaseLock()
290

    
291
    return self._config_data.instances.keys()
292

    
293
  def ExpandInstanceName(self, short_name):
294
    """Attempt to expand an incomplete instance name.
295

296
    """
297
    self._OpenConfig()
298
    self._ReleaseLock()
299

    
300
    return utils.MatchNameComponent(short_name,
301
                                    self._config_data.instances.keys())
302

    
303
  def GetInstanceInfo(self, instance_name):
304
    """Returns informations about an instance.
305

306
    It takes the information from the configuration file. Other informations of
307
    an instance are taken from the live systems.
308

309
    Args:
310
      instance: name of the instance, ex instance1.example.com
311

312
    Returns:
313
      the instance object
314

315
    """
316
    self._OpenConfig()
317
    self._ReleaseLock()
318

    
319
    if instance_name not in self._config_data.instances:
320
      return None
321

    
322
    return self._config_data.instances[instance_name]
323

    
324
  def AddNode(self, node):
325
    """Add a node to the configuration.
326

327
    Args:
328
      node: an object.Node instance
329

330
    """
331
    self._OpenConfig()
332
    self._config_data.nodes[node.name] = node
333
    self._WriteConfig()
334

    
335
  def RemoveNode(self, node_name):
336
    """Remove a node from the configuration.
337

338
    """
339
    self._OpenConfig()
340
    if node_name not in self._config_data.nodes:
341
      raise errors.ConfigurationError, ("Unknown node '%s'" % node_name)
342

    
343
    del self._config_data.nodes[node_name]
344
    self._WriteConfig()
345

    
346
  def ExpandNodeName(self, short_name):
347
    """Attempt to expand an incomplete instance name.
348

349
    """
350
    self._OpenConfig()
351
    self._ReleaseLock()
352

    
353
    return utils.MatchNameComponent(short_name,
354
                                    self._config_data.nodes.keys())
355

    
356
  def GetNodeInfo(self, node_name):
357
    """Get the configuration of a node, as stored in the config.
358

359
    Args: node: nodename (tuple) of the node
360

361
    Returns: the node object
362

363
    """
364
    self._OpenConfig()
365
    self._ReleaseLock()
366

    
367
    if node_name not in self._config_data.nodes:
368
      return None
369

    
370
    return self._config_data.nodes[node_name]
371

    
372
  def GetNodeList(self):
373
    """Return the list of nodes which are in the configuration.
374

375
    """
376
    self._OpenConfig()
377
    self._ReleaseLock()
378
    return self._config_data.nodes.keys()
379

    
380
  def DumpConfig(self):
381
    """Return the entire configuration of the cluster.
382
    """
383
    self._OpenConfig()
384
    self._ReleaseLock()
385
    return self._config_data
386

    
387
  def _BumpSerialNo(self):
388
    """Bump up the serial number of the config.
389

390
    """
391
    self._config_data.cluster.serial_no += 1
392

    
393
  def _OpenConfig(self):
394
    """Read the config data from disk.
395

396
    In case we already have configuration data and the config file has
397
    the same mtime as when we read it, we skip the parsing of the
398
    file, since de-serialisation could be slow.
399

400
    """
401
    try:
402
      st = os.stat(self._cfg_file)
403
    except OSError, err:
404
      raise errors.ConfigurationError, "Can't stat config file: %s" % err
405
    if (self._config_data is not None and
406
        self._config_time is not None and
407
        self._config_time == st.st_mtime and
408
        self._config_size == st.st_size and
409
        self._config_inode == st.st_ino):
410
      # data is current, so skip loading of config file
411
      return
412
    f = open(self._cfg_file, 'r')
413
    try:
414
      try:
415
        data = objects.ConfigObject.Load(f)
416
      except Exception, err:
417
        raise errors.ConfigurationError, err
418
    finally:
419
      f.close()
420
    if (not hasattr(data, 'cluster') or
421
        not hasattr(data.cluster, 'config_version')):
422
      raise errors.ConfigurationError, ("Incomplete configuration"
423
                                        " (missing cluster.config_version)")
424
    if data.cluster.config_version != constants.CONFIG_VERSION:
425
      raise errors.ConfigurationError, ("Cluster configuration version"
426
                                        " mismatch, got %s instead of %s" %
427
                                        (data.cluster.config_version,
428
                                         constants.CONFIG_VERSION))
429
    self._config_data = data
430
    self._config_time = st.st_mtime
431
    self._config_size = st.st_size
432
    self._config_inode = st.st_ino
433

    
434
  def _ReleaseLock(self):
435
    """xxxx
436
    """
437

    
438
  def _DistributeConfig(self):
439
    """Distribute the configuration to the other nodes.
440

441
    Currently, this only copies the configuration file. In the future,
442
    it could be used to encapsulate the 2/3-phase update mechanism.
443

444
    """
445
    if self._offline:
446
      return True
447
    bad = False
448
    nodelist = self.GetNodeList()
449
    myhostname = socket.gethostname()
450

    
451
    tgt_list = []
452
    for node in nodelist:
453
      nodeinfo = self.GetNodeInfo(node)
454
      if nodeinfo.name == myhostname:
455
        continue
456
      tgt_list.append(node)
457

    
458
    result = rpc.call_upload_file(tgt_list, self._cfg_file)
459
    for node in tgt_list:
460
      if not result[node]:
461
        logger.Error("copy of file %s to node %s failed" %
462
                     (self._cfg_file, node))
463
        bad = True
464
    return not bad
465

    
466
  def _WriteConfig(self, destination=None):
467
    """Write the configuration data to persistent storage.
468

469
    """
470
    if destination is None:
471
      destination = self._cfg_file
472
    self._BumpSerialNo()
473
    dir_name, file_name = os.path.split(destination)
474
    fd, name = tempfile.mkstemp('.newconfig', file_name, dir_name)
475
    f = os.fdopen(fd, 'w')
476
    try:
477
      self._config_data.Dump(f)
478
      os.fsync(f.fileno())
479
    finally:
480
      f.close()
481
    # we don't need to do os.close(fd) as f.close() did it
482
    os.rename(name, destination)
483
    self._DistributeConfig()
484

    
485
  def InitConfig(self, node, primary_ip, secondary_ip,
486
                 hostkeypub, mac_prefix, vg_name, def_bridge):
487
    """Create the initial cluster configuration.
488

489
    It will contain the current node, which will also be the master
490
    node, and no instances or operating systmes.
491

492
    Args:
493
      node: the nodename of the initial node
494
      primary_ip: the IP address of the current host
495
      secondary_ip: the secondary IP of the current host or None
496
      hostkeypub: the public hostkey of this host
497

498
    """
499
    hu_port = constants.FIRST_DRBD_PORT - 1
500
    globalconfig = objects.Cluster(config_version=constants.CONFIG_VERSION,
501
                                   serial_no=1,
502
                                   rsahostkeypub=hostkeypub,
503
                                   highest_used_port=hu_port,
504
                                   mac_prefix=mac_prefix,
505
                                   volume_group_name=vg_name,
506
                                   default_bridge=def_bridge,
507
                                   tcpudp_port_pool=set())
508
    if secondary_ip is None:
509
      secondary_ip = primary_ip
510
    nodeconfig = objects.Node(name=node, primary_ip=primary_ip,
511
                              secondary_ip=secondary_ip)
512

    
513
    self._config_data = objects.ConfigData(nodes={node: nodeconfig},
514
                                           instances={},
515
                                           cluster=globalconfig)
516
    self._WriteConfig()
517

    
518
  def GetVGName(self):
519
    """Return the volume group name.
520

521
    """
522
    self._OpenConfig()
523
    self._ReleaseLock()
524
    return self._config_data.cluster.volume_group_name
525

    
526
  def GetDefBridge(self):
527
    """Return the default bridge.
528

529
    """
530
    self._OpenConfig()
531
    self._ReleaseLock()
532
    return self._config_data.cluster.default_bridge
533

    
534
  def GetMACPrefix(self):
535
    """Return the mac prefix.
536

537
    """
538
    self._OpenConfig()
539
    self._ReleaseLock()
540
    return self._config_data.cluster.mac_prefix