Statistics
| Branch: | Tag: | Revision:

root / lib / config.py @ 264bb3c5

History | View | Annotate | Download (15.9 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Configuration management for Ganeti
23

24
This module provides the interface to the ganeti cluster configuration.
25

26

27
The configuration data is stored on every node but is updated on the
28
master only. After each update, the master distributes the data to the
29
other nodes.
30

31
Currently the data storage format is pickle as yaml was initially not
32
available, then we used it but it was a memory-eating slow beast, so
33
we reverted to pickle using custom Unpicklers.
34

35
"""
36

    
37
import os
38
import socket
39
import tempfile
40
import random
41

    
42
from ganeti import errors
43
from ganeti import logger
44
from ganeti import utils
45
from ganeti import constants
46
from ganeti import rpc
47
from ganeti import objects
48

    
49

    
50
class ConfigWriter:
51
  """The interface to the cluster configuration"""
52

    
53
  def __init__(self, cfg_file=None, offline=False):
54
    self._config_data = None
55
    self._config_time = None
56
    self._config_size = None
57
    self._config_inode = None
58
    self._offline = offline
59
    if cfg_file is None:
60
      self._cfg_file = constants.CLUSTER_CONF_FILE
61
    else:
62
      self._cfg_file = cfg_file
63

    
64
  # this method needs to be static, so that we can call it on the class
65
  @staticmethod
66
  def IsCluster():
67
    """Check if the cluster is configured.
68

69
    """
70
    return os.path.exists(constants.CLUSTER_CONF_FILE)
71

    
72
  def GenerateMAC(self):
73
    """Generate a MAC for an instance.
74

75
    This should check the current instances for duplicates.
76

77
    """
78
    self._OpenConfig()
79
    self._ReleaseLock()
80
    prefix = self._config_data.cluster.mac_prefix
81
    all_macs = self._AllMACs()
82
    retries = 64
83
    while retries > 0:
84
      byte1 = random.randrange(0, 256)
85
      byte2 = random.randrange(0, 256)
86
      byte3 = random.randrange(0, 256)
87
      mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
88
      if mac not in all_macs:
89
        break
90
      retries -= 1
91
    else:
92
      raise errors.ConfigurationError, ("Can't generate unique MAC")
93
    return mac
94

    
95
  def _AllMACs(self):
96
    """Return all MACs present in the config.
97

98
    """
99
    self._OpenConfig()
100
    self._ReleaseLock()
101

    
102
    result = []
103
    for instance in self._config_data.instances.values():
104
      for nic in instance.nics:
105
        result.append(nic.mac)
106

    
107
    return result
108

    
109
  def VerifyConfig(self):
110
    """Stub verify function.
111
    """
112
    self._OpenConfig()
113
    self._ReleaseLock()
114

    
115
    result = []
116
    seen_macs = []
117
    data = self._config_data
118
    for instance_name in data.instances:
119
      instance = data.instances[instance_name]
120
      if instance.primary_node not in data.nodes:
121
        result.append("Instance '%s' has invalid primary node '%s'" %
122
                      (instance_name, instance.primary_node))
123
      for snode in instance.secondary_nodes:
124
        if snode not in data.nodes:
125
          result.append("Instance '%s' has invalid secondary node '%s'" %
126
                        (instance_name, snode))
127
      for idx, nic in enumerate(instance.nics):
128
        if nic.mac in seen_macs:
129
          result.append("Instance '%s' has NIC %d mac %s duplicate" %
130
                        (instance_name, idx, nic.mac))
131
        else:
132
          seen_macs.append(nic.mac)
133
    return result
134

    
135

    
136
  def SetDiskID(self, disk, node_name):
137
    """Convert the unique ID to the ID needed on the target nodes.
138

139
    This is used only for drbd, which needs ip/port configuration.
140

141
    The routine descends down and updates its children also, because
142
    this helps when the only the top device is passed to the remote
143
    node.
144

145
    """
146
    if disk.children:
147
      for child in disk.children:
148
        self.SetDiskID(child, node_name)
149

    
150
    if disk.logical_id is None and disk.physical_id is not None:
151
      return
152
    if disk.dev_type == "drbd":
153
      pnode, snode, port = disk.logical_id
154
      if node_name not in (pnode, snode):
155
        raise errors.ConfigurationError, ("DRBD device not knowing node %s" %
156
                                          node_name)
157
      pnode_info = self.GetNodeInfo(pnode)
158
      snode_info = self.GetNodeInfo(snode)
159
      if pnode_info is None or snode_info is None:
160
        raise errors.ConfigurationError("Can't find primary or secondary node"
161
                                        " for %s" % str(disk))
162
      if pnode == node_name:
163
        disk.physical_id = (pnode_info.secondary_ip, port,
164
                            snode_info.secondary_ip, port)
165
      else: # it must be secondary, we tested above
166
        disk.physical_id = (snode_info.secondary_ip, port,
167
                            pnode_info.secondary_ip, port)
168
    else:
169
      disk.physical_id = disk.logical_id
170
    return
171

    
172
  def AddTcpIpPort(self, port):
173
    if not isinstance(port, int):
174
      raise errors.ProgrammerError("Invalid type passed for port")
175

    
176
    self._OpenConfig()
177
    self._config_data.tcpudp_port_pool.add(port)
178
    self._WriteConfig()
179

    
180
  def GetPortList():
181
    """Returns a copy of the current port list.
182

183
    """
184
    self._OpenConfig()
185
    self._ReleaseLock()
186
    return self._config_data.tcpudp_port_pool.copy()
187

    
188
  def AllocatePort(self):
189
    """Allocate a port.
190

191
    The port will be recorded in the cluster config.
192

193
    """
194
    self._OpenConfig()
195

    
196
    # If there are TCP/IP ports configured, we use them first.
197
    if self._config_data.tcpudp_port_pool:
198
      port = self._config_data.tcpudp_port_pool.pop()
199
    else:
200
      port = self._config_data.cluster.highest_used_port + 1
201
      if port >= constants.LAST_DRBD_PORT:
202
        raise errors.ConfigurationError, ("The highest used port is greater"
203
                                          " than %s. Aborting." %
204
                                          constants.LAST_DRBD_PORT)
205
      self._config_data.cluster.highest_used_port = port
206

    
207
    self._WriteConfig()
208
    return port
209

    
210
  def GetHostKey(self):
211
    """Return the rsa hostkey from the config.
212

213
    Args: None
214

215
    Returns: rsa hostkey
216
    """
217
    self._OpenConfig()
218
    self._ReleaseLock()
219
    return self._config_data.cluster.rsahostkeypub
220

    
221
  def AddInstance(self, instance):
222
    """Add an instance to the config.
223

224
    This should be used after creating a new instance.
225

226
    Args:
227
      instance: the instance object
228
    """
229
    if not isinstance(instance, objects.Instance):
230
      raise errors.ProgrammerError("Invalid type passed to AddInstance")
231

    
232
    self._OpenConfig()
233
    self._config_data.instances[instance.name] = instance
234
    self._WriteConfig()
235

    
236
  def MarkInstanceUp(self, instance_name):
237
    """Mark the instance status to up in the config.
238

239
    """
240
    self._OpenConfig()
241

    
242
    if instance_name not in self._config_data.instances:
243
      raise errors.ConfigurationError, ("Unknown instance '%s'" %
244
                                        instance_name)
245
    instance = self._config_data.instances[instance_name]
246
    instance.status = "up"
247
    self._WriteConfig()
248

    
249
  def RemoveInstance(self, instance_name):
250
    """Remove the instance from the configuration.
251

252
    """
253
    self._OpenConfig()
254

    
255
    if instance_name not in self._config_data.instances:
256
      raise errors.ConfigurationError, ("Unknown instance '%s'" %
257
                                        instance_name)
258
    del self._config_data.instances[instance_name]
259
    self._WriteConfig()
260

    
261
  def MarkInstanceDown(self, instance_name):
262
    """Mark the status of an instance to down in the configuration.
263

264
    """
265

    
266
    self._OpenConfig()
267

    
268
    if instance_name not in self._config_data.instances:
269
      raise errors.ConfigurationError, ("Unknown instance '%s'" %
270
                                        instance_name)
271
    instance = self._config_data.instances[instance_name]
272
    instance.status = "down"
273
    self._WriteConfig()
274

    
275
  def GetInstanceList(self):
276
    """Get the list of instances.
277

278
    Returns:
279
      array of instances, ex. ['instance2.example.com','instance1.example.com']
280
      these contains all the instances, also the ones in Admin_down state
281

282
    """
283
    self._OpenConfig()
284
    self._ReleaseLock()
285

    
286
    return self._config_data.instances.keys()
287

    
288
  def ExpandInstanceName(self, short_name):
289
    """Attempt to expand an incomplete instance name.
290

291
    """
292
    self._OpenConfig()
293
    self._ReleaseLock()
294

    
295
    return utils.MatchNameComponent(short_name,
296
                                    self._config_data.instances.keys())
297

    
298
  def GetInstanceInfo(self, instance_name):
299
    """Returns informations about an instance.
300

301
    It takes the information from the configuration file. Other informations of
302
    an instance are taken from the live systems.
303

304
    Args:
305
      instance: name of the instance, ex instance1.example.com
306

307
    Returns:
308
      the instance object
309

310
    """
311
    self._OpenConfig()
312
    self._ReleaseLock()
313

    
314
    if instance_name not in self._config_data.instances:
315
      return None
316

    
317
    return self._config_data.instances[instance_name]
318

    
319
  def AddNode(self, node):
320
    """Add a node to the configuration.
321

322
    Args:
323
      node: an object.Node instance
324

325
    """
326
    self._OpenConfig()
327
    self._config_data.nodes[node.name] = node
328
    self._WriteConfig()
329

    
330
  def RemoveNode(self, node_name):
331
    """Remove a node from the configuration.
332

333
    """
334
    self._OpenConfig()
335
    if node_name not in self._config_data.nodes:
336
      raise errors.ConfigurationError, ("Unknown node '%s'" % node_name)
337

    
338
    del self._config_data.nodes[node_name]
339
    self._WriteConfig()
340

    
341
  def ExpandNodeName(self, short_name):
342
    """Attempt to expand an incomplete instance name.
343

344
    """
345
    self._OpenConfig()
346
    self._ReleaseLock()
347

    
348
    return utils.MatchNameComponent(short_name,
349
                                    self._config_data.nodes.keys())
350

    
351
  def GetNodeInfo(self, node_name):
352
    """Get the configuration of a node, as stored in the config.
353

354
    Args: node: nodename (tuple) of the node
355

356
    Returns: the node object
357

358
    """
359
    self._OpenConfig()
360
    self._ReleaseLock()
361

    
362
    if node_name not in self._config_data.nodes:
363
      return None
364

    
365
    return self._config_data.nodes[node_name]
366

    
367
  def GetNodeList(self):
368
    """Return the list of nodes which are in the configuration.
369

370
    """
371
    self._OpenConfig()
372
    self._ReleaseLock()
373
    return self._config_data.nodes.keys()
374

    
375
  def DumpConfig(self):
376
    """Return the entire configuration of the cluster.
377
    """
378
    self._OpenConfig()
379
    self._ReleaseLock()
380
    return self._config_data
381

    
382
  def _BumpSerialNo(self):
383
    """Bump up the serial number of the config.
384

385
    """
386
    self._config_data.cluster.serial_no += 1
387

    
388
  def _OpenConfig(self):
389
    """Read the config data from disk.
390

391
    In case we already have configuration data and the config file has
392
    the same mtime as when we read it, we skip the parsing of the
393
    file, since de-serialisation could be slow.
394

395
    """
396
    try:
397
      st = os.stat(self._cfg_file)
398
    except OSError, err:
399
      raise errors.ConfigurationError, "Can't stat config file: %s" % err
400
    if (self._config_data is not None and
401
        self._config_time is not None and
402
        self._config_time == st.st_mtime and
403
        self._config_size == st.st_size and
404
        self._config_inode == st.st_ino):
405
      # data is current, so skip loading of config file
406
      return
407
    f = open(self._cfg_file, 'r')
408
    try:
409
      try:
410
        data = objects.ConfigObject.Load(f)
411
      except Exception, err:
412
        raise errors.ConfigurationError, err
413
    finally:
414
      f.close()
415
    if (not hasattr(data, 'cluster') or
416
        not hasattr(data.cluster, 'config_version')):
417
      raise errors.ConfigurationError, ("Incomplete configuration"
418
                                        " (missing cluster.config_version)")
419
    if data.cluster.config_version != constants.CONFIG_VERSION:
420
      raise errors.ConfigurationError, ("Cluster configuration version"
421
                                        " mismatch, got %s instead of %s" %
422
                                        (data.cluster.config_version,
423
                                         constants.CONFIG_VERSION))
424
    self._config_data = data
425
    self._config_time = st.st_mtime
426
    self._config_size = st.st_size
427
    self._config_inode = st.st_ino
428

    
429
  def _ReleaseLock(self):
430
    """xxxx
431
    """
432

    
433
  def _DistributeConfig(self):
434
    """Distribute the configuration to the other nodes.
435

436
    Currently, this only copies the configuration file. In the future,
437
    it could be used to encapsulate the 2/3-phase update mechanism.
438

439
    """
440
    if self._offline:
441
      return True
442
    bad = False
443
    nodelist = self.GetNodeList()
444
    myhostname = socket.gethostname()
445

    
446
    tgt_list = []
447
    for node in nodelist:
448
      nodeinfo = self.GetNodeInfo(node)
449
      if nodeinfo.name == myhostname:
450
        continue
451
      tgt_list.append(node)
452

    
453
    result = rpc.call_upload_file(tgt_list, self._cfg_file)
454
    for node in tgt_list:
455
      if not result[node]:
456
        logger.Error("copy of file %s to node %s failed" %
457
                     (self._cfg_file, node))
458
        bad = True
459
    return not bad
460

    
461
  def _WriteConfig(self, destination=None):
462
    """Write the configuration data to persistent storage.
463

464
    """
465
    if destination is None:
466
      destination = self._cfg_file
467
    self._BumpSerialNo()
468
    dir_name, file_name = os.path.split(destination)
469
    fd, name = tempfile.mkstemp('.newconfig', file_name, dir_name)
470
    f = os.fdopen(fd, 'w')
471
    try:
472
      self._config_data.Dump(f)
473
      os.fsync(f.fileno())
474
    finally:
475
      f.close()
476
    # we don't need to do os.close(fd) as f.close() did it
477
    os.rename(name, destination)
478
    self._DistributeConfig()
479

    
480
  def InitConfig(self, node, primary_ip, secondary_ip,
481
                 clustername, hostkeypub, mac_prefix, vg_name, def_bridge):
482
    """Create the initial cluster configuration.
483

484
    It will contain the current node, which will also be the master
485
    node, and no instances or operating systmes.
486

487
    Args:
488
      node: the nodename of the initial node
489
      primary_ip: the IP address of the current host
490
      secondary_ip: the secondary IP of the current host or None
491
      clustername: the name of the cluster
492
      hostkeypub: the public hostkey of this host
493

494
    """
495
    hu_port = constants.FIRST_DRBD_PORT - 1
496
    globalconfig = objects.Cluster(config_version=constants.CONFIG_VERSION,
497
                                   serial_no=1, master_node=node,
498
                                   name=clustername,
499
                                   rsahostkeypub=hostkeypub,
500
                                   highest_used_port=hu_port,
501
                                   mac_prefix=mac_prefix,
502
                                   volume_group_name=vg_name,
503
                                   default_bridge=def_bridge)
504
    if secondary_ip is None:
505
      secondary_ip = primary_ip
506
    nodeconfig = objects.Node(name=node, primary_ip=primary_ip,
507
                              secondary_ip=secondary_ip)
508

    
509
    self._config_data = objects.ConfigData(nodes={node: nodeconfig},
510
                                           instances={},
511
                                           cluster=globalconfig,
512
                                           tcpudp_port_pool=set())
513
    self._WriteConfig()
514

    
515
  def GetClusterName(self):
516
    """Return the cluster name.
517

518
    """
519
    self._OpenConfig()
520
    self._ReleaseLock()
521
    return self._config_data.cluster.name
522

    
523
  def GetVGName(self):
524
    """Return the volume group name.
525

526
    """
527
    self._OpenConfig()
528
    self._ReleaseLock()
529
    return self._config_data.cluster.volume_group_name
530

    
531
  def GetDefBridge(self):
532
    """Return the default bridge.
533

534
    """
535
    self._OpenConfig()
536
    self._ReleaseLock()
537
    return self._config_data.cluster.default_bridge
538

    
539
  def GetMACPrefix(self):
540
    """Return the mac prefix.
541

542
    """
543
    self._OpenConfig()
544
    self._ReleaseLock()
545
    return self._config_data.cluster.mac_prefix