Statistics
| Branch: | Tag: | Revision:

root / lib / config.py @ a8083063

History | View | Annotate | Download (15.6 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Configuration management for Ganeti
23

24
This module provides the interface to the ganeti cluster configuration.
25

26

27
The configuration data is stored on every node but is updated on the
28
master only. After each update, the master distributes the data to the
29
other nodes.
30

31
Currently the data storage format is pickle as yaml was initially not
32
available, then we used it but it was a memory-eating slow beast, so
33
we reverted to pickle using custom Unpicklers.
34

35
"""
36

    
37
import os
38
import socket
39
import tempfile
40
import random
41

    
42
from ganeti import errors
43
from ganeti import logger
44
from ganeti import utils
45
from ganeti import constants
46
from ganeti import rpc
47
from ganeti import objects
48

    
49

    
50
class ConfigWriter:
51
  """The interface to the cluster configuration"""
52

    
53
  def __init__(self, cfg_file=None, offline=False):
54
    self._config_data = None
55
    self._config_time = None
56
    self._offline = offline
57
    if cfg_file is None:
58
      self._cfg_file = constants.CLUSTER_CONF_FILE
59
    else:
60
      self._cfg_file = cfg_file
61

    
62
  # this method needs to be static, so that we can call it on the class
63
  @staticmethod
64
  def IsCluster():
65
    """Check if the cluster is configured.
66

67
    """
68
    return os.path.exists(constants.CLUSTER_CONF_FILE)
69

    
70
  def GenerateMAC(self):
71
    """Generate a MAC for an instance.
72

73
    This should check the current instances for duplicates.
74

75
    """
76
    self._OpenConfig()
77
    self._ReleaseLock()
78
    prefix = self._config_data.cluster.mac_prefix
79
    all_macs = self._AllMACs()
80
    retries = 64
81
    while retries > 0:
82
      byte1 = random.randrange(0, 256)
83
      byte2 = random.randrange(0, 256)
84
      byte3 = random.randrange(0, 256)
85
      mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
86
      if mac not in all_macs:
87
        break
88
      retries -= 1
89
    else:
90
      raise errors.ConfigurationError, ("Can't generate unique MAC")
91
    return mac
92

    
93
  def _AllMACs(self):
94
    """Return all MACs present in the config.
95

96
    """
97
    self._OpenConfig()
98
    self._ReleaseLock()
99

    
100
    result = []
101
    for instance in self._config_data.instances.values():
102
      for nic in instance.nics:
103
        result.append(nic.mac)
104

    
105
    return result
106

    
107
  def VerifyConfig(self):
108
    """Stub verify function.
109
    """
110
    self._OpenConfig()
111
    self._ReleaseLock()
112

    
113
    result = []
114
    seen_macs = []
115
    data = self._config_data
116
    for instance_name in data.instances:
117
      instance = data.instances[instance_name]
118
      if instance.primary_node not in data.nodes:
119
        result.append("Instance '%s' has invalid primary node '%s'" %
120
                      (instance_name, instance.primary_node))
121
      for snode in instance.secondary_nodes:
122
        if snode not in data.nodes:
123
          result.append("Instance '%s' has invalid secondary node '%s'" %
124
                        (instance_name, snode))
125
      for idx, nic in enumerate(instance.nics):
126
        if nic.mac in seen_macs:
127
          result.append("Instance '%s' has NIC %d mac %s duplicate" %
128
                        (instance_name, idx, nic.mac))
129
        else:
130
          seen_macs.append(nic.mac)
131
    return result
132

    
133

    
134
  def SetDiskID(self, disk, node_name):
135
    """Convert the unique ID to the ID needed on the target nodes.
136

137
    This is used only for drbd, which needs ip/port configuration.
138

139
    The routine descends down and updates its children also, because
140
    this helps when the only the top device is passed to the remote
141
    node.
142

143
    """
144
    if disk.children:
145
      for child in disk.children:
146
        self.SetDiskID(child, node_name)
147

    
148
    if disk.logical_id is None and disk.physical_id is not None:
149
      return
150
    if disk.dev_type == "drbd":
151
      pnode, snode, port = disk.logical_id
152
      if node_name not in (pnode, snode):
153
        raise errors.ConfigurationError, ("DRBD device not knowing node %s" %
154
                                          node_name)
155
      pnode_info = self.GetNodeInfo(pnode)
156
      snode_info = self.GetNodeInfo(snode)
157
      if pnode_info is None or snode_info is None:
158
        raise errors.ConfigurationError("Can't find primary or secondary node"
159
                                        " for %s" % str(disk))
160
      if pnode == node_name:
161
        disk.physical_id = (pnode_info.secondary_ip, port,
162
                            snode_info.secondary_ip, port)
163
      else: # it must be secondary, we tested above
164
        disk.physical_id = (snode_info.secondary_ip, port,
165
                            pnode_info.secondary_ip, port)
166
    else:
167
      disk.physical_id = disk.logical_id
168
    return
169

    
170
  def AllocatePort(self):
171
    """Allocate a port.
172

173
    The port will be recorded in the cluster config.
174

175
    """
176
    self._OpenConfig()
177

    
178
    self._config_data.cluster.highest_used_port += 1
179
    if self._config_data.cluster.highest_used_port >= constants.LAST_DRBD_PORT:
180
      raise errors.ConfigurationError, ("The highest used port is greater"
181
                                        " than %s. Aborting." %
182
                                        constants.LAST_DRBD_PORT)
183
    port = self._config_data.cluster.highest_used_port
184

    
185
    self._WriteConfig()
186
    return port
187

    
188
  def GetHostKey(self):
189
    """Return the rsa hostkey from the config.
190

191
    Args: None
192

193
    Returns: rsa hostkey
194
    """
195
    self._OpenConfig()
196
    self._ReleaseLock()
197
    return self._config_data.cluster.rsahostkeypub
198

    
199
  def AddInstance(self, instance):
200
    """Add an instance to the config.
201

202
    This should be used after creating a new instance.
203

204
    Args:
205
      instance: the instance object
206
    """
207
    if not isinstance(instance, objects.Instance):
208
      raise errors.ProgrammerError("Invalid type passed to AddInstance")
209

    
210
    self._OpenConfig()
211
    self._config_data.instances[instance.name] = instance
212
    self._WriteConfig()
213

    
214
  def MarkInstanceUp(self, instance_name):
215
    """Mark the instance status to up in the config.
216

217
    """
218
    self._OpenConfig()
219

    
220
    if instance_name not in self._config_data.instances:
221
      raise errors.ConfigurationError, ("Unknown instance '%s'" %
222
                                        instance_name)
223
    instance = self._config_data.instances[instance_name]
224
    instance.status = "up"
225
    self._WriteConfig()
226

    
227
  def RemoveInstance(self, instance_name):
228
    """Remove the instance from the configuration.
229

230
    """
231
    self._OpenConfig()
232

    
233
    if instance_name not in self._config_data.instances:
234
      raise errors.ConfigurationError, ("Unknown instance '%s'" %
235
                                        instance_name)
236
    del self._config_data.instances[instance_name]
237
    self._WriteConfig()
238

    
239
  def MarkInstanceDown(self, instance_name):
240
    """Mark the status of an instance to down in the configuration.
241

242
    """
243

    
244
    self._OpenConfig()
245

    
246
    if instance_name not in self._config_data.instances:
247
      raise errors.ConfigurationError, ("Unknown instance '%s'" %
248
                                        instance_name)
249
    instance = self._config_data.instances[instance_name]
250
    instance.status = "down"
251
    self._WriteConfig()
252

    
253
  def GetInstanceList(self):
254
    """Get the list of instances.
255

256
    Returns:
257
      array of instances, ex. ['instance2.example.com','instance1.example.com']
258
      these contains all the instances, also the ones in Admin_down state
259

260
    """
261
    self._OpenConfig()
262
    self._ReleaseLock()
263

    
264
    return self._config_data.instances.keys()
265

    
266
  def ExpandInstanceName(self, short_name):
267
    """Attempt to expand an incomplete instance name.
268

269
    """
270
    self._OpenConfig()
271
    self._ReleaseLock()
272

    
273
    return utils.MatchNameComponent(short_name,
274
                                    self._config_data.instances.keys())
275

    
276
  def GetInstanceInfo(self, instance_name):
277
    """Returns informations about an instance.
278

279
    It takes the information from the configuration file. Other informations of
280
    an instance are taken from the live systems.
281

282
    Args:
283
      instance: name of the instance, ex instance1.example.com
284

285
    Returns:
286
      the instance object
287

288
    """
289
    self._OpenConfig()
290
    self._ReleaseLock()
291

    
292
    if instance_name not in self._config_data.instances:
293
      return None
294

    
295
    return self._config_data.instances[instance_name]
296

    
297
  def AddNode(self, node):
298
    """Add a node to the configuration.
299

300
    Args:
301
      node: an object.Node instance
302

303
    """
304
    self._OpenConfig()
305
    self._config_data.nodes[node.name] = node
306
    self._WriteConfig()
307

    
308
  def RemoveNode(self, node_name):
309
    """Remove a node from the configuration.
310

311
    """
312
    self._OpenConfig()
313
    if node_name not in self._config_data.nodes:
314
      raise errors.ConfigurationError, ("Unknown node '%s'" % node_name)
315

    
316
    del self._config_data.nodes[node_name]
317
    self._WriteConfig()
318

    
319
  def ExpandNodeName(self, short_name):
320
    """Attempt to expand an incomplete instance name.
321

322
    """
323
    self._OpenConfig()
324
    self._ReleaseLock()
325

    
326
    return utils.MatchNameComponent(short_name,
327
                                    self._config_data.nodes.keys())
328

    
329
  def GetNodeInfo(self, node_name):
330
    """Get the configuration of a node, as stored in the config.
331

332
    Args: node: nodename (tuple) of the node
333

334
    Returns: the node object
335

336
    """
337
    self._OpenConfig()
338
    self._ReleaseLock()
339

    
340
    if node_name not in self._config_data.nodes:
341
      return None
342

    
343
    return self._config_data.nodes[node_name]
344

    
345
  def GetNodeList(self):
346
    """Return the list of nodes which are in the configuration.
347

348
    """
349
    self._OpenConfig()
350
    self._ReleaseLock()
351
    return self._config_data.nodes.keys()
352

    
353
  def DumpConfig(self):
354
    """Return the entire configuration of the cluster.
355
    """
356
    self._OpenConfig()
357
    self._ReleaseLock()
358
    return self._config_data
359

    
360
  def _BumpSerialNo(self):
361
    """Bump up the serial number of the config.
362

363
    """
364
    self._config_data.cluster.serial_no += 1
365

    
366
  def _OpenConfig(self):
367
    """Read the config data from disk.
368

369
    In case we already have configuration data and the config file has
370
    the same mtime as when we read it, we skip the parsing of the
371
    file, since de-serialisation could be slow.
372

373
    """
374
    try:
375
      st = os.stat(self._cfg_file)
376
    except OSError, err:
377
      raise errors.ConfigurationError, "Can't stat config file: %s" % err
378
    if (self._config_data is not None and
379
        self._config_time is not None and
380
        self._config_time == st.st_mtime):
381
      # data is current, so skip loading of config file
382
      return
383
    f = open(self._cfg_file, 'r')
384
    try:
385
      try:
386
        data = objects.ConfigObject.Load(f)
387
      except Exception, err:
388
        raise errors.ConfigurationError, err
389
    finally:
390
      f.close()
391
    if (not hasattr(data, 'cluster') or
392
        not hasattr(data.cluster, 'config_version')):
393
      raise errors.ConfigurationError, ("Incomplete configuration"
394
                                        " (missing cluster.config_version)")
395
    if data.cluster.config_version != constants.CONFIG_VERSION:
396
      raise errors.ConfigurationError, ("Cluster configuration version"
397
                                        " mismatch, got %s instead of %s" %
398
                                        (data.cluster.config_version,
399
                                         constants.CONFIG_VERSION))
400
    self._config_data = data
401
    self._config_time = st.st_mtime
402

    
403
  def _ReleaseLock(self):
404
    """xxxx
405
    """
406

    
407
  def _DistributeConfig(self):
408
    """Distribute the configuration to the other nodes.
409

410
    Currently, this only copies the configuration file. In the future,
411
    it could be used to encapsulate the 2/3-phase update mechanism.
412

413
    """
414
    if self._offline:
415
      return True
416
    bad = False
417
    nodelist = self.GetNodeList()
418
    myhostname = socket.gethostname()
419

    
420
    tgt_list = []
421
    for node in nodelist:
422
      nodeinfo = self.GetNodeInfo(node)
423
      if nodeinfo.name == myhostname:
424
        continue
425
      tgt_list.append(node)
426

    
427
    result = rpc.call_upload_file(tgt_list, self._cfg_file)
428
    for node in tgt_list:
429
      if not result[node]:
430
        logger.Error("copy of file %s to node %s failed" %
431
                     (self._cfg_file, node))
432
        bad = True
433
    return not bad
434

    
435
  def _WriteConfig(self, destination=None):
436
    """Write the configuration data to persistent storage.
437

438
    """
439
    if destination is None:
440
      destination = self._cfg_file
441
    self._BumpSerialNo()
442
    dir_name, file_name = os.path.split(destination)
443
    fd, name = tempfile.mkstemp('.newconfig', file_name, dir_name)
444
    f = os.fdopen(fd, 'w')
445
    try:
446
      self._config_data.Dump(f)
447
      os.fsync(f.fileno())
448
    finally:
449
      f.close()
450
    # we don't need to do os.close(fd) as f.close() did it
451
    os.rename(name, destination)
452
    self._DistributeConfig()
453

    
454
  def InitConfig(self, node, primary_ip, secondary_ip,
455
                 clustername, hostkeypub, mac_prefix, vg_name, def_bridge):
456
    """Create the initial cluster configuration.
457

458
    It will contain the current node, which will also be the master
459
    node, and no instances or operating systmes.
460

461
    Args:
462
      node: the nodename of the initial node
463
      primary_ip: the IP address of the current host
464
      secondary_ip: the secondary IP of the current host or None
465
      clustername: the name of the cluster
466
      hostkeypub: the public hostkey of this host
467
    """
468

    
469
    hu_port = constants.FIRST_DRBD_PORT - 1
470
    globalconfig = objects.Cluster(config_version=constants.CONFIG_VERSION,
471
                                   serial_no=1, master_node=node,
472
                                   name=clustername,
473
                                   rsahostkeypub=hostkeypub,
474
                                   highest_used_port=hu_port,
475
                                   mac_prefix=mac_prefix,
476
                                   volume_group_name=vg_name,
477
                                   default_bridge=def_bridge)
478
    if secondary_ip is None:
479
      secondary_ip = primary_ip
480
    nodeconfig = objects.Node(name=node, primary_ip=primary_ip,
481
                              secondary_ip=secondary_ip)
482

    
483
    self._config_data = objects.ConfigData(nodes={node: nodeconfig},
484
                                           instances={},
485
                                           cluster=globalconfig)
486
    self._WriteConfig()
487

    
488
  def GetClusterName(self):
489
    """Return the cluster name.
490

491
    """
492
    self._OpenConfig()
493
    self._ReleaseLock()
494
    return self._config_data.cluster.name
495

    
496
  def GetVGName(self):
497
    """Return the volume group name.
498

499
    """
500
    self._OpenConfig()
501
    self._ReleaseLock()
502
    return self._config_data.cluster.volume_group_name
503

    
504
  def GetDefBridge(self):
505
    """Return the default bridge.
506

507
    """
508
    self._OpenConfig()
509
    self._ReleaseLock()
510
    return self._config_data.cluster.default_bridge
511

    
512
  def GetMACPrefix(self):
513
    """Return the mac prefix.
514

515
    """
516
    self._OpenConfig()
517
    self._ReleaseLock()
518
    return self._config_data.cluster.mac_prefix
519

    
520
  def GetMaster(self):
521
    """Get the name of the master.
522

523
    """
524
    self._OpenConfig()
525
    self._ReleaseLock()
526
    return self._config_data.cluster.master_node
527

    
528
  def SetMaster(self, master_node):
529
    """Change the master of the cluster.
530

531
    As with all changes, the configuration data will be distributed to
532
    all nodes.
533

534
    This function is used for manual master failover.
535

536
    """
537
    self._OpenConfig()
538
    self._config_data.cluster.master_node = master_node
539
    self._WriteConfig()
540
    self._ReleaseLock()