Statistics
| Branch: | Tag: | Revision:

root / lib / config.py @ a1f445d3

History | View | Annotate | Download (19.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Configuration management for Ganeti
23

24
This module provides the interface to the Ganeti cluster configuration.
25

26
The configuration data is stored on every node but is updated on the master
27
only. After each update, the master distributes the data to the other nodes.
28

29
Currently, the data storage format is JSON. YAML was slow and consuming too
30
much memory.
31

32
"""
33

    
34
import os
35
import tempfile
36
import random
37

    
38
from ganeti import errors
39
from ganeti import logger
40
from ganeti import utils
41
from ganeti import constants
42
from ganeti import rpc
43
from ganeti import objects
44

    
45

    
46
class ConfigWriter:
47
  """The interface to the cluster configuration.
48

49
  """
50
  def __init__(self, cfg_file=None, offline=False):
51
    self.write_count = 0
52
    self._config_data = None
53
    self._config_time = None
54
    self._config_size = None
55
    self._config_inode = None
56
    self._offline = offline
57
    if cfg_file is None:
58
      self._cfg_file = constants.CLUSTER_CONF_FILE
59
    else:
60
      self._cfg_file = cfg_file
61
    self._temporary_ids = set()
62
    # Note: in order to prevent errors when resolving our name in
63
    # _DistributeConfig, we compute it here once and reuse it; it's
64
    # better to raise an error before starting to modify the config
65
    # file than after it was modified
66
    self._my_hostname = utils.HostInfo().name
67

    
68
  # this method needs to be static, so that we can call it on the class
69
  @staticmethod
70
  def IsCluster():
71
    """Check if the cluster is configured.
72

73
    """
74
    return os.path.exists(constants.CLUSTER_CONF_FILE)
75

    
76
  def GenerateMAC(self):
77
    """Generate a MAC for an instance.
78

79
    This should check the current instances for duplicates.
80

81
    """
82
    self._OpenConfig()
83
    self._ReleaseLock()
84
    prefix = self._config_data.cluster.mac_prefix
85
    all_macs = self._AllMACs()
86
    retries = 64
87
    while retries > 0:
88
      byte1 = random.randrange(0, 256)
89
      byte2 = random.randrange(0, 256)
90
      byte3 = random.randrange(0, 256)
91
      mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
92
      if mac not in all_macs:
93
        break
94
      retries -= 1
95
    else:
96
      raise errors.ConfigurationError("Can't generate unique MAC")
97
    return mac
98

    
99
  def _ComputeAllLVs(self):
100
    """Compute the list of all LVs.
101

102
    """
103
    self._OpenConfig()
104
    self._ReleaseLock()
105
    lvnames = set()
106
    for instance in self._config_data.instances.values():
107
      node_data = instance.MapLVsByNode()
108
      for lv_list in node_data.values():
109
        lvnames.update(lv_list)
110
    return lvnames
111

    
112
  def GenerateUniqueID(self, exceptions=None):
113
    """Generate an unique disk name.
114

115
    This checks the current node, instances and disk names for
116
    duplicates.
117

118
    Args:
119
      - exceptions: a list with some other names which should be checked
120
                    for uniqueness (used for example when you want to get
121
                    more than one id at one time without adding each one in
122
                    turn to the config file
123

124
    Returns: the unique id as a string
125

126
    """
127
    existing = set()
128
    existing.update(self._temporary_ids)
129
    existing.update(self._ComputeAllLVs())
130
    existing.update(self._config_data.instances.keys())
131
    existing.update(self._config_data.nodes.keys())
132
    if exceptions is not None:
133
      existing.update(exceptions)
134
    retries = 64
135
    while retries > 0:
136
      unique_id = utils.GetUUID()
137
      if unique_id not in existing and unique_id is not None:
138
        break
139
    else:
140
      raise errors.ConfigurationError("Not able generate an unique ID"
141
                                      " (last tried ID: %s" % unique_id)
142
    self._temporary_ids.add(unique_id)
143
    return unique_id
144

    
145
  def _AllMACs(self):
146
    """Return all MACs present in the config.
147

148
    """
149
    self._OpenConfig()
150
    self._ReleaseLock()
151

    
152
    result = []
153
    for instance in self._config_data.instances.values():
154
      for nic in instance.nics:
155
        result.append(nic.mac)
156

    
157
    return result
158

    
159
  def VerifyConfig(self):
160
    """Stub verify function.
161
    """
162
    self._OpenConfig()
163
    self._ReleaseLock()
164

    
165
    result = []
166
    seen_macs = []
167
    data = self._config_data
168
    for instance_name in data.instances:
169
      instance = data.instances[instance_name]
170
      if instance.primary_node not in data.nodes:
171
        result.append("Instance '%s' has invalid primary node '%s'" %
172
                      (instance_name, instance.primary_node))
173
      for snode in instance.secondary_nodes:
174
        if snode not in data.nodes:
175
          result.append("Instance '%s' has invalid secondary node '%s'" %
176
                        (instance_name, snode))
177
      for idx, nic in enumerate(instance.nics):
178
        if nic.mac in seen_macs:
179
          result.append("Instance '%s' has NIC %d mac %s duplicate" %
180
                        (instance_name, idx, nic.mac))
181
        else:
182
          seen_macs.append(nic.mac)
183
    return result
184

    
185
  def SetDiskID(self, disk, node_name):
186
    """Convert the unique ID to the ID needed on the target nodes.
187

188
    This is used only for drbd, which needs ip/port configuration.
189

190
    The routine descends down and updates its children also, because
191
    this helps when the only the top device is passed to the remote
192
    node.
193

194
    """
195
    if disk.children:
196
      for child in disk.children:
197
        self.SetDiskID(child, node_name)
198

    
199
    if disk.logical_id is None and disk.physical_id is not None:
200
      return
201
    if disk.dev_type in constants.LDS_DRBD:
202
      pnode, snode, port = disk.logical_id
203
      if node_name not in (pnode, snode):
204
        raise errors.ConfigurationError("DRBD device not knowing node %s" %
205
                                        node_name)
206
      pnode_info = self.GetNodeInfo(pnode)
207
      snode_info = self.GetNodeInfo(snode)
208
      if pnode_info is None or snode_info is None:
209
        raise errors.ConfigurationError("Can't find primary or secondary node"
210
                                        " for %s" % str(disk))
211
      if pnode == node_name:
212
        disk.physical_id = (pnode_info.secondary_ip, port,
213
                            snode_info.secondary_ip, port)
214
      else: # it must be secondary, we tested above
215
        disk.physical_id = (snode_info.secondary_ip, port,
216
                            pnode_info.secondary_ip, port)
217
    else:
218
      disk.physical_id = disk.logical_id
219
    return
220

    
221
  def AddTcpUdpPort(self, port):
222
    """Adds a new port to the available port pool.
223

224
    """
225
    if not isinstance(port, int):
226
      raise errors.ProgrammerError("Invalid type passed for port")
227

    
228
    self._OpenConfig()
229
    self._config_data.cluster.tcpudp_port_pool.add(port)
230
    self._WriteConfig()
231

    
232
  def GetPortList(self):
233
    """Returns a copy of the current port list.
234

235
    """
236
    self._OpenConfig()
237
    self._ReleaseLock()
238
    return self._config_data.cluster.tcpudp_port_pool.copy()
239

    
240
  def AllocatePort(self):
241
    """Allocate a port.
242

243
    The port will be taken from the available port pool or from the
244
    default port range (and in this case we increase
245
    highest_used_port).
246

247
    """
248
    self._OpenConfig()
249

    
250
    # If there are TCP/IP ports configured, we use them first.
251
    if self._config_data.cluster.tcpudp_port_pool:
252
      port = self._config_data.cluster.tcpudp_port_pool.pop()
253
    else:
254
      port = self._config_data.cluster.highest_used_port + 1
255
      if port >= constants.LAST_DRBD_PORT:
256
        raise errors.ConfigurationError("The highest used port is greater"
257
                                        " than %s. Aborting." %
258
                                        constants.LAST_DRBD_PORT)
259
      self._config_data.cluster.highest_used_port = port
260

    
261
    self._WriteConfig()
262
    return port
263

    
264
  def GetHostKey(self):
265
    """Return the rsa hostkey from the config.
266

267
    Args: None
268

269
    Returns: rsa hostkey
270
    """
271
    self._OpenConfig()
272
    self._ReleaseLock()
273
    return self._config_data.cluster.rsahostkeypub
274

    
275
  def AddInstance(self, instance):
276
    """Add an instance to the config.
277

278
    This should be used after creating a new instance.
279

280
    Args:
281
      instance: the instance object
282
    """
283
    if not isinstance(instance, objects.Instance):
284
      raise errors.ProgrammerError("Invalid type passed to AddInstance")
285

    
286
    if instance.disk_template != constants.DT_DISKLESS:
287
      all_lvs = instance.MapLVsByNode()
288
      logger.Info("Instance '%s' DISK_LAYOUT: %s" % (instance.name, all_lvs))
289

    
290
    self._OpenConfig()
291
    self._config_data.instances[instance.name] = instance
292
    self._WriteConfig()
293

    
294
  def MarkInstanceUp(self, instance_name):
295
    """Mark the instance status to up in the config.
296

297
    """
298
    self._OpenConfig()
299

    
300
    if instance_name not in self._config_data.instances:
301
      raise errors.ConfigurationError("Unknown instance '%s'" %
302
                                      instance_name)
303
    instance = self._config_data.instances[instance_name]
304
    instance.status = "up"
305
    self._WriteConfig()
306

    
307
  def RemoveInstance(self, instance_name):
308
    """Remove the instance from the configuration.
309

310
    """
311
    self._OpenConfig()
312

    
313
    if instance_name not in self._config_data.instances:
314
      raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
315
    del self._config_data.instances[instance_name]
316
    self._WriteConfig()
317

    
318
  def RenameInstance(self, old_name, new_name):
319
    """Rename an instance.
320

321
    This needs to be done in ConfigWriter and not by RemoveInstance
322
    combined with AddInstance as only we can guarantee an atomic
323
    rename.
324

325
    """
326
    self._OpenConfig()
327
    if old_name not in self._config_data.instances:
328
      raise errors.ConfigurationError("Unknown instance '%s'" % old_name)
329
    inst = self._config_data.instances[old_name]
330
    del self._config_data.instances[old_name]
331
    inst.name = new_name
332
    self._config_data.instances[inst.name] = inst
333
    self._WriteConfig()
334

    
335
  def MarkInstanceDown(self, instance_name):
336
    """Mark the status of an instance to down in the configuration.
337

338
    """
339
    self._OpenConfig()
340

    
341
    if instance_name not in self._config_data.instances:
342
      raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
343
    instance = self._config_data.instances[instance_name]
344
    instance.status = "down"
345
    self._WriteConfig()
346

    
347
  def GetInstanceList(self):
348
    """Get the list of instances.
349

350
    Returns:
351
      array of instances, ex. ['instance2.example.com','instance1.example.com']
352
      these contains all the instances, also the ones in Admin_down state
353

354
    """
355
    self._OpenConfig()
356
    self._ReleaseLock()
357

    
358
    return self._config_data.instances.keys()
359

    
360
  def ExpandInstanceName(self, short_name):
361
    """Attempt to expand an incomplete instance name.
362

363
    """
364
    self._OpenConfig()
365
    self._ReleaseLock()
366

    
367
    return utils.MatchNameComponent(short_name,
368
                                    self._config_data.instances.keys())
369

    
370
  def GetInstanceInfo(self, instance_name):
371
    """Returns informations about an instance.
372

373
    It takes the information from the configuration file. Other informations of
374
    an instance are taken from the live systems.
375

376
    Args:
377
      instance: name of the instance, ex instance1.example.com
378

379
    Returns:
380
      the instance object
381

382
    """
383
    self._OpenConfig()
384
    self._ReleaseLock()
385

    
386
    if instance_name not in self._config_data.instances:
387
      return None
388

    
389
    return self._config_data.instances[instance_name]
390

    
391
  def AddNode(self, node):
392
    """Add a node to the configuration.
393

394
    Args:
395
      node: an object.Node instance
396

397
    """
398
    self._OpenConfig()
399
    self._config_data.nodes[node.name] = node
400
    self._WriteConfig()
401

    
402
  def RemoveNode(self, node_name):
403
    """Remove a node from the configuration.
404

405
    """
406
    self._OpenConfig()
407
    if node_name not in self._config_data.nodes:
408
      raise errors.ConfigurationError("Unknown node '%s'" % node_name)
409

    
410
    del self._config_data.nodes[node_name]
411
    self._WriteConfig()
412

    
413
  def ExpandNodeName(self, short_name):
414
    """Attempt to expand an incomplete instance name.
415

416
    """
417
    self._OpenConfig()
418
    self._ReleaseLock()
419

    
420
    return utils.MatchNameComponent(short_name,
421
                                    self._config_data.nodes.keys())
422

    
423
  def GetNodeInfo(self, node_name):
424
    """Get the configuration of a node, as stored in the config.
425

426
    Args: node: nodename (tuple) of the node
427

428
    Returns: the node object
429

430
    """
431
    self._OpenConfig()
432
    self._ReleaseLock()
433

    
434
    if node_name not in self._config_data.nodes:
435
      return None
436

    
437
    return self._config_data.nodes[node_name]
438

    
439
  def GetNodeList(self):
440
    """Return the list of nodes which are in the configuration.
441

442
    """
443
    self._OpenConfig()
444
    self._ReleaseLock()
445
    return self._config_data.nodes.keys()
446

    
447
  def DumpConfig(self):
448
    """Return the entire configuration of the cluster.
449
    """
450
    self._OpenConfig()
451
    self._ReleaseLock()
452
    return self._config_data
453

    
454
  def _BumpSerialNo(self):
455
    """Bump up the serial number of the config.
456

457
    """
458
    self._config_data.cluster.serial_no += 1
459

    
460
  def _OpenConfig(self):
461
    """Read the config data from disk.
462

463
    In case we already have configuration data and the config file has
464
    the same mtime as when we read it, we skip the parsing of the
465
    file, since de-serialisation could be slow.
466

467
    """
468
    try:
469
      st = os.stat(self._cfg_file)
470
    except OSError, err:
471
      raise errors.ConfigurationError("Can't stat config file: %s" % err)
472
    if (self._config_data is not None and
473
        self._config_time is not None and
474
        self._config_time == st.st_mtime and
475
        self._config_size == st.st_size and
476
        self._config_inode == st.st_ino):
477
      # data is current, so skip loading of config file
478
      return
479
    f = open(self._cfg_file, 'r')
480
    try:
481
      try:
482
        data = objects.ConfigData.Load(f)
483
      except Exception, err:
484
        raise errors.ConfigurationError(err)
485
    finally:
486
      f.close()
487
    if (not hasattr(data, 'cluster') or
488
        not hasattr(data.cluster, 'config_version')):
489
      raise errors.ConfigurationError("Incomplete configuration"
490
                                      " (missing cluster.config_version)")
491
    if data.cluster.config_version != constants.CONFIG_VERSION:
492
      raise errors.ConfigurationError("Cluster configuration version"
493
                                      " mismatch, got %s instead of %s" %
494
                                      (data.cluster.config_version,
495
                                       constants.CONFIG_VERSION))
496
    self._config_data = data
497
    self._config_time = st.st_mtime
498
    self._config_size = st.st_size
499
    self._config_inode = st.st_ino
500

    
501
  def _ReleaseLock(self):
502
    """xxxx
503
    """
504

    
505
  def _DistributeConfig(self):
506
    """Distribute the configuration to the other nodes.
507

508
    Currently, this only copies the configuration file. In the future,
509
    it could be used to encapsulate the 2/3-phase update mechanism.
510

511
    """
512
    if self._offline:
513
      return True
514
    bad = False
515
    nodelist = self.GetNodeList()
516
    myhostname = self._my_hostname
517

    
518
    tgt_list = []
519
    for node in nodelist:
520
      nodeinfo = self.GetNodeInfo(node)
521
      if nodeinfo.name == myhostname:
522
        continue
523
      tgt_list.append(node)
524

    
525
    result = rpc.call_upload_file(tgt_list, self._cfg_file)
526
    for node in tgt_list:
527
      if not result[node]:
528
        logger.Error("copy of file %s to node %s failed" %
529
                     (self._cfg_file, node))
530
        bad = True
531
    return not bad
532

    
533
  def _WriteConfig(self, destination=None):
534
    """Write the configuration data to persistent storage.
535

536
    """
537
    if destination is None:
538
      destination = self._cfg_file
539
    self._BumpSerialNo()
540
    dir_name, file_name = os.path.split(destination)
541
    fd, name = tempfile.mkstemp('.newconfig', file_name, dir_name)
542
    f = os.fdopen(fd, 'w')
543
    try:
544
      self._config_data.Dump(f)
545
      os.fsync(f.fileno())
546
    finally:
547
      f.close()
548
    # we don't need to do os.close(fd) as f.close() did it
549
    os.rename(name, destination)
550
    self.write_count += 1
551
    # re-set our cache as not to re-read the config file
552
    try:
553
      st = os.stat(destination)
554
    except OSError, err:
555
      raise errors.ConfigurationError("Can't stat config file: %s" % err)
556
    self._config_time = st.st_mtime
557
    self._config_size = st.st_size
558
    self._config_inode = st.st_ino
559
    # and redistribute the config file
560
    self._DistributeConfig()
561

    
562
  def InitConfig(self, node, primary_ip, secondary_ip,
563
                 hostkeypub, mac_prefix, vg_name, def_bridge):
564
    """Create the initial cluster configuration.
565

566
    It will contain the current node, which will also be the master
567
    node, and no instances or operating systmes.
568

569
    Args:
570
      node: the nodename of the initial node
571
      primary_ip: the IP address of the current host
572
      secondary_ip: the secondary IP of the current host or None
573
      hostkeypub: the public hostkey of this host
574

575
    """
576
    hu_port = constants.FIRST_DRBD_PORT - 1
577
    globalconfig = objects.Cluster(config_version=constants.CONFIG_VERSION,
578
                                   serial_no=1,
579
                                   rsahostkeypub=hostkeypub,
580
                                   highest_used_port=hu_port,
581
                                   mac_prefix=mac_prefix,
582
                                   volume_group_name=vg_name,
583
                                   default_bridge=def_bridge,
584
                                   tcpudp_port_pool=set())
585
    if secondary_ip is None:
586
      secondary_ip = primary_ip
587
    nodeconfig = objects.Node(name=node, primary_ip=primary_ip,
588
                              secondary_ip=secondary_ip)
589

    
590
    self._config_data = objects.ConfigData(nodes={node: nodeconfig},
591
                                           instances={},
592
                                           cluster=globalconfig)
593
    self._WriteConfig()
594

    
595
  def GetVGName(self):
596
    """Return the volume group name.
597

598
    """
599
    self._OpenConfig()
600
    self._ReleaseLock()
601
    return self._config_data.cluster.volume_group_name
602

    
603
  def GetDefBridge(self):
604
    """Return the default bridge.
605

606
    """
607
    self._OpenConfig()
608
    self._ReleaseLock()
609
    return self._config_data.cluster.default_bridge
610

    
611
  def GetMACPrefix(self):
612
    """Return the mac prefix.
613

614
    """
615
    self._OpenConfig()
616
    self._ReleaseLock()
617
    return self._config_data.cluster.mac_prefix
618

    
619
  def GetClusterInfo(self):
620
    """Returns informations about the cluster
621

622
    Returns:
623
      the cluster object
624

625
    """
626
    self._OpenConfig()
627
    self._ReleaseLock()
628

    
629
    return self._config_data.cluster
630

    
631
  def Update(self, target):
632
    """Notify function to be called after updates.
633

634
    This function must be called when an object (as returned by
635
    GetInstanceInfo, GetNodeInfo, GetCluster) has been updated and the
636
    caller wants the modifications saved to the backing store. Note
637
    that all modified objects will be saved, but the target argument
638
    is the one the caller wants to ensure that it's saved.
639

640
    """
641
    if self._config_data is None:
642
      raise errors.ProgrammerError("Configuration file not read,"
643
                                   " cannot save.")
644
    if isinstance(target, objects.Cluster):
645
      test = target == self._config_data.cluster
646
    elif isinstance(target, objects.Node):
647
      test = target in self._config_data.nodes.values()
648
    elif isinstance(target, objects.Instance):
649
      test = target in self._config_data.instances.values()
650
    else:
651
      raise errors.ProgrammerError("Invalid object type (%s) passed to"
652
                                   " ConfigWriter.Update" % type(target))
653
    if not test:
654
      raise errors.ConfigurationError("Configuration updated since object"
655
                                      " has been read or unknown object")
656
    self._WriteConfig()