Statistics
| Branch: | Tag: | Revision:

root / lib / config.py @ 42a999d1

History | View | Annotate | Download (20.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Configuration management for Ganeti
23

24
This module provides the interface to the Ganeti cluster configuration.
25

26
The configuration data is stored on every node but is updated on the master
27
only. After each update, the master distributes the data to the other nodes.
28

29
Currently, the data storage format is JSON. YAML was slow and consuming too
30
much memory.
31

32
"""
33

    
34
import os
35
import tempfile
36
import random
37
import re
38

    
39
from ganeti import errors
40
from ganeti import logger
41
from ganeti import utils
42
from ganeti import constants
43
from ganeti import rpc
44
from ganeti import objects
45
from ganeti import serializer
46
from ganeti import ssconf
47

    
48

    
49
def ValidateConfig():
50
  sstore = ssconf.SimpleStore()
51

    
52
  if sstore.GetConfigVersion() != constants.CONFIG_VERSION:
53
    raise errors.ConfigurationError("Cluster configuration version"
54
                                    " mismatch, got %s instead of %s" %
55
                                    (sstore.GetConfigVersion(),
56
                                     constants.CONFIG_VERSION))
57

    
58

    
59
class ConfigWriter:
60
  """The interface to the cluster configuration.
61

62
  """
63
  def __init__(self, cfg_file=None, offline=False):
64
    self.write_count = 0
65
    self._config_data = None
66
    self._config_time = None
67
    self._config_size = None
68
    self._config_inode = None
69
    self._offline = offline
70
    if cfg_file is None:
71
      self._cfg_file = constants.CLUSTER_CONF_FILE
72
    else:
73
      self._cfg_file = cfg_file
74
    self._temporary_ids = set()
75
    # Note: in order to prevent errors when resolving our name in
76
    # _DistributeConfig, we compute it here once and reuse it; it's
77
    # better to raise an error before starting to modify the config
78
    # file than after it was modified
79
    self._my_hostname = utils.HostInfo().name
80

    
81
  # this method needs to be static, so that we can call it on the class
82
  @staticmethod
83
  def IsCluster():
84
    """Check if the cluster is configured.
85

86
    """
87
    return os.path.exists(constants.CLUSTER_CONF_FILE)
88

    
89
  def GenerateMAC(self):
90
    """Generate a MAC for an instance.
91

92
    This should check the current instances for duplicates.
93

94
    """
95
    self._OpenConfig()
96
    prefix = self._config_data.cluster.mac_prefix
97
    all_macs = self._AllMACs()
98
    retries = 64
99
    while retries > 0:
100
      byte1 = random.randrange(0, 256)
101
      byte2 = random.randrange(0, 256)
102
      byte3 = random.randrange(0, 256)
103
      mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
104
      if mac not in all_macs:
105
        break
106
      retries -= 1
107
    else:
108
      raise errors.ConfigurationError("Can't generate unique MAC")
109
    return mac
110

    
111
  def IsMacInUse(self, mac):
112
    """Predicate: check if the specified MAC is in use in the Ganeti cluster.
113

114
    This only checks instances managed by this cluster, it does not
115
    check for potential collisions elsewhere.
116

117
    """
118
    self._OpenConfig()
119
    all_macs = self._AllMACs()
120
    return mac in all_macs
121

    
122
  def _ComputeAllLVs(self):
123
    """Compute the list of all LVs.
124

125
    """
126
    self._OpenConfig()
127
    lvnames = set()
128
    for instance in self._config_data.instances.values():
129
      node_data = instance.MapLVsByNode()
130
      for lv_list in node_data.values():
131
        lvnames.update(lv_list)
132
    return lvnames
133

    
134
  def GenerateUniqueID(self, exceptions=None):
135
    """Generate an unique disk name.
136

137
    This checks the current node, instances and disk names for
138
    duplicates.
139

140
    Args:
141
      - exceptions: a list with some other names which should be checked
142
                    for uniqueness (used for example when you want to get
143
                    more than one id at one time without adding each one in
144
                    turn to the config file
145

146
    Returns: the unique id as a string
147

148
    """
149
    existing = set()
150
    existing.update(self._temporary_ids)
151
    existing.update(self._ComputeAllLVs())
152
    existing.update(self._config_data.instances.keys())
153
    existing.update(self._config_data.nodes.keys())
154
    if exceptions is not None:
155
      existing.update(exceptions)
156
    retries = 64
157
    while retries > 0:
158
      unique_id = utils.NewUUID()
159
      if unique_id not in existing and unique_id is not None:
160
        break
161
    else:
162
      raise errors.ConfigurationError("Not able generate an unique ID"
163
                                      " (last tried ID: %s" % unique_id)
164
    self._temporary_ids.add(unique_id)
165
    return unique_id
166

    
167
  def _AllMACs(self):
168
    """Return all MACs present in the config.
169

170
    """
171
    self._OpenConfig()
172

    
173
    result = []
174
    for instance in self._config_data.instances.values():
175
      for nic in instance.nics:
176
        result.append(nic.mac)
177

    
178
    return result
179

    
180
  def VerifyConfig(self):
181
    """Stub verify function.
182
    """
183
    self._OpenConfig()
184

    
185
    result = []
186
    seen_macs = []
187
    data = self._config_data
188
    for instance_name in data.instances:
189
      instance = data.instances[instance_name]
190
      if instance.primary_node not in data.nodes:
191
        result.append("instance '%s' has invalid primary node '%s'" %
192
                      (instance_name, instance.primary_node))
193
      for snode in instance.secondary_nodes:
194
        if snode not in data.nodes:
195
          result.append("instance '%s' has invalid secondary node '%s'" %
196
                        (instance_name, snode))
197
      for idx, nic in enumerate(instance.nics):
198
        if nic.mac in seen_macs:
199
          result.append("instance '%s' has NIC %d mac %s duplicate" %
200
                        (instance_name, idx, nic.mac))
201
        else:
202
          seen_macs.append(nic.mac)
203
    return result
204

    
205
  def SetDiskID(self, disk, node_name):
206
    """Convert the unique ID to the ID needed on the target nodes.
207

208
    This is used only for drbd, which needs ip/port configuration.
209

210
    The routine descends down and updates its children also, because
211
    this helps when the only the top device is passed to the remote
212
    node.
213

214
    """
215
    if disk.children:
216
      for child in disk.children:
217
        self.SetDiskID(child, node_name)
218

    
219
    if disk.logical_id is None and disk.physical_id is not None:
220
      return
221
    if disk.dev_type in constants.LDS_DRBD:
222
      pnode, snode, port = disk.logical_id
223
      if node_name not in (pnode, snode):
224
        raise errors.ConfigurationError("DRBD device not knowing node %s" %
225
                                        node_name)
226
      pnode_info = self.GetNodeInfo(pnode)
227
      snode_info = self.GetNodeInfo(snode)
228
      if pnode_info is None or snode_info is None:
229
        raise errors.ConfigurationError("Can't find primary or secondary node"
230
                                        " for %s" % str(disk))
231
      if pnode == node_name:
232
        disk.physical_id = (pnode_info.secondary_ip, port,
233
                            snode_info.secondary_ip, port)
234
      else: # it must be secondary, we tested above
235
        disk.physical_id = (snode_info.secondary_ip, port,
236
                            pnode_info.secondary_ip, port)
237
    else:
238
      disk.physical_id = disk.logical_id
239
    return
240

    
241
  def AddTcpUdpPort(self, port):
242
    """Adds a new port to the available port pool.
243

244
    """
245
    if not isinstance(port, int):
246
      raise errors.ProgrammerError("Invalid type passed for port")
247

    
248
    self._OpenConfig()
249
    self._config_data.cluster.tcpudp_port_pool.add(port)
250
    self._WriteConfig()
251

    
252
  def GetPortList(self):
253
    """Returns a copy of the current port list.
254

255
    """
256
    self._OpenConfig()
257
    return self._config_data.cluster.tcpudp_port_pool.copy()
258

    
259
  def AllocatePort(self):
260
    """Allocate a port.
261

262
    The port will be taken from the available port pool or from the
263
    default port range (and in this case we increase
264
    highest_used_port).
265

266
    """
267
    self._OpenConfig()
268

    
269
    # If there are TCP/IP ports configured, we use them first.
270
    if self._config_data.cluster.tcpudp_port_pool:
271
      port = self._config_data.cluster.tcpudp_port_pool.pop()
272
    else:
273
      port = self._config_data.cluster.highest_used_port + 1
274
      if port >= constants.LAST_DRBD_PORT:
275
        raise errors.ConfigurationError("The highest used port is greater"
276
                                        " than %s. Aborting." %
277
                                        constants.LAST_DRBD_PORT)
278
      self._config_data.cluster.highest_used_port = port
279

    
280
    self._WriteConfig()
281
    return port
282

    
283
  def GetHostKey(self):
284
    """Return the rsa hostkey from the config.
285

286
    Args: None
287

288
    Returns: rsa hostkey
289
    """
290
    self._OpenConfig()
291
    return self._config_data.cluster.rsahostkeypub
292

    
293
  def AddInstance(self, instance):
294
    """Add an instance to the config.
295

296
    This should be used after creating a new instance.
297

298
    Args:
299
      instance: the instance object
300
    """
301
    if not isinstance(instance, objects.Instance):
302
      raise errors.ProgrammerError("Invalid type passed to AddInstance")
303

    
304
    if instance.disk_template != constants.DT_DISKLESS:
305
      all_lvs = instance.MapLVsByNode()
306
      logger.Info("Instance '%s' DISK_LAYOUT: %s" % (instance.name, all_lvs))
307

    
308
    self._OpenConfig()
309
    self._config_data.instances[instance.name] = instance
310
    self._WriteConfig()
311

    
312
  def _SetInstanceStatus(self, instance_name, status):
313
    """Set the instance's status to a given value.
314

315
    """
316
    if status not in ("up", "down"):
317
      raise errors.ProgrammerError("Invalid status '%s' passed to"
318
                                   " ConfigWriter._SetInstanceStatus()" %
319
                                   status)
320
    self._OpenConfig()
321

    
322
    if instance_name not in self._config_data.instances:
323
      raise errors.ConfigurationError("Unknown instance '%s'" %
324
                                      instance_name)
325
    instance = self._config_data.instances[instance_name]
326
    if instance.status != status:
327
      instance.status = status
328
      self._WriteConfig()
329

    
330
  def MarkInstanceUp(self, instance_name):
331
    """Mark the instance status to up in the config.
332

333
    """
334
    self._SetInstanceStatus(instance_name, "up")
335

    
336
  def RemoveInstance(self, instance_name):
337
    """Remove the instance from the configuration.
338

339
    """
340
    self._OpenConfig()
341

    
342
    if instance_name not in self._config_data.instances:
343
      raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
344
    del self._config_data.instances[instance_name]
345
    self._WriteConfig()
346

    
347
  def RenameInstance(self, old_name, new_name):
348
    """Rename an instance.
349

350
    This needs to be done in ConfigWriter and not by RemoveInstance
351
    combined with AddInstance as only we can guarantee an atomic
352
    rename.
353

354
    """
355
    self._OpenConfig()
356
    if old_name not in self._config_data.instances:
357
      raise errors.ConfigurationError("Unknown instance '%s'" % old_name)
358
    inst = self._config_data.instances[old_name]
359
    del self._config_data.instances[old_name]
360
    inst.name = new_name
361

    
362
    for disk in inst.disks:
363
      if disk.dev_type == constants.LD_FILE:
364
        # rename the file paths in logical and physical id
365
        file_storage_dir = os.path.dirname(os.path.dirname(disk.logical_id[1]))
366
        disk.physical_id = disk.logical_id = (disk.logical_id[0],
367
                                              os.path.join(file_storage_dir,
368
                                                           inst.name,
369
                                                           disk.iv_name))
370

    
371
    self._config_data.instances[inst.name] = inst
372
    self._WriteConfig()
373

    
374
  def MarkInstanceDown(self, instance_name):
375
    """Mark the status of an instance to down in the configuration.
376

377
    """
378
    self._SetInstanceStatus(instance_name, "down")
379

    
380
  def GetInstanceList(self):
381
    """Get the list of instances.
382

383
    Returns:
384
      array of instances, ex. ['instance2.example.com','instance1.example.com']
385
      these contains all the instances, also the ones in Admin_down state
386

387
    """
388
    self._OpenConfig()
389

    
390
    return self._config_data.instances.keys()
391

    
392
  def ExpandInstanceName(self, short_name):
393
    """Attempt to expand an incomplete instance name.
394

395
    """
396
    self._OpenConfig()
397

    
398
    return utils.MatchNameComponent(short_name,
399
                                    self._config_data.instances.keys())
400

    
401
  def GetInstanceInfo(self, instance_name):
402
    """Returns informations about an instance.
403

404
    It takes the information from the configuration file. Other informations of
405
    an instance are taken from the live systems.
406

407
    Args:
408
      instance: name of the instance, ex instance1.example.com
409

410
    Returns:
411
      the instance object
412

413
    """
414
    self._OpenConfig()
415

    
416
    if instance_name not in self._config_data.instances:
417
      return None
418

    
419
    return self._config_data.instances[instance_name]
420

    
421
  def AddNode(self, node):
422
    """Add a node to the configuration.
423

424
    Args:
425
      node: an object.Node instance
426

427
    """
428
    self._OpenConfig()
429
    self._config_data.nodes[node.name] = node
430
    self._WriteConfig()
431

    
432
  def RemoveNode(self, node_name):
433
    """Remove a node from the configuration.
434

435
    """
436
    self._OpenConfig()
437
    if node_name not in self._config_data.nodes:
438
      raise errors.ConfigurationError("Unknown node '%s'" % node_name)
439

    
440
    del self._config_data.nodes[node_name]
441
    self._WriteConfig()
442

    
443
  def ExpandNodeName(self, short_name):
444
    """Attempt to expand an incomplete instance name.
445

446
    """
447
    self._OpenConfig()
448

    
449
    return utils.MatchNameComponent(short_name,
450
                                    self._config_data.nodes.keys())
451

    
452
  def GetNodeInfo(self, node_name):
453
    """Get the configuration of a node, as stored in the config.
454

455
    Args: node: nodename (tuple) of the node
456

457
    Returns: the node object
458

459
    """
460
    self._OpenConfig()
461

    
462
    if node_name not in self._config_data.nodes:
463
      return None
464

    
465
    return self._config_data.nodes[node_name]
466

    
467
  def GetNodeList(self):
468
    """Return the list of nodes which are in the configuration.
469

470
    """
471
    self._OpenConfig()
472
    return self._config_data.nodes.keys()
473

    
474
  def DumpConfig(self):
475
    """Return the entire configuration of the cluster.
476
    """
477
    self._OpenConfig()
478
    return self._config_data
479

    
480
  def _BumpSerialNo(self):
481
    """Bump up the serial number of the config.
482

483
    """
484
    self._config_data.cluster.serial_no += 1
485

    
486
  def _OpenConfig(self):
487
    """Read the config data from disk.
488

489
    In case we already have configuration data and the config file has
490
    the same mtime as when we read it, we skip the parsing of the
491
    file, since de-serialisation could be slow.
492

493
    """
494
    try:
495
      st = os.stat(self._cfg_file)
496
    except OSError, err:
497
      raise errors.ConfigurationError("Can't stat config file: %s" % err)
498
    if (self._config_data is not None and
499
        self._config_time is not None and
500
        self._config_time == st.st_mtime and
501
        self._config_size == st.st_size and
502
        self._config_inode == st.st_ino):
503
      # data is current, so skip loading of config file
504
      return
505

    
506
    # Make sure the configuration has the right version
507
    ValidateConfig()
508

    
509
    f = open(self._cfg_file, 'r')
510
    try:
511
      try:
512
        data = objects.ConfigData.FromDict(serializer.Load(f.read()))
513
      except Exception, err:
514
        raise errors.ConfigurationError(err)
515
    finally:
516
      f.close()
517
    if (not hasattr(data, 'cluster') or
518
        not hasattr(data.cluster, 'rsahostkeypub')):
519
      raise errors.ConfigurationError("Incomplete configuration"
520
                                      " (missing cluster.rsahostkeypub)")
521
    self._config_data = data
522
    self._config_time = st.st_mtime
523
    self._config_size = st.st_size
524
    self._config_inode = st.st_ino
525

    
526
  def _DistributeConfig(self):
527
    """Distribute the configuration to the other nodes.
528

529
    Currently, this only copies the configuration file. In the future,
530
    it could be used to encapsulate the 2/3-phase update mechanism.
531

532
    """
533
    if self._offline:
534
      return True
535
    bad = False
536
    nodelist = self.GetNodeList()
537
    myhostname = self._my_hostname
538

    
539
    try:
540
      nodelist.remove(myhostname)
541
    except ValueError:
542
      pass
543

    
544
    result = rpc.call_upload_file(nodelist, self._cfg_file)
545
    for node in nodelist:
546
      if not result[node]:
547
        logger.Error("copy of file %s to node %s failed" %
548
                     (self._cfg_file, node))
549
        bad = True
550
    return not bad
551

    
552
  def _WriteConfig(self, destination=None):
553
    """Write the configuration data to persistent storage.
554

555
    """
556
    if destination is None:
557
      destination = self._cfg_file
558
    self._BumpSerialNo()
559
    txt = serializer.Dump(self._config_data.ToDict())
560
    dir_name, file_name = os.path.split(destination)
561
    fd, name = tempfile.mkstemp('.newconfig', file_name, dir_name)
562
    f = os.fdopen(fd, 'w')
563
    try:
564
      f.write(txt)
565
      os.fsync(f.fileno())
566
    finally:
567
      f.close()
568
    # we don't need to do os.close(fd) as f.close() did it
569
    os.rename(name, destination)
570
    self.write_count += 1
571
    # re-set our cache as not to re-read the config file
572
    try:
573
      st = os.stat(destination)
574
    except OSError, err:
575
      raise errors.ConfigurationError("Can't stat config file: %s" % err)
576
    self._config_time = st.st_mtime
577
    self._config_size = st.st_size
578
    self._config_inode = st.st_ino
579
    # and redistribute the config file
580
    self._DistributeConfig()
581

    
582
  def InitConfig(self, node, primary_ip, secondary_ip,
583
                 hostkeypub, mac_prefix, vg_name, def_bridge):
584
    """Create the initial cluster configuration.
585

586
    It will contain the current node, which will also be the master
587
    node, and no instances or operating systmes.
588

589
    Args:
590
      node: the nodename of the initial node
591
      primary_ip: the IP address of the current host
592
      secondary_ip: the secondary IP of the current host or None
593
      hostkeypub: the public hostkey of this host
594

595
    """
596
    hu_port = constants.FIRST_DRBD_PORT - 1
597
    globalconfig = objects.Cluster(serial_no=1,
598
                                   rsahostkeypub=hostkeypub,
599
                                   highest_used_port=hu_port,
600
                                   mac_prefix=mac_prefix,
601
                                   volume_group_name=vg_name,
602
                                   default_bridge=def_bridge,
603
                                   tcpudp_port_pool=set())
604
    if secondary_ip is None:
605
      secondary_ip = primary_ip
606
    nodeconfig = objects.Node(name=node, primary_ip=primary_ip,
607
                              secondary_ip=secondary_ip)
608

    
609
    self._config_data = objects.ConfigData(nodes={node: nodeconfig},
610
                                           instances={},
611
                                           cluster=globalconfig)
612
    self._WriteConfig()
613

    
614
  def GetVGName(self):
615
    """Return the volume group name.
616

617
    """
618
    self._OpenConfig()
619
    return self._config_data.cluster.volume_group_name
620

    
621
  def SetVGName(self, vg_name):
622
    """Set the volume group name.
623

624
    """
625
    self._OpenConfig()
626
    self._config_data.cluster.volume_group_name = vg_name
627
    self._WriteConfig()
628

    
629
  def GetDefBridge(self):
630
    """Return the default bridge.
631

632
    """
633
    self._OpenConfig()
634
    return self._config_data.cluster.default_bridge
635

    
636
  def GetMACPrefix(self):
637
    """Return the mac prefix.
638

639
    """
640
    self._OpenConfig()
641
    return self._config_data.cluster.mac_prefix
642

    
643
  def GetClusterInfo(self):
644
    """Returns informations about the cluster
645

646
    Returns:
647
      the cluster object
648

649
    """
650
    self._OpenConfig()
651

    
652
    return self._config_data.cluster
653

    
654
  def Update(self, target):
655
    """Notify function to be called after updates.
656

657
    This function must be called when an object (as returned by
658
    GetInstanceInfo, GetNodeInfo, GetCluster) has been updated and the
659
    caller wants the modifications saved to the backing store. Note
660
    that all modified objects will be saved, but the target argument
661
    is the one the caller wants to ensure that it's saved.
662

663
    """
664
    if self._config_data is None:
665
      raise errors.ProgrammerError("Configuration file not read,"
666
                                   " cannot save.")
667
    if isinstance(target, objects.Cluster):
668
      test = target == self._config_data.cluster
669
    elif isinstance(target, objects.Node):
670
      test = target in self._config_data.nodes.values()
671
    elif isinstance(target, objects.Instance):
672
      test = target in self._config_data.instances.values()
673
    else:
674
      raise errors.ProgrammerError("Invalid object type (%s) passed to"
675
                                   " ConfigWriter.Update" % type(target))
676
    if not test:
677
      raise errors.ConfigurationError("Configuration updated since object"
678
                                      " has been read or unknown object")
679
    self._WriteConfig()