Statistics
| Branch: | Tag: | Revision:

root / lib / config.py @ 243cdbcc

History | View | Annotate | Download (21.1 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Configuration management for Ganeti
23

24
This module provides the interface to the Ganeti cluster configuration.
25

26
The configuration data is stored on every node but is updated on the master
27
only. After each update, the master distributes the data to the other nodes.
28

29
Currently, the data storage format is JSON. YAML was slow and consuming too
30
much memory.
31

32
"""
33

    
34
import os
35
import tempfile
36
import random
37
import re
38

    
39
from ganeti import errors
40
from ganeti import logger
41
from ganeti import utils
42
from ganeti import constants
43
from ganeti import rpc
44
from ganeti import objects
45
from ganeti import serializer
46
from ganeti import ssconf
47

    
48

    
49
def ValidateConfig():
50
  sstore = ssconf.SimpleStore()
51

    
52
  if sstore.GetConfigVersion() != constants.CONFIG_VERSION:
53
    raise errors.ConfigurationError("Cluster configuration version"
54
                                    " mismatch, got %s instead of %s" %
55
                                    (sstore.GetConfigVersion(),
56
                                     constants.CONFIG_VERSION))
57

    
58

    
59
class ConfigWriter:
60
  """The interface to the cluster configuration.
61

62
  """
63
  def __init__(self, cfg_file=None, offline=False):
64
    self.write_count = 0
65
    self._config_data = None
66
    self._config_time = None
67
    self._config_size = None
68
    self._config_inode = None
69
    self._offline = offline
70
    if cfg_file is None:
71
      self._cfg_file = constants.CLUSTER_CONF_FILE
72
    else:
73
      self._cfg_file = cfg_file
74
    self._temporary_ids = set()
75
    # Note: in order to prevent errors when resolving our name in
76
    # _DistributeConfig, we compute it here once and reuse it; it's
77
    # better to raise an error before starting to modify the config
78
    # file than after it was modified
79
    self._my_hostname = utils.HostInfo().name
80

    
81
  # this method needs to be static, so that we can call it on the class
82
  @staticmethod
83
  def IsCluster():
84
    """Check if the cluster is configured.
85

86
    """
87
    return os.path.exists(constants.CLUSTER_CONF_FILE)
88

    
89
  def GenerateMAC(self):
90
    """Generate a MAC for an instance.
91

92
    This should check the current instances for duplicates.
93

94
    """
95
    self._OpenConfig()
96
    self._ReleaseLock()
97
    prefix = self._config_data.cluster.mac_prefix
98
    all_macs = self._AllMACs()
99
    retries = 64
100
    while retries > 0:
101
      byte1 = random.randrange(0, 256)
102
      byte2 = random.randrange(0, 256)
103
      byte3 = random.randrange(0, 256)
104
      mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
105
      if mac not in all_macs:
106
        break
107
      retries -= 1
108
    else:
109
      raise errors.ConfigurationError("Can't generate unique MAC")
110
    return mac
111

    
112
  def IsMacInUse(self, mac):
113
    """Predicate: check if the specified MAC is in use in the Ganeti cluster.
114

115
    This only checks instances managed by this cluster, it does not
116
    check for potential collisions elsewhere.
117

118
    """
119
    self._OpenConfig()
120
    self._ReleaseLock()
121
    all_macs = self._AllMACs()
122
    return mac in all_macs
123

    
124
  def _ComputeAllLVs(self):
125
    """Compute the list of all LVs.
126

127
    """
128
    self._OpenConfig()
129
    self._ReleaseLock()
130
    lvnames = set()
131
    for instance in self._config_data.instances.values():
132
      node_data = instance.MapLVsByNode()
133
      for lv_list in node_data.values():
134
        lvnames.update(lv_list)
135
    return lvnames
136

    
137
  def GenerateUniqueID(self, exceptions=None):
138
    """Generate an unique disk name.
139

140
    This checks the current node, instances and disk names for
141
    duplicates.
142

143
    Args:
144
      - exceptions: a list with some other names which should be checked
145
                    for uniqueness (used for example when you want to get
146
                    more than one id at one time without adding each one in
147
                    turn to the config file
148

149
    Returns: the unique id as a string
150

151
    """
152
    existing = set()
153
    existing.update(self._temporary_ids)
154
    existing.update(self._ComputeAllLVs())
155
    existing.update(self._config_data.instances.keys())
156
    existing.update(self._config_data.nodes.keys())
157
    if exceptions is not None:
158
      existing.update(exceptions)
159
    retries = 64
160
    while retries > 0:
161
      unique_id = utils.NewUUID()
162
      if unique_id not in existing and unique_id is not None:
163
        break
164
    else:
165
      raise errors.ConfigurationError("Not able generate an unique ID"
166
                                      " (last tried ID: %s" % unique_id)
167
    self._temporary_ids.add(unique_id)
168
    return unique_id
169

    
170
  def _AllMACs(self):
171
    """Return all MACs present in the config.
172

173
    """
174
    self._OpenConfig()
175
    self._ReleaseLock()
176

    
177
    result = []
178
    for instance in self._config_data.instances.values():
179
      for nic in instance.nics:
180
        result.append(nic.mac)
181

    
182
    return result
183

    
184
  def VerifyConfig(self):
185
    """Stub verify function.
186
    """
187
    self._OpenConfig()
188
    self._ReleaseLock()
189

    
190
    result = []
191
    seen_macs = []
192
    data = self._config_data
193
    for instance_name in data.instances:
194
      instance = data.instances[instance_name]
195
      if instance.primary_node not in data.nodes:
196
        result.append("instance '%s' has invalid primary node '%s'" %
197
                      (instance_name, instance.primary_node))
198
      for snode in instance.secondary_nodes:
199
        if snode not in data.nodes:
200
          result.append("instance '%s' has invalid secondary node '%s'" %
201
                        (instance_name, snode))
202
      for idx, nic in enumerate(instance.nics):
203
        if nic.mac in seen_macs:
204
          result.append("instance '%s' has NIC %d mac %s duplicate" %
205
                        (instance_name, idx, nic.mac))
206
        else:
207
          seen_macs.append(nic.mac)
208
    return result
209

    
210
  def SetDiskID(self, disk, node_name):
211
    """Convert the unique ID to the ID needed on the target nodes.
212

213
    This is used only for drbd, which needs ip/port configuration.
214

215
    The routine descends down and updates its children also, because
216
    this helps when the only the top device is passed to the remote
217
    node.
218

219
    """
220
    if disk.children:
221
      for child in disk.children:
222
        self.SetDiskID(child, node_name)
223

    
224
    if disk.logical_id is None and disk.physical_id is not None:
225
      return
226
    if disk.dev_type in constants.LDS_DRBD:
227
      pnode, snode, port = disk.logical_id
228
      if node_name not in (pnode, snode):
229
        raise errors.ConfigurationError("DRBD device not knowing node %s" %
230
                                        node_name)
231
      pnode_info = self.GetNodeInfo(pnode)
232
      snode_info = self.GetNodeInfo(snode)
233
      if pnode_info is None or snode_info is None:
234
        raise errors.ConfigurationError("Can't find primary or secondary node"
235
                                        " for %s" % str(disk))
236
      if pnode == node_name:
237
        disk.physical_id = (pnode_info.secondary_ip, port,
238
                            snode_info.secondary_ip, port)
239
      else: # it must be secondary, we tested above
240
        disk.physical_id = (snode_info.secondary_ip, port,
241
                            pnode_info.secondary_ip, port)
242
    else:
243
      disk.physical_id = disk.logical_id
244
    return
245

    
246
  def AddTcpUdpPort(self, port):
247
    """Adds a new port to the available port pool.
248

249
    """
250
    if not isinstance(port, int):
251
      raise errors.ProgrammerError("Invalid type passed for port")
252

    
253
    self._OpenConfig()
254
    self._config_data.cluster.tcpudp_port_pool.add(port)
255
    self._WriteConfig()
256

    
257
  def GetPortList(self):
258
    """Returns a copy of the current port list.
259

260
    """
261
    self._OpenConfig()
262
    self._ReleaseLock()
263
    return self._config_data.cluster.tcpudp_port_pool.copy()
264

    
265
  def AllocatePort(self):
266
    """Allocate a port.
267

268
    The port will be taken from the available port pool or from the
269
    default port range (and in this case we increase
270
    highest_used_port).
271

272
    """
273
    self._OpenConfig()
274

    
275
    # If there are TCP/IP ports configured, we use them first.
276
    if self._config_data.cluster.tcpudp_port_pool:
277
      port = self._config_data.cluster.tcpudp_port_pool.pop()
278
    else:
279
      port = self._config_data.cluster.highest_used_port + 1
280
      if port >= constants.LAST_DRBD_PORT:
281
        raise errors.ConfigurationError("The highest used port is greater"
282
                                        " than %s. Aborting." %
283
                                        constants.LAST_DRBD_PORT)
284
      self._config_data.cluster.highest_used_port = port
285

    
286
    self._WriteConfig()
287
    return port
288

    
289
  def GetHostKey(self):
290
    """Return the rsa hostkey from the config.
291

292
    Args: None
293

294
    Returns: rsa hostkey
295
    """
296
    self._OpenConfig()
297
    self._ReleaseLock()
298
    return self._config_data.cluster.rsahostkeypub
299

    
300
  def AddInstance(self, instance):
301
    """Add an instance to the config.
302

303
    This should be used after creating a new instance.
304

305
    Args:
306
      instance: the instance object
307
    """
308
    if not isinstance(instance, objects.Instance):
309
      raise errors.ProgrammerError("Invalid type passed to AddInstance")
310

    
311
    if instance.disk_template != constants.DT_DISKLESS:
312
      all_lvs = instance.MapLVsByNode()
313
      logger.Info("Instance '%s' DISK_LAYOUT: %s" % (instance.name, all_lvs))
314

    
315
    self._OpenConfig()
316
    self._config_data.instances[instance.name] = instance
317
    self._WriteConfig()
318

    
319
  def _SetInstanceStatus(self, instance_name, status):
320
    """Set the instance's status to a given value.
321

322
    """
323
    if status not in ("up", "down"):
324
      raise errors.ProgrammerError("Invalid status '%s' passed to"
325
                                   " ConfigWriter._SetInstanceStatus()" %
326
                                   status)
327
    self._OpenConfig()
328

    
329
    if instance_name not in self._config_data.instances:
330
      raise errors.ConfigurationError("Unknown instance '%s'" %
331
                                      instance_name)
332
    instance = self._config_data.instances[instance_name]
333
    if instance.status != status:
334
      instance.status = status
335
      self._WriteConfig()
336

    
337
  def MarkInstanceUp(self, instance_name):
338
    """Mark the instance status to up in the config.
339

340
    """
341
    self._SetInstanceStatus(instance_name, "up")
342

    
343
  def RemoveInstance(self, instance_name):
344
    """Remove the instance from the configuration.
345

346
    """
347
    self._OpenConfig()
348

    
349
    if instance_name not in self._config_data.instances:
350
      raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
351
    del self._config_data.instances[instance_name]
352
    self._WriteConfig()
353

    
354
  def RenameInstance(self, old_name, new_name):
355
    """Rename an instance.
356

357
    This needs to be done in ConfigWriter and not by RemoveInstance
358
    combined with AddInstance as only we can guarantee an atomic
359
    rename.
360

361
    """
362
    self._OpenConfig()
363
    if old_name not in self._config_data.instances:
364
      raise errors.ConfigurationError("Unknown instance '%s'" % old_name)
365
    inst = self._config_data.instances[old_name]
366
    del self._config_data.instances[old_name]
367
    inst.name = new_name
368

    
369
    for disk in inst.disks:
370
      if disk.dev_type == constants.LD_FILE:
371
        # rename the file paths in logical and physical id
372
        file_storage_dir = os.path.dirname(os.path.dirname(disk.logical_id[1]))
373
        disk.physical_id = disk.logical_id = (disk.logical_id[0],
374
                                              os.path.join(file_storage_dir,
375
                                                           inst.name,
376
                                                           disk.iv_name))
377

    
378
    self._config_data.instances[inst.name] = inst
379
    self._WriteConfig()
380

    
381
  def MarkInstanceDown(self, instance_name):
382
    """Mark the status of an instance to down in the configuration.
383

384
    """
385
    self._SetInstanceStatus(instance_name, "down")
386

    
387
  def GetInstanceList(self):
388
    """Get the list of instances.
389

390
    Returns:
391
      array of instances, ex. ['instance2.example.com','instance1.example.com']
392
      these contains all the instances, also the ones in Admin_down state
393

394
    """
395
    self._OpenConfig()
396
    self._ReleaseLock()
397

    
398
    return self._config_data.instances.keys()
399

    
400
  def ExpandInstanceName(self, short_name):
401
    """Attempt to expand an incomplete instance name.
402

403
    """
404
    self._OpenConfig()
405
    self._ReleaseLock()
406

    
407
    return utils.MatchNameComponent(short_name,
408
                                    self._config_data.instances.keys())
409

    
410
  def GetInstanceInfo(self, instance_name):
411
    """Returns informations about an instance.
412

413
    It takes the information from the configuration file. Other informations of
414
    an instance are taken from the live systems.
415

416
    Args:
417
      instance: name of the instance, ex instance1.example.com
418

419
    Returns:
420
      the instance object
421

422
    """
423
    self._OpenConfig()
424
    self._ReleaseLock()
425

    
426
    if instance_name not in self._config_data.instances:
427
      return None
428

    
429
    return self._config_data.instances[instance_name]
430

    
431
  def AddNode(self, node):
432
    """Add a node to the configuration.
433

434
    Args:
435
      node: an object.Node instance
436

437
    """
438
    self._OpenConfig()
439
    self._config_data.nodes[node.name] = node
440
    self._WriteConfig()
441

    
442
  def RemoveNode(self, node_name):
443
    """Remove a node from the configuration.
444

445
    """
446
    self._OpenConfig()
447
    if node_name not in self._config_data.nodes:
448
      raise errors.ConfigurationError("Unknown node '%s'" % node_name)
449

    
450
    del self._config_data.nodes[node_name]
451
    self._WriteConfig()
452

    
453
  def ExpandNodeName(self, short_name):
454
    """Attempt to expand an incomplete instance name.
455

456
    """
457
    self._OpenConfig()
458
    self._ReleaseLock()
459

    
460
    return utils.MatchNameComponent(short_name,
461
                                    self._config_data.nodes.keys())
462

    
463
  def GetNodeInfo(self, node_name):
464
    """Get the configuration of a node, as stored in the config.
465

466
    Args: node: nodename (tuple) of the node
467

468
    Returns: the node object
469

470
    """
471
    self._OpenConfig()
472
    self._ReleaseLock()
473

    
474
    if node_name not in self._config_data.nodes:
475
      return None
476

    
477
    return self._config_data.nodes[node_name]
478

    
479
  def GetNodeList(self):
480
    """Return the list of nodes which are in the configuration.
481

482
    """
483
    self._OpenConfig()
484
    self._ReleaseLock()
485
    return self._config_data.nodes.keys()
486

    
487
  def DumpConfig(self):
488
    """Return the entire configuration of the cluster.
489
    """
490
    self._OpenConfig()
491
    self._ReleaseLock()
492
    return self._config_data
493

    
494
  def _BumpSerialNo(self):
495
    """Bump up the serial number of the config.
496

497
    """
498
    self._config_data.cluster.serial_no += 1
499

    
500
  def _OpenConfig(self):
501
    """Read the config data from disk.
502

503
    In case we already have configuration data and the config file has
504
    the same mtime as when we read it, we skip the parsing of the
505
    file, since de-serialisation could be slow.
506

507
    """
508
    try:
509
      st = os.stat(self._cfg_file)
510
    except OSError, err:
511
      raise errors.ConfigurationError("Can't stat config file: %s" % err)
512
    if (self._config_data is not None and
513
        self._config_time is not None and
514
        self._config_time == st.st_mtime and
515
        self._config_size == st.st_size and
516
        self._config_inode == st.st_ino):
517
      # data is current, so skip loading of config file
518
      return
519

    
520
    # Make sure the configuration has the right version
521
    ValidateConfig()
522

    
523
    f = open(self._cfg_file, 'r')
524
    try:
525
      try:
526
        data = objects.ConfigData.FromDict(serializer.Load(f.read()))
527
      except Exception, err:
528
        raise errors.ConfigurationError(err)
529
    finally:
530
      f.close()
531
    if (not hasattr(data, 'cluster') or
532
        not hasattr(data.cluster, 'rsahostkeypub')):
533
      raise errors.ConfigurationError("Incomplete configuration"
534
                                      " (missing cluster.rsahostkeypub)")
535
    self._config_data = data
536
    self._config_time = st.st_mtime
537
    self._config_size = st.st_size
538
    self._config_inode = st.st_ino
539

    
540
  def _ReleaseLock(self):
541
    """xxxx
542
    """
543

    
544
  def _DistributeConfig(self):
545
    """Distribute the configuration to the other nodes.
546

547
    Currently, this only copies the configuration file. In the future,
548
    it could be used to encapsulate the 2/3-phase update mechanism.
549

550
    """
551
    if self._offline:
552
      return True
553
    bad = False
554
    nodelist = self.GetNodeList()
555
    myhostname = self._my_hostname
556

    
557
    try:
558
      nodelist.remove(myhostname)
559
    except ValueError:
560
      pass
561

    
562
    result = rpc.call_upload_file(nodelist, self._cfg_file)
563
    for node in nodelist:
564
      if not result[node]:
565
        logger.Error("copy of file %s to node %s failed" %
566
                     (self._cfg_file, node))
567
        bad = True
568
    return not bad
569

    
570
  def _WriteConfig(self, destination=None):
571
    """Write the configuration data to persistent storage.
572

573
    """
574
    if destination is None:
575
      destination = self._cfg_file
576
    self._BumpSerialNo()
577
    txt = serializer.Dump(self._config_data.ToDict())
578
    dir_name, file_name = os.path.split(destination)
579
    fd, name = tempfile.mkstemp('.newconfig', file_name, dir_name)
580
    f = os.fdopen(fd, 'w')
581
    try:
582
      f.write(txt)
583
      os.fsync(f.fileno())
584
    finally:
585
      f.close()
586
    # we don't need to do os.close(fd) as f.close() did it
587
    os.rename(name, destination)
588
    self.write_count += 1
589
    # re-set our cache as not to re-read the config file
590
    try:
591
      st = os.stat(destination)
592
    except OSError, err:
593
      raise errors.ConfigurationError("Can't stat config file: %s" % err)
594
    self._config_time = st.st_mtime
595
    self._config_size = st.st_size
596
    self._config_inode = st.st_ino
597
    # and redistribute the config file
598
    self._DistributeConfig()
599

    
600
  def InitConfig(self, node, primary_ip, secondary_ip,
601
                 hostkeypub, mac_prefix, vg_name, def_bridge):
602
    """Create the initial cluster configuration.
603

604
    It will contain the current node, which will also be the master
605
    node, and no instances or operating systmes.
606

607
    Args:
608
      node: the nodename of the initial node
609
      primary_ip: the IP address of the current host
610
      secondary_ip: the secondary IP of the current host or None
611
      hostkeypub: the public hostkey of this host
612

613
    """
614
    hu_port = constants.FIRST_DRBD_PORT - 1
615
    globalconfig = objects.Cluster(serial_no=1,
616
                                   rsahostkeypub=hostkeypub,
617
                                   highest_used_port=hu_port,
618
                                   mac_prefix=mac_prefix,
619
                                   volume_group_name=vg_name,
620
                                   default_bridge=def_bridge,
621
                                   tcpudp_port_pool=set())
622
    if secondary_ip is None:
623
      secondary_ip = primary_ip
624
    nodeconfig = objects.Node(name=node, primary_ip=primary_ip,
625
                              secondary_ip=secondary_ip)
626

    
627
    self._config_data = objects.ConfigData(nodes={node: nodeconfig},
628
                                           instances={},
629
                                           cluster=globalconfig)
630
    self._WriteConfig()
631

    
632
  def GetVGName(self):
633
    """Return the volume group name.
634

635
    """
636
    self._OpenConfig()
637
    self._ReleaseLock()
638
    return self._config_data.cluster.volume_group_name
639

    
640
  def SetVGName(self, vg_name):
641
    """Set the volume group name.
642

643
    """
644
    self._OpenConfig()
645
    self._config_data.cluster.volume_group_name = vg_name
646
    self._WriteConfig()
647

    
648
  def GetDefBridge(self):
649
    """Return the default bridge.
650

651
    """
652
    self._OpenConfig()
653
    self._ReleaseLock()
654
    return self._config_data.cluster.default_bridge
655

    
656
  def GetMACPrefix(self):
657
    """Return the mac prefix.
658

659
    """
660
    self._OpenConfig()
661
    self._ReleaseLock()
662
    return self._config_data.cluster.mac_prefix
663

    
664
  def GetClusterInfo(self):
665
    """Returns informations about the cluster
666

667
    Returns:
668
      the cluster object
669

670
    """
671
    self._OpenConfig()
672
    self._ReleaseLock()
673

    
674
    return self._config_data.cluster
675

    
676
  def Update(self, target):
677
    """Notify function to be called after updates.
678

679
    This function must be called when an object (as returned by
680
    GetInstanceInfo, GetNodeInfo, GetCluster) has been updated and the
681
    caller wants the modifications saved to the backing store. Note
682
    that all modified objects will be saved, but the target argument
683
    is the one the caller wants to ensure that it's saved.
684

685
    """
686
    if self._config_data is None:
687
      raise errors.ProgrammerError("Configuration file not read,"
688
                                   " cannot save.")
689
    if isinstance(target, objects.Cluster):
690
      test = target == self._config_data.cluster
691
    elif isinstance(target, objects.Node):
692
      test = target in self._config_data.nodes.values()
693
    elif isinstance(target, objects.Instance):
694
      test = target in self._config_data.instances.values()
695
    else:
696
      raise errors.ProgrammerError("Invalid object type (%s) passed to"
697
                                   " ConfigWriter.Update" % type(target))
698
    if not test:
699
      raise errors.ConfigurationError("Configuration updated since object"
700
                                      " has been read or unknown object")
701
    self._WriteConfig()