Statistics
| Branch: | Tag: | Revision:

root / lib / config.py @ b23c4333

History | View | Annotate | Download (20.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Configuration management for Ganeti
23

24
This module provides the interface to the Ganeti cluster configuration.
25

26
The configuration data is stored on every node but is updated on the master
27
only. After each update, the master distributes the data to the other nodes.
28

29
Currently, the data storage format is JSON. YAML was slow and consuming too
30
much memory.
31

32
"""
33

    
34
import os
35
import tempfile
36
import random
37
import re
38

    
39
from ganeti import errors
40
from ganeti import logger
41
from ganeti import utils
42
from ganeti import constants
43
from ganeti import rpc
44
from ganeti import objects
45

    
46

    
47
class ConfigWriter:
48
  """The interface to the cluster configuration.
49

50
  """
51
  def __init__(self, cfg_file=None, offline=False):
52
    self.write_count = 0
53
    self._config_data = None
54
    self._config_time = None
55
    self._config_size = None
56
    self._config_inode = None
57
    self._offline = offline
58
    if cfg_file is None:
59
      self._cfg_file = constants.CLUSTER_CONF_FILE
60
    else:
61
      self._cfg_file = cfg_file
62
    self._temporary_ids = set()
63
    # Note: in order to prevent errors when resolving our name in
64
    # _DistributeConfig, we compute it here once and reuse it; it's
65
    # better to raise an error before starting to modify the config
66
    # file than after it was modified
67
    self._my_hostname = utils.HostInfo().name
68

    
69
  # this method needs to be static, so that we can call it on the class
70
  @staticmethod
71
  def IsCluster():
72
    """Check if the cluster is configured.
73

74
    """
75
    return os.path.exists(constants.CLUSTER_CONF_FILE)
76

    
77
  def GenerateMAC(self):
78
    """Generate a MAC for an instance.
79

80
    This should check the current instances for duplicates.
81

82
    """
83
    self._OpenConfig()
84
    self._ReleaseLock()
85
    prefix = self._config_data.cluster.mac_prefix
86
    all_macs = self._AllMACs()
87
    retries = 64
88
    while retries > 0:
89
      byte1 = random.randrange(0, 256)
90
      byte2 = random.randrange(0, 256)
91
      byte3 = random.randrange(0, 256)
92
      mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
93
      if mac not in all_macs:
94
        break
95
      retries -= 1
96
    else:
97
      raise errors.ConfigurationError("Can't generate unique MAC")
98
    return mac
99

    
100
  def IsMacInUse(self, mac):
101
    """Predicate: check if the specified MAC is in use in the Ganeti cluster.
102

103
    This only checks instances managed by this cluster, it does not
104
    check for potential collisions elsewhere.
105

106
    """
107
    self._OpenConfig()
108
    self._ReleaseLock()
109
    all_macs = self._AllMACs()
110
    return mac in all_macs
111

    
112
  def _ComputeAllLVs(self):
113
    """Compute the list of all LVs.
114

115
    """
116
    self._OpenConfig()
117
    self._ReleaseLock()
118
    lvnames = set()
119
    for instance in self._config_data.instances.values():
120
      node_data = instance.MapLVsByNode()
121
      for lv_list in node_data.values():
122
        lvnames.update(lv_list)
123
    return lvnames
124

    
125
  def GenerateUniqueID(self, exceptions=None):
126
    """Generate an unique disk name.
127

128
    This checks the current node, instances and disk names for
129
    duplicates.
130

131
    Args:
132
      - exceptions: a list with some other names which should be checked
133
                    for uniqueness (used for example when you want to get
134
                    more than one id at one time without adding each one in
135
                    turn to the config file
136

137
    Returns: the unique id as a string
138

139
    """
140
    existing = set()
141
    existing.update(self._temporary_ids)
142
    existing.update(self._ComputeAllLVs())
143
    existing.update(self._config_data.instances.keys())
144
    existing.update(self._config_data.nodes.keys())
145
    if exceptions is not None:
146
      existing.update(exceptions)
147
    retries = 64
148
    while retries > 0:
149
      unique_id = utils.NewUUID()
150
      if unique_id not in existing and unique_id is not None:
151
        break
152
    else:
153
      raise errors.ConfigurationError("Not able generate an unique ID"
154
                                      " (last tried ID: %s" % unique_id)
155
    self._temporary_ids.add(unique_id)
156
    return unique_id
157

    
158
  def _AllMACs(self):
159
    """Return all MACs present in the config.
160

161
    """
162
    self._OpenConfig()
163
    self._ReleaseLock()
164

    
165
    result = []
166
    for instance in self._config_data.instances.values():
167
      for nic in instance.nics:
168
        result.append(nic.mac)
169

    
170
    return result
171

    
172
  def VerifyConfig(self):
173
    """Stub verify function.
174
    """
175
    self._OpenConfig()
176
    self._ReleaseLock()
177

    
178
    result = []
179
    seen_macs = []
180
    data = self._config_data
181
    for instance_name in data.instances:
182
      instance = data.instances[instance_name]
183
      if instance.primary_node not in data.nodes:
184
        result.append("instance '%s' has invalid primary node '%s'" %
185
                      (instance_name, instance.primary_node))
186
      for snode in instance.secondary_nodes:
187
        if snode not in data.nodes:
188
          result.append("instance '%s' has invalid secondary node '%s'" %
189
                        (instance_name, snode))
190
      for idx, nic in enumerate(instance.nics):
191
        if nic.mac in seen_macs:
192
          result.append("instance '%s' has NIC %d mac %s duplicate" %
193
                        (instance_name, idx, nic.mac))
194
        else:
195
          seen_macs.append(nic.mac)
196
    return result
197

    
198
  def SetDiskID(self, disk, node_name):
199
    """Convert the unique ID to the ID needed on the target nodes.
200

201
    This is used only for drbd, which needs ip/port configuration.
202

203
    The routine descends down and updates its children also, because
204
    this helps when the only the top device is passed to the remote
205
    node.
206

207
    """
208
    if disk.children:
209
      for child in disk.children:
210
        self.SetDiskID(child, node_name)
211

    
212
    if disk.logical_id is None and disk.physical_id is not None:
213
      return
214
    if disk.dev_type in constants.LDS_DRBD:
215
      pnode, snode, port = disk.logical_id
216
      if node_name not in (pnode, snode):
217
        raise errors.ConfigurationError("DRBD device not knowing node %s" %
218
                                        node_name)
219
      pnode_info = self.GetNodeInfo(pnode)
220
      snode_info = self.GetNodeInfo(snode)
221
      if pnode_info is None or snode_info is None:
222
        raise errors.ConfigurationError("Can't find primary or secondary node"
223
                                        " for %s" % str(disk))
224
      if pnode == node_name:
225
        disk.physical_id = (pnode_info.secondary_ip, port,
226
                            snode_info.secondary_ip, port)
227
      else: # it must be secondary, we tested above
228
        disk.physical_id = (snode_info.secondary_ip, port,
229
                            pnode_info.secondary_ip, port)
230
    else:
231
      disk.physical_id = disk.logical_id
232
    return
233

    
234
  def AddTcpUdpPort(self, port):
235
    """Adds a new port to the available port pool.
236

237
    """
238
    if not isinstance(port, int):
239
      raise errors.ProgrammerError("Invalid type passed for port")
240

    
241
    self._OpenConfig()
242
    self._config_data.cluster.tcpudp_port_pool.add(port)
243
    self._WriteConfig()
244

    
245
  def GetPortList(self):
246
    """Returns a copy of the current port list.
247

248
    """
249
    self._OpenConfig()
250
    self._ReleaseLock()
251
    return self._config_data.cluster.tcpudp_port_pool.copy()
252

    
253
  def AllocatePort(self):
254
    """Allocate a port.
255

256
    The port will be taken from the available port pool or from the
257
    default port range (and in this case we increase
258
    highest_used_port).
259

260
    """
261
    self._OpenConfig()
262

    
263
    # If there are TCP/IP ports configured, we use them first.
264
    if self._config_data.cluster.tcpudp_port_pool:
265
      port = self._config_data.cluster.tcpudp_port_pool.pop()
266
    else:
267
      port = self._config_data.cluster.highest_used_port + 1
268
      if port >= constants.LAST_DRBD_PORT:
269
        raise errors.ConfigurationError("The highest used port is greater"
270
                                        " than %s. Aborting." %
271
                                        constants.LAST_DRBD_PORT)
272
      self._config_data.cluster.highest_used_port = port
273

    
274
    self._WriteConfig()
275
    return port
276

    
277
  def GetHostKey(self):
278
    """Return the rsa hostkey from the config.
279

280
    Args: None
281

282
    Returns: rsa hostkey
283
    """
284
    self._OpenConfig()
285
    self._ReleaseLock()
286
    return self._config_data.cluster.rsahostkeypub
287

    
288
  def AddInstance(self, instance):
289
    """Add an instance to the config.
290

291
    This should be used after creating a new instance.
292

293
    Args:
294
      instance: the instance object
295
    """
296
    if not isinstance(instance, objects.Instance):
297
      raise errors.ProgrammerError("Invalid type passed to AddInstance")
298

    
299
    if instance.disk_template != constants.DT_DISKLESS:
300
      all_lvs = instance.MapLVsByNode()
301
      logger.Info("Instance '%s' DISK_LAYOUT: %s" % (instance.name, all_lvs))
302

    
303
    self._OpenConfig()
304
    self._config_data.instances[instance.name] = instance
305
    self._WriteConfig()
306

    
307
  def MarkInstanceUp(self, instance_name):
308
    """Mark the instance status to up in the config.
309

310
    """
311
    self._OpenConfig()
312

    
313
    if instance_name not in self._config_data.instances:
314
      raise errors.ConfigurationError("Unknown instance '%s'" %
315
                                      instance_name)
316
    instance = self._config_data.instances[instance_name]
317
    instance.status = "up"
318
    self._WriteConfig()
319

    
320
  def RemoveInstance(self, instance_name):
321
    """Remove the instance from the configuration.
322

323
    """
324
    self._OpenConfig()
325

    
326
    if instance_name not in self._config_data.instances:
327
      raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
328
    del self._config_data.instances[instance_name]
329
    self._WriteConfig()
330

    
331
  def RenameInstance(self, old_name, new_name):
332
    """Rename an instance.
333

334
    This needs to be done in ConfigWriter and not by RemoveInstance
335
    combined with AddInstance as only we can guarantee an atomic
336
    rename.
337

338
    """
339
    self._OpenConfig()
340
    if old_name not in self._config_data.instances:
341
      raise errors.ConfigurationError("Unknown instance '%s'" % old_name)
342
    inst = self._config_data.instances[old_name]
343
    del self._config_data.instances[old_name]
344
    inst.name = new_name
345

    
346
    for disk in inst.disks:
347
      if disk.dev_type == constants.LD_FILE:
348
        # rename the file paths in logical and physical id
349
        file_storage_dir = os.path.dirname(os.path.dirname(disk.logical_id[1]))
350
        disk.physical_id = disk.logical_id = (disk.logical_id[0],
351
                                              os.path.join(file_storage_dir,
352
                                                           inst.name,
353
                                                           disk.iv_name))
354

    
355
    self._config_data.instances[inst.name] = inst
356
    self._WriteConfig()
357

    
358
  def MarkInstanceDown(self, instance_name):
359
    """Mark the status of an instance to down in the configuration.
360

361
    """
362
    self._OpenConfig()
363

    
364
    if instance_name not in self._config_data.instances:
365
      raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
366
    instance = self._config_data.instances[instance_name]
367
    instance.status = "down"
368
    self._WriteConfig()
369

    
370
  def GetInstanceList(self):
371
    """Get the list of instances.
372

373
    Returns:
374
      array of instances, ex. ['instance2.example.com','instance1.example.com']
375
      these contains all the instances, also the ones in Admin_down state
376

377
    """
378
    self._OpenConfig()
379
    self._ReleaseLock()
380

    
381
    return self._config_data.instances.keys()
382

    
383
  def ExpandInstanceName(self, short_name):
384
    """Attempt to expand an incomplete instance name.
385

386
    """
387
    self._OpenConfig()
388
    self._ReleaseLock()
389

    
390
    return utils.MatchNameComponent(short_name,
391
                                    self._config_data.instances.keys())
392

    
393
  def GetInstanceInfo(self, instance_name):
394
    """Returns informations about an instance.
395

396
    It takes the information from the configuration file. Other informations of
397
    an instance are taken from the live systems.
398

399
    Args:
400
      instance: name of the instance, ex instance1.example.com
401

402
    Returns:
403
      the instance object
404

405
    """
406
    self._OpenConfig()
407
    self._ReleaseLock()
408

    
409
    if instance_name not in self._config_data.instances:
410
      return None
411

    
412
    return self._config_data.instances[instance_name]
413

    
414
  def AddNode(self, node):
415
    """Add a node to the configuration.
416

417
    Args:
418
      node: an object.Node instance
419

420
    """
421
    self._OpenConfig()
422
    self._config_data.nodes[node.name] = node
423
    self._WriteConfig()
424

    
425
  def RemoveNode(self, node_name):
426
    """Remove a node from the configuration.
427

428
    """
429
    self._OpenConfig()
430
    if node_name not in self._config_data.nodes:
431
      raise errors.ConfigurationError("Unknown node '%s'" % node_name)
432

    
433
    del self._config_data.nodes[node_name]
434
    self._WriteConfig()
435

    
436
  def ExpandNodeName(self, short_name):
437
    """Attempt to expand an incomplete instance name.
438

439
    """
440
    self._OpenConfig()
441
    self._ReleaseLock()
442

    
443
    return utils.MatchNameComponent(short_name,
444
                                    self._config_data.nodes.keys())
445

    
446
  def GetNodeInfo(self, node_name):
447
    """Get the configuration of a node, as stored in the config.
448

449
    Args: node: nodename (tuple) of the node
450

451
    Returns: the node object
452

453
    """
454
    self._OpenConfig()
455
    self._ReleaseLock()
456

    
457
    if node_name not in self._config_data.nodes:
458
      return None
459

    
460
    return self._config_data.nodes[node_name]
461

    
462
  def GetNodeList(self):
463
    """Return the list of nodes which are in the configuration.
464

465
    """
466
    self._OpenConfig()
467
    self._ReleaseLock()
468
    return self._config_data.nodes.keys()
469

    
470
  def DumpConfig(self):
471
    """Return the entire configuration of the cluster.
472
    """
473
    self._OpenConfig()
474
    self._ReleaseLock()
475
    return self._config_data
476

    
477
  def _BumpSerialNo(self):
478
    """Bump up the serial number of the config.
479

480
    """
481
    self._config_data.cluster.serial_no += 1
482

    
483
  def _OpenConfig(self):
484
    """Read the config data from disk.
485

486
    In case we already have configuration data and the config file has
487
    the same mtime as when we read it, we skip the parsing of the
488
    file, since de-serialisation could be slow.
489

490
    """
491
    try:
492
      st = os.stat(self._cfg_file)
493
    except OSError, err:
494
      raise errors.ConfigurationError("Can't stat config file: %s" % err)
495
    if (self._config_data is not None and
496
        self._config_time is not None and
497
        self._config_time == st.st_mtime and
498
        self._config_size == st.st_size and
499
        self._config_inode == st.st_ino):
500
      # data is current, so skip loading of config file
501
      return
502
    f = open(self._cfg_file, 'r')
503
    try:
504
      try:
505
        data = objects.ConfigData.Load(f)
506
      except Exception, err:
507
        raise errors.ConfigurationError(err)
508
    finally:
509
      f.close()
510
    if (not hasattr(data, 'cluster') or
511
        not hasattr(data.cluster, 'config_version')):
512
      raise errors.ConfigurationError("Incomplete configuration"
513
                                      " (missing cluster.config_version)")
514
    if data.cluster.config_version != constants.CONFIG_VERSION:
515
      raise errors.ConfigurationError("Cluster configuration version"
516
                                      " mismatch, got %s instead of %s" %
517
                                      (data.cluster.config_version,
518
                                       constants.CONFIG_VERSION))
519
    self._config_data = data
520
    self._config_time = st.st_mtime
521
    self._config_size = st.st_size
522
    self._config_inode = st.st_ino
523

    
524
  def _ReleaseLock(self):
525
    """xxxx
526
    """
527

    
528
  def _DistributeConfig(self):
529
    """Distribute the configuration to the other nodes.
530

531
    Currently, this only copies the configuration file. In the future,
532
    it could be used to encapsulate the 2/3-phase update mechanism.
533

534
    """
535
    if self._offline:
536
      return True
537
    bad = False
538
    nodelist = self.GetNodeList()
539
    myhostname = self._my_hostname
540

    
541
    tgt_list = []
542
    for node in nodelist:
543
      nodeinfo = self.GetNodeInfo(node)
544
      if nodeinfo.name == myhostname:
545
        continue
546
      tgt_list.append(node)
547

    
548
    result = rpc.call_upload_file(tgt_list, self._cfg_file)
549
    for node in tgt_list:
550
      if not result[node]:
551
        logger.Error("copy of file %s to node %s failed" %
552
                     (self._cfg_file, node))
553
        bad = True
554
    return not bad
555

    
556
  def _WriteConfig(self, destination=None):
557
    """Write the configuration data to persistent storage.
558

559
    """
560
    if destination is None:
561
      destination = self._cfg_file
562
    self._BumpSerialNo()
563
    dir_name, file_name = os.path.split(destination)
564
    fd, name = tempfile.mkstemp('.newconfig', file_name, dir_name)
565
    f = os.fdopen(fd, 'w')
566
    try:
567
      self._config_data.Dump(f)
568
      os.fsync(f.fileno())
569
    finally:
570
      f.close()
571
    # we don't need to do os.close(fd) as f.close() did it
572
    os.rename(name, destination)
573
    self.write_count += 1
574
    # re-set our cache as not to re-read the config file
575
    try:
576
      st = os.stat(destination)
577
    except OSError, err:
578
      raise errors.ConfigurationError("Can't stat config file: %s" % err)
579
    self._config_time = st.st_mtime
580
    self._config_size = st.st_size
581
    self._config_inode = st.st_ino
582
    # and redistribute the config file
583
    self._DistributeConfig()
584

    
585
  def InitConfig(self, node, primary_ip, secondary_ip,
586
                 hostkeypub, mac_prefix, vg_name, def_bridge):
587
    """Create the initial cluster configuration.
588

589
    It will contain the current node, which will also be the master
590
    node, and no instances or operating systmes.
591

592
    Args:
593
      node: the nodename of the initial node
594
      primary_ip: the IP address of the current host
595
      secondary_ip: the secondary IP of the current host or None
596
      hostkeypub: the public hostkey of this host
597

598
    """
599
    hu_port = constants.FIRST_DRBD_PORT - 1
600
    globalconfig = objects.Cluster(config_version=constants.CONFIG_VERSION,
601
                                   serial_no=1,
602
                                   rsahostkeypub=hostkeypub,
603
                                   highest_used_port=hu_port,
604
                                   mac_prefix=mac_prefix,
605
                                   volume_group_name=vg_name,
606
                                   default_bridge=def_bridge,
607
                                   tcpudp_port_pool=set())
608
    if secondary_ip is None:
609
      secondary_ip = primary_ip
610
    nodeconfig = objects.Node(name=node, primary_ip=primary_ip,
611
                              secondary_ip=secondary_ip)
612

    
613
    self._config_data = objects.ConfigData(nodes={node: nodeconfig},
614
                                           instances={},
615
                                           cluster=globalconfig)
616
    self._WriteConfig()
617

    
618
  def GetVGName(self):
619
    """Return the volume group name.
620

621
    """
622
    self._OpenConfig()
623
    self._ReleaseLock()
624
    return self._config_data.cluster.volume_group_name
625

    
626
  def SetVGName(self, vg_name):
627
    """Set the volume group name.
628

629
    """
630
    self._OpenConfig()
631
    self._config_data.cluster.volume_group_name = vg_name
632
    self._WriteConfig()
633

    
634
  def GetDefBridge(self):
635
    """Return the default bridge.
636

637
    """
638
    self._OpenConfig()
639
    self._ReleaseLock()
640
    return self._config_data.cluster.default_bridge
641

    
642
  def GetMACPrefix(self):
643
    """Return the mac prefix.
644

645
    """
646
    self._OpenConfig()
647
    self._ReleaseLock()
648
    return self._config_data.cluster.mac_prefix
649

    
650
  def GetClusterInfo(self):
651
    """Returns informations about the cluster
652

653
    Returns:
654
      the cluster object
655

656
    """
657
    self._OpenConfig()
658
    self._ReleaseLock()
659

    
660
    return self._config_data.cluster
661

    
662
  def Update(self, target):
663
    """Notify function to be called after updates.
664

665
    This function must be called when an object (as returned by
666
    GetInstanceInfo, GetNodeInfo, GetCluster) has been updated and the
667
    caller wants the modifications saved to the backing store. Note
668
    that all modified objects will be saved, but the target argument
669
    is the one the caller wants to ensure that it's saved.
670

671
    """
672
    if self._config_data is None:
673
      raise errors.ProgrammerError("Configuration file not read,"
674
                                   " cannot save.")
675
    if isinstance(target, objects.Cluster):
676
      test = target == self._config_data.cluster
677
    elif isinstance(target, objects.Node):
678
      test = target in self._config_data.nodes.values()
679
    elif isinstance(target, objects.Instance):
680
      test = target in self._config_data.instances.values()
681
    else:
682
      raise errors.ProgrammerError("Invalid object type (%s) passed to"
683
                                   " ConfigWriter.Update" % type(target))
684
    if not test:
685
      raise errors.ConfigurationError("Configuration updated since object"
686
                                      " has been read or unknown object")
687
    self._WriteConfig()