Statistics
| Branch: | Tag: | Revision:

root / lib / config.py @ 89e1fc26

History | View | Annotate | Download (20.2 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Configuration management for Ganeti
23

24
This module provides the interface to the ganeti cluster configuration.
25

26

27
The configuration data is stored on every node but is updated on the
28
master only. After each update, the master distributes the data to the
29
other nodes.
30

31
Currently the data storage format is pickle as yaml was initially not
32
available, then we used it but it was a memory-eating slow beast, so
33
we reverted to pickle using custom Unpicklers.
34

35
"""
36

    
37
import os
38
import tempfile
39
import random
40

    
41
from ganeti import errors
42
from ganeti import logger
43
from ganeti import utils
44
from ganeti import constants
45
from ganeti import rpc
46
from ganeti import objects
47

    
48
def _my_uuidgen():
49
  """Poor-man's uuidgen using the uuidgen binary.
50

51
  """
52
  result = utils.RunCmd(["uuidgen", "-r"])
53
  if result.failed:
54
    return None
55
  return result.stdout.rstrip('\n')
56

    
57

    
58
try:
59
  import uuid
60
  _uuidgen = uuid.uuid4
61
except ImportError:
62
  _uuidgen = _my_uuidgen
63

    
64

    
65
class ConfigWriter:
66
  """The interface to the cluster configuration.
67

68
  """
69
  def __init__(self, cfg_file=None, offline=False):
70
    self._config_data = None
71
    self._config_time = None
72
    self._config_size = None
73
    self._config_inode = None
74
    self._offline = offline
75
    if cfg_file is None:
76
      self._cfg_file = constants.CLUSTER_CONF_FILE
77
    else:
78
      self._cfg_file = cfg_file
79
    self._temporary_ids = set()
80
    # Note: in order to prevent errors when resolving our name in
81
    # _DistributeConfig, we compute it here once and reuse it; it's
82
    # better to raise an error before starting to modify the config
83
    # file than after it was modified
84
    self._my_hostname = utils.HostInfo().name
85

    
86
  # this method needs to be static, so that we can call it on the class
87
  @staticmethod
88
  def IsCluster():
89
    """Check if the cluster is configured.
90

91
    """
92
    return os.path.exists(constants.CLUSTER_CONF_FILE)
93

    
94
  def GenerateMAC(self):
95
    """Generate a MAC for an instance.
96

97
    This should check the current instances for duplicates.
98

99
    """
100
    self._OpenConfig()
101
    self._ReleaseLock()
102
    prefix = self._config_data.cluster.mac_prefix
103
    all_macs = self._AllMACs()
104
    retries = 64
105
    while retries > 0:
106
      byte1 = random.randrange(0, 256)
107
      byte2 = random.randrange(0, 256)
108
      byte3 = random.randrange(0, 256)
109
      mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
110
      if mac not in all_macs:
111
        break
112
      retries -= 1
113
    else:
114
      raise errors.ConfigurationError("Can't generate unique MAC")
115
    return mac
116

    
117
  def _ComputeAllLVs(self):
118
    """Compute the list of all LVs.
119

120
    """
121
    self._OpenConfig()
122
    self._ReleaseLock()
123
    lvnames = set()
124
    for instance in self._config_data.instances.values():
125
      node_data = instance.MapLVsByNode()
126
      for lv_list in node_data.values():
127
        lvnames.update(lv_list)
128
    return lvnames
129

    
130
  def GenerateUniqueID(self, exceptions=None):
131
    """Generate an unique disk name.
132

133
    This checks the current node, instances and disk names for
134
    duplicates.
135

136
    Args:
137
      - exceptions: a list with some other names which should be checked
138
                    for uniqueness (used for example when you want to get
139
                    more than one id at one time without adding each one in
140
                    turn to the config file
141

142
    Returns: the unique id as a string
143

144
    """
145
    existing = set()
146
    existing.update(self._temporary_ids)
147
    existing.update(self._ComputeAllLVs())
148
    existing.update(self._config_data.instances.keys())
149
    existing.update(self._config_data.nodes.keys())
150
    if exceptions is not None:
151
      existing.update(exceptions)
152
    retries = 64
153
    while retries > 0:
154
      unique_id = _uuidgen()
155
      if unique_id not in existing and unique_id is not None:
156
        break
157
    else:
158
      raise errors.ConfigurationError("Not able generate an unique ID"
159
                                      " (last tried ID: %s" % unique_id)
160
    self._temporary_ids.add(unique_id)
161
    return unique_id
162

    
163
  def _AllMACs(self):
164
    """Return all MACs present in the config.
165

166
    """
167
    self._OpenConfig()
168
    self._ReleaseLock()
169

    
170
    result = []
171
    for instance in self._config_data.instances.values():
172
      for nic in instance.nics:
173
        result.append(nic.mac)
174

    
175
    return result
176

    
177
  def VerifyConfig(self):
178
    """Stub verify function.
179
    """
180
    self._OpenConfig()
181
    self._ReleaseLock()
182

    
183
    result = []
184
    seen_macs = []
185
    data = self._config_data
186
    for instance_name in data.instances:
187
      instance = data.instances[instance_name]
188
      if instance.primary_node not in data.nodes:
189
        result.append("Instance '%s' has invalid primary node '%s'" %
190
                      (instance_name, instance.primary_node))
191
      for snode in instance.secondary_nodes:
192
        if snode not in data.nodes:
193
          result.append("Instance '%s' has invalid secondary node '%s'" %
194
                        (instance_name, snode))
195
      for idx, nic in enumerate(instance.nics):
196
        if nic.mac in seen_macs:
197
          result.append("Instance '%s' has NIC %d mac %s duplicate" %
198
                        (instance_name, idx, nic.mac))
199
        else:
200
          seen_macs.append(nic.mac)
201
    return result
202

    
203
  def SetDiskID(self, disk, node_name):
204
    """Convert the unique ID to the ID needed on the target nodes.
205

206
    This is used only for drbd, which needs ip/port configuration.
207

208
    The routine descends down and updates its children also, because
209
    this helps when the only the top device is passed to the remote
210
    node.
211

212
    """
213
    if disk.children:
214
      for child in disk.children:
215
        self.SetDiskID(child, node_name)
216

    
217
    if disk.logical_id is None and disk.physical_id is not None:
218
      return
219
    if disk.dev_type == "drbd":
220
      pnode, snode, port = disk.logical_id
221
      if node_name not in (pnode, snode):
222
        raise errors.ConfigurationError("DRBD device not knowing node %s" %
223
                                        node_name)
224
      pnode_info = self.GetNodeInfo(pnode)
225
      snode_info = self.GetNodeInfo(snode)
226
      if pnode_info is None or snode_info is None:
227
        raise errors.ConfigurationError("Can't find primary or secondary node"
228
                                        " for %s" % str(disk))
229
      if pnode == node_name:
230
        disk.physical_id = (pnode_info.secondary_ip, port,
231
                            snode_info.secondary_ip, port)
232
      else: # it must be secondary, we tested above
233
        disk.physical_id = (snode_info.secondary_ip, port,
234
                            pnode_info.secondary_ip, port)
235
    else:
236
      disk.physical_id = disk.logical_id
237
    return
238

    
239
  def AddTcpUdpPort(self, port):
240
    """Adds a new port to the available port pool.
241

242
    """
243
    if not isinstance(port, int):
244
      raise errors.ProgrammerError("Invalid type passed for port")
245

    
246
    self._OpenConfig()
247
    self._config_data.cluster.tcpudp_port_pool.add(port)
248
    self._WriteConfig()
249

    
250
  def GetPortList(self):
251
    """Returns a copy of the current port list.
252

253
    """
254
    self._OpenConfig()
255
    self._ReleaseLock()
256
    return self._config_data.cluster.tcpudp_port_pool.copy()
257

    
258
  def AllocatePort(self):
259
    """Allocate a port.
260

261
    The port will be taken from the available port pool or from the
262
    default port range (and in this case we increase
263
    highest_used_port).
264

265
    """
266
    self._OpenConfig()
267

    
268
    # If there are TCP/IP ports configured, we use them first.
269
    if self._config_data.cluster.tcpudp_port_pool:
270
      port = self._config_data.cluster.tcpudp_port_pool.pop()
271
    else:
272
      port = self._config_data.cluster.highest_used_port + 1
273
      if port >= constants.LAST_DRBD_PORT:
274
        raise errors.ConfigurationError("The highest used port is greater"
275
                                        " than %s. Aborting." %
276
                                        constants.LAST_DRBD_PORT)
277
      self._config_data.cluster.highest_used_port = port
278

    
279
    self._WriteConfig()
280
    return port
281

    
282
  def GetHostKey(self):
283
    """Return the rsa hostkey from the config.
284

285
    Args: None
286

287
    Returns: rsa hostkey
288
    """
289
    self._OpenConfig()
290
    self._ReleaseLock()
291
    return self._config_data.cluster.rsahostkeypub
292

    
293
  def AddInstance(self, instance):
294
    """Add an instance to the config.
295

296
    This should be used after creating a new instance.
297

298
    Args:
299
      instance: the instance object
300
    """
301
    if not isinstance(instance, objects.Instance):
302
      raise errors.ProgrammerError("Invalid type passed to AddInstance")
303

    
304
    if instance.disk_template != constants.DT_DISKLESS:
305
      all_lvs = instance.MapLVsByNode()
306
      logger.Info("Instance '%s' DISK_LAYOUT: %s" % (instance.name, all_lvs))
307

    
308
    self._OpenConfig()
309
    self._config_data.instances[instance.name] = instance
310
    self._WriteConfig()
311

    
312
  def MarkInstanceUp(self, instance_name):
313
    """Mark the instance status to up in the config.
314

315
    """
316
    self._OpenConfig()
317

    
318
    if instance_name not in self._config_data.instances:
319
      raise errors.ConfigurationError("Unknown instance '%s'" %
320
                                      instance_name)
321
    instance = self._config_data.instances[instance_name]
322
    instance.status = "up"
323
    self._WriteConfig()
324

    
325
  def RemoveInstance(self, instance_name):
326
    """Remove the instance from the configuration.
327

328
    """
329
    self._OpenConfig()
330

    
331
    if instance_name not in self._config_data.instances:
332
      raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
333
    del self._config_data.instances[instance_name]
334
    self._WriteConfig()
335

    
336
  def RenameInstance(self, old_name, new_name):
337
    """Rename an instance.
338

339
    This needs to be done in ConfigWriter and not by RemoveInstance
340
    combined with AddInstance as only we can guarantee an atomic
341
    rename.
342

343
    """
344
    self._OpenConfig()
345
    if old_name not in self._config_data.instances:
346
      raise errors.ConfigurationError("Unknown instance '%s'" % old_name)
347
    inst = self._config_data.instances[old_name]
348
    del self._config_data.instances[old_name]
349
    inst.name = new_name
350
    self._config_data.instances[inst.name] = inst
351
    self._WriteConfig()
352

    
353
  def MarkInstanceDown(self, instance_name):
354
    """Mark the status of an instance to down in the configuration.
355

356
    """
357
    self._OpenConfig()
358

    
359
    if instance_name not in self._config_data.instances:
360
      raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
361
    instance = self._config_data.instances[instance_name]
362
    instance.status = "down"
363
    self._WriteConfig()
364

    
365
  def GetInstanceList(self):
366
    """Get the list of instances.
367

368
    Returns:
369
      array of instances, ex. ['instance2.example.com','instance1.example.com']
370
      these contains all the instances, also the ones in Admin_down state
371

372
    """
373
    self._OpenConfig()
374
    self._ReleaseLock()
375

    
376
    return self._config_data.instances.keys()
377

    
378
  def ExpandInstanceName(self, short_name):
379
    """Attempt to expand an incomplete instance name.
380

381
    """
382
    self._OpenConfig()
383
    self._ReleaseLock()
384

    
385
    return utils.MatchNameComponent(short_name,
386
                                    self._config_data.instances.keys())
387

    
388
  def GetInstanceInfo(self, instance_name):
389
    """Returns informations about an instance.
390

391
    It takes the information from the configuration file. Other informations of
392
    an instance are taken from the live systems.
393

394
    Args:
395
      instance: name of the instance, ex instance1.example.com
396

397
    Returns:
398
      the instance object
399

400
    """
401
    self._OpenConfig()
402
    self._ReleaseLock()
403

    
404
    if instance_name not in self._config_data.instances:
405
      return None
406

    
407
    return self._config_data.instances[instance_name]
408

    
409
  def AddNode(self, node):
410
    """Add a node to the configuration.
411

412
    Args:
413
      node: an object.Node instance
414

415
    """
416
    self._OpenConfig()
417
    self._config_data.nodes[node.name] = node
418
    self._WriteConfig()
419

    
420
  def RemoveNode(self, node_name):
421
    """Remove a node from the configuration.
422

423
    """
424
    self._OpenConfig()
425
    if node_name not in self._config_data.nodes:
426
      raise errors.ConfigurationError("Unknown node '%s'" % node_name)
427

    
428
    del self._config_data.nodes[node_name]
429
    self._WriteConfig()
430

    
431
  def ExpandNodeName(self, short_name):
432
    """Attempt to expand an incomplete instance name.
433

434
    """
435
    self._OpenConfig()
436
    self._ReleaseLock()
437

    
438
    return utils.MatchNameComponent(short_name,
439
                                    self._config_data.nodes.keys())
440

    
441
  def GetNodeInfo(self, node_name):
442
    """Get the configuration of a node, as stored in the config.
443

444
    Args: node: nodename (tuple) of the node
445

446
    Returns: the node object
447

448
    """
449
    self._OpenConfig()
450
    self._ReleaseLock()
451

    
452
    if node_name not in self._config_data.nodes:
453
      return None
454

    
455
    return self._config_data.nodes[node_name]
456

    
457
  def GetNodeList(self):
458
    """Return the list of nodes which are in the configuration.
459

460
    """
461
    self._OpenConfig()
462
    self._ReleaseLock()
463
    return self._config_data.nodes.keys()
464

    
465
  def DumpConfig(self):
466
    """Return the entire configuration of the cluster.
467
    """
468
    self._OpenConfig()
469
    self._ReleaseLock()
470
    return self._config_data
471

    
472
  def _BumpSerialNo(self):
473
    """Bump up the serial number of the config.
474

475
    """
476
    self._config_data.cluster.serial_no += 1
477

    
478
  def _OpenConfig(self):
479
    """Read the config data from disk.
480

481
    In case we already have configuration data and the config file has
482
    the same mtime as when we read it, we skip the parsing of the
483
    file, since de-serialisation could be slow.
484

485
    """
486
    try:
487
      st = os.stat(self._cfg_file)
488
    except OSError, err:
489
      raise errors.ConfigurationError("Can't stat config file: %s" % err)
490
    if (self._config_data is not None and
491
        self._config_time is not None and
492
        self._config_time == st.st_mtime and
493
        self._config_size == st.st_size and
494
        self._config_inode == st.st_ino):
495
      # data is current, so skip loading of config file
496
      return
497
    f = open(self._cfg_file, 'r')
498
    try:
499
      try:
500
        data = objects.ConfigObject.Load(f)
501
      except Exception, err:
502
        raise errors.ConfigurationError(err)
503
    finally:
504
      f.close()
505
    if (not hasattr(data, 'cluster') or
506
        not hasattr(data.cluster, 'config_version')):
507
      raise errors.ConfigurationError("Incomplete configuration"
508
                                      " (missing cluster.config_version)")
509
    if data.cluster.config_version != constants.CONFIG_VERSION:
510
      raise errors.ConfigurationError("Cluster configuration version"
511
                                      " mismatch, got %s instead of %s" %
512
                                      (data.cluster.config_version,
513
                                       constants.CONFIG_VERSION))
514
    self._config_data = data
515
    self._config_time = st.st_mtime
516
    self._config_size = st.st_size
517
    self._config_inode = st.st_ino
518

    
519
  def _ReleaseLock(self):
520
    """xxxx
521
    """
522

    
523
  def _DistributeConfig(self):
524
    """Distribute the configuration to the other nodes.
525

526
    Currently, this only copies the configuration file. In the future,
527
    it could be used to encapsulate the 2/3-phase update mechanism.
528

529
    """
530
    if self._offline:
531
      return True
532
    bad = False
533
    nodelist = self.GetNodeList()
534
    myhostname = self._my_hostname
535

    
536
    tgt_list = []
537
    for node in nodelist:
538
      nodeinfo = self.GetNodeInfo(node)
539
      if nodeinfo.name == myhostname:
540
        continue
541
      tgt_list.append(node)
542

    
543
    result = rpc.call_upload_file(tgt_list, self._cfg_file)
544
    for node in tgt_list:
545
      if not result[node]:
546
        logger.Error("copy of file %s to node %s failed" %
547
                     (self._cfg_file, node))
548
        bad = True
549
    return not bad
550

    
551
  def _WriteConfig(self, destination=None):
552
    """Write the configuration data to persistent storage.
553

554
    """
555
    if destination is None:
556
      destination = self._cfg_file
557
    self._BumpSerialNo()
558
    dir_name, file_name = os.path.split(destination)
559
    fd, name = tempfile.mkstemp('.newconfig', file_name, dir_name)
560
    f = os.fdopen(fd, 'w')
561
    try:
562
      self._config_data.Dump(f)
563
      os.fsync(f.fileno())
564
    finally:
565
      f.close()
566
    # we don't need to do os.close(fd) as f.close() did it
567
    os.rename(name, destination)
568
    # re-set our cache as not to re-read the config file
569
    try:
570
      st = os.stat(destination)
571
    except OSError, err:
572
      raise errors.ConfigurationError("Can't stat config file: %s" % err)
573
    self._config_time = st.st_mtime
574
    self._config_size = st.st_size
575
    self._config_inode = st.st_ino
576
    # and redistribute the config file
577
    self._DistributeConfig()
578

    
579
  def InitConfig(self, node, primary_ip, secondary_ip,
580
                 hostkeypub, mac_prefix, vg_name, def_bridge):
581
    """Create the initial cluster configuration.
582

583
    It will contain the current node, which will also be the master
584
    node, and no instances or operating systmes.
585

586
    Args:
587
      node: the nodename of the initial node
588
      primary_ip: the IP address of the current host
589
      secondary_ip: the secondary IP of the current host or None
590
      hostkeypub: the public hostkey of this host
591

592
    """
593
    hu_port = constants.FIRST_DRBD_PORT - 1
594
    globalconfig = objects.Cluster(config_version=constants.CONFIG_VERSION,
595
                                   serial_no=1,
596
                                   rsahostkeypub=hostkeypub,
597
                                   highest_used_port=hu_port,
598
                                   mac_prefix=mac_prefix,
599
                                   volume_group_name=vg_name,
600
                                   default_bridge=def_bridge,
601
                                   tcpudp_port_pool=set())
602
    if secondary_ip is None:
603
      secondary_ip = primary_ip
604
    nodeconfig = objects.Node(name=node, primary_ip=primary_ip,
605
                              secondary_ip=secondary_ip)
606

    
607
    self._config_data = objects.ConfigData(nodes={node: nodeconfig},
608
                                           instances={},
609
                                           cluster=globalconfig)
610
    self._WriteConfig()
611

    
612
  def GetVGName(self):
613
    """Return the volume group name.
614

615
    """
616
    self._OpenConfig()
617
    self._ReleaseLock()
618
    return self._config_data.cluster.volume_group_name
619

    
620
  def GetDefBridge(self):
621
    """Return the default bridge.
622

623
    """
624
    self._OpenConfig()
625
    self._ReleaseLock()
626
    return self._config_data.cluster.default_bridge
627

    
628
  def GetMACPrefix(self):
629
    """Return the mac prefix.
630

631
    """
632
    self._OpenConfig()
633
    self._ReleaseLock()
634
    return self._config_data.cluster.mac_prefix
635

    
636
  def GetClusterInfo(self):
637
    """Returns informations about the cluster
638

639
    Returns:
640
      the cluster object
641

642
    """
643
    self._OpenConfig()
644
    self._ReleaseLock()
645

    
646
    return self._config_data.cluster
647

    
648
  def Update(self, target):
649
    """Notify function to be called after updates.
650

651
    This function must be called when an object (as returned by
652
    GetInstanceInfo, GetNodeInfo, GetCluster) has been updated and the
653
    caller wants the modifications saved to the backing store. Note
654
    that all modified objects will be saved, but the target argument
655
    is the one the caller wants to ensure that it's saved.
656

657
    """
658
    if self._config_data is None:
659
      raise errors.ProgrammerError("Configuration file not read,"
660
                                   " cannot save.")
661
    if isinstance(target, objects.Cluster):
662
      test = target == self._config_data.cluster
663
    elif isinstance(target, objects.Node):
664
      test = target in self._config_data.nodes.values()
665
    elif isinstance(target, objects.Instance):
666
      test = target in self._config_data.instances.values()
667
    else:
668
      raise errors.ProgrammerError("Invalid object type (%s) passed to"
669
                                   " ConfigWriter.Update" % type(target))
670
    if not test:
671
      raise errors.ConfigurationError("Configuration updated since object"
672
                                      " has been read or unknown object")
673
    self._WriteConfig()