Statistics
| Branch: | Tag: | Revision:

root / lib / config.py @ 319856a9

History | View | Annotate | Download (20.1 kB)

1
#!/usr/bin/python
2
#
3

    
4
# Copyright (C) 2006, 2007 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Configuration management for Ganeti
23

24
This module provides the interface to the Ganeti cluster configuration.
25

26
The configuration data is stored on every node but is updated on the master
27
only. After each update, the master distributes the data to the other nodes.
28

29
Currently, the data storage format is JSON. YAML was slow and consuming too
30
much memory.
31

32
"""
33

    
34
import os
35
import tempfile
36
import random
37

    
38
from ganeti import errors
39
from ganeti import logger
40
from ganeti import utils
41
from ganeti import constants
42
from ganeti import rpc
43
from ganeti import objects
44

    
45

    
46
def _my_uuidgen():
47
  """Poor-man's uuidgen using the uuidgen binary.
48

49
  """
50
  result = utils.RunCmd(["uuidgen", "-r"])
51
  if result.failed:
52
    return None
53
  return result.stdout.rstrip('\n')
54

    
55

    
56
try:
57
  import uuid
58
  _uuidgen = uuid.uuid4
59
except ImportError:
60
  _uuidgen = _my_uuidgen
61

    
62

    
63
class ConfigWriter:
64
  """The interface to the cluster configuration.
65

66
  """
67
  def __init__(self, cfg_file=None, offline=False):
68
    self._config_data = None
69
    self._config_time = None
70
    self._config_size = None
71
    self._config_inode = None
72
    self._offline = offline
73
    if cfg_file is None:
74
      self._cfg_file = constants.CLUSTER_CONF_FILE
75
    else:
76
      self._cfg_file = cfg_file
77
    self._temporary_ids = set()
78
    # Note: in order to prevent errors when resolving our name in
79
    # _DistributeConfig, we compute it here once and reuse it; it's
80
    # better to raise an error before starting to modify the config
81
    # file than after it was modified
82
    self._my_hostname = utils.HostInfo().name
83

    
84
  # this method needs to be static, so that we can call it on the class
85
  @staticmethod
86
  def IsCluster():
87
    """Check if the cluster is configured.
88

89
    """
90
    return os.path.exists(constants.CLUSTER_CONF_FILE)
91

    
92
  def GenerateMAC(self):
93
    """Generate a MAC for an instance.
94

95
    This should check the current instances for duplicates.
96

97
    """
98
    self._OpenConfig()
99
    self._ReleaseLock()
100
    prefix = self._config_data.cluster.mac_prefix
101
    all_macs = self._AllMACs()
102
    retries = 64
103
    while retries > 0:
104
      byte1 = random.randrange(0, 256)
105
      byte2 = random.randrange(0, 256)
106
      byte3 = random.randrange(0, 256)
107
      mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
108
      if mac not in all_macs:
109
        break
110
      retries -= 1
111
    else:
112
      raise errors.ConfigurationError("Can't generate unique MAC")
113
    return mac
114

    
115
  def _ComputeAllLVs(self):
116
    """Compute the list of all LVs.
117

118
    """
119
    self._OpenConfig()
120
    self._ReleaseLock()
121
    lvnames = set()
122
    for instance in self._config_data.instances.values():
123
      node_data = instance.MapLVsByNode()
124
      for lv_list in node_data.values():
125
        lvnames.update(lv_list)
126
    return lvnames
127

    
128
  def GenerateUniqueID(self, exceptions=None):
129
    """Generate an unique disk name.
130

131
    This checks the current node, instances and disk names for
132
    duplicates.
133

134
    Args:
135
      - exceptions: a list with some other names which should be checked
136
                    for uniqueness (used for example when you want to get
137
                    more than one id at one time without adding each one in
138
                    turn to the config file
139

140
    Returns: the unique id as a string
141

142
    """
143
    existing = set()
144
    existing.update(self._temporary_ids)
145
    existing.update(self._ComputeAllLVs())
146
    existing.update(self._config_data.instances.keys())
147
    existing.update(self._config_data.nodes.keys())
148
    if exceptions is not None:
149
      existing.update(exceptions)
150
    retries = 64
151
    while retries > 0:
152
      unique_id = _uuidgen()
153
      if unique_id not in existing and unique_id is not None:
154
        break
155
    else:
156
      raise errors.ConfigurationError("Not able generate an unique ID"
157
                                      " (last tried ID: %s" % unique_id)
158
    self._temporary_ids.add(unique_id)
159
    return unique_id
160

    
161
  def _AllMACs(self):
162
    """Return all MACs present in the config.
163

164
    """
165
    self._OpenConfig()
166
    self._ReleaseLock()
167

    
168
    result = []
169
    for instance in self._config_data.instances.values():
170
      for nic in instance.nics:
171
        result.append(nic.mac)
172

    
173
    return result
174

    
175
  def VerifyConfig(self):
176
    """Stub verify function.
177
    """
178
    self._OpenConfig()
179
    self._ReleaseLock()
180

    
181
    result = []
182
    seen_macs = []
183
    data = self._config_data
184
    for instance_name in data.instances:
185
      instance = data.instances[instance_name]
186
      if instance.primary_node not in data.nodes:
187
        result.append("Instance '%s' has invalid primary node '%s'" %
188
                      (instance_name, instance.primary_node))
189
      for snode in instance.secondary_nodes:
190
        if snode not in data.nodes:
191
          result.append("Instance '%s' has invalid secondary node '%s'" %
192
                        (instance_name, snode))
193
      for idx, nic in enumerate(instance.nics):
194
        if nic.mac in seen_macs:
195
          result.append("Instance '%s' has NIC %d mac %s duplicate" %
196
                        (instance_name, idx, nic.mac))
197
        else:
198
          seen_macs.append(nic.mac)
199
    return result
200

    
201
  def SetDiskID(self, disk, node_name):
202
    """Convert the unique ID to the ID needed on the target nodes.
203

204
    This is used only for drbd, which needs ip/port configuration.
205

206
    The routine descends down and updates its children also, because
207
    this helps when the only the top device is passed to the remote
208
    node.
209

210
    """
211
    if disk.children:
212
      for child in disk.children:
213
        self.SetDiskID(child, node_name)
214

    
215
    if disk.logical_id is None and disk.physical_id is not None:
216
      return
217
    if disk.dev_type == "drbd":
218
      pnode, snode, port = disk.logical_id
219
      if node_name not in (pnode, snode):
220
        raise errors.ConfigurationError("DRBD device not knowing node %s" %
221
                                        node_name)
222
      pnode_info = self.GetNodeInfo(pnode)
223
      snode_info = self.GetNodeInfo(snode)
224
      if pnode_info is None or snode_info is None:
225
        raise errors.ConfigurationError("Can't find primary or secondary node"
226
                                        " for %s" % str(disk))
227
      if pnode == node_name:
228
        disk.physical_id = (pnode_info.secondary_ip, port,
229
                            snode_info.secondary_ip, port)
230
      else: # it must be secondary, we tested above
231
        disk.physical_id = (snode_info.secondary_ip, port,
232
                            pnode_info.secondary_ip, port)
233
    else:
234
      disk.physical_id = disk.logical_id
235
    return
236

    
237
  def AddTcpUdpPort(self, port):
238
    """Adds a new port to the available port pool.
239

240
    """
241
    if not isinstance(port, int):
242
      raise errors.ProgrammerError("Invalid type passed for port")
243

    
244
    self._OpenConfig()
245
    self._config_data.cluster.tcpudp_port_pool.add(port)
246
    self._WriteConfig()
247

    
248
  def GetPortList(self):
249
    """Returns a copy of the current port list.
250

251
    """
252
    self._OpenConfig()
253
    self._ReleaseLock()
254
    return self._config_data.cluster.tcpudp_port_pool.copy()
255

    
256
  def AllocatePort(self):
257
    """Allocate a port.
258

259
    The port will be taken from the available port pool or from the
260
    default port range (and in this case we increase
261
    highest_used_port).
262

263
    """
264
    self._OpenConfig()
265

    
266
    # If there are TCP/IP ports configured, we use them first.
267
    if self._config_data.cluster.tcpudp_port_pool:
268
      port = self._config_data.cluster.tcpudp_port_pool.pop()
269
    else:
270
      port = self._config_data.cluster.highest_used_port + 1
271
      if port >= constants.LAST_DRBD_PORT:
272
        raise errors.ConfigurationError("The highest used port is greater"
273
                                        " than %s. Aborting." %
274
                                        constants.LAST_DRBD_PORT)
275
      self._config_data.cluster.highest_used_port = port
276

    
277
    self._WriteConfig()
278
    return port
279

    
280
  def GetHostKey(self):
281
    """Return the rsa hostkey from the config.
282

283
    Args: None
284

285
    Returns: rsa hostkey
286
    """
287
    self._OpenConfig()
288
    self._ReleaseLock()
289
    return self._config_data.cluster.rsahostkeypub
290

    
291
  def AddInstance(self, instance):
292
    """Add an instance to the config.
293

294
    This should be used after creating a new instance.
295

296
    Args:
297
      instance: the instance object
298
    """
299
    if not isinstance(instance, objects.Instance):
300
      raise errors.ProgrammerError("Invalid type passed to AddInstance")
301

    
302
    if instance.disk_template != constants.DT_DISKLESS:
303
      all_lvs = instance.MapLVsByNode()
304
      logger.Info("Instance '%s' DISK_LAYOUT: %s" % (instance.name, all_lvs))
305

    
306
    self._OpenConfig()
307
    self._config_data.instances[instance.name] = instance
308
    self._WriteConfig()
309

    
310
  def MarkInstanceUp(self, instance_name):
311
    """Mark the instance status to up in the config.
312

313
    """
314
    self._OpenConfig()
315

    
316
    if instance_name not in self._config_data.instances:
317
      raise errors.ConfigurationError("Unknown instance '%s'" %
318
                                      instance_name)
319
    instance = self._config_data.instances[instance_name]
320
    instance.status = "up"
321
    self._WriteConfig()
322

    
323
  def RemoveInstance(self, instance_name):
324
    """Remove the instance from the configuration.
325

326
    """
327
    self._OpenConfig()
328

    
329
    if instance_name not in self._config_data.instances:
330
      raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
331
    del self._config_data.instances[instance_name]
332
    self._WriteConfig()
333

    
334
  def RenameInstance(self, old_name, new_name):
335
    """Rename an instance.
336

337
    This needs to be done in ConfigWriter and not by RemoveInstance
338
    combined with AddInstance as only we can guarantee an atomic
339
    rename.
340

341
    """
342
    self._OpenConfig()
343
    if old_name not in self._config_data.instances:
344
      raise errors.ConfigurationError("Unknown instance '%s'" % old_name)
345
    inst = self._config_data.instances[old_name]
346
    del self._config_data.instances[old_name]
347
    inst.name = new_name
348
    self._config_data.instances[inst.name] = inst
349
    self._WriteConfig()
350

    
351
  def MarkInstanceDown(self, instance_name):
352
    """Mark the status of an instance to down in the configuration.
353

354
    """
355
    self._OpenConfig()
356

    
357
    if instance_name not in self._config_data.instances:
358
      raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
359
    instance = self._config_data.instances[instance_name]
360
    instance.status = "down"
361
    self._WriteConfig()
362

    
363
  def GetInstanceList(self):
364
    """Get the list of instances.
365

366
    Returns:
367
      array of instances, ex. ['instance2.example.com','instance1.example.com']
368
      these contains all the instances, also the ones in Admin_down state
369

370
    """
371
    self._OpenConfig()
372
    self._ReleaseLock()
373

    
374
    return self._config_data.instances.keys()
375

    
376
  def ExpandInstanceName(self, short_name):
377
    """Attempt to expand an incomplete instance name.
378

379
    """
380
    self._OpenConfig()
381
    self._ReleaseLock()
382

    
383
    return utils.MatchNameComponent(short_name,
384
                                    self._config_data.instances.keys())
385

    
386
  def GetInstanceInfo(self, instance_name):
387
    """Returns informations about an instance.
388

389
    It takes the information from the configuration file. Other informations of
390
    an instance are taken from the live systems.
391

392
    Args:
393
      instance: name of the instance, ex instance1.example.com
394

395
    Returns:
396
      the instance object
397

398
    """
399
    self._OpenConfig()
400
    self._ReleaseLock()
401

    
402
    if instance_name not in self._config_data.instances:
403
      return None
404

    
405
    return self._config_data.instances[instance_name]
406

    
407
  def AddNode(self, node):
408
    """Add a node to the configuration.
409

410
    Args:
411
      node: an object.Node instance
412

413
    """
414
    self._OpenConfig()
415
    self._config_data.nodes[node.name] = node
416
    self._WriteConfig()
417

    
418
  def RemoveNode(self, node_name):
419
    """Remove a node from the configuration.
420

421
    """
422
    self._OpenConfig()
423
    if node_name not in self._config_data.nodes:
424
      raise errors.ConfigurationError("Unknown node '%s'" % node_name)
425

    
426
    del self._config_data.nodes[node_name]
427
    self._WriteConfig()
428

    
429
  def ExpandNodeName(self, short_name):
430
    """Attempt to expand an incomplete instance name.
431

432
    """
433
    self._OpenConfig()
434
    self._ReleaseLock()
435

    
436
    return utils.MatchNameComponent(short_name,
437
                                    self._config_data.nodes.keys())
438

    
439
  def GetNodeInfo(self, node_name):
440
    """Get the configuration of a node, as stored in the config.
441

442
    Args: node: nodename (tuple) of the node
443

444
    Returns: the node object
445

446
    """
447
    self._OpenConfig()
448
    self._ReleaseLock()
449

    
450
    if node_name not in self._config_data.nodes:
451
      return None
452

    
453
    return self._config_data.nodes[node_name]
454

    
455
  def GetNodeList(self):
456
    """Return the list of nodes which are in the configuration.
457

458
    """
459
    self._OpenConfig()
460
    self._ReleaseLock()
461
    return self._config_data.nodes.keys()
462

    
463
  def DumpConfig(self):
464
    """Return the entire configuration of the cluster.
465
    """
466
    self._OpenConfig()
467
    self._ReleaseLock()
468
    return self._config_data
469

    
470
  def _BumpSerialNo(self):
471
    """Bump up the serial number of the config.
472

473
    """
474
    self._config_data.cluster.serial_no += 1
475

    
476
  def _OpenConfig(self):
477
    """Read the config data from disk.
478

479
    In case we already have configuration data and the config file has
480
    the same mtime as when we read it, we skip the parsing of the
481
    file, since de-serialisation could be slow.
482

483
    """
484
    try:
485
      st = os.stat(self._cfg_file)
486
    except OSError, err:
487
      raise errors.ConfigurationError("Can't stat config file: %s" % err)
488
    if (self._config_data is not None and
489
        self._config_time is not None and
490
        self._config_time == st.st_mtime and
491
        self._config_size == st.st_size and
492
        self._config_inode == st.st_ino):
493
      # data is current, so skip loading of config file
494
      return
495
    f = open(self._cfg_file, 'r')
496
    try:
497
      try:
498
        data = objects.ConfigData.Load(f)
499
      except Exception, err:
500
        raise
501
        raise errors.ConfigurationError(err)
502
    finally:
503
      f.close()
504
    if (not hasattr(data, 'cluster') or
505
        not hasattr(data.cluster, 'config_version')):
506
      raise errors.ConfigurationError("Incomplete configuration"
507
                                      " (missing cluster.config_version)")
508
    if data.cluster.config_version != constants.CONFIG_VERSION:
509
      raise errors.ConfigurationError("Cluster configuration version"
510
                                      " mismatch, got %s instead of %s" %
511
                                      (data.cluster.config_version,
512
                                       constants.CONFIG_VERSION))
513
    self._config_data = data
514
    self._config_time = st.st_mtime
515
    self._config_size = st.st_size
516
    self._config_inode = st.st_ino
517

    
518
  def _ReleaseLock(self):
519
    """xxxx
520
    """
521

    
522
  def _DistributeConfig(self):
523
    """Distribute the configuration to the other nodes.
524

525
    Currently, this only copies the configuration file. In the future,
526
    it could be used to encapsulate the 2/3-phase update mechanism.
527

528
    """
529
    if self._offline:
530
      return True
531
    bad = False
532
    nodelist = self.GetNodeList()
533
    myhostname = self._my_hostname
534

    
535
    tgt_list = []
536
    for node in nodelist:
537
      nodeinfo = self.GetNodeInfo(node)
538
      if nodeinfo.name == myhostname:
539
        continue
540
      tgt_list.append(node)
541

    
542
    result = rpc.call_upload_file(tgt_list, self._cfg_file)
543
    for node in tgt_list:
544
      if not result[node]:
545
        logger.Error("copy of file %s to node %s failed" %
546
                     (self._cfg_file, node))
547
        bad = True
548
    return not bad
549

    
550
  def _WriteConfig(self, destination=None):
551
    """Write the configuration data to persistent storage.
552

553
    """
554
    if destination is None:
555
      destination = self._cfg_file
556
    self._BumpSerialNo()
557
    dir_name, file_name = os.path.split(destination)
558
    fd, name = tempfile.mkstemp('.newconfig', file_name, dir_name)
559
    f = os.fdopen(fd, 'w')
560
    try:
561
      self._config_data.Dump(f)
562
      os.fsync(f.fileno())
563
    finally:
564
      f.close()
565
    # we don't need to do os.close(fd) as f.close() did it
566
    os.rename(name, destination)
567
    # re-set our cache as not to re-read the config file
568
    try:
569
      st = os.stat(destination)
570
    except OSError, err:
571
      raise errors.ConfigurationError("Can't stat config file: %s" % err)
572
    self._config_time = st.st_mtime
573
    self._config_size = st.st_size
574
    self._config_inode = st.st_ino
575
    # and redistribute the config file
576
    self._DistributeConfig()
577

    
578
  def InitConfig(self, node, primary_ip, secondary_ip,
579
                 hostkeypub, mac_prefix, vg_name, def_bridge):
580
    """Create the initial cluster configuration.
581

582
    It will contain the current node, which will also be the master
583
    node, and no instances or operating systmes.
584

585
    Args:
586
      node: the nodename of the initial node
587
      primary_ip: the IP address of the current host
588
      secondary_ip: the secondary IP of the current host or None
589
      hostkeypub: the public hostkey of this host
590

591
    """
592
    hu_port = constants.FIRST_DRBD_PORT - 1
593
    globalconfig = objects.Cluster(config_version=constants.CONFIG_VERSION,
594
                                   serial_no=1,
595
                                   rsahostkeypub=hostkeypub,
596
                                   highest_used_port=hu_port,
597
                                   mac_prefix=mac_prefix,
598
                                   volume_group_name=vg_name,
599
                                   default_bridge=def_bridge,
600
                                   tcpudp_port_pool=set())
601
    if secondary_ip is None:
602
      secondary_ip = primary_ip
603
    nodeconfig = objects.Node(name=node, primary_ip=primary_ip,
604
                              secondary_ip=secondary_ip)
605

    
606
    self._config_data = objects.ConfigData(nodes={node: nodeconfig},
607
                                           instances={},
608
                                           cluster=globalconfig)
609
    self._WriteConfig()
610

    
611
  def GetVGName(self):
612
    """Return the volume group name.
613

614
    """
615
    self._OpenConfig()
616
    self._ReleaseLock()
617
    return self._config_data.cluster.volume_group_name
618

    
619
  def GetDefBridge(self):
620
    """Return the default bridge.
621

622
    """
623
    self._OpenConfig()
624
    self._ReleaseLock()
625
    return self._config_data.cluster.default_bridge
626

    
627
  def GetMACPrefix(self):
628
    """Return the mac prefix.
629

630
    """
631
    self._OpenConfig()
632
    self._ReleaseLock()
633
    return self._config_data.cluster.mac_prefix
634

    
635
  def GetClusterInfo(self):
636
    """Returns informations about the cluster
637

638
    Returns:
639
      the cluster object
640

641
    """
642
    self._OpenConfig()
643
    self._ReleaseLock()
644

    
645
    return self._config_data.cluster
646

    
647
  def Update(self, target):
648
    """Notify function to be called after updates.
649

650
    This function must be called when an object (as returned by
651
    GetInstanceInfo, GetNodeInfo, GetCluster) has been updated and the
652
    caller wants the modifications saved to the backing store. Note
653
    that all modified objects will be saved, but the target argument
654
    is the one the caller wants to ensure that it's saved.
655

656
    """
657
    if self._config_data is None:
658
      raise errors.ProgrammerError("Configuration file not read,"
659
                                   " cannot save.")
660
    if isinstance(target, objects.Cluster):
661
      test = target == self._config_data.cluster
662
    elif isinstance(target, objects.Node):
663
      test = target in self._config_data.nodes.values()
664
    elif isinstance(target, objects.Instance):
665
      test = target in self._config_data.instances.values()
666
    else:
667
      raise errors.ProgrammerError("Invalid object type (%s) passed to"
668
                                   " ConfigWriter.Update" % type(target))
669
    if not test:
670
      raise errors.ConfigurationError("Configuration updated since object"
671
                                      " has been read or unknown object")
672
    self._WriteConfig()