Statistics
| Branch: | Tag: | Revision:

root / lib / config.py @ 4e7f986e

History | View | Annotate | Download (101.5 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Configuration management for Ganeti
23

24
This module provides the interface to the Ganeti cluster configuration.
25

26
The configuration data is stored on every node but is updated on the master
27
only. After each update, the master distributes the data to the other nodes.
28

29
Currently, the data storage format is JSON. YAML was slow and consuming too
30
much memory.
31

32
"""
33

    
34
# pylint: disable=R0904
35
# R0904: Too many public methods
36

    
37
import copy
38
import os
39
import random
40
import logging
41
import time
42
import itertools
43

    
44
from ganeti import errors
45
from ganeti import locking
46
from ganeti import utils
47
from ganeti import constants
48
import ganeti.rpc.node as rpc
49
from ganeti import objects
50
from ganeti import serializer
51
from ganeti import uidpool
52
from ganeti import netutils
53
from ganeti import runtime
54
from ganeti import pathutils
55
from ganeti import network
56

    
57

    
58
_config_lock = locking.SharedLock("ConfigWriter")
59

    
60
# job id used for resource management at config upgrade time
61
_UPGRADE_CONFIG_JID = "jid-cfg-upgrade"
62

    
63

    
64
def _ValidateConfig(data):
65
  """Verifies that a configuration objects looks valid.
66

67
  This only verifies the version of the configuration.
68

69
  @raise errors.ConfigurationError: if the version differs from what
70
      we expect
71

72
  """
73
  if data.version != constants.CONFIG_VERSION:
74
    raise errors.ConfigVersionMismatch(constants.CONFIG_VERSION, data.version)
75

    
76

    
77
class TemporaryReservationManager:
78
  """A temporary resource reservation manager.
79

80
  This is used to reserve resources in a job, before using them, making sure
81
  other jobs cannot get them in the meantime.
82

83
  """
84
  def __init__(self):
85
    self._ec_reserved = {}
86

    
87
  def Reserved(self, resource):
88
    for holder_reserved in self._ec_reserved.values():
89
      if resource in holder_reserved:
90
        return True
91
    return False
92

    
93
  def Reserve(self, ec_id, resource):
94
    if self.Reserved(resource):
95
      raise errors.ReservationError("Duplicate reservation for resource '%s'"
96
                                    % str(resource))
97
    if ec_id not in self._ec_reserved:
98
      self._ec_reserved[ec_id] = set([resource])
99
    else:
100
      self._ec_reserved[ec_id].add(resource)
101

    
102
  def DropECReservations(self, ec_id):
103
    if ec_id in self._ec_reserved:
104
      del self._ec_reserved[ec_id]
105

    
106
  def GetReserved(self):
107
    all_reserved = set()
108
    for holder_reserved in self._ec_reserved.values():
109
      all_reserved.update(holder_reserved)
110
    return all_reserved
111

    
112
  def GetECReserved(self, ec_id):
113
    """ Used when you want to retrieve all reservations for a specific
114
        execution context. E.g when commiting reserved IPs for a specific
115
        network.
116

117
    """
118
    ec_reserved = set()
119
    if ec_id in self._ec_reserved:
120
      ec_reserved.update(self._ec_reserved[ec_id])
121
    return ec_reserved
122

    
123
  def Generate(self, existing, generate_one_fn, ec_id):
124
    """Generate a new resource of this type
125

126
    """
127
    assert callable(generate_one_fn)
128

    
129
    all_elems = self.GetReserved()
130
    all_elems.update(existing)
131
    retries = 64
132
    while retries > 0:
133
      new_resource = generate_one_fn()
134
      if new_resource is not None and new_resource not in all_elems:
135
        break
136
    else:
137
      raise errors.ConfigurationError("Not able generate new resource"
138
                                      " (last tried: %s)" % new_resource)
139
    self.Reserve(ec_id, new_resource)
140
    return new_resource
141

    
142

    
143
def _MatchNameComponentIgnoreCase(short_name, names):
144
  """Wrapper around L{utils.text.MatchNameComponent}.
145

146
  """
147
  return utils.MatchNameComponent(short_name, names, case_sensitive=False)
148

    
149

    
150
def _CheckInstanceDiskIvNames(disks):
151
  """Checks if instance's disks' C{iv_name} attributes are in order.
152

153
  @type disks: list of L{objects.Disk}
154
  @param disks: List of disks
155
  @rtype: list of tuples; (int, string, string)
156
  @return: List of wrongly named disks, each tuple contains disk index,
157
    expected and actual name
158

159
  """
160
  result = []
161

    
162
  for (idx, disk) in enumerate(disks):
163
    exp_iv_name = "disk/%s" % idx
164
    if disk.iv_name != exp_iv_name:
165
      result.append((idx, exp_iv_name, disk.iv_name))
166

    
167
  return result
168

    
169

    
170
class ConfigWriter(object):
171
  """The interface to the cluster configuration.
172

173
  @ivar _temporary_lvs: reservation manager for temporary LVs
174
  @ivar _all_rms: a list of all temporary reservation managers
175

176
  """
177
  def __init__(self, cfg_file=None, offline=False, _getents=runtime.GetEnts,
178
               accept_foreign=False):
179
    self.write_count = 0
180
    self._lock = _config_lock
181
    self._config_data = None
182
    self._offline = offline
183
    if cfg_file is None:
184
      self._cfg_file = pathutils.CLUSTER_CONF_FILE
185
    else:
186
      self._cfg_file = cfg_file
187
    self._getents = _getents
188
    self._temporary_ids = TemporaryReservationManager()
189
    self._temporary_drbds = {}
190
    self._temporary_macs = TemporaryReservationManager()
191
    self._temporary_secrets = TemporaryReservationManager()
192
    self._temporary_lvs = TemporaryReservationManager()
193
    self._temporary_ips = TemporaryReservationManager()
194
    self._all_rms = [self._temporary_ids, self._temporary_macs,
195
                     self._temporary_secrets, self._temporary_lvs,
196
                     self._temporary_ips]
197
    # Note: in order to prevent errors when resolving our name in
198
    # _DistributeConfig, we compute it here once and reuse it; it's
199
    # better to raise an error before starting to modify the config
200
    # file than after it was modified
201
    self._my_hostname = netutils.Hostname.GetSysName()
202
    self._last_cluster_serial = -1
203
    self._cfg_id = None
204
    self._context = None
205
    self._OpenConfig(accept_foreign)
206

    
207
  def _GetRpc(self, address_list):
208
    """Returns RPC runner for configuration.
209

210
    """
211
    return rpc.ConfigRunner(self._context, address_list)
212

    
213
  def SetContext(self, context):
214
    """Sets Ganeti context.
215

216
    """
217
    self._context = context
218

    
219
  # this method needs to be static, so that we can call it on the class
220
  @staticmethod
221
  def IsCluster():
222
    """Check if the cluster is configured.
223

224
    """
225
    return os.path.exists(pathutils.CLUSTER_CONF_FILE)
226

    
227
  @locking.ssynchronized(_config_lock, shared=1)
228
  def GetNdParams(self, node):
229
    """Get the node params populated with cluster defaults.
230

231
    @type node: L{objects.Node}
232
    @param node: The node we want to know the params for
233
    @return: A dict with the filled in node params
234

235
    """
236
    nodegroup = self._UnlockedGetNodeGroup(node.group)
237
    return self._config_data.cluster.FillND(node, nodegroup)
238

    
239
  @locking.ssynchronized(_config_lock, shared=1)
240
  def GetNdGroupParams(self, nodegroup):
241
    """Get the node groups params populated with cluster defaults.
242

243
    @type nodegroup: L{objects.NodeGroup}
244
    @param nodegroup: The node group we want to know the params for
245
    @return: A dict with the filled in node group params
246

247
    """
248
    return self._config_data.cluster.FillNDGroup(nodegroup)
249

    
250
  @locking.ssynchronized(_config_lock, shared=1)
251
  def GetInstanceDiskParams(self, instance):
252
    """Get the disk params populated with inherit chain.
253

254
    @type instance: L{objects.Instance}
255
    @param instance: The instance we want to know the params for
256
    @return: A dict with the filled in disk params
257

258
    """
259
    node = self._UnlockedGetNodeInfo(instance.primary_node)
260
    nodegroup = self._UnlockedGetNodeGroup(node.group)
261
    return self._UnlockedGetGroupDiskParams(nodegroup)
262

    
263
  # pylint: disable=R0201
264
  def _UnlockedGetInstanceNodes(self, instance, disks=None):
265
    """Get all disk-releated nodes for an instance.
266

267
    This function is for internal use, when the config lock is already held.
268

269
    """
270
    all_nodes = [instance.primary_node]
271
    inst_disks = instance.disks
272
    if disks is not None:
273
      inst_disks.extend(disks)
274
    for disk in inst_disks:
275
      all_nodes.extend(disk.all_nodes)
276
    return tuple(set(all_nodes))
277

    
278
  @locking.ssynchronized(_config_lock, shared=1)
279
  def GetInstanceNodes(self, instance, disks=None):
280
    """Get all disk-releated nodes for an instance.
281

282
    For non-DRBD, this will be empty, for DRBD it will contain both
283
    the primary and the secondaries.
284
    If additional disks are given, include their nodes to the result.
285
    This is done because these disks may not be attached to the instance yet.
286

287
    @type instance: L{objects.Instance}
288
    @param instance: The instance we want to get nodes for
289
    @type disks: list of L{objects.Disk}
290
    @param disks: If given, include these disks to the result
291
    @return: A list of names for all the nodes of the instance
292

293
    """
294
    return self._UnlockedGetInstanceNodes(instance, disks=disks)
295

    
296
  def _UnlockedGetInstanceSecondaryNodes(self, instance):
297
    """Get the list of secondary nodes.
298

299
    This function is for internal use, when the config lock is already held.
300

301
    """
302
    all_nodes = set(self._UnlockedGetInstanceNodes(instance))
303
    all_nodes.discard(instance.primary_node)
304
    return tuple(all_nodes)
305

    
306
  @locking.ssynchronized(_config_lock, shared=1)
307
  def GetInstanceSecondaryNodes(self, instance):
308
    """Get the list of secondary nodes.
309

310
    This is a simple wrapper over _UnlockedGetInstanceNodes.
311

312
    """
313
    return self._UnlockedGetInstanceSecondaryNodes(instance)
314

    
315
  # pylint: disable=R0201
316
  def _UnlockedGetInstanceLVsByNode(self, instance, lvmap=None):
317
    """Provide a mapping of node to LVs a given instance owns.
318

319
    This is a simple wrapper over _UnlockedGetInstanceNodes.
320

321
    """
322
    def _MapLVsByNode(lvmap, devs, node_uuid):
323
      """Recursively helper function."""
324
      if not node_uuid in lvmap:
325
        lvmap[node_uuid] = []
326

    
327
      for dev in devs:
328
        if dev.dev_type == constants.DT_PLAIN:
329
          lvmap[node_uuid].append(dev.logical_id[0] + "/" + dev.logical_id[1])
330

    
331
        elif dev.dev_type in constants.DTS_DRBD:
332
          if dev.children:
333
            _MapLVsByNode(lvmap, dev.children, dev.logical_id[0])
334
            _MapLVsByNode(lvmap, dev.children, dev.logical_id[1])
335

    
336
        elif dev.children:
337
          _MapLVsByNode(lvmap, devs.children, node_uuid)
338

    
339
    if lvmap is None:
340
      lvmap = {}
341
      ret = lvmap
342
    else:
343
      ret = None
344

    
345
    node_uuid = instance.primary_node
346
    devs = instance.disks
347
    _MapLVsByNode(lvmap, devs, node_uuid)
348
    return ret
349

    
350
  @locking.ssynchronized(_config_lock, shared=1)
351
  def GetInstanceLVsByNode(self, instance, lvmap=None):
352
    """Provide a mapping of node to LVs a given instance owns.
353

354
    This function figures out what logical volums should belong on
355
    which nodes, recursing through a device tree.
356

357
    @type instance: L{objects.Instance}
358
    @param instance: The instance we want to compute the LVsByNode for
359
    @type lvmap: dict
360
    @param lvmap: optional dictionary to receive the
361
        'node' : ['lv', ...] data.
362
    @return: None if lvmap arg is given, otherwise, a dictionary of
363
        the form { 'node_uuid' : ['volume1', 'volume2', ...], ... };
364
        volumeN is of the form "vg_name/lv_name", compatible with
365
        GetVolumeList()
366

367
    """
368
    return self._UnlockedGetInstanceLVsByNode(instance, lvmap=lvmap)
369

    
370
  @locking.ssynchronized(_config_lock, shared=1)
371
  def GetGroupDiskParams(self, group):
372
    """Get the disk params populated with inherit chain.
373

374
    @type group: L{objects.NodeGroup}
375
    @param group: The group we want to know the params for
376
    @return: A dict with the filled in disk params
377

378
    """
379
    return self._UnlockedGetGroupDiskParams(group)
380

    
381
  def _UnlockedGetGroupDiskParams(self, group):
382
    """Get the disk params populated with inherit chain down to node-group.
383

384
    @type group: L{objects.NodeGroup}
385
    @param group: The group we want to know the params for
386
    @return: A dict with the filled in disk params
387

388
    """
389
    return self._config_data.cluster.SimpleFillDP(group.diskparams)
390

    
391
  def _UnlockedGetNetworkMACPrefix(self, net_uuid):
392
    """Return the network mac prefix if it exists or the cluster level default.
393

394
    """
395
    prefix = None
396
    if net_uuid:
397
      nobj = self._UnlockedGetNetwork(net_uuid)
398
      if nobj.mac_prefix:
399
        prefix = nobj.mac_prefix
400

    
401
    return prefix
402

    
403
  def _GenerateOneMAC(self, prefix=None):
404
    """Return a function that randomly generates a MAC suffic
405
       and appends it to the given prefix. If prefix is not given get
406
       the cluster level default.
407

408
    """
409
    if not prefix:
410
      prefix = self._config_data.cluster.mac_prefix
411

    
412
    def GenMac():
413
      byte1 = random.randrange(0, 256)
414
      byte2 = random.randrange(0, 256)
415
      byte3 = random.randrange(0, 256)
416
      mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
417
      return mac
418

    
419
    return GenMac
420

    
421
  @locking.ssynchronized(_config_lock, shared=1)
422
  def GenerateMAC(self, net_uuid, ec_id):
423
    """Generate a MAC for an instance.
424

425
    This should check the current instances for duplicates.
426

427
    """
428
    existing = self._AllMACs()
429
    prefix = self._UnlockedGetNetworkMACPrefix(net_uuid)
430
    gen_mac = self._GenerateOneMAC(prefix)
431
    return self._temporary_ids.Generate(existing, gen_mac, ec_id)
432

    
433
  @locking.ssynchronized(_config_lock, shared=1)
434
  def ReserveMAC(self, mac, ec_id):
435
    """Reserve a MAC for an instance.
436

437
    This only checks instances managed by this cluster, it does not
438
    check for potential collisions elsewhere.
439

440
    """
441
    all_macs = self._AllMACs()
442
    if mac in all_macs:
443
      raise errors.ReservationError("mac already in use")
444
    else:
445
      self._temporary_macs.Reserve(ec_id, mac)
446

    
447
  def _UnlockedCommitTemporaryIps(self, ec_id):
448
    """Commit all reserved IP address to their respective pools
449

450
    """
451
    for action, address, net_uuid in self._temporary_ips.GetECReserved(ec_id):
452
      self._UnlockedCommitIp(action, net_uuid, address)
453

    
454
  def _UnlockedCommitIp(self, action, net_uuid, address):
455
    """Commit a reserved IP address to an IP pool.
456

457
    The IP address is taken from the network's IP pool and marked as reserved.
458

459
    """
460
    nobj = self._UnlockedGetNetwork(net_uuid)
461
    pool = network.AddressPool(nobj)
462
    if action == constants.RESERVE_ACTION:
463
      pool.Reserve(address)
464
    elif action == constants.RELEASE_ACTION:
465
      pool.Release(address)
466

    
467
  def _UnlockedReleaseIp(self, net_uuid, address, ec_id):
468
    """Give a specific IP address back to an IP pool.
469

470
    The IP address is returned to the IP pool designated by pool_id and marked
471
    as reserved.
472

473
    """
474
    self._temporary_ips.Reserve(ec_id,
475
                                (constants.RELEASE_ACTION, address, net_uuid))
476

    
477
  @locking.ssynchronized(_config_lock, shared=1)
478
  def ReleaseIp(self, net_uuid, address, ec_id):
479
    """Give a specified IP address back to an IP pool.
480

481
    This is just a wrapper around _UnlockedReleaseIp.
482

483
    """
484
    if net_uuid:
485
      self._UnlockedReleaseIp(net_uuid, address, ec_id)
486

    
487
  @locking.ssynchronized(_config_lock, shared=1)
488
  def GenerateIp(self, net_uuid, ec_id):
489
    """Find a free IPv4 address for an instance.
490

491
    """
492
    nobj = self._UnlockedGetNetwork(net_uuid)
493
    pool = network.AddressPool(nobj)
494

    
495
    def gen_one():
496
      try:
497
        ip = pool.GenerateFree()
498
      except errors.AddressPoolError:
499
        raise errors.ReservationError("Cannot generate IP. Network is full")
500
      return (constants.RESERVE_ACTION, ip, net_uuid)
501

    
502
    _, address, _ = self._temporary_ips.Generate([], gen_one, ec_id)
503
    return address
504

    
505
  def _UnlockedReserveIp(self, net_uuid, address, ec_id, check=True):
506
    """Reserve a given IPv4 address for use by an instance.
507

508
    """
509
    nobj = self._UnlockedGetNetwork(net_uuid)
510
    pool = network.AddressPool(nobj)
511
    try:
512
      isreserved = pool.IsReserved(address)
513
      isextreserved = pool.IsReserved(address, external=True)
514
    except errors.AddressPoolError:
515
      raise errors.ReservationError("IP address not in network")
516
    if isreserved:
517
      raise errors.ReservationError("IP address already in use")
518
    if check and isextreserved:
519
      raise errors.ReservationError("IP is externally reserved")
520

    
521
    return self._temporary_ips.Reserve(ec_id,
522
                                       (constants.RESERVE_ACTION,
523
                                        address, net_uuid))
524

    
525
  @locking.ssynchronized(_config_lock, shared=1)
526
  def ReserveIp(self, net_uuid, address, ec_id, check=True):
527
    """Reserve a given IPv4 address for use by an instance.
528

529
    """
530
    if net_uuid:
531
      return self._UnlockedReserveIp(net_uuid, address, ec_id, check)
532

    
533
  @locking.ssynchronized(_config_lock, shared=1)
534
  def ReserveLV(self, lv_name, ec_id):
535
    """Reserve an VG/LV pair for an instance.
536

537
    @type lv_name: string
538
    @param lv_name: the logical volume name to reserve
539

540
    """
541
    all_lvs = self._AllLVs()
542
    if lv_name in all_lvs:
543
      raise errors.ReservationError("LV already in use")
544
    else:
545
      self._temporary_lvs.Reserve(ec_id, lv_name)
546

    
547
  @locking.ssynchronized(_config_lock, shared=1)
548
  def GenerateDRBDSecret(self, ec_id):
549
    """Generate a DRBD secret.
550

551
    This checks the current disks for duplicates.
552

553
    """
554
    return self._temporary_secrets.Generate(self._AllDRBDSecrets(),
555
                                            utils.GenerateSecret,
556
                                            ec_id)
557

    
558
  def _AllLVs(self):
559
    """Compute the list of all LVs.
560

561
    """
562
    lvnames = set()
563
    for instance in self._config_data.instances.values():
564
      node_data = instance.MapLVsByNode()
565
      for lv_list in node_data.values():
566
        lvnames.update(lv_list)
567
    return lvnames
568

    
569
  def _AllDisks(self):
570
    """Compute the list of all Disks (recursively, including children).
571

572
    """
573
    def DiskAndAllChildren(disk):
574
      """Returns a list containing the given disk and all of his children.
575

576
      """
577
      disks = [disk]
578
      if disk.children:
579
        for child_disk in disk.children:
580
          disks.extend(DiskAndAllChildren(child_disk))
581
      return disks
582

    
583
    disks = []
584
    for instance in self._config_data.instances.values():
585
      for disk in instance.disks:
586
        disks.extend(DiskAndAllChildren(disk))
587
    return disks
588

    
589
  def _AllNICs(self):
590
    """Compute the list of all NICs.
591

592
    """
593
    nics = []
594
    for instance in self._config_data.instances.values():
595
      nics.extend(instance.nics)
596
    return nics
597

    
598
  def _AllIDs(self, include_temporary):
599
    """Compute the list of all UUIDs and names we have.
600

601
    @type include_temporary: boolean
602
    @param include_temporary: whether to include the _temporary_ids set
603
    @rtype: set
604
    @return: a set of IDs
605

606
    """
607
    existing = set()
608
    if include_temporary:
609
      existing.update(self._temporary_ids.GetReserved())
610
    existing.update(self._AllLVs())
611
    existing.update(self._config_data.instances.keys())
612
    existing.update(self._config_data.nodes.keys())
613
    existing.update([i.uuid for i in self._AllUUIDObjects() if i.uuid])
614
    return existing
615

    
616
  def _GenerateUniqueID(self, ec_id):
617
    """Generate an unique UUID.
618

619
    This checks the current node, instances and disk names for
620
    duplicates.
621

622
    @rtype: string
623
    @return: the unique id
624

625
    """
626
    existing = self._AllIDs(include_temporary=False)
627
    return self._temporary_ids.Generate(existing, utils.NewUUID, ec_id)
628

    
629
  @locking.ssynchronized(_config_lock, shared=1)
630
  def GenerateUniqueID(self, ec_id):
631
    """Generate an unique ID.
632

633
    This is just a wrapper over the unlocked version.
634

635
    @type ec_id: string
636
    @param ec_id: unique id for the job to reserve the id to
637

638
    """
639
    return self._GenerateUniqueID(ec_id)
640

    
641
  def _AllMACs(self):
642
    """Return all MACs present in the config.
643

644
    @rtype: list
645
    @return: the list of all MACs
646

647
    """
648
    result = []
649
    for instance in self._config_data.instances.values():
650
      for nic in instance.nics:
651
        result.append(nic.mac)
652

    
653
    return result
654

    
655
  def _AllDRBDSecrets(self):
656
    """Return all DRBD secrets present in the config.
657

658
    @rtype: list
659
    @return: the list of all DRBD secrets
660

661
    """
662
    def helper(disk, result):
663
      """Recursively gather secrets from this disk."""
664
      if disk.dev_type == constants.DT_DRBD8:
665
        result.append(disk.logical_id[5])
666
      if disk.children:
667
        for child in disk.children:
668
          helper(child, result)
669

    
670
    result = []
671
    for instance in self._config_data.instances.values():
672
      for disk in instance.disks:
673
        helper(disk, result)
674

    
675
    return result
676

    
677
  def _CheckDiskIDs(self, disk, l_ids):
678
    """Compute duplicate disk IDs
679

680
    @type disk: L{objects.Disk}
681
    @param disk: the disk at which to start searching
682
    @type l_ids: list
683
    @param l_ids: list of current logical ids
684
    @rtype: list
685
    @return: a list of error messages
686

687
    """
688
    result = []
689
    if disk.logical_id is not None:
690
      if disk.logical_id in l_ids:
691
        result.append("duplicate logical id %s" % str(disk.logical_id))
692
      else:
693
        l_ids.append(disk.logical_id)
694

    
695
    if disk.children:
696
      for child in disk.children:
697
        result.extend(self._CheckDiskIDs(child, l_ids))
698
    return result
699

    
700
  def _UnlockedVerifyConfig(self):
701
    """Verify function.
702

703
    @rtype: list
704
    @return: a list of error messages; a non-empty list signifies
705
        configuration errors
706

707
    """
708
    # pylint: disable=R0914
709
    result = []
710
    seen_macs = []
711
    ports = {}
712
    data = self._config_data
713
    cluster = data.cluster
714
    seen_lids = []
715

    
716
    # global cluster checks
717
    if not cluster.enabled_hypervisors:
718
      result.append("enabled hypervisors list doesn't have any entries")
719
    invalid_hvs = set(cluster.enabled_hypervisors) - constants.HYPER_TYPES
720
    if invalid_hvs:
721
      result.append("enabled hypervisors contains invalid entries: %s" %
722
                    utils.CommaJoin(invalid_hvs))
723
    missing_hvp = (set(cluster.enabled_hypervisors) -
724
                   set(cluster.hvparams.keys()))
725
    if missing_hvp:
726
      result.append("hypervisor parameters missing for the enabled"
727
                    " hypervisor(s) %s" % utils.CommaJoin(missing_hvp))
728

    
729
    if not cluster.enabled_disk_templates:
730
      result.append("enabled disk templates list doesn't have any entries")
731
    invalid_disk_templates = set(cluster.enabled_disk_templates) \
732
                               - constants.DISK_TEMPLATES
733
    if invalid_disk_templates:
734
      result.append("enabled disk templates list contains invalid entries:"
735
                    " %s" % utils.CommaJoin(invalid_disk_templates))
736

    
737
    if cluster.master_node not in data.nodes:
738
      result.append("cluster has invalid primary node '%s'" %
739
                    cluster.master_node)
740

    
741
    def _helper(owner, attr, value, template):
742
      try:
743
        utils.ForceDictType(value, template)
744
      except errors.GenericError, err:
745
        result.append("%s has invalid %s: %s" % (owner, attr, err))
746

    
747
    def _helper_nic(owner, params):
748
      try:
749
        objects.NIC.CheckParameterSyntax(params)
750
      except errors.ConfigurationError, err:
751
        result.append("%s has invalid nicparams: %s" % (owner, err))
752

    
753
    def _helper_ipolicy(owner, ipolicy, iscluster):
754
      try:
755
        objects.InstancePolicy.CheckParameterSyntax(ipolicy, iscluster)
756
      except errors.ConfigurationError, err:
757
        result.append("%s has invalid instance policy: %s" % (owner, err))
758
      for key, value in ipolicy.items():
759
        if key == constants.ISPECS_MINMAX:
760
          for k in range(len(value)):
761
            _helper_ispecs(owner, "ipolicy/%s[%s]" % (key, k), value[k])
762
        elif key == constants.ISPECS_STD:
763
          _helper(owner, "ipolicy/" + key, value,
764
                  constants.ISPECS_PARAMETER_TYPES)
765
        else:
766
          # FIXME: assuming list type
767
          if key in constants.IPOLICY_PARAMETERS:
768
            exp_type = float
769
          else:
770
            exp_type = list
771
          if not isinstance(value, exp_type):
772
            result.append("%s has invalid instance policy: for %s,"
773
                          " expecting %s, got %s" %
774
                          (owner, key, exp_type.__name__, type(value)))
775

    
776
    def _helper_ispecs(owner, parentkey, params):
777
      for (key, value) in params.items():
778
        fullkey = "/".join([parentkey, key])
779
        _helper(owner, fullkey, value, constants.ISPECS_PARAMETER_TYPES)
780

    
781
    # check cluster parameters
782
    _helper("cluster", "beparams", cluster.SimpleFillBE({}),
783
            constants.BES_PARAMETER_TYPES)
784
    _helper("cluster", "nicparams", cluster.SimpleFillNIC({}),
785
            constants.NICS_PARAMETER_TYPES)
786
    _helper_nic("cluster", cluster.SimpleFillNIC({}))
787
    _helper("cluster", "ndparams", cluster.SimpleFillND({}),
788
            constants.NDS_PARAMETER_TYPES)
789
    _helper_ipolicy("cluster", cluster.ipolicy, True)
790

    
791
    for disk_template in cluster.diskparams:
792
      if disk_template not in constants.DTS_HAVE_ACCESS:
793
        continue
794

    
795
      access = cluster.diskparams[disk_template].get(constants.LDP_ACCESS,
796
                                                     constants.DISK_KERNELSPACE)
797
      if access not in constants.DISK_VALID_ACCESS_MODES:
798
        result.append(
799
          "Invalid value of '%s:%s': '%s' (expected one of %s)" % (
800
            disk_template, constants.LDP_ACCESS, access,
801
            utils.CommaJoin(constants.DISK_VALID_ACCESS_MODES)
802
          )
803
        )
804

    
805
    # per-instance checks
806
    for instance_uuid in data.instances:
807
      instance = data.instances[instance_uuid]
808
      if instance.uuid != instance_uuid:
809
        result.append("instance '%s' is indexed by wrong UUID '%s'" %
810
                      (instance.name, instance_uuid))
811
      if instance.primary_node not in data.nodes:
812
        result.append("instance '%s' has invalid primary node '%s'" %
813
                      (instance.name, instance.primary_node))
814
      for snode in self._UnlockedGetInstanceSecondaryNodes(instance):
815
        if snode not in data.nodes:
816
          result.append("instance '%s' has invalid secondary node '%s'" %
817
                        (instance.name, snode))
818
      for idx, nic in enumerate(instance.nics):
819
        if nic.mac in seen_macs:
820
          result.append("instance '%s' has NIC %d mac %s duplicate" %
821
                        (instance.name, idx, nic.mac))
822
        else:
823
          seen_macs.append(nic.mac)
824
        if nic.nicparams:
825
          filled = cluster.SimpleFillNIC(nic.nicparams)
826
          owner = "instance %s nic %d" % (instance.name, idx)
827
          _helper(owner, "nicparams",
828
                  filled, constants.NICS_PARAMETER_TYPES)
829
          _helper_nic(owner, filled)
830

    
831
      # disk template checks
832
      if not instance.disk_template in data.cluster.enabled_disk_templates:
833
        result.append("instance '%s' uses the disabled disk template '%s'." %
834
                      (instance.name, instance.disk_template))
835

    
836
      # parameter checks
837
      if instance.beparams:
838
        _helper("instance %s" % instance.name, "beparams",
839
                cluster.FillBE(instance), constants.BES_PARAMETER_TYPES)
840

    
841
      # gather the drbd ports for duplicate checks
842
      for (idx, dsk) in enumerate(instance.disks):
843
        if dsk.dev_type in constants.DTS_DRBD:
844
          tcp_port = dsk.logical_id[2]
845
          if tcp_port not in ports:
846
            ports[tcp_port] = []
847
          ports[tcp_port].append((instance.name, "drbd disk %s" % idx))
848
      # gather network port reservation
849
      net_port = getattr(instance, "network_port", None)
850
      if net_port is not None:
851
        if net_port not in ports:
852
          ports[net_port] = []
853
        ports[net_port].append((instance.name, "network port"))
854

    
855
      # instance disk verify
856
      for idx, disk in enumerate(instance.disks):
857
        result.extend(["instance '%s' disk %d error: %s" %
858
                       (instance.name, idx, msg) for msg in disk.Verify()])
859
        result.extend(self._CheckDiskIDs(disk, seen_lids))
860

    
861
      wrong_names = _CheckInstanceDiskIvNames(instance.disks)
862
      if wrong_names:
863
        tmp = "; ".join(("name of disk %s should be '%s', but is '%s'" %
864
                         (idx, exp_name, actual_name))
865
                        for (idx, exp_name, actual_name) in wrong_names)
866

    
867
        result.append("Instance '%s' has wrongly named disks: %s" %
868
                      (instance.name, tmp))
869

    
870
    # cluster-wide pool of free ports
871
    for free_port in cluster.tcpudp_port_pool:
872
      if free_port not in ports:
873
        ports[free_port] = []
874
      ports[free_port].append(("cluster", "port marked as free"))
875

    
876
    # compute tcp/udp duplicate ports
877
    keys = ports.keys()
878
    keys.sort()
879
    for pnum in keys:
880
      pdata = ports[pnum]
881
      if len(pdata) > 1:
882
        txt = utils.CommaJoin(["%s/%s" % val for val in pdata])
883
        result.append("tcp/udp port %s has duplicates: %s" % (pnum, txt))
884

    
885
    # highest used tcp port check
886
    if keys:
887
      if keys[-1] > cluster.highest_used_port:
888
        result.append("Highest used port mismatch, saved %s, computed %s" %
889
                      (cluster.highest_used_port, keys[-1]))
890

    
891
    if not data.nodes[cluster.master_node].master_candidate:
892
      result.append("Master node is not a master candidate")
893

    
894
    # master candidate checks
895
    mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats()
896
    if mc_now < mc_max:
897
      result.append("Not enough master candidates: actual %d, target %d" %
898
                    (mc_now, mc_max))
899

    
900
    # node checks
901
    for node_uuid, node in data.nodes.items():
902
      if node.uuid != node_uuid:
903
        result.append("Node '%s' is indexed by wrong UUID '%s'" %
904
                      (node.name, node_uuid))
905
      if [node.master_candidate, node.drained, node.offline].count(True) > 1:
906
        result.append("Node %s state is invalid: master_candidate=%s,"
907
                      " drain=%s, offline=%s" %
908
                      (node.name, node.master_candidate, node.drained,
909
                       node.offline))
910
      if node.group not in data.nodegroups:
911
        result.append("Node '%s' has invalid group '%s'" %
912
                      (node.name, node.group))
913
      else:
914
        _helper("node %s" % node.name, "ndparams",
915
                cluster.FillND(node, data.nodegroups[node.group]),
916
                constants.NDS_PARAMETER_TYPES)
917
      used_globals = constants.NDC_GLOBALS.intersection(node.ndparams)
918
      if used_globals:
919
        result.append("Node '%s' has some global parameters set: %s" %
920
                      (node.name, utils.CommaJoin(used_globals)))
921

    
922
    # nodegroups checks
923
    nodegroups_names = set()
924
    for nodegroup_uuid in data.nodegroups:
925
      nodegroup = data.nodegroups[nodegroup_uuid]
926
      if nodegroup.uuid != nodegroup_uuid:
927
        result.append("node group '%s' (uuid: '%s') indexed by wrong uuid '%s'"
928
                      % (nodegroup.name, nodegroup.uuid, nodegroup_uuid))
929
      if utils.UUID_RE.match(nodegroup.name.lower()):
930
        result.append("node group '%s' (uuid: '%s') has uuid-like name" %
931
                      (nodegroup.name, nodegroup.uuid))
932
      if nodegroup.name in nodegroups_names:
933
        result.append("duplicate node group name '%s'" % nodegroup.name)
934
      else:
935
        nodegroups_names.add(nodegroup.name)
936
      group_name = "group %s" % nodegroup.name
937
      _helper_ipolicy(group_name, cluster.SimpleFillIPolicy(nodegroup.ipolicy),
938
                      False)
939
      if nodegroup.ndparams:
940
        _helper(group_name, "ndparams",
941
                cluster.SimpleFillND(nodegroup.ndparams),
942
                constants.NDS_PARAMETER_TYPES)
943

    
944
    # drbd minors check
945
    _, duplicates = self._UnlockedComputeDRBDMap()
946
    for node, minor, instance_a, instance_b in duplicates:
947
      result.append("DRBD minor %d on node %s is assigned twice to instances"
948
                    " %s and %s" % (minor, node, instance_a, instance_b))
949

    
950
    # IP checks
951
    default_nicparams = cluster.nicparams[constants.PP_DEFAULT]
952
    ips = {}
953

    
954
    def _AddIpAddress(ip, name):
955
      ips.setdefault(ip, []).append(name)
956

    
957
    _AddIpAddress(cluster.master_ip, "cluster_ip")
958

    
959
    for node in data.nodes.values():
960
      _AddIpAddress(node.primary_ip, "node:%s/primary" % node.name)
961
      if node.secondary_ip != node.primary_ip:
962
        _AddIpAddress(node.secondary_ip, "node:%s/secondary" % node.name)
963

    
964
    for instance in data.instances.values():
965
      for idx, nic in enumerate(instance.nics):
966
        if nic.ip is None:
967
          continue
968

    
969
        nicparams = objects.FillDict(default_nicparams, nic.nicparams)
970
        nic_mode = nicparams[constants.NIC_MODE]
971
        nic_link = nicparams[constants.NIC_LINK]
972

    
973
        if nic_mode == constants.NIC_MODE_BRIDGED:
974
          link = "bridge:%s" % nic_link
975
        elif nic_mode == constants.NIC_MODE_ROUTED:
976
          link = "route:%s" % nic_link
977
        else:
978
          raise errors.ProgrammerError("NIC mode '%s' not handled" % nic_mode)
979

    
980
        _AddIpAddress("%s/%s/%s" % (link, nic.ip, nic.network),
981
                      "instance:%s/nic:%d" % (instance.name, idx))
982

    
983
    for ip, owners in ips.items():
984
      if len(owners) > 1:
985
        result.append("IP address %s is used by multiple owners: %s" %
986
                      (ip, utils.CommaJoin(owners)))
987

    
988
    return result
989

    
990
  @locking.ssynchronized(_config_lock, shared=1)
991
  def VerifyConfig(self):
992
    """Verify function.
993

994
    This is just a wrapper over L{_UnlockedVerifyConfig}.
995

996
    @rtype: list
997
    @return: a list of error messages; a non-empty list signifies
998
        configuration errors
999

1000
    """
1001
    return self._UnlockedVerifyConfig()
1002

    
1003
  @locking.ssynchronized(_config_lock)
1004
  def AddTcpUdpPort(self, port):
1005
    """Adds a new port to the available port pool.
1006

1007
    @warning: this method does not "flush" the configuration (via
1008
        L{_WriteConfig}); callers should do that themselves once the
1009
        configuration is stable
1010

1011
    """
1012
    if not isinstance(port, int):
1013
      raise errors.ProgrammerError("Invalid type passed for port")
1014

    
1015
    self._config_data.cluster.tcpudp_port_pool.add(port)
1016

    
1017
  @locking.ssynchronized(_config_lock, shared=1)
1018
  def GetPortList(self):
1019
    """Returns a copy of the current port list.
1020

1021
    """
1022
    return self._config_data.cluster.tcpudp_port_pool.copy()
1023

    
1024
  @locking.ssynchronized(_config_lock)
1025
  def AllocatePort(self):
1026
    """Allocate a port.
1027

1028
    The port will be taken from the available port pool or from the
1029
    default port range (and in this case we increase
1030
    highest_used_port).
1031

1032
    """
1033
    # If there are TCP/IP ports configured, we use them first.
1034
    if self._config_data.cluster.tcpudp_port_pool:
1035
      port = self._config_data.cluster.tcpudp_port_pool.pop()
1036
    else:
1037
      port = self._config_data.cluster.highest_used_port + 1
1038
      if port >= constants.LAST_DRBD_PORT:
1039
        raise errors.ConfigurationError("The highest used port is greater"
1040
                                        " than %s. Aborting." %
1041
                                        constants.LAST_DRBD_PORT)
1042
      self._config_data.cluster.highest_used_port = port
1043

    
1044
    self._WriteConfig()
1045
    return port
1046

    
1047
  def _UnlockedComputeDRBDMap(self):
1048
    """Compute the used DRBD minor/nodes.
1049

1050
    @rtype: (dict, list)
1051
    @return: dictionary of node_uuid: dict of minor: instance_uuid;
1052
        the returned dict will have all the nodes in it (even if with
1053
        an empty list), and a list of duplicates; if the duplicates
1054
        list is not empty, the configuration is corrupted and its caller
1055
        should raise an exception
1056

1057
    """
1058
    def _AppendUsedMinors(get_node_name_fn, instance, disk, used):
1059
      duplicates = []
1060
      if disk.dev_type == constants.DT_DRBD8 and len(disk.logical_id) >= 5:
1061
        node_a, node_b, _, minor_a, minor_b = disk.logical_id[:5]
1062
        for node_uuid, minor in ((node_a, minor_a), (node_b, minor_b)):
1063
          assert node_uuid in used, \
1064
            ("Node '%s' of instance '%s' not found in node list" %
1065
             (get_node_name_fn(node_uuid), instance.name))
1066
          if minor in used[node_uuid]:
1067
            duplicates.append((node_uuid, minor, instance.uuid,
1068
                               used[node_uuid][minor]))
1069
          else:
1070
            used[node_uuid][minor] = instance.uuid
1071
      if disk.children:
1072
        for child in disk.children:
1073
          duplicates.extend(_AppendUsedMinors(get_node_name_fn, instance, child,
1074
                                              used))
1075
      return duplicates
1076

    
1077
    duplicates = []
1078
    my_dict = dict((node_uuid, {}) for node_uuid in self._config_data.nodes)
1079
    for instance in self._config_data.instances.itervalues():
1080
      for disk in instance.disks:
1081
        duplicates.extend(_AppendUsedMinors(self._UnlockedGetNodeName,
1082
                                            instance, disk, my_dict))
1083
    for (node_uuid, minor), inst_uuid in self._temporary_drbds.iteritems():
1084
      if minor in my_dict[node_uuid] and my_dict[node_uuid][minor] != inst_uuid:
1085
        duplicates.append((node_uuid, minor, inst_uuid,
1086
                           my_dict[node_uuid][minor]))
1087
      else:
1088
        my_dict[node_uuid][minor] = inst_uuid
1089
    return my_dict, duplicates
1090

    
1091
  @locking.ssynchronized(_config_lock)
1092
  def ComputeDRBDMap(self):
1093
    """Compute the used DRBD minor/nodes.
1094

1095
    This is just a wrapper over L{_UnlockedComputeDRBDMap}.
1096

1097
    @return: dictionary of node_uuid: dict of minor: instance_uuid;
1098
        the returned dict will have all the nodes in it (even if with
1099
        an empty list).
1100

1101
    """
1102
    d_map, duplicates = self._UnlockedComputeDRBDMap()
1103
    if duplicates:
1104
      raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
1105
                                      str(duplicates))
1106
    return d_map
1107

    
1108
  @locking.ssynchronized(_config_lock)
1109
  def AllocateDRBDMinor(self, node_uuids, inst_uuid):
1110
    """Allocate a drbd minor.
1111

1112
    The free minor will be automatically computed from the existing
1113
    devices. A node can be given multiple times in order to allocate
1114
    multiple minors. The result is the list of minors, in the same
1115
    order as the passed nodes.
1116

1117
    @type inst_uuid: string
1118
    @param inst_uuid: the instance for which we allocate minors
1119

1120
    """
1121
    assert isinstance(inst_uuid, basestring), \
1122
           "Invalid argument '%s' passed to AllocateDRBDMinor" % inst_uuid
1123

    
1124
    d_map, duplicates = self._UnlockedComputeDRBDMap()
1125
    if duplicates:
1126
      raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
1127
                                      str(duplicates))
1128
    result = []
1129
    for nuuid in node_uuids:
1130
      ndata = d_map[nuuid]
1131
      if not ndata:
1132
        # no minors used, we can start at 0
1133
        result.append(0)
1134
        ndata[0] = inst_uuid
1135
        self._temporary_drbds[(nuuid, 0)] = inst_uuid
1136
        continue
1137
      keys = ndata.keys()
1138
      keys.sort()
1139
      ffree = utils.FirstFree(keys)
1140
      if ffree is None:
1141
        # return the next minor
1142
        # TODO: implement high-limit check
1143
        minor = keys[-1] + 1
1144
      else:
1145
        minor = ffree
1146
      # double-check minor against current instances
1147
      assert minor not in d_map[nuuid], \
1148
             ("Attempt to reuse allocated DRBD minor %d on node %s,"
1149
              " already allocated to instance %s" %
1150
              (minor, nuuid, d_map[nuuid][minor]))
1151
      ndata[minor] = inst_uuid
1152
      # double-check minor against reservation
1153
      r_key = (nuuid, minor)
1154
      assert r_key not in self._temporary_drbds, \
1155
             ("Attempt to reuse reserved DRBD minor %d on node %s,"
1156
              " reserved for instance %s" %
1157
              (minor, nuuid, self._temporary_drbds[r_key]))
1158
      self._temporary_drbds[r_key] = inst_uuid
1159
      result.append(minor)
1160
    logging.debug("Request to allocate drbd minors, input: %s, returning %s",
1161
                  node_uuids, result)
1162
    return result
1163

    
1164
  def _UnlockedReleaseDRBDMinors(self, inst_uuid):
1165
    """Release temporary drbd minors allocated for a given instance.
1166

1167
    @type inst_uuid: string
1168
    @param inst_uuid: the instance for which temporary minors should be
1169
                      released
1170

1171
    """
1172
    assert isinstance(inst_uuid, basestring), \
1173
           "Invalid argument passed to ReleaseDRBDMinors"
1174
    for key, uuid in self._temporary_drbds.items():
1175
      if uuid == inst_uuid:
1176
        del self._temporary_drbds[key]
1177

    
1178
  @locking.ssynchronized(_config_lock)
1179
  def ReleaseDRBDMinors(self, inst_uuid):
1180
    """Release temporary drbd minors allocated for a given instance.
1181

1182
    This should be called on the error paths, on the success paths
1183
    it's automatically called by the ConfigWriter add and update
1184
    functions.
1185

1186
    This function is just a wrapper over L{_UnlockedReleaseDRBDMinors}.
1187

1188
    @type inst_uuid: string
1189
    @param inst_uuid: the instance for which temporary minors should be
1190
                      released
1191

1192
    """
1193
    self._UnlockedReleaseDRBDMinors(inst_uuid)
1194

    
1195
  @locking.ssynchronized(_config_lock, shared=1)
1196
  def GetConfigVersion(self):
1197
    """Get the configuration version.
1198

1199
    @return: Config version
1200

1201
    """
1202
    return self._config_data.version
1203

    
1204
  @locking.ssynchronized(_config_lock, shared=1)
1205
  def GetClusterName(self):
1206
    """Get cluster name.
1207

1208
    @return: Cluster name
1209

1210
    """
1211
    return self._config_data.cluster.cluster_name
1212

    
1213
  @locking.ssynchronized(_config_lock, shared=1)
1214
  def GetMasterNode(self):
1215
    """Get the UUID of the master node for this cluster.
1216

1217
    @return: Master node UUID
1218

1219
    """
1220
    return self._config_data.cluster.master_node
1221

    
1222
  @locking.ssynchronized(_config_lock, shared=1)
1223
  def GetMasterNodeName(self):
1224
    """Get the hostname of the master node for this cluster.
1225

1226
    @return: Master node hostname
1227

1228
    """
1229
    return self._UnlockedGetNodeName(self._config_data.cluster.master_node)
1230

    
1231
  @locking.ssynchronized(_config_lock, shared=1)
1232
  def GetMasterNodeInfo(self):
1233
    """Get the master node information for this cluster.
1234

1235
    @rtype: objects.Node
1236
    @return: Master node L{objects.Node} object
1237

1238
    """
1239
    return self._UnlockedGetNodeInfo(self._config_data.cluster.master_node)
1240

    
1241
  @locking.ssynchronized(_config_lock, shared=1)
1242
  def GetMasterIP(self):
1243
    """Get the IP of the master node for this cluster.
1244

1245
    @return: Master IP
1246

1247
    """
1248
    return self._config_data.cluster.master_ip
1249

    
1250
  @locking.ssynchronized(_config_lock, shared=1)
1251
  def GetMasterNetdev(self):
1252
    """Get the master network device for this cluster.
1253

1254
    """
1255
    return self._config_data.cluster.master_netdev
1256

    
1257
  @locking.ssynchronized(_config_lock, shared=1)
1258
  def GetMasterNetmask(self):
1259
    """Get the netmask of the master node for this cluster.
1260

1261
    """
1262
    return self._config_data.cluster.master_netmask
1263

    
1264
  @locking.ssynchronized(_config_lock, shared=1)
1265
  def GetUseExternalMipScript(self):
1266
    """Get flag representing whether to use the external master IP setup script.
1267

1268
    """
1269
    return self._config_data.cluster.use_external_mip_script
1270

    
1271
  @locking.ssynchronized(_config_lock, shared=1)
1272
  def GetFileStorageDir(self):
1273
    """Get the file storage dir for this cluster.
1274

1275
    """
1276
    return self._config_data.cluster.file_storage_dir
1277

    
1278
  @locking.ssynchronized(_config_lock, shared=1)
1279
  def GetSharedFileStorageDir(self):
1280
    """Get the shared file storage dir for this cluster.
1281

1282
    """
1283
    return self._config_data.cluster.shared_file_storage_dir
1284

    
1285
  @locking.ssynchronized(_config_lock, shared=1)
1286
  def GetGlusterStorageDir(self):
1287
    """Get the Gluster storage dir for this cluster.
1288

1289
    """
1290
    return self._config_data.cluster.gluster_storage_dir
1291

    
1292
  @locking.ssynchronized(_config_lock, shared=1)
1293
  def GetHypervisorType(self):
1294
    """Get the hypervisor type for this cluster.
1295

1296
    """
1297
    return self._config_data.cluster.enabled_hypervisors[0]
1298

    
1299
  @locking.ssynchronized(_config_lock, shared=1)
1300
  def GetRsaHostKey(self):
1301
    """Return the rsa hostkey from the config.
1302

1303
    @rtype: string
1304
    @return: the rsa hostkey
1305

1306
    """
1307
    return self._config_data.cluster.rsahostkeypub
1308

    
1309
  @locking.ssynchronized(_config_lock, shared=1)
1310
  def GetDsaHostKey(self):
1311
    """Return the dsa hostkey from the config.
1312

1313
    @rtype: string
1314
    @return: the dsa hostkey
1315

1316
    """
1317
    return self._config_data.cluster.dsahostkeypub
1318

    
1319
  @locking.ssynchronized(_config_lock, shared=1)
1320
  def GetDefaultIAllocator(self):
1321
    """Get the default instance allocator for this cluster.
1322

1323
    """
1324
    return self._config_data.cluster.default_iallocator
1325

    
1326
  @locking.ssynchronized(_config_lock, shared=1)
1327
  def GetDefaultIAllocatorParameters(self):
1328
    """Get the default instance allocator parameters for this cluster.
1329

1330
    @rtype: dict
1331
    @return: dict of iallocator parameters
1332

1333
    """
1334
    return self._config_data.cluster.default_iallocator_params
1335

    
1336
  @locking.ssynchronized(_config_lock, shared=1)
1337
  def GetPrimaryIPFamily(self):
1338
    """Get cluster primary ip family.
1339

1340
    @return: primary ip family
1341

1342
    """
1343
    return self._config_data.cluster.primary_ip_family
1344

    
1345
  @locking.ssynchronized(_config_lock, shared=1)
1346
  def GetMasterNetworkParameters(self):
1347
    """Get network parameters of the master node.
1348

1349
    @rtype: L{object.MasterNetworkParameters}
1350
    @return: network parameters of the master node
1351

1352
    """
1353
    cluster = self._config_data.cluster
1354
    result = objects.MasterNetworkParameters(
1355
      uuid=cluster.master_node, ip=cluster.master_ip,
1356
      netmask=cluster.master_netmask, netdev=cluster.master_netdev,
1357
      ip_family=cluster.primary_ip_family)
1358

    
1359
    return result
1360

    
1361
  @locking.ssynchronized(_config_lock, shared=1)
1362
  def GetInstanceCommunicationNetwork(self):
1363
    """Get cluster instance communication network
1364

1365
    @rtype: string
1366
    @return: instance communication network, which is the name of the
1367
             network used for instance communication
1368

1369
    """
1370
    return self._config_data.cluster.instance_communication_network
1371

    
1372
  @locking.ssynchronized(_config_lock)
1373
  def AddNodeGroup(self, group, ec_id, check_uuid=True):
1374
    """Add a node group to the configuration.
1375

1376
    This method calls group.UpgradeConfig() to fill any missing attributes
1377
    according to their default values.
1378

1379
    @type group: L{objects.NodeGroup}
1380
    @param group: the NodeGroup object to add
1381
    @type ec_id: string
1382
    @param ec_id: unique id for the job to use when creating a missing UUID
1383
    @type check_uuid: bool
1384
    @param check_uuid: add an UUID to the group if it doesn't have one or, if
1385
                       it does, ensure that it does not exist in the
1386
                       configuration already
1387

1388
    """
1389
    self._UnlockedAddNodeGroup(group, ec_id, check_uuid)
1390
    self._WriteConfig()
1391

    
1392
  def _UnlockedAddNodeGroup(self, group, ec_id, check_uuid):
1393
    """Add a node group to the configuration.
1394

1395
    """
1396
    logging.info("Adding node group %s to configuration", group.name)
1397

    
1398
    # Some code might need to add a node group with a pre-populated UUID
1399
    # generated with ConfigWriter.GenerateUniqueID(). We allow them to bypass
1400
    # the "does this UUID" exist already check.
1401
    if check_uuid:
1402
      self._EnsureUUID(group, ec_id)
1403

    
1404
    try:
1405
      existing_uuid = self._UnlockedLookupNodeGroup(group.name)
1406
    except errors.OpPrereqError:
1407
      pass
1408
    else:
1409
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
1410
                                 " node group (UUID: %s)" %
1411
                                 (group.name, existing_uuid),
1412
                                 errors.ECODE_EXISTS)
1413

    
1414
    group.serial_no = 1
1415
    group.ctime = group.mtime = time.time()
1416
    group.UpgradeConfig()
1417

    
1418
    self._config_data.nodegroups[group.uuid] = group
1419
    self._config_data.cluster.serial_no += 1
1420

    
1421
  @locking.ssynchronized(_config_lock)
1422
  def RemoveNodeGroup(self, group_uuid):
1423
    """Remove a node group from the configuration.
1424

1425
    @type group_uuid: string
1426
    @param group_uuid: the UUID of the node group to remove
1427

1428
    """
1429
    logging.info("Removing node group %s from configuration", group_uuid)
1430

    
1431
    if group_uuid not in self._config_data.nodegroups:
1432
      raise errors.ConfigurationError("Unknown node group '%s'" % group_uuid)
1433

    
1434
    assert len(self._config_data.nodegroups) != 1, \
1435
            "Group '%s' is the only group, cannot be removed" % group_uuid
1436

    
1437
    del self._config_data.nodegroups[group_uuid]
1438
    self._config_data.cluster.serial_no += 1
1439
    self._WriteConfig()
1440

    
1441
  def _UnlockedLookupNodeGroup(self, target):
1442
    """Lookup a node group's UUID.
1443

1444
    @type target: string or None
1445
    @param target: group name or UUID or None to look for the default
1446
    @rtype: string
1447
    @return: nodegroup UUID
1448
    @raises errors.OpPrereqError: when the target group cannot be found
1449

1450
    """
1451
    if target is None:
1452
      if len(self._config_data.nodegroups) != 1:
1453
        raise errors.OpPrereqError("More than one node group exists. Target"
1454
                                   " group must be specified explicitly.")
1455
      else:
1456
        return self._config_data.nodegroups.keys()[0]
1457
    if target in self._config_data.nodegroups:
1458
      return target
1459
    for nodegroup in self._config_data.nodegroups.values():
1460
      if nodegroup.name == target:
1461
        return nodegroup.uuid
1462
    raise errors.OpPrereqError("Node group '%s' not found" % target,
1463
                               errors.ECODE_NOENT)
1464

    
1465
  @locking.ssynchronized(_config_lock, shared=1)
1466
  def LookupNodeGroup(self, target):
1467
    """Lookup a node group's UUID.
1468

1469
    This function is just a wrapper over L{_UnlockedLookupNodeGroup}.
1470

1471
    @type target: string or None
1472
    @param target: group name or UUID or None to look for the default
1473
    @rtype: string
1474
    @return: nodegroup UUID
1475

1476
    """
1477
    return self._UnlockedLookupNodeGroup(target)
1478

    
1479
  def _UnlockedGetNodeGroup(self, uuid):
1480
    """Lookup a node group.
1481

1482
    @type uuid: string
1483
    @param uuid: group UUID
1484
    @rtype: L{objects.NodeGroup} or None
1485
    @return: nodegroup object, or None if not found
1486

1487
    """
1488
    if uuid not in self._config_data.nodegroups:
1489
      return None
1490

    
1491
    return self._config_data.nodegroups[uuid]
1492

    
1493
  @locking.ssynchronized(_config_lock, shared=1)
1494
  def GetNodeGroup(self, uuid):
1495
    """Lookup a node group.
1496

1497
    @type uuid: string
1498
    @param uuid: group UUID
1499
    @rtype: L{objects.NodeGroup} or None
1500
    @return: nodegroup object, or None if not found
1501

1502
    """
1503
    return self._UnlockedGetNodeGroup(uuid)
1504

    
1505
  def _UnlockedGetAllNodeGroupsInfo(self):
1506
    """Get the configuration of all node groups.
1507

1508
    """
1509
    return dict(self._config_data.nodegroups)
1510

    
1511
  @locking.ssynchronized(_config_lock, shared=1)
1512
  def GetAllNodeGroupsInfo(self):
1513
    """Get the configuration of all node groups.
1514

1515
    """
1516
    return self._UnlockedGetAllNodeGroupsInfo()
1517

    
1518
  @locking.ssynchronized(_config_lock, shared=1)
1519
  def GetAllNodeGroupsInfoDict(self):
1520
    """Get the configuration of all node groups expressed as a dictionary of
1521
    dictionaries.
1522

1523
    """
1524
    return dict(map(lambda (uuid, ng): (uuid, ng.ToDict()),
1525
                    self._UnlockedGetAllNodeGroupsInfo().items()))
1526

    
1527
  @locking.ssynchronized(_config_lock, shared=1)
1528
  def GetNodeGroupList(self):
1529
    """Get a list of node groups.
1530

1531
    """
1532
    return self._config_data.nodegroups.keys()
1533

    
1534
  @locking.ssynchronized(_config_lock, shared=1)
1535
  def GetNodeGroupMembersByNodes(self, nodes):
1536
    """Get nodes which are member in the same nodegroups as the given nodes.
1537

1538
    """
1539
    ngfn = lambda node_uuid: self._UnlockedGetNodeInfo(node_uuid).group
1540
    return frozenset(member_uuid
1541
                     for node_uuid in nodes
1542
                     for member_uuid in
1543
                       self._UnlockedGetNodeGroup(ngfn(node_uuid)).members)
1544

    
1545
  @locking.ssynchronized(_config_lock, shared=1)
1546
  def GetMultiNodeGroupInfo(self, group_uuids):
1547
    """Get the configuration of multiple node groups.
1548

1549
    @param group_uuids: List of node group UUIDs
1550
    @rtype: list
1551
    @return: List of tuples of (group_uuid, group_info)
1552

1553
    """
1554
    return [(uuid, self._UnlockedGetNodeGroup(uuid)) for uuid in group_uuids]
1555

    
1556
  @locking.ssynchronized(_config_lock)
1557
  def AddInstance(self, instance, ec_id):
1558
    """Add an instance to the config.
1559

1560
    This should be used after creating a new instance.
1561

1562
    @type instance: L{objects.Instance}
1563
    @param instance: the instance object
1564

1565
    """
1566
    if not isinstance(instance, objects.Instance):
1567
      raise errors.ProgrammerError("Invalid type passed to AddInstance")
1568

    
1569
    if instance.disk_template != constants.DT_DISKLESS:
1570
      all_lvs = instance.MapLVsByNode()
1571
      logging.info("Instance '%s' DISK_LAYOUT: %s", instance.name, all_lvs)
1572

    
1573
    all_macs = self._AllMACs()
1574
    for nic in instance.nics:
1575
      if nic.mac in all_macs:
1576
        raise errors.ConfigurationError("Cannot add instance %s:"
1577
                                        " MAC address '%s' already in use." %
1578
                                        (instance.name, nic.mac))
1579

    
1580
    self._CheckUniqueUUID(instance, include_temporary=False)
1581

    
1582
    instance.serial_no = 1
1583
    instance.ctime = instance.mtime = time.time()
1584
    self._config_data.instances[instance.uuid] = instance
1585
    self._config_data.cluster.serial_no += 1
1586
    self._UnlockedReleaseDRBDMinors(instance.uuid)
1587
    self._UnlockedCommitTemporaryIps(ec_id)
1588
    self._WriteConfig()
1589

    
1590
  def _EnsureUUID(self, item, ec_id):
1591
    """Ensures a given object has a valid UUID.
1592

1593
    @param item: the instance or node to be checked
1594
    @param ec_id: the execution context id for the uuid reservation
1595

1596
    """
1597
    if not item.uuid:
1598
      item.uuid = self._GenerateUniqueID(ec_id)
1599
    else:
1600
      self._CheckUniqueUUID(item, include_temporary=True)
1601

    
1602
  def _CheckUniqueUUID(self, item, include_temporary):
1603
    """Checks that the UUID of the given object is unique.
1604

1605
    @param item: the instance or node to be checked
1606
    @param include_temporary: whether temporarily generated UUID's should be
1607
              included in the check. If the UUID of the item to be checked is
1608
              a temporarily generated one, this has to be C{False}.
1609

1610
    """
1611
    if not item.uuid:
1612
      raise errors.ConfigurationError("'%s' must have an UUID" % (item.name,))
1613
    if item.uuid in self._AllIDs(include_temporary=include_temporary):
1614
      raise errors.ConfigurationError("Cannot add '%s': UUID %s already"
1615
                                      " in use" % (item.name, item.uuid))
1616

    
1617
  def _SetInstanceStatus(self, inst_uuid, status, disks_active):
1618
    """Set the instance's status to a given value.
1619

1620
    """
1621
    if inst_uuid not in self._config_data.instances:
1622
      raise errors.ConfigurationError("Unknown instance '%s'" %
1623
                                      inst_uuid)
1624
    instance = self._config_data.instances[inst_uuid]
1625

    
1626
    if status is None:
1627
      status = instance.admin_state
1628
    if disks_active is None:
1629
      disks_active = instance.disks_active
1630

    
1631
    assert status in constants.ADMINST_ALL, \
1632
           "Invalid status '%s' passed to SetInstanceStatus" % (status,)
1633

    
1634
    if instance.admin_state != status or \
1635
       instance.disks_active != disks_active:
1636
      instance.admin_state = status
1637
      instance.disks_active = disks_active
1638
      instance.serial_no += 1
1639
      instance.mtime = time.time()
1640
      self._WriteConfig()
1641

    
1642
  @locking.ssynchronized(_config_lock)
1643
  def MarkInstanceUp(self, inst_uuid):
1644
    """Mark the instance status to up in the config.
1645

1646
    This also sets the instance disks active flag.
1647

1648
    """
1649
    self._SetInstanceStatus(inst_uuid, constants.ADMINST_UP, True)
1650

    
1651
  @locking.ssynchronized(_config_lock)
1652
  def MarkInstanceOffline(self, inst_uuid):
1653
    """Mark the instance status to down in the config.
1654

1655
    This also clears the instance disks active flag.
1656

1657
    """
1658
    self._SetInstanceStatus(inst_uuid, constants.ADMINST_OFFLINE, False)
1659

    
1660
  @locking.ssynchronized(_config_lock)
1661
  def RemoveInstance(self, inst_uuid):
1662
    """Remove the instance from the configuration.
1663

1664
    """
1665
    if inst_uuid not in self._config_data.instances:
1666
      raise errors.ConfigurationError("Unknown instance '%s'" % inst_uuid)
1667

    
1668
    # If a network port has been allocated to the instance,
1669
    # return it to the pool of free ports.
1670
    inst = self._config_data.instances[inst_uuid]
1671
    network_port = getattr(inst, "network_port", None)
1672
    if network_port is not None:
1673
      self._config_data.cluster.tcpudp_port_pool.add(network_port)
1674

    
1675
    instance = self._UnlockedGetInstanceInfo(inst_uuid)
1676

    
1677
    for nic in instance.nics:
1678
      if nic.network and nic.ip:
1679
        # Return all IP addresses to the respective address pools
1680
        self._UnlockedCommitIp(constants.RELEASE_ACTION, nic.network, nic.ip)
1681

    
1682
    del self._config_data.instances[inst_uuid]
1683
    self._config_data.cluster.serial_no += 1
1684
    self._WriteConfig()
1685

    
1686
  @locking.ssynchronized(_config_lock)
1687
  def RenameInstance(self, inst_uuid, new_name):
1688
    """Rename an instance.
1689

1690
    This needs to be done in ConfigWriter and not by RemoveInstance
1691
    combined with AddInstance as only we can guarantee an atomic
1692
    rename.
1693

1694
    """
1695
    if inst_uuid not in self._config_data.instances:
1696
      raise errors.ConfigurationError("Unknown instance '%s'" % inst_uuid)
1697

    
1698
    inst = self._config_data.instances[inst_uuid]
1699
    inst.name = new_name
1700

    
1701
    for (_, disk) in enumerate(inst.disks):
1702
      if disk.dev_type in [constants.DT_FILE, constants.DT_SHARED_FILE]:
1703
        # rename the file paths in logical and physical id
1704
        file_storage_dir = os.path.dirname(os.path.dirname(disk.logical_id[1]))
1705
        disk.logical_id = (disk.logical_id[0],
1706
                           utils.PathJoin(file_storage_dir, inst.name,
1707
                                          os.path.basename(disk.logical_id[1])))
1708

    
1709
    # Force update of ssconf files
1710
    self._config_data.cluster.serial_no += 1
1711

    
1712
    self._WriteConfig()
1713

    
1714
  @locking.ssynchronized(_config_lock)
1715
  def MarkInstanceDown(self, inst_uuid):
1716
    """Mark the status of an instance to down in the configuration.
1717

1718
    This does not touch the instance disks active flag, as shut down instances
1719
    can still have active disks.
1720

1721
    """
1722
    self._SetInstanceStatus(inst_uuid, constants.ADMINST_DOWN, None)
1723

    
1724
  @locking.ssynchronized(_config_lock)
1725
  def MarkInstanceDisksActive(self, inst_uuid):
1726
    """Mark the status of instance disks active.
1727

1728
    """
1729
    self._SetInstanceStatus(inst_uuid, None, True)
1730

    
1731
  @locking.ssynchronized(_config_lock)
1732
  def MarkInstanceDisksInactive(self, inst_uuid):
1733
    """Mark the status of instance disks inactive.
1734

1735
    """
1736
    self._SetInstanceStatus(inst_uuid, None, False)
1737

    
1738
  def _UnlockedGetInstanceList(self):
1739
    """Get the list of instances.
1740

1741
    This function is for internal use, when the config lock is already held.
1742

1743
    """
1744
    return self._config_data.instances.keys()
1745

    
1746
  @locking.ssynchronized(_config_lock, shared=1)
1747
  def GetInstanceList(self):
1748
    """Get the list of instances.
1749

1750
    @return: array of instances, ex. ['instance2-uuid', 'instance1-uuid']
1751

1752
    """
1753
    return self._UnlockedGetInstanceList()
1754

    
1755
  def ExpandInstanceName(self, short_name):
1756
    """Attempt to expand an incomplete instance name.
1757

1758
    """
1759
    # Locking is done in L{ConfigWriter.GetAllInstancesInfo}
1760
    all_insts = self.GetAllInstancesInfo().values()
1761
    expanded_name = _MatchNameComponentIgnoreCase(
1762
                      short_name, [inst.name for inst in all_insts])
1763

    
1764
    if expanded_name is not None:
1765
      # there has to be exactly one instance with that name
1766
      inst = (filter(lambda n: n.name == expanded_name, all_insts)[0])
1767
      return (inst.uuid, inst.name)
1768
    else:
1769
      return (None, None)
1770

    
1771
  def _UnlockedGetInstanceInfo(self, inst_uuid):
1772
    """Returns information about an instance.
1773

1774
    This function is for internal use, when the config lock is already held.
1775

1776
    """
1777
    if inst_uuid not in self._config_data.instances:
1778
      return None
1779

    
1780
    return self._config_data.instances[inst_uuid]
1781

    
1782
  @locking.ssynchronized(_config_lock, shared=1)
1783
  def GetInstanceInfo(self, inst_uuid):
1784
    """Returns information about an instance.
1785

1786
    It takes the information from the configuration file. Other information of
1787
    an instance are taken from the live systems.
1788

1789
    @param inst_uuid: UUID of the instance
1790

1791
    @rtype: L{objects.Instance}
1792
    @return: the instance object
1793

1794
    """
1795
    return self._UnlockedGetInstanceInfo(inst_uuid)
1796

    
1797
  @locking.ssynchronized(_config_lock, shared=1)
1798
  def GetInstanceNodeGroups(self, inst_uuid, primary_only=False):
1799
    """Returns set of node group UUIDs for instance's nodes.
1800

1801
    @rtype: frozenset
1802

1803
    """
1804
    instance = self._UnlockedGetInstanceInfo(inst_uuid)
1805
    if not instance:
1806
      raise errors.ConfigurationError("Unknown instance '%s'" % inst_uuid)
1807

    
1808
    if primary_only:
1809
      nodes = [instance.primary_node]
1810
    else:
1811
      nodes = self._UnlockedGetInstanceNodes(instance)
1812

    
1813
    return frozenset(self._UnlockedGetNodeInfo(node_uuid).group
1814
                     for node_uuid in nodes)
1815

    
1816
  @locking.ssynchronized(_config_lock, shared=1)
1817
  def GetInstanceNetworks(self, inst_uuid):
1818
    """Returns set of network UUIDs for instance's nics.
1819

1820
    @rtype: frozenset
1821

1822
    """
1823
    instance = self._UnlockedGetInstanceInfo(inst_uuid)
1824
    if not instance:
1825
      raise errors.ConfigurationError("Unknown instance '%s'" % inst_uuid)
1826

    
1827
    networks = set()
1828
    for nic in instance.nics:
1829
      if nic.network:
1830
        networks.add(nic.network)
1831

    
1832
    return frozenset(networks)
1833

    
1834
  @locking.ssynchronized(_config_lock, shared=1)
1835
  def GetMultiInstanceInfo(self, inst_uuids):
1836
    """Get the configuration of multiple instances.
1837

1838
    @param inst_uuids: list of instance UUIDs
1839
    @rtype: list
1840
    @return: list of tuples (instance UUID, instance_info), where
1841
        instance_info is what would GetInstanceInfo return for the
1842
        node, while keeping the original order
1843

1844
    """
1845
    return [(uuid, self._UnlockedGetInstanceInfo(uuid)) for uuid in inst_uuids]
1846

    
1847
  @locking.ssynchronized(_config_lock, shared=1)
1848
  def GetMultiInstanceInfoByName(self, inst_names):
1849
    """Get the configuration of multiple instances.
1850

1851
    @param inst_names: list of instance names
1852
    @rtype: list
1853
    @return: list of tuples (instance, instance_info), where
1854
        instance_info is what would GetInstanceInfo return for the
1855
        node, while keeping the original order
1856

1857
    """
1858
    result = []
1859
    for name in inst_names:
1860
      instance = self._UnlockedGetInstanceInfoByName(name)
1861
      result.append((instance.uuid, instance))
1862
    return result
1863

    
1864
  @locking.ssynchronized(_config_lock, shared=1)
1865
  def GetAllInstancesInfo(self):
1866
    """Get the configuration of all instances.
1867

1868
    @rtype: dict
1869
    @return: dict of (instance, instance_info), where instance_info is what
1870
              would GetInstanceInfo return for the node
1871

1872
    """
1873
    return self._UnlockedGetAllInstancesInfo()
1874

    
1875
  def _UnlockedGetAllInstancesInfo(self):
1876
    my_dict = dict([(inst_uuid, self._UnlockedGetInstanceInfo(inst_uuid))
1877
                    for inst_uuid in self._UnlockedGetInstanceList()])
1878
    return my_dict
1879

    
1880
  @locking.ssynchronized(_config_lock, shared=1)
1881
  def GetInstancesInfoByFilter(self, filter_fn):
1882
    """Get instance configuration with a filter.
1883

1884
    @type filter_fn: callable
1885
    @param filter_fn: Filter function receiving instance object as parameter,
1886
      returning boolean. Important: this function is called while the
1887
      configuration locks is held. It must not do any complex work or call
1888
      functions potentially leading to a deadlock. Ideally it doesn't call any
1889
      other functions and just compares instance attributes.
1890

1891
    """
1892
    return dict((uuid, inst)
1893
                for (uuid, inst) in self._config_data.instances.items()
1894
                if filter_fn(inst))
1895

    
1896
  @locking.ssynchronized(_config_lock, shared=1)
1897
  def GetInstanceInfoByName(self, inst_name):
1898
    """Get the L{objects.Instance} object for a named instance.
1899

1900
    @param inst_name: name of the instance to get information for
1901
    @type inst_name: string
1902
    @return: the corresponding L{objects.Instance} instance or None if no
1903
          information is available
1904

1905
    """
1906
    return self._UnlockedGetInstanceInfoByName(inst_name)
1907

    
1908
  def _UnlockedGetInstanceInfoByName(self, inst_name):
1909
    for inst in self._UnlockedGetAllInstancesInfo().values():
1910
      if inst.name == inst_name:
1911
        return inst
1912
    return None
1913

    
1914
  def _UnlockedGetInstanceName(self, inst_uuid):
1915
    inst_info = self._UnlockedGetInstanceInfo(inst_uuid)
1916
    if inst_info is None:
1917
      raise errors.OpExecError("Unknown instance: %s" % inst_uuid)
1918
    return inst_info.name
1919

    
1920
  @locking.ssynchronized(_config_lock, shared=1)
1921
  def GetInstanceName(self, inst_uuid):
1922
    """Gets the instance name for the passed instance.
1923

1924
    @param inst_uuid: instance UUID to get name for
1925
    @type inst_uuid: string
1926
    @rtype: string
1927
    @return: instance name
1928

1929
    """
1930
    return self._UnlockedGetInstanceName(inst_uuid)
1931

    
1932
  @locking.ssynchronized(_config_lock, shared=1)
1933
  def GetInstanceNames(self, inst_uuids):
1934
    """Gets the instance names for the passed list of nodes.
1935

1936
    @param inst_uuids: list of instance UUIDs to get names for
1937
    @type inst_uuids: list of strings
1938
    @rtype: list of strings
1939
    @return: list of instance names
1940

1941
    """
1942
    return self._UnlockedGetInstanceNames(inst_uuids)
1943

    
1944
  def _UnlockedGetInstanceNames(self, inst_uuids):
1945
    return [self._UnlockedGetInstanceName(uuid) for uuid in inst_uuids]
1946

    
1947
  @locking.ssynchronized(_config_lock)
1948
  def AddNode(self, node, ec_id):
1949
    """Add a node to the configuration.
1950

1951
    @type node: L{objects.Node}
1952
    @param node: a Node instance
1953

1954
    """
1955
    logging.info("Adding node %s to configuration", node.name)
1956

    
1957
    self._EnsureUUID(node, ec_id)
1958

    
1959
    node.serial_no = 1
1960
    node.ctime = node.mtime = time.time()
1961
    self._UnlockedAddNodeToGroup(node.uuid, node.group)
1962
    self._config_data.nodes[node.uuid] = node
1963
    self._config_data.cluster.serial_no += 1
1964
    self._WriteConfig()
1965

    
1966
  @locking.ssynchronized(_config_lock)
1967
  def RemoveNode(self, node_uuid):
1968
    """Remove a node from the configuration.
1969

1970
    """
1971
    logging.info("Removing node %s from configuration", node_uuid)
1972

    
1973
    if node_uuid not in self._config_data.nodes:
1974
      raise errors.ConfigurationError("Unknown node '%s'" % node_uuid)
1975

    
1976
    self._UnlockedRemoveNodeFromGroup(self._config_data.nodes[node_uuid])
1977
    del self._config_data.nodes[node_uuid]
1978
    self._config_data.cluster.serial_no += 1
1979
    self._WriteConfig()
1980

    
1981
  def ExpandNodeName(self, short_name):
1982
    """Attempt to expand an incomplete node name into a node UUID.
1983

1984
    """
1985
    # Locking is done in L{ConfigWriter.GetAllNodesInfo}
1986
    all_nodes = self.GetAllNodesInfo().values()
1987
    expanded_name = _MatchNameComponentIgnoreCase(
1988
                      short_name, [node.name for node in all_nodes])
1989

    
1990
    if expanded_name is not None:
1991
      # there has to be exactly one node with that name
1992
      node = (filter(lambda n: n.name == expanded_name, all_nodes)[0])
1993
      return (node.uuid, node.name)
1994
    else:
1995
      return (None, None)
1996

    
1997
  def _UnlockedGetNodeInfo(self, node_uuid):
1998
    """Get the configuration of a node, as stored in the config.
1999

2000
    This function is for internal use, when the config lock is already
2001
    held.
2002

2003
    @param node_uuid: the node UUID
2004

2005
    @rtype: L{objects.Node}
2006
    @return: the node object
2007

2008
    """
2009
    if node_uuid not in self._config_data.nodes:
2010
      return None
2011

    
2012
    return self._config_data.nodes[node_uuid]
2013

    
2014
  @locking.ssynchronized(_config_lock, shared=1)
2015
  def GetNodeInfo(self, node_uuid):
2016
    """Get the configuration of a node, as stored in the config.
2017

2018
    This is just a locked wrapper over L{_UnlockedGetNodeInfo}.
2019

2020
    @param node_uuid: the node UUID
2021

2022
    @rtype: L{objects.Node}
2023
    @return: the node object
2024

2025
    """
2026
    return self._UnlockedGetNodeInfo(node_uuid)
2027

    
2028
  @locking.ssynchronized(_config_lock, shared=1)
2029
  def GetNodeInstances(self, node_uuid):
2030
    """Get the instances of a node, as stored in the config.
2031

2032
    @param node_uuid: the node UUID
2033

2034
    @rtype: (list, list)
2035
    @return: a tuple with two lists: the primary and the secondary instances
2036

2037
    """
2038
    pri = []
2039
    sec = []
2040
    for inst in self._config_data.instances.values():
2041
      if inst.primary_node == node_uuid:
2042
        pri.append(inst.uuid)
2043
      if node_uuid in self._UnlockedGetInstanceSecondaryNodes(inst):
2044
        sec.append(inst.uuid)
2045
    return (pri, sec)
2046

    
2047
  @locking.ssynchronized(_config_lock, shared=1)
2048
  def GetNodeGroupInstances(self, uuid, primary_only=False):
2049
    """Get the instances of a node group.
2050

2051
    @param uuid: Node group UUID
2052
    @param primary_only: Whether to only consider primary nodes
2053
    @rtype: frozenset
2054
    @return: List of instance UUIDs in node group
2055

2056
    """
2057
    if primary_only:
2058
      nodes_fn = lambda inst: [inst.primary_node]
2059
    else:
2060
      nodes_fn = self._UnlockedGetInstanceNodes
2061

    
2062
    return frozenset(inst.uuid
2063
                     for inst in self._config_data.instances.values()
2064
                     for node_uuid in nodes_fn(inst)
2065
                     if self._UnlockedGetNodeInfo(node_uuid).group == uuid)
2066

    
2067
  def _UnlockedGetHvparamsString(self, hvname):
2068
    """Return the string representation of the list of hyervisor parameters of
2069
    the given hypervisor.
2070

2071
    @see: C{GetHvparams}
2072

2073
    """
2074
    result = ""
2075
    hvparams = self._config_data.cluster.hvparams[hvname]
2076
    for key in hvparams:
2077
      result += "%s=%s\n" % (key, hvparams[key])
2078
    return result
2079

    
2080
  @locking.ssynchronized(_config_lock, shared=1)
2081
  def GetHvparamsString(self, hvname):
2082
    """Return the hypervisor parameters of the given hypervisor.
2083

2084
    @type hvname: string
2085
    @param hvname: name of a hypervisor
2086
    @rtype: string
2087
    @return: string containing key-value-pairs, one pair on each line;
2088
      format: KEY=VALUE
2089

2090
    """
2091
    return self._UnlockedGetHvparamsString(hvname)
2092

    
2093
  def _UnlockedGetNodeList(self):
2094
    """Return the list of nodes which are in the configuration.
2095

2096
    This function is for internal use, when the config lock is already
2097
    held.
2098

2099
    @rtype: list
2100

2101
    """
2102
    return self._config_data.nodes.keys()
2103

    
2104
  @locking.ssynchronized(_config_lock, shared=1)
2105
  def GetNodeList(self):
2106
    """Return the list of nodes which are in the configuration.
2107

2108
    """
2109
    return self._UnlockedGetNodeList()
2110

    
2111
  def _UnlockedGetOnlineNodeList(self):
2112
    """Return the list of nodes which are online.
2113

2114
    """
2115
    all_nodes = [self._UnlockedGetNodeInfo(node)
2116
                 for node in self._UnlockedGetNodeList()]
2117
    return [node.uuid for node in all_nodes if not node.offline]
2118

    
2119
  @locking.ssynchronized(_config_lock, shared=1)
2120
  def GetOnlineNodeList(self):
2121
    """Return the list of nodes which are online.
2122

2123
    """
2124
    return self._UnlockedGetOnlineNodeList()
2125

    
2126
  @locking.ssynchronized(_config_lock, shared=1)
2127
  def GetVmCapableNodeList(self):
2128
    """Return the list of nodes which are not vm capable.
2129

2130
    """
2131
    all_nodes = [self._UnlockedGetNodeInfo(node)
2132
                 for node in self._UnlockedGetNodeList()]
2133
    return [node.uuid for node in all_nodes if node.vm_capable]
2134

    
2135
  @locking.ssynchronized(_config_lock, shared=1)
2136
  def GetNonVmCapableNodeList(self):
2137
    """Return the list of nodes which are not vm capable.
2138

2139
    """
2140
    all_nodes = [self._UnlockedGetNodeInfo(node)
2141
                 for node in self._UnlockedGetNodeList()]
2142
    return [node.uuid for node in all_nodes if not node.vm_capable]
2143

    
2144
  @locking.ssynchronized(_config_lock, shared=1)
2145
  def GetMultiNodeInfo(self, node_uuids):
2146
    """Get the configuration of multiple nodes.
2147

2148
    @param node_uuids: list of node UUIDs
2149
    @rtype: list
2150
    @return: list of tuples of (node, node_info), where node_info is
2151
        what would GetNodeInfo return for the node, in the original
2152
        order
2153

2154
    """
2155
    return [(uuid, self._UnlockedGetNodeInfo(uuid)) for uuid in node_uuids]
2156

    
2157
  def _UnlockedGetAllNodesInfo(self):
2158
    """Gets configuration of all nodes.
2159

2160
    @note: See L{GetAllNodesInfo}
2161

2162
    """
2163
    return dict([(node_uuid, self._UnlockedGetNodeInfo(node_uuid))
2164
                 for node_uuid in self._UnlockedGetNodeList()])
2165

    
2166
  @locking.ssynchronized(_config_lock, shared=1)
2167
  def GetAllNodesInfo(self):
2168
    """Get the configuration of all nodes.
2169

2170
    @rtype: dict
2171
    @return: dict of (node, node_info), where node_info is what
2172
              would GetNodeInfo return for the node
2173

2174
    """
2175
    return self._UnlockedGetAllNodesInfo()
2176

    
2177
  def _UnlockedGetNodeInfoByName(self, node_name):
2178
    for node in self._UnlockedGetAllNodesInfo().values():
2179
      if node.name == node_name:
2180
        return node
2181
    return None
2182

    
2183
  @locking.ssynchronized(_config_lock, shared=1)
2184
  def GetNodeInfoByName(self, node_name):
2185
    """Get the L{objects.Node} object for a named node.
2186

2187
    @param node_name: name of the node to get information for
2188
    @type node_name: string
2189
    @return: the corresponding L{objects.Node} instance or None if no
2190
          information is available
2191

2192
    """
2193
    return self._UnlockedGetNodeInfoByName(node_name)
2194

    
2195
  @locking.ssynchronized(_config_lock, shared=1)
2196
  def GetNodeGroupInfoByName(self, nodegroup_name):
2197
    """Get the L{objects.NodeGroup} object for a named node group.
2198

2199
    @param nodegroup_name: name of the node group to get information for
2200
    @type nodegroup_name: string
2201
    @return: the corresponding L{objects.NodeGroup} instance or None if no
2202
          information is available
2203

2204
    """
2205
    for nodegroup in self._UnlockedGetAllNodeGroupsInfo().values():
2206
      if nodegroup.name == nodegroup_name:
2207
        return nodegroup
2208
    return None
2209

    
2210
  def _UnlockedGetNodeName(self, node_spec):
2211
    if isinstance(node_spec, objects.Node):
2212
      return node_spec.name
2213
    elif isinstance(node_spec, basestring):
2214
      node_info = self._UnlockedGetNodeInfo(node_spec)
2215
      if node_info is None:
2216
        raise errors.OpExecError("Unknown node: %s" % node_spec)
2217
      return node_info.name
2218
    else:
2219
      raise errors.ProgrammerError("Can't handle node spec '%s'" % node_spec)
2220

    
2221
  @locking.ssynchronized(_config_lock, shared=1)
2222
  def GetNodeName(self, node_spec):
2223
    """Gets the node name for the passed node.
2224

2225
    @param node_spec: node to get names for
2226
    @type node_spec: either node UUID or a L{objects.Node} object
2227
    @rtype: string
2228
    @return: node name
2229

2230
    """
2231
    return self._UnlockedGetNodeName(node_spec)
2232

    
2233
  def _UnlockedGetNodeNames(self, node_specs):
2234
    return [self._UnlockedGetNodeName(node_spec) for node_spec in node_specs]
2235

    
2236
  @locking.ssynchronized(_config_lock, shared=1)
2237
  def GetNodeNames(self, node_specs):
2238
    """Gets the node names for the passed list of nodes.
2239

2240
    @param node_specs: list of nodes to get names for
2241
    @type node_specs: list of either node UUIDs or L{objects.Node} objects
2242
    @rtype: list of strings
2243
    @return: list of node names
2244

2245
    """
2246
    return self._UnlockedGetNodeNames(node_specs)
2247

    
2248
  @locking.ssynchronized(_config_lock, shared=1)
2249
  def GetNodeGroupsFromNodes(self, node_uuids):
2250
    """Returns groups for a list of nodes.
2251

2252
    @type node_uuids: list of string
2253
    @param node_uuids: List of node UUIDs
2254
    @rtype: frozenset
2255

2256
    """
2257
    return frozenset(self._UnlockedGetNodeInfo(uuid).group
2258
                     for uuid in node_uuids)
2259

    
2260
  def _UnlockedGetMasterCandidateStats(self, exceptions=None):
2261
    """Get the number of current and maximum desired and possible candidates.
2262

2263
    @type exceptions: list
2264
    @param exceptions: if passed, list of nodes that should be ignored
2265
    @rtype: tuple
2266
    @return: tuple of (current, desired and possible, possible)
2267

2268
    """
2269
    mc_now = mc_should = mc_max = 0
2270
    for node in self._config_data.nodes.values():
2271
      if exceptions and node.uuid in exceptions:
2272
        continue
2273
      if not (node.offline or node.drained) and node.master_capable:
2274
        mc_max += 1
2275
      if node.master_candidate:
2276
        mc_now += 1
2277
    mc_should = min(mc_max, self._config_data.cluster.candidate_pool_size)
2278
    return (mc_now, mc_should, mc_max)
2279

    
2280
  @locking.ssynchronized(_config_lock, shared=1)
2281
  def GetMasterCandidateStats(self, exceptions=None):
2282
    """Get the number of current and maximum possible candidates.
2283

2284
    This is just a wrapper over L{_UnlockedGetMasterCandidateStats}.
2285

2286
    @type exceptions: list
2287
    @param exceptions: if passed, list of nodes that should be ignored
2288
    @rtype: tuple
2289
    @return: tuple of (current, max)
2290

2291
    """
2292
    return self._UnlockedGetMasterCandidateStats(exceptions)
2293

    
2294
  @locking.ssynchronized(_config_lock)
2295
  def MaintainCandidatePool(self, exception_node_uuids):
2296
    """Try to grow the candidate pool to the desired size.
2297

2298
    @type exception_node_uuids: list
2299
    @param exception_node_uuids: if passed, list of nodes that should be ignored
2300
    @rtype: list
2301
    @return: list with the adjusted nodes (L{objects.Node} instances)
2302

2303
    """
2304
    mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats(
2305
                          exception_node_uuids)
2306
    mod_list = []
2307
    if mc_now < mc_max:
2308
      node_list = self._config_data.nodes.keys()
2309
      random.shuffle(node_list)
2310
      for uuid in node_list:
2311
        if mc_now >= mc_max:
2312
          break
2313
        node = self._config_data.nodes[uuid]
2314
        if (node.master_candidate or node.offline or node.drained or
2315
            node.uuid in exception_node_uuids or not node.master_capable):
2316
          continue
2317
        mod_list.append(node)
2318
        node.master_candidate = True
2319
        node.serial_no += 1
2320
        mc_now += 1
2321
      if mc_now != mc_max:
2322
        # this should not happen
2323
        logging.warning("Warning: MaintainCandidatePool didn't manage to"
2324
                        " fill the candidate pool (%d/%d)", mc_now, mc_max)
2325
      if mod_list:
2326
        self._config_data.cluster.serial_no += 1
2327
        self._WriteConfig()
2328

    
2329
    return mod_list
2330

    
2331
  def _UnlockedAddNodeToGroup(self, node_uuid, nodegroup_uuid):
2332
    """Add a given node to the specified group.
2333

2334
    """
2335
    if nodegroup_uuid not in self._config_data.nodegroups:
2336
      # This can happen if a node group gets deleted between its lookup and
2337
      # when we're adding the first node to it, since we don't keep a lock in
2338
      # the meantime. It's ok though, as we'll fail cleanly if the node group
2339
      # is not found anymore.
2340
      raise errors.OpExecError("Unknown node group: %s" % nodegroup_uuid)
2341
    if node_uuid not in self._config_data.nodegroups[nodegroup_uuid].members:
2342
      self._config_data.nodegroups[nodegroup_uuid].members.append(node_uuid)
2343

    
2344
  def _UnlockedRemoveNodeFromGroup(self, node):
2345
    """Remove a given node from its group.
2346

2347
    """
2348
    nodegroup = node.group
2349
    if nodegroup not in self._config_data.nodegroups:
2350
      logging.warning("Warning: node '%s' has unknown node group '%s'"
2351
                      " (while being removed from it)", node.uuid, nodegroup)
2352
    nodegroup_obj = self._config_data.nodegroups[nodegroup]
2353
    if node.uuid not in nodegroup_obj.members:
2354
      logging.warning("Warning: node '%s' not a member of its node group '%s'"
2355
                      " (while being removed from it)", node.uuid, nodegroup)
2356
    else:
2357
      nodegroup_obj.members.remove(node.uuid)
2358

    
2359
  @locking.ssynchronized(_config_lock)
2360
  def AssignGroupNodes(self, mods):
2361
    """Changes the group of a number of nodes.
2362

2363
    @type mods: list of tuples; (node name, new group UUID)
2364
    @param mods: Node membership modifications
2365

2366
    """
2367
    groups = self._config_data.nodegroups
2368
    nodes = self._config_data.nodes
2369

    
2370
    resmod = []
2371

    
2372
    # Try to resolve UUIDs first
2373
    for (node_uuid, new_group_uuid) in mods:
2374
      try:
2375
        node = nodes[node_uuid]
2376
      except KeyError:
2377
        raise errors.ConfigurationError("Unable to find node '%s'" % node_uuid)
2378

    
2379
      if node.group == new_group_uuid:
2380
        # Node is being assigned to its current group
2381
        logging.debug("Node '%s' was assigned to its current group (%s)",
2382
                      node_uuid, node.group)
2383
        continue
2384

    
2385
      # Try to find current group of node
2386
      try:
2387
        old_group = groups[node.group]
2388
      except KeyError:
2389
        raise errors.ConfigurationError("Unable to find old group '%s'" %
2390
                                        node.group)
2391

    
2392
      # Try to find new group for node
2393
      try:
2394
        new_group = groups[new_group_uuid]
2395
      except KeyError:
2396
        raise errors.ConfigurationError("Unable to find new group '%s'" %
2397
                                        new_group_uuid)
2398

    
2399
      assert node.uuid in old_group.members, \
2400
        ("Inconsistent configuration: node '%s' not listed in members for its"
2401
         " old group '%s'" % (node.uuid, old_group.uuid))
2402
      assert node.uuid not in new_group.members, \
2403
        ("Inconsistent configuration: node '%s' already listed in members for"
2404
         " its new group '%s'" % (node.uuid, new_group.uuid))
2405

    
2406
      resmod.append((node, old_group, new_group))
2407

    
2408
    # Apply changes
2409
    for (node, old_group, new_group) in resmod:
2410
      assert node.uuid != new_group.uuid and old_group.uuid != new_group.uuid, \
2411
        "Assigning to current group is not possible"
2412

    
2413
      node.group = new_group.uuid
2414

    
2415
      # Update members of involved groups
2416
      if node.uuid in old_group.members:
2417
        old_group.members.remove(node.uuid)
2418
      if node.uuid not in new_group.members:
2419
        new_group.members.append(node.uuid)
2420

    
2421
    # Update timestamps and serials (only once per node/group object)
2422
    now = time.time()
2423
    for obj in frozenset(itertools.chain(*resmod)): # pylint: disable=W0142
2424
      obj.serial_no += 1
2425
      obj.mtime = now
2426

    
2427
    # Force ssconf update
2428
    self._config_data.cluster.serial_no += 1
2429

    
2430
    self._WriteConfig()
2431

    
2432
  def _BumpSerialNo(self):
2433
    """Bump up the serial number of the config.
2434

2435
    """
2436
    self._config_data.serial_no += 1
2437
    self._config_data.mtime = time.time()
2438

    
2439
  def _AllUUIDObjects(self):
2440
    """Returns all objects with uuid attributes.
2441

2442
    """
2443
    return (self._config_data.instances.values() +
2444
            self._config_data.nodes.values() +
2445
            self._config_data.nodegroups.values() +
2446
            self._config_data.networks.values() +
2447
            self._AllDisks() +
2448
            self._AllNICs() +
2449
            [self._config_data.cluster])
2450

    
2451
  def _OpenConfig(self, accept_foreign):
2452
    """Read the config data from disk.
2453

2454
    """
2455
    raw_data = utils.ReadFile(self._cfg_file)
2456

    
2457
    try:
2458
      data = objects.ConfigData.FromDict(serializer.Load(raw_data))
2459
    except Exception, err:
2460
      raise errors.ConfigurationError(err)
2461

    
2462
    # Make sure the configuration has the right version
2463
    _ValidateConfig(data)
2464

    
2465
    if (not hasattr(data, "cluster") or
2466
        not hasattr(data.cluster, "rsahostkeypub")):
2467
      raise errors.ConfigurationError("Incomplete configuration"
2468
                                      " (missing cluster.rsahostkeypub)")
2469

    
2470
    if not data.cluster.master_node in data.nodes:
2471
      msg = ("The configuration denotes node %s as master, but does not"
2472
             " contain information about this node" %
2473
             data.cluster.master_node)
2474
      raise errors.ConfigurationError(msg)
2475

    
2476
    master_info = data.nodes[data.cluster.master_node]
2477
    if master_info.name != self._my_hostname and not accept_foreign:
2478
      msg = ("The configuration denotes node %s as master, while my"
2479
             " hostname is %s; opening a foreign configuration is only"
2480
             " possible in accept_foreign mode" %
2481
             (master_info.name, self._my_hostname))
2482
      raise errors.ConfigurationError(msg)
2483

    
2484
    self._config_data = data
2485
    # reset the last serial as -1 so that the next write will cause
2486
    # ssconf update
2487
    self._last_cluster_serial = -1
2488

    
2489
    # Upgrade configuration if needed
2490
    self._UpgradeConfig()
2491

    
2492
    self._cfg_id = utils.GetFileID(path=self._cfg_file)
2493

    
2494
  def _UpgradeConfig(self):
2495
    """Run any upgrade steps.
2496

2497
    This method performs both in-object upgrades and also update some data
2498
    elements that need uniqueness across the whole configuration or interact
2499
    with other objects.
2500

2501
    @warning: this function will call L{_WriteConfig()}, but also
2502
        L{DropECReservations} so it needs to be called only from a
2503
        "safe" place (the constructor). If one wanted to call it with
2504
        the lock held, a DropECReservationUnlocked would need to be
2505
        created first, to avoid causing deadlock.
2506

2507
    """
2508
    # Keep a copy of the persistent part of _config_data to check for changes
2509
    # Serialization doesn't guarantee order in dictionaries
2510
    oldconf = copy.deepcopy(self._config_data.ToDict())
2511

    
2512
    # In-object upgrades
2513
    self._config_data.UpgradeConfig()
2514

    
2515
    for item in self._AllUUIDObjects():
2516
      if item.uuid is None:
2517
        item.uuid = self._GenerateUniqueID(_UPGRADE_CONFIG_JID)
2518
    if not self._config_data.nodegroups:
2519
      default_nodegroup_name = constants.INITIAL_NODE_GROUP_NAME
2520
      default_nodegroup = objects.NodeGroup(name=default_nodegroup_name,
2521
                                            members=[])
2522
      self._UnlockedAddNodeGroup(default_nodegroup, _UPGRADE_CONFIG_JID, True)
2523
    for node in self._config_data.nodes.values():
2524
      if not node.group:
2525
        node.group = self.LookupNodeGroup(None)
2526
      # This is technically *not* an upgrade, but needs to be done both when
2527
      # nodegroups are being added, and upon normally loading the config,
2528
      # because the members list of a node group is discarded upon
2529
      # serializing/deserializing the object.
2530
      self._UnlockedAddNodeToGroup(node.uuid, node.group)
2531

    
2532
    modified = (oldconf != self._config_data.ToDict())
2533
    if modified:
2534
      self._WriteConfig()
2535
      # This is ok even if it acquires the internal lock, as _UpgradeConfig is
2536
      # only called at config init time, without the lock held
2537
      self.DropECReservations(_UPGRADE_CONFIG_JID)
2538
    else:
2539
      config_errors = self._UnlockedVerifyConfig()
2540
      if config_errors:
2541
        errmsg = ("Loaded configuration data is not consistent: %s" %
2542
                  (utils.CommaJoin(config_errors)))
2543
        logging.critical(errmsg)
2544

    
2545
  def _DistributeConfig(self, feedback_fn):
2546
    """Distribute the configuration to the other nodes.
2547

2548
    Currently, this only copies the configuration file. In the future,
2549
    it could be used to encapsulate the 2/3-phase update mechanism.
2550

2551
    """
2552
    if self._offline:
2553
      return True
2554

    
2555
    bad = False
2556

    
2557
    node_list = []
2558
    addr_list = []
2559
    myhostname = self._my_hostname
2560
    # we can skip checking whether _UnlockedGetNodeInfo returns None
2561
    # since the node list comes from _UnlocketGetNodeList, and we are
2562
    # called with the lock held, so no modifications should take place
2563
    # in between
2564
    for node_uuid in self._UnlockedGetNodeList():
2565
      node_info = self._UnlockedGetNodeInfo(node_uuid)
2566
      if node_info.name == myhostname or not node_info.master_candidate:
2567
        continue
2568
      node_list.append(node_info.name)
2569
      addr_list.append(node_info.primary_ip)
2570

    
2571
    # TODO: Use dedicated resolver talking to config writer for name resolution
2572
    result = \
2573
      self._GetRpc(addr_list).call_upload_file(node_list, self._cfg_file)
2574
    for to_node, to_result in result.items():
2575
      msg = to_result.fail_msg
2576
      if msg:
2577
        msg = ("Copy of file %s to node %s failed: %s" %
2578
               (self._cfg_file, to_node, msg))
2579
        logging.error(msg)
2580

    
2581
        if feedback_fn:
2582
          feedback_fn(msg)
2583

    
2584
        bad = True
2585

    
2586
    return not bad
2587

    
2588
  def _WriteConfig(self, destination=None, feedback_fn=None):
2589
    """Write the configuration data to persistent storage.
2590

2591
    """
2592
    assert feedback_fn is None or callable(feedback_fn)
2593

    
2594
    # Warn on config errors, but don't abort the save - the
2595
    # configuration has already been modified, and we can't revert;
2596
    # the best we can do is to warn the user and save as is, leaving
2597
    # recovery to the user
2598
    config_errors = self._UnlockedVerifyConfig()
2599
    if config_errors:
2600
      errmsg = ("Configuration data is not consistent: %s" %
2601
                (utils.CommaJoin(config_errors)))
2602
      logging.critical(errmsg)
2603
      if feedback_fn:
2604
        feedback_fn(errmsg)
2605

    
2606
    if destination is None:
2607
      destination = self._cfg_file
2608
    self._BumpSerialNo()
2609
    txt = serializer.DumpJson(
2610
      self._config_data.ToDict(_with_private=True),
2611
      private_encoder=serializer.EncodeWithPrivateFields
2612
    )
2613

    
2614
    getents = self._getents()
2615
    try:
2616
      fd = utils.SafeWriteFile(destination, self._cfg_id, data=txt,
2617
                               close=False, gid=getents.confd_gid, mode=0640)
2618
    except errors.LockError:
2619
      raise errors.ConfigurationError("The configuration file has been"
2620
                                      " modified since the last write, cannot"
2621
                                      " update")
2622
    try:
2623
      self._cfg_id = utils.GetFileID(fd=fd)
2624
    finally:
2625
      os.close(fd)
2626

    
2627
    self.write_count += 1
2628

    
2629
    # and redistribute the config file to master candidates
2630
    self._DistributeConfig(feedback_fn)
2631

    
2632
    # Write ssconf files on all nodes (including locally)
2633
    if self._last_cluster_serial < self._config_data.cluster.serial_no:
2634
      if not self._offline:
2635
        result = self._GetRpc(None).call_write_ssconf_files(
2636
          self._UnlockedGetNodeNames(self._UnlockedGetOnlineNodeList()),
2637
          self._UnlockedGetSsconfValues())
2638

    
2639
        for nname, nresu in result.items():
2640
          msg = nresu.fail_msg
2641
          if msg:
2642
            errmsg = ("Error while uploading ssconf files to"
2643
                      " node %s: %s" % (nname, msg))
2644
            logging.warning(errmsg)
2645

    
2646
            if feedback_fn:
2647
              feedback_fn(errmsg)
2648

    
2649
      self._last_cluster_serial = self._config_data.cluster.serial_no
2650

    
2651
  def _GetAllHvparamsStrings(self, hypervisors):
2652
    """Get the hvparams of all given hypervisors from the config.
2653

2654
    @type hypervisors: list of string
2655
    @param hypervisors: list of hypervisor names
2656
    @rtype: dict of strings
2657
    @returns: dictionary mapping the hypervisor name to a string representation
2658
      of the hypervisor's hvparams
2659

2660
    """
2661
    hvparams = {}
2662
    for hv in hypervisors:
2663
      hvparams[hv] = self._UnlockedGetHvparamsString(hv)
2664
    return hvparams
2665

    
2666
  @staticmethod
2667
  def _ExtendByAllHvparamsStrings(ssconf_values, all_hvparams):
2668
    """Extends the ssconf_values dictionary by hvparams.
2669

2670
    @type ssconf_values: dict of strings
2671
    @param ssconf_values: dictionary mapping ssconf_keys to strings
2672
      representing the content of ssconf files
2673
    @type all_hvparams: dict of strings
2674
    @param all_hvparams: dictionary mapping hypervisor names to a string
2675
      representation of their hvparams
2676
    @rtype: same as ssconf_values
2677
    @returns: the ssconf_values dictionary extended by hvparams
2678

2679
    """
2680
    for hv in all_hvparams:
2681
      ssconf_key = constants.SS_HVPARAMS_PREF + hv
2682
      ssconf_values[ssconf_key] = all_hvparams[hv]
2683
    return ssconf_values
2684

    
2685
  def _UnlockedGetSsconfValues(self):
2686
    """Return the values needed by ssconf.
2687

2688
    @rtype: dict
2689
    @return: a dictionary with keys the ssconf names and values their
2690
        associated value
2691

2692
    """
2693
    fn = "\n".join
2694
    instance_names = utils.NiceSort(
2695
                       [inst.name for inst in
2696
                        self._UnlockedGetAllInstancesInfo().values()])
2697
    node_infos = self._UnlockedGetAllNodesInfo().values()
2698
    node_names = [node.name for node in node_infos]
2699
    node_pri_ips = ["%s %s" % (ninfo.name, ninfo.primary_ip)
2700
                    for ninfo in node_infos]
2701
    node_snd_ips = ["%s %s" % (ninfo.name, ninfo.secondary_ip)
2702
                    for ninfo in node_infos]
2703

    
2704
    instance_data = fn(instance_names)
2705
    off_data = fn(node.name for node in node_infos if node.offline)
2706
    on_data = fn(node.name for node in node_infos if not node.offline)
2707
    mc_data = fn(node.name for node in node_infos if node.master_candidate)
2708
    mc_ips_data = fn(node.primary_ip for node in node_infos
2709
                     if node.master_candidate)
2710
    node_data = fn(node_names)
2711
    node_pri_ips_data = fn(node_pri_ips)
2712
    node_snd_ips_data = fn(node_snd_ips)
2713

    
2714
    cluster = self._config_data.cluster
2715
    cluster_tags = fn(cluster.GetTags())
2716

    
2717
    master_candidates_certs = fn("%s=%s" % (mc_uuid, mc_cert)
2718
                                 for mc_uuid, mc_cert
2719
                                 in cluster.candidate_certs.items())
2720

    
2721
    hypervisor_list = fn(cluster.enabled_hypervisors)
2722
    all_hvparams = self._GetAllHvparamsStrings(constants.HYPER_TYPES)
2723

    
2724
    uid_pool = uidpool.FormatUidPool(cluster.uid_pool, separator="\n")
2725

    
2726
    nodegroups = ["%s %s" % (nodegroup.uuid, nodegroup.name) for nodegroup in
2727
                  self._config_data.nodegroups.values()]
2728
    nodegroups_data = fn(utils.NiceSort(nodegroups))
2729
    networks = ["%s %s" % (net.uuid, net.name) for net in
2730
                self._config_data.networks.values()]
2731
    networks_data = fn(utils.NiceSort(networks))
2732

    
2733
    ssconf_values = {
2734
      constants.SS_CLUSTER_NAME: cluster.cluster_name,
2735
      constants.SS_CLUSTER_TAGS: cluster_tags,
2736
      constants.SS_FILE_STORAGE_DIR: cluster.file_storage_dir,
2737
      constants.SS_SHARED_FILE_STORAGE_DIR: cluster.shared_file_storage_dir,
2738
      constants.SS_GLUSTER_STORAGE_DIR: cluster.gluster_storage_dir,
2739
      constants.SS_MASTER_CANDIDATES: mc_data,
2740
      constants.SS_MASTER_CANDIDATES_IPS: mc_ips_data,
2741
      constants.SS_MASTER_CANDIDATES_CERTS: master_candidates_certs,
2742
      constants.SS_MASTER_IP: cluster.master_ip,
2743
      constants.SS_MASTER_NETDEV: cluster.master_netdev,
2744
      constants.SS_MASTER_NETMASK: str(cluster.master_netmask),
2745
      constants.SS_MASTER_NODE: self._UnlockedGetNodeName(cluster.master_node),
2746
      constants.SS_NODE_LIST: node_data,
2747
      constants.SS_NODE_PRIMARY_IPS: node_pri_ips_data,
2748
      constants.SS_NODE_SECONDARY_IPS: node_snd_ips_data,
2749
      constants.SS_OFFLINE_NODES: off_data,
2750
      constants.SS_ONLINE_NODES: on_data,
2751
      constants.SS_PRIMARY_IP_FAMILY: str(cluster.primary_ip_family),
2752
      constants.SS_INSTANCE_LIST: instance_data,
2753
      constants.SS_RELEASE_VERSION: constants.RELEASE_VERSION,
2754
      constants.SS_HYPERVISOR_LIST: hypervisor_list,
2755
      constants.SS_MAINTAIN_NODE_HEALTH: str(cluster.maintain_node_health),
2756
      constants.SS_UID_POOL: uid_pool,
2757
      constants.SS_NODEGROUPS: nodegroups_data,
2758
      constants.SS_NETWORKS: networks_data,
2759
      }
2760
    ssconf_values = self._ExtendByAllHvparamsStrings(ssconf_values,
2761
                                                     all_hvparams)
2762
    bad_values = [(k, v) for k, v in ssconf_values.items()
2763
                  if not isinstance(v, (str, basestring))]
2764
    if bad_values:
2765
      err = utils.CommaJoin("%s=%s" % (k, v) for k, v in bad_values)
2766
      raise errors.ConfigurationError("Some ssconf key(s) have non-string"
2767
                                      " values: %s" % err)
2768
    return ssconf_values
2769

    
2770
  @locking.ssynchronized(_config_lock, shared=1)
2771
  def GetSsconfValues(self):
2772
    """Wrapper using lock around _UnlockedGetSsconf().
2773

2774
    """
2775
    return self._UnlockedGetSsconfValues()
2776

    
2777
  @locking.ssynchronized(_config_lock, shared=1)
2778
  def GetVGName(self):
2779
    """Return the volume group name.
2780

2781
    """
2782
    return self._config_data.cluster.volume_group_name
2783

    
2784
  @locking.ssynchronized(_config_lock)
2785
  def SetVGName(self, vg_name):
2786
    """Set the volume group name.
2787

2788
    """
2789
    self._config_data.cluster.volume_group_name = vg_name
2790
    self._config_data.cluster.serial_no += 1
2791
    self._WriteConfig()
2792

    
2793
  @locking.ssynchronized(_config_lock, shared=1)
2794
  def GetDRBDHelper(self):
2795
    """Return DRBD usermode helper.
2796

2797
    """
2798
    return self._config_data.cluster.drbd_usermode_helper
2799

    
2800
  @locking.ssynchronized(_config_lock)
2801
  def SetDRBDHelper(self, drbd_helper):
2802
    """Set DRBD usermode helper.
2803

2804
    """
2805
    self._config_data.cluster.drbd_usermode_helper = drbd_helper
2806
    self._config_data.cluster.serial_no += 1
2807
    self._WriteConfig()
2808

    
2809
  @locking.ssynchronized(_config_lock, shared=1)
2810
  def GetMACPrefix(self):
2811
    """Return the mac prefix.
2812

2813
    """
2814
    return self._config_data.cluster.mac_prefix
2815

    
2816
  @locking.ssynchronized(_config_lock, shared=1)
2817
  def GetClusterInfo(self):
2818
    """Returns information about the cluster
2819

2820
    @rtype: L{objects.Cluster}
2821
    @return: the cluster object
2822

2823
    """
2824
    return self._config_data.cluster
2825

    
2826
  @locking.ssynchronized(_config_lock, shared=1)
2827
  def HasAnyDiskOfType(self, dev_type):
2828
    """Check if in there is at disk of the given type in the configuration.
2829

2830
    """
2831
    return self._config_data.HasAnyDiskOfType(dev_type)
2832

    
2833
  @locking.ssynchronized(_config_lock)
2834
  def Update(self, target, feedback_fn, ec_id=None):
2835
    """Notify function to be called after updates.
2836

2837
    This function must be called when an object (as returned by
2838
    GetInstanceInfo, GetNodeInfo, GetCluster) has been updated and the
2839
    caller wants the modifications saved to the backing store. Note
2840
    that all modified objects will be saved, but the target argument
2841
    is the one the caller wants to ensure that it's saved.
2842

2843
    @param target: an instance of either L{objects.Cluster},
2844
        L{objects.Node} or L{objects.Instance} which is existing in
2845
        the cluster
2846
    @param feedback_fn: Callable feedback function
2847

2848
    """
2849
    if self._config_data is None:
2850
      raise errors.ProgrammerError("Configuration file not read,"
2851
                                   " cannot save.")
2852
    update_serial = False
2853
    if isinstance(target, objects.Cluster):
2854
      test = target == self._config_data.cluster
2855
    elif isinstance(target, objects.Node):
2856
      test = target in self._config_data.nodes.values()
2857
      update_serial = True
2858
    elif isinstance(target, objects.Instance):
2859
      test = target in self._config_data.instances.values()
2860
    elif isinstance(target, objects.NodeGroup):
2861
      test = target in self._config_data.nodegroups.values()
2862
    elif isinstance(target, objects.Network):
2863
      test = target in self._config_data.networks.values()
2864
    else:
2865
      raise errors.ProgrammerError("Invalid object type (%s) passed to"
2866
                                   " ConfigWriter.Update" % type(target))
2867
    if not test:
2868
      raise errors.ConfigurationError("Configuration updated since object"
2869
                                      " has been read or unknown object")
2870
    target.serial_no += 1
2871
    target.mtime = now = time.time()
2872

    
2873
    if update_serial:
2874
      # for node updates, we need to increase the cluster serial too
2875
      self._config_data.cluster.serial_no += 1
2876
      self._config_data.cluster.mtime = now
2877

    
2878
    if isinstance(target, objects.Instance):
2879
      self._UnlockedReleaseDRBDMinors(target.uuid)
2880

    
2881
    if ec_id is not None:
2882
      # Commit all ips reserved by OpInstanceSetParams and OpGroupSetParams
2883
      self._UnlockedCommitTemporaryIps(ec_id)
2884

    
2885
    self._WriteConfig(feedback_fn=feedback_fn)
2886

    
2887
  @locking.ssynchronized(_config_lock)
2888
  def DropECReservations(self, ec_id):
2889
    """Drop per-execution-context reservations
2890

2891
    """
2892
    for rm in self._all_rms:
2893
      rm.DropECReservations(ec_id)
2894

    
2895
  @locking.ssynchronized(_config_lock, shared=1)
2896
  def GetAllNetworksInfo(self):
2897
    """Get configuration info of all the networks.
2898

2899
    """
2900
    return dict(self._config_data.networks)
2901

    
2902
  def _UnlockedGetNetworkList(self):
2903
    """Get the list of networks.
2904

2905
    This function is for internal use, when the config lock is already held.
2906

2907
    """
2908
    return self._config_data.networks.keys()
2909

    
2910
  @locking.ssynchronized(_config_lock, shared=1)
2911
  def GetNetworkList(self):
2912
    """Get the list of networks.
2913

2914
    @return: array of networks, ex. ["main", "vlan100", "200]
2915

2916
    """
2917
    return self._UnlockedGetNetworkList()
2918

    
2919
  @locking.ssynchronized(_config_lock, shared=1)
2920
  def GetNetworkNames(self):
2921
    """Get a list of network names
2922

2923
    """
2924
    names = [net.name
2925
             for net in self._config_data.networks.values()]
2926
    return names
2927

    
2928
  def _UnlockedGetNetwork(self, uuid):
2929
    """Returns information about a network.
2930

2931
    This function is for internal use, when the config lock is already held.
2932

2933
    """
2934
    if uuid not in self._config_data.networks:
2935
      return None
2936

    
2937
    return self._config_data.networks[uuid]
2938

    
2939
  @locking.ssynchronized(_config_lock, shared=1)
2940
  def GetNetwork(self, uuid):
2941
    """Returns information about a network.
2942

2943
    It takes the information from the configuration file.
2944

2945
    @param uuid: UUID of the network
2946

2947
    @rtype: L{objects.Network}
2948
    @return: the network object
2949

2950
    """
2951
    return self._UnlockedGetNetwork(uuid)
2952

    
2953
  @locking.ssynchronized(_config_lock)
2954
  def AddNetwork(self, net, ec_id, check_uuid=True):
2955
    """Add a network to the configuration.
2956

2957
    @type net: L{objects.Network}
2958
    @param net: the Network object to add
2959
    @type ec_id: string
2960
    @param ec_id: unique id for the job to use when creating a missing UUID
2961

2962
    """
2963
    self._UnlockedAddNetwork(net, ec_id, check_uuid)
2964
    self._WriteConfig()
2965

    
2966
  def _UnlockedAddNetwork(self, net, ec_id, check_uuid):
2967
    """Add a network to the configuration.
2968

2969
    """
2970
    logging.info("Adding network %s to configuration", net.name)
2971

    
2972
    if check_uuid:
2973
      self._EnsureUUID(net, ec_id)
2974

    
2975
    net.serial_no = 1
2976
    net.ctime = net.mtime = time.time()
2977
    self._config_data.networks[net.uuid] = net
2978
    self._config_data.cluster.serial_no += 1
2979

    
2980
  def _UnlockedLookupNetwork(self, target):
2981
    """Lookup a network's UUID.
2982

2983
    @type target: string
2984
    @param target: network name or UUID
2985
    @rtype: string
2986
    @return: network UUID
2987
    @raises errors.OpPrereqError: when the target network cannot be found
2988

2989
    """
2990
    if target is None:
2991
      return None
2992
    if target in self._config_data.networks:
2993
      return target
2994
    for net in self._config_data.networks.values():
2995
      if net.name == target:
2996
        return net.uuid
2997
    raise errors.OpPrereqError("Network '%s' not found" % target,
2998
                               errors.ECODE_NOENT)
2999

    
3000
  @locking.ssynchronized(_config_lock, shared=1)
3001
  def LookupNetwork(self, target):
3002
    """Lookup a network's UUID.
3003

3004
    This function is just a wrapper over L{_UnlockedLookupNetwork}.
3005

3006
    @type target: string
3007
    @param target: network name or UUID
3008
    @rtype: string
3009
    @return: network UUID
3010

3011
    """
3012
    return self._UnlockedLookupNetwork(target)
3013

    
3014
  @locking.ssynchronized(_config_lock)
3015
  def RemoveNetwork(self, network_uuid):
3016
    """Remove a network from the configuration.
3017

3018
    @type network_uuid: string
3019
    @param network_uuid: the UUID of the network to remove
3020

3021
    """
3022
    logging.info("Removing network %s from configuration", network_uuid)
3023

    
3024
    if network_uuid not in self._config_data.networks:
3025
      raise errors.ConfigurationError("Unknown network '%s'" % network_uuid)
3026

    
3027
    del self._config_data.networks[network_uuid]
3028
    self._config_data.cluster.serial_no += 1
3029
    self._WriteConfig()
3030

    
3031
  def _UnlockedGetGroupNetParams(self, net_uuid, node_uuid):
3032
    """Get the netparams (mode, link) of a network.
3033

3034
    Get a network's netparams for a given node.
3035

3036
    @type net_uuid: string
3037
    @param net_uuid: network uuid
3038
    @type node_uuid: string
3039
    @param node_uuid: node UUID
3040
    @rtype: dict or None
3041
    @return: netparams
3042

3043
    """
3044
    node_info = self._UnlockedGetNodeInfo(node_uuid)
3045
    nodegroup_info = self._UnlockedGetNodeGroup(node_info.group)
3046
    netparams = nodegroup_info.networks.get(net_uuid, None)
3047

    
3048
    return netparams
3049

    
3050
  @locking.ssynchronized(_config_lock, shared=1)
3051
  def GetGroupNetParams(self, net_uuid, node_uuid):
3052
    """Locking wrapper of _UnlockedGetGroupNetParams()
3053

3054
    """
3055
    return self._UnlockedGetGroupNetParams(net_uuid, node_uuid)
3056

    
3057
  @locking.ssynchronized(_config_lock, shared=1)
3058
  def CheckIPInNodeGroup(self, ip, node_uuid):
3059
    """Check IP uniqueness in nodegroup.
3060

3061
    Check networks that are connected in the node's node group
3062
    if ip is contained in any of them. Used when creating/adding
3063
    a NIC to ensure uniqueness among nodegroups.
3064

3065
    @type ip: string
3066
    @param ip: ip address
3067
    @type node_uuid: string
3068
    @param node_uuid: node UUID
3069
    @rtype: (string, dict) or (None, None)
3070
    @return: (network name, netparams)
3071

3072
    """
3073
    if ip is None:
3074
      return (None, None)
3075
    node_info = self._UnlockedGetNodeInfo(node_uuid)
3076
    nodegroup_info = self._UnlockedGetNodeGroup(node_info.group)
3077
    for net_uuid in nodegroup_info.networks.keys():
3078
      net_info = self._UnlockedGetNetwork(net_uuid)
3079
      pool = network.AddressPool(net_info)
3080
      if pool.Contains(ip):
3081
        return (net_info.name, nodegroup_info.networks[net_uuid])
3082

    
3083
    return (None, None)