Statistics
| Branch: | Tag: | Revision:

root / lib / config.py @ 25cf4130

History | View | Annotate | Download (99.1 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Configuration management for Ganeti
23

24
This module provides the interface to the Ganeti cluster configuration.
25

26
The configuration data is stored on every node but is updated on the master
27
only. After each update, the master distributes the data to the other nodes.
28

29
Currently, the data storage format is JSON. YAML was slow and consuming too
30
much memory.
31

32
"""
33

    
34
# pylint: disable=R0904
35
# R0904: Too many public methods
36

    
37
import copy
38
import os
39
import random
40
import logging
41
import time
42
import itertools
43

    
44
from ganeti import errors
45
from ganeti import locking
46
from ganeti import utils
47
from ganeti import constants
48
import ganeti.rpc.node as rpc
49
import ganeti.wconfd as wc
50
from ganeti import objects
51
from ganeti import serializer
52
from ganeti import uidpool
53
from ganeti import netutils
54
from ganeti import runtime
55
from ganeti import pathutils
56
from ganeti import network
57

    
58

    
59
_config_lock = locking.SharedLock("ConfigWriter")
60

    
61
# job id used for resource management at config upgrade time
62
_UPGRADE_CONFIG_JID = "jid-cfg-upgrade"
63

    
64

    
65
def _ValidateConfig(data):
66
  """Verifies that a configuration objects looks valid.
67

68
  This only verifies the version of the configuration.
69

70
  @raise errors.ConfigurationError: if the version differs from what
71
      we expect
72

73
  """
74
  if data.version != constants.CONFIG_VERSION:
75
    raise errors.ConfigVersionMismatch(constants.CONFIG_VERSION, data.version)
76

    
77

    
78
class TemporaryReservationManager:
79
  """A temporary resource reservation manager.
80

81
  This is used to reserve resources in a job, before using them, making sure
82
  other jobs cannot get them in the meantime.
83

84
  """
85
  def __init__(self):
86
    self._ec_reserved = {}
87

    
88
  def Reserved(self, resource):
89
    for holder_reserved in self._ec_reserved.values():
90
      if resource in holder_reserved:
91
        return True
92
    return False
93

    
94
  def Reserve(self, ec_id, resource):
95
    if self.Reserved(resource):
96
      raise errors.ReservationError("Duplicate reservation for resource '%s'"
97
                                    % str(resource))
98
    if ec_id not in self._ec_reserved:
99
      self._ec_reserved[ec_id] = set([resource])
100
    else:
101
      self._ec_reserved[ec_id].add(resource)
102

    
103
  def DropECReservations(self, ec_id):
104
    if ec_id in self._ec_reserved:
105
      del self._ec_reserved[ec_id]
106

    
107
  def GetReserved(self):
108
    all_reserved = set()
109
    for holder_reserved in self._ec_reserved.values():
110
      all_reserved.update(holder_reserved)
111
    return all_reserved
112

    
113
  def GetECReserved(self, ec_id):
114
    """ Used when you want to retrieve all reservations for a specific
115
        execution context. E.g when commiting reserved IPs for a specific
116
        network.
117

118
    """
119
    ec_reserved = set()
120
    if ec_id in self._ec_reserved:
121
      ec_reserved.update(self._ec_reserved[ec_id])
122
    return ec_reserved
123

    
124
  def Generate(self, existing, generate_one_fn, ec_id):
125
    """Generate a new resource of this type
126

127
    """
128
    assert callable(generate_one_fn)
129

    
130
    all_elems = self.GetReserved()
131
    all_elems.update(existing)
132
    retries = 64
133
    while retries > 0:
134
      new_resource = generate_one_fn()
135
      if new_resource is not None and new_resource not in all_elems:
136
        break
137
    else:
138
      raise errors.ConfigurationError("Not able generate new resource"
139
                                      " (last tried: %s)" % new_resource)
140
    self.Reserve(ec_id, new_resource)
141
    return new_resource
142

    
143

    
144
def _MatchNameComponentIgnoreCase(short_name, names):
145
  """Wrapper around L{utils.text.MatchNameComponent}.
146

147
  """
148
  return utils.MatchNameComponent(short_name, names, case_sensitive=False)
149

    
150

    
151
def _CheckInstanceDiskIvNames(disks):
152
  """Checks if instance's disks' C{iv_name} attributes are in order.
153

154
  @type disks: list of L{objects.Disk}
155
  @param disks: List of disks
156
  @rtype: list of tuples; (int, string, string)
157
  @return: List of wrongly named disks, each tuple contains disk index,
158
    expected and actual name
159

160
  """
161
  result = []
162

    
163
  for (idx, disk) in enumerate(disks):
164
    exp_iv_name = "disk/%s" % idx
165
    if disk.iv_name != exp_iv_name:
166
      result.append((idx, exp_iv_name, disk.iv_name))
167

    
168
  return result
169

    
170

    
171
class ConfigWriter(object):
172
  """The interface to the cluster configuration.
173

174
  @ivar _temporary_lvs: reservation manager for temporary LVs
175
  @ivar _all_rms: a list of all temporary reservation managers
176

177
  """
178
  def __init__(self, cfg_file=None, offline=False, _getents=runtime.GetEnts,
179
               accept_foreign=False):
180
    self.write_count = 0
181
    self._lock = _config_lock
182
    self._config_data = None
183
    self._offline = offline
184
    if cfg_file is None:
185
      self._cfg_file = pathutils.CLUSTER_CONF_FILE
186
    else:
187
      self._cfg_file = cfg_file
188
    self._getents = _getents
189
    self._temporary_ids = TemporaryReservationManager()
190
    self._temporary_drbds = {}
191
    self._temporary_macs = TemporaryReservationManager()
192
    self._temporary_secrets = TemporaryReservationManager()
193
    self._temporary_lvs = TemporaryReservationManager()
194
    self._temporary_ips = TemporaryReservationManager()
195
    self._all_rms = [self._temporary_ids, self._temporary_macs,
196
                     self._temporary_secrets, self._temporary_lvs,
197
                     self._temporary_ips]
198
    # Note: in order to prevent errors when resolving our name in
199
    # _DistributeConfig, we compute it here once and reuse it; it's
200
    # better to raise an error before starting to modify the config
201
    # file than after it was modified
202
    self._my_hostname = netutils.Hostname.GetSysName()
203
    self._last_cluster_serial = -1
204
    self._cfg_id = None
205
    self._context = None
206
    self._wconfd = None
207
    self._OpenConfig(accept_foreign)
208

    
209
  def _GetRpc(self, address_list):
210
    """Returns RPC runner for configuration.
211

212
    """
213
    return rpc.ConfigRunner(self._context, address_list)
214

    
215
  def SetContext(self, context):
216
    """Sets Ganeti context.
217

218
    """
219
    self._context = context
220

    
221
  # this method needs to be static, so that we can call it on the class
222
  @staticmethod
223
  def IsCluster():
224
    """Check if the cluster is configured.
225

226
    """
227
    return os.path.exists(pathutils.CLUSTER_CONF_FILE)
228

    
229
  @locking.ssynchronized(_config_lock, shared=1)
230
  def GetNdParams(self, node):
231
    """Get the node params populated with cluster defaults.
232

233
    @type node: L{objects.Node}
234
    @param node: The node we want to know the params for
235
    @return: A dict with the filled in node params
236

237
    """
238
    nodegroup = self._UnlockedGetNodeGroup(node.group)
239
    return self._config_data.cluster.FillND(node, nodegroup)
240

    
241
  @locking.ssynchronized(_config_lock, shared=1)
242
  def GetNdGroupParams(self, nodegroup):
243
    """Get the node groups params populated with cluster defaults.
244

245
    @type nodegroup: L{objects.NodeGroup}
246
    @param nodegroup: The node group we want to know the params for
247
    @return: A dict with the filled in node group params
248

249
    """
250
    return self._config_data.cluster.FillNDGroup(nodegroup)
251

    
252
  @locking.ssynchronized(_config_lock, shared=1)
253
  def GetInstanceDiskParams(self, instance):
254
    """Get the disk params populated with inherit chain.
255

256
    @type instance: L{objects.Instance}
257
    @param instance: The instance we want to know the params for
258
    @return: A dict with the filled in disk params
259

260
    """
261
    node = self._UnlockedGetNodeInfo(instance.primary_node)
262
    nodegroup = self._UnlockedGetNodeGroup(node.group)
263
    return self._UnlockedGetGroupDiskParams(nodegroup)
264

    
265
  @locking.ssynchronized(_config_lock, shared=1)
266
  def GetGroupDiskParams(self, group):
267
    """Get the disk params populated with inherit chain.
268

269
    @type group: L{objects.NodeGroup}
270
    @param group: The group we want to know the params for
271
    @return: A dict with the filled in disk params
272

273
    """
274
    return self._UnlockedGetGroupDiskParams(group)
275

    
276
  def _UnlockedGetGroupDiskParams(self, group):
277
    """Get the disk params populated with inherit chain down to node-group.
278

279
    @type group: L{objects.NodeGroup}
280
    @param group: The group we want to know the params for
281
    @return: A dict with the filled in disk params
282

283
    """
284
    return self._config_data.cluster.SimpleFillDP(group.diskparams)
285

    
286
  def _UnlockedGetNetworkMACPrefix(self, net_uuid):
287
    """Return the network mac prefix if it exists or the cluster level default.
288

289
    """
290
    prefix = None
291
    if net_uuid:
292
      nobj = self._UnlockedGetNetwork(net_uuid)
293
      if nobj.mac_prefix:
294
        prefix = nobj.mac_prefix
295

    
296
    return prefix
297

    
298
  def _GenerateOneMAC(self, prefix=None):
299
    """Return a function that randomly generates a MAC suffic
300
       and appends it to the given prefix. If prefix is not given get
301
       the cluster level default.
302

303
    """
304
    if not prefix:
305
      prefix = self._config_data.cluster.mac_prefix
306

    
307
    def GenMac():
308
      byte1 = random.randrange(0, 256)
309
      byte2 = random.randrange(0, 256)
310
      byte3 = random.randrange(0, 256)
311
      mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
312
      return mac
313

    
314
    return GenMac
315

    
316
  @locking.ssynchronized(_config_lock, shared=1)
317
  def GenerateMAC(self, net_uuid, ec_id):
318
    """Generate a MAC for an instance.
319

320
    This should check the current instances for duplicates.
321

322
    """
323
    existing = self._AllMACs()
324
    prefix = self._UnlockedGetNetworkMACPrefix(net_uuid)
325
    gen_mac = self._GenerateOneMAC(prefix)
326
    return self._temporary_ids.Generate(existing, gen_mac, ec_id)
327

    
328
  @locking.ssynchronized(_config_lock, shared=1)
329
  def ReserveMAC(self, mac, ec_id):
330
    """Reserve a MAC for an instance.
331

332
    This only checks instances managed by this cluster, it does not
333
    check for potential collisions elsewhere.
334

335
    """
336
    all_macs = self._AllMACs()
337
    if mac in all_macs:
338
      raise errors.ReservationError("mac already in use")
339
    else:
340
      self._temporary_macs.Reserve(ec_id, mac)
341

    
342
  def _UnlockedCommitTemporaryIps(self, ec_id):
343
    """Commit all reserved IP address to their respective pools
344

345
    """
346
    for action, address, net_uuid in self._temporary_ips.GetECReserved(ec_id):
347
      self._UnlockedCommitIp(action, net_uuid, address)
348

    
349
  def _UnlockedCommitIp(self, action, net_uuid, address):
350
    """Commit a reserved IP address to an IP pool.
351

352
    The IP address is taken from the network's IP pool and marked as reserved.
353

354
    """
355
    nobj = self._UnlockedGetNetwork(net_uuid)
356
    pool = network.AddressPool(nobj)
357
    if action == constants.RESERVE_ACTION:
358
      pool.Reserve(address)
359
    elif action == constants.RELEASE_ACTION:
360
      pool.Release(address)
361

    
362
  def _UnlockedReleaseIp(self, net_uuid, address, ec_id):
363
    """Give a specific IP address back to an IP pool.
364

365
    The IP address is returned to the IP pool designated by pool_id and marked
366
    as reserved.
367

368
    """
369
    self._temporary_ips.Reserve(ec_id,
370
                                (constants.RELEASE_ACTION, address, net_uuid))
371

    
372
  @locking.ssynchronized(_config_lock, shared=1)
373
  def ReleaseIp(self, net_uuid, address, ec_id):
374
    """Give a specified IP address back to an IP pool.
375

376
    This is just a wrapper around _UnlockedReleaseIp.
377

378
    """
379
    if net_uuid:
380
      self._UnlockedReleaseIp(net_uuid, address, ec_id)
381

    
382
  @locking.ssynchronized(_config_lock, shared=1)
383
  def GenerateIp(self, net_uuid, ec_id):
384
    """Find a free IPv4 address for an instance.
385

386
    """
387
    nobj = self._UnlockedGetNetwork(net_uuid)
388
    pool = network.AddressPool(nobj)
389

    
390
    def gen_one():
391
      try:
392
        ip = pool.GenerateFree()
393
      except errors.AddressPoolError:
394
        raise errors.ReservationError("Cannot generate IP. Network is full")
395
      return (constants.RESERVE_ACTION, ip, net_uuid)
396

    
397
    _, address, _ = self._temporary_ips.Generate([], gen_one, ec_id)
398
    return address
399

    
400
  def _UnlockedReserveIp(self, net_uuid, address, ec_id, check=True):
401
    """Reserve a given IPv4 address for use by an instance.
402

403
    """
404
    nobj = self._UnlockedGetNetwork(net_uuid)
405
    pool = network.AddressPool(nobj)
406
    try:
407
      isreserved = pool.IsReserved(address)
408
      isextreserved = pool.IsReserved(address, external=True)
409
    except errors.AddressPoolError:
410
      raise errors.ReservationError("IP address not in network")
411
    if isreserved:
412
      raise errors.ReservationError("IP address already in use")
413
    if check and isextreserved:
414
      raise errors.ReservationError("IP is externally reserved")
415

    
416
    return self._temporary_ips.Reserve(ec_id,
417
                                       (constants.RESERVE_ACTION,
418
                                        address, net_uuid))
419

    
420
  @locking.ssynchronized(_config_lock, shared=1)
421
  def ReserveIp(self, net_uuid, address, ec_id, check=True):
422
    """Reserve a given IPv4 address for use by an instance.
423

424
    """
425
    if net_uuid:
426
      return self._UnlockedReserveIp(net_uuid, address, ec_id, check)
427

    
428
  @locking.ssynchronized(_config_lock, shared=1)
429
  def ReserveLV(self, lv_name, ec_id):
430
    """Reserve an VG/LV pair for an instance.
431

432
    @type lv_name: string
433
    @param lv_name: the logical volume name to reserve
434

435
    """
436
    all_lvs = self._AllLVs()
437
    if lv_name in all_lvs:
438
      raise errors.ReservationError("LV already in use")
439
    else:
440
      self._temporary_lvs.Reserve(ec_id, lv_name)
441

    
442
  @locking.ssynchronized(_config_lock, shared=1)
443
  def GenerateDRBDSecret(self, ec_id):
444
    """Generate a DRBD secret.
445

446
    This checks the current disks for duplicates.
447

448
    """
449
    return self._temporary_secrets.Generate(self._AllDRBDSecrets(),
450
                                            utils.GenerateSecret,
451
                                            ec_id)
452

    
453
  def _AllLVs(self):
454
    """Compute the list of all LVs.
455

456
    """
457
    lvnames = set()
458
    for instance in self._config_data.instances.values():
459
      node_data = instance.MapLVsByNode()
460
      for lv_list in node_data.values():
461
        lvnames.update(lv_list)
462
    return lvnames
463

    
464
  def _AllDisks(self):
465
    """Compute the list of all Disks (recursively, including children).
466

467
    """
468
    def DiskAndAllChildren(disk):
469
      """Returns a list containing the given disk and all of his children.
470

471
      """
472
      disks = [disk]
473
      if disk.children:
474
        for child_disk in disk.children:
475
          disks.extend(DiskAndAllChildren(child_disk))
476
      return disks
477

    
478
    disks = []
479
    for instance in self._config_data.instances.values():
480
      for disk in instance.disks:
481
        disks.extend(DiskAndAllChildren(disk))
482
    return disks
483

    
484
  def _AllNICs(self):
485
    """Compute the list of all NICs.
486

487
    """
488
    nics = []
489
    for instance in self._config_data.instances.values():
490
      nics.extend(instance.nics)
491
    return nics
492

    
493
  def _AllIDs(self, include_temporary):
494
    """Compute the list of all UUIDs and names we have.
495

496
    @type include_temporary: boolean
497
    @param include_temporary: whether to include the _temporary_ids set
498
    @rtype: set
499
    @return: a set of IDs
500

501
    """
502
    existing = set()
503
    if include_temporary:
504
      existing.update(self._temporary_ids.GetReserved())
505
    existing.update(self._AllLVs())
506
    existing.update(self._config_data.instances.keys())
507
    existing.update(self._config_data.nodes.keys())
508
    existing.update([i.uuid for i in self._AllUUIDObjects() if i.uuid])
509
    return existing
510

    
511
  def _GenerateUniqueID(self, ec_id):
512
    """Generate an unique UUID.
513

514
    This checks the current node, instances and disk names for
515
    duplicates.
516

517
    @rtype: string
518
    @return: the unique id
519

520
    """
521
    existing = self._AllIDs(include_temporary=False)
522
    return self._temporary_ids.Generate(existing, utils.NewUUID, ec_id)
523

    
524
  @locking.ssynchronized(_config_lock, shared=1)
525
  def GenerateUniqueID(self, ec_id):
526
    """Generate an unique ID.
527

528
    This is just a wrapper over the unlocked version.
529

530
    @type ec_id: string
531
    @param ec_id: unique id for the job to reserve the id to
532

533
    """
534
    return self._GenerateUniqueID(ec_id)
535

    
536
  def _AllMACs(self):
537
    """Return all MACs present in the config.
538

539
    @rtype: list
540
    @return: the list of all MACs
541

542
    """
543
    result = []
544
    for instance in self._config_data.instances.values():
545
      for nic in instance.nics:
546
        result.append(nic.mac)
547

    
548
    return result
549

    
550
  def _AllDRBDSecrets(self):
551
    """Return all DRBD secrets present in the config.
552

553
    @rtype: list
554
    @return: the list of all DRBD secrets
555

556
    """
557
    def helper(disk, result):
558
      """Recursively gather secrets from this disk."""
559
      if disk.dev_type == constants.DT_DRBD8:
560
        result.append(disk.logical_id[5])
561
      if disk.children:
562
        for child in disk.children:
563
          helper(child, result)
564

    
565
    result = []
566
    for instance in self._config_data.instances.values():
567
      for disk in instance.disks:
568
        helper(disk, result)
569

    
570
    return result
571

    
572
  def _CheckDiskIDs(self, disk, l_ids):
573
    """Compute duplicate disk IDs
574

575
    @type disk: L{objects.Disk}
576
    @param disk: the disk at which to start searching
577
    @type l_ids: list
578
    @param l_ids: list of current logical ids
579
    @rtype: list
580
    @return: a list of error messages
581

582
    """
583
    result = []
584
    if disk.logical_id is not None:
585
      if disk.logical_id in l_ids:
586
        result.append("duplicate logical id %s" % str(disk.logical_id))
587
      else:
588
        l_ids.append(disk.logical_id)
589

    
590
    if disk.children:
591
      for child in disk.children:
592
        result.extend(self._CheckDiskIDs(child, l_ids))
593
    return result
594

    
595
  def _UnlockedVerifyConfig(self):
596
    """Verify function.
597

598
    @rtype: list
599
    @return: a list of error messages; a non-empty list signifies
600
        configuration errors
601

602
    """
603
    # pylint: disable=R0914
604
    result = []
605
    seen_macs = []
606
    ports = {}
607
    data = self._config_data
608
    cluster = data.cluster
609
    seen_lids = []
610

    
611
    # global cluster checks
612
    if not cluster.enabled_hypervisors:
613
      result.append("enabled hypervisors list doesn't have any entries")
614
    invalid_hvs = set(cluster.enabled_hypervisors) - constants.HYPER_TYPES
615
    if invalid_hvs:
616
      result.append("enabled hypervisors contains invalid entries: %s" %
617
                    utils.CommaJoin(invalid_hvs))
618
    missing_hvp = (set(cluster.enabled_hypervisors) -
619
                   set(cluster.hvparams.keys()))
620
    if missing_hvp:
621
      result.append("hypervisor parameters missing for the enabled"
622
                    " hypervisor(s) %s" % utils.CommaJoin(missing_hvp))
623

    
624
    if not cluster.enabled_disk_templates:
625
      result.append("enabled disk templates list doesn't have any entries")
626
    invalid_disk_templates = set(cluster.enabled_disk_templates) \
627
                               - constants.DISK_TEMPLATES
628
    if invalid_disk_templates:
629
      result.append("enabled disk templates list contains invalid entries:"
630
                    " %s" % utils.CommaJoin(invalid_disk_templates))
631

    
632
    if cluster.master_node not in data.nodes:
633
      result.append("cluster has invalid primary node '%s'" %
634
                    cluster.master_node)
635

    
636
    def _helper(owner, attr, value, template):
637
      try:
638
        utils.ForceDictType(value, template)
639
      except errors.GenericError, err:
640
        result.append("%s has invalid %s: %s" % (owner, attr, err))
641

    
642
    def _helper_nic(owner, params):
643
      try:
644
        objects.NIC.CheckParameterSyntax(params)
645
      except errors.ConfigurationError, err:
646
        result.append("%s has invalid nicparams: %s" % (owner, err))
647

    
648
    def _helper_ipolicy(owner, ipolicy, iscluster):
649
      try:
650
        objects.InstancePolicy.CheckParameterSyntax(ipolicy, iscluster)
651
      except errors.ConfigurationError, err:
652
        result.append("%s has invalid instance policy: %s" % (owner, err))
653
      for key, value in ipolicy.items():
654
        if key == constants.ISPECS_MINMAX:
655
          for k in range(len(value)):
656
            _helper_ispecs(owner, "ipolicy/%s[%s]" % (key, k), value[k])
657
        elif key == constants.ISPECS_STD:
658
          _helper(owner, "ipolicy/" + key, value,
659
                  constants.ISPECS_PARAMETER_TYPES)
660
        else:
661
          # FIXME: assuming list type
662
          if key in constants.IPOLICY_PARAMETERS:
663
            exp_type = float
664
            # if the value is int, it can be converted into float
665
            convertible_types = [int]
666
          else:
667
            exp_type = list
668
            convertible_types = []
669
          # Try to convert from allowed types, if necessary.
670
          if any(isinstance(value, ct) for ct in convertible_types):
671
            try:
672
              value = exp_type(value)
673
              ipolicy[key] = value
674
            except ValueError:
675
              pass
676
          if not isinstance(value, exp_type):
677
            result.append("%s has invalid instance policy: for %s,"
678
                          " expecting %s, got %s" %
679
                          (owner, key, exp_type.__name__, type(value)))
680

    
681
    def _helper_ispecs(owner, parentkey, params):
682
      for (key, value) in params.items():
683
        fullkey = "/".join([parentkey, key])
684
        _helper(owner, fullkey, value, constants.ISPECS_PARAMETER_TYPES)
685

    
686
    # check cluster parameters
687
    _helper("cluster", "beparams", cluster.SimpleFillBE({}),
688
            constants.BES_PARAMETER_TYPES)
689
    _helper("cluster", "nicparams", cluster.SimpleFillNIC({}),
690
            constants.NICS_PARAMETER_TYPES)
691
    _helper_nic("cluster", cluster.SimpleFillNIC({}))
692
    _helper("cluster", "ndparams", cluster.SimpleFillND({}),
693
            constants.NDS_PARAMETER_TYPES)
694
    _helper_ipolicy("cluster", cluster.ipolicy, True)
695

    
696
    for disk_template in cluster.diskparams:
697
      if disk_template not in constants.DTS_HAVE_ACCESS:
698
        continue
699

    
700
      access = cluster.diskparams[disk_template].get(constants.LDP_ACCESS,
701
                                                     constants.DISK_KERNELSPACE)
702
      if access not in constants.DISK_VALID_ACCESS_MODES:
703
        result.append(
704
          "Invalid value of '%s:%s': '%s' (expected one of %s)" % (
705
            disk_template, constants.LDP_ACCESS, access,
706
            utils.CommaJoin(constants.DISK_VALID_ACCESS_MODES)
707
          )
708
        )
709

    
710
    # per-instance checks
711
    for instance_uuid in data.instances:
712
      instance = data.instances[instance_uuid]
713
      if instance.uuid != instance_uuid:
714
        result.append("instance '%s' is indexed by wrong UUID '%s'" %
715
                      (instance.name, instance_uuid))
716
      if instance.primary_node not in data.nodes:
717
        result.append("instance '%s' has invalid primary node '%s'" %
718
                      (instance.name, instance.primary_node))
719
      for snode in instance.secondary_nodes:
720
        if snode not in data.nodes:
721
          result.append("instance '%s' has invalid secondary node '%s'" %
722
                        (instance.name, snode))
723
      for idx, nic in enumerate(instance.nics):
724
        if nic.mac in seen_macs:
725
          result.append("instance '%s' has NIC %d mac %s duplicate" %
726
                        (instance.name, idx, nic.mac))
727
        else:
728
          seen_macs.append(nic.mac)
729
        if nic.nicparams:
730
          filled = cluster.SimpleFillNIC(nic.nicparams)
731
          owner = "instance %s nic %d" % (instance.name, idx)
732
          _helper(owner, "nicparams",
733
                  filled, constants.NICS_PARAMETER_TYPES)
734
          _helper_nic(owner, filled)
735

    
736
      # disk template checks
737
      if not instance.disk_template in data.cluster.enabled_disk_templates:
738
        result.append("instance '%s' uses the disabled disk template '%s'." %
739
                      (instance.name, instance.disk_template))
740

    
741
      # parameter checks
742
      if instance.beparams:
743
        _helper("instance %s" % instance.name, "beparams",
744
                cluster.FillBE(instance), constants.BES_PARAMETER_TYPES)
745

    
746
      # gather the drbd ports for duplicate checks
747
      for (idx, dsk) in enumerate(instance.disks):
748
        if dsk.dev_type in constants.DTS_DRBD:
749
          tcp_port = dsk.logical_id[2]
750
          if tcp_port not in ports:
751
            ports[tcp_port] = []
752
          ports[tcp_port].append((instance.name, "drbd disk %s" % idx))
753
      # gather network port reservation
754
      net_port = getattr(instance, "network_port", None)
755
      if net_port is not None:
756
        if net_port not in ports:
757
          ports[net_port] = []
758
        ports[net_port].append((instance.name, "network port"))
759

    
760
      # instance disk verify
761
      for idx, disk in enumerate(instance.disks):
762
        result.extend(["instance '%s' disk %d error: %s" %
763
                       (instance.name, idx, msg) for msg in disk.Verify()])
764
        result.extend(self._CheckDiskIDs(disk, seen_lids))
765

    
766
      wrong_names = _CheckInstanceDiskIvNames(instance.disks)
767
      if wrong_names:
768
        tmp = "; ".join(("name of disk %s should be '%s', but is '%s'" %
769
                         (idx, exp_name, actual_name))
770
                        for (idx, exp_name, actual_name) in wrong_names)
771

    
772
        result.append("Instance '%s' has wrongly named disks: %s" %
773
                      (instance.name, tmp))
774

    
775
    # cluster-wide pool of free ports
776
    for free_port in cluster.tcpudp_port_pool:
777
      if free_port not in ports:
778
        ports[free_port] = []
779
      ports[free_port].append(("cluster", "port marked as free"))
780

    
781
    # compute tcp/udp duplicate ports
782
    keys = ports.keys()
783
    keys.sort()
784
    for pnum in keys:
785
      pdata = ports[pnum]
786
      if len(pdata) > 1:
787
        txt = utils.CommaJoin(["%s/%s" % val for val in pdata])
788
        result.append("tcp/udp port %s has duplicates: %s" % (pnum, txt))
789

    
790
    # highest used tcp port check
791
    if keys:
792
      if keys[-1] > cluster.highest_used_port:
793
        result.append("Highest used port mismatch, saved %s, computed %s" %
794
                      (cluster.highest_used_port, keys[-1]))
795

    
796
    if not data.nodes[cluster.master_node].master_candidate:
797
      result.append("Master node is not a master candidate")
798

    
799
    # master candidate checks
800
    mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats()
801
    if mc_now < mc_max:
802
      result.append("Not enough master candidates: actual %d, target %d" %
803
                    (mc_now, mc_max))
804

    
805
    # node checks
806
    for node_uuid, node in data.nodes.items():
807
      if node.uuid != node_uuid:
808
        result.append("Node '%s' is indexed by wrong UUID '%s'" %
809
                      (node.name, node_uuid))
810
      if [node.master_candidate, node.drained, node.offline].count(True) > 1:
811
        result.append("Node %s state is invalid: master_candidate=%s,"
812
                      " drain=%s, offline=%s" %
813
                      (node.name, node.master_candidate, node.drained,
814
                       node.offline))
815
      if node.group not in data.nodegroups:
816
        result.append("Node '%s' has invalid group '%s'" %
817
                      (node.name, node.group))
818
      else:
819
        _helper("node %s" % node.name, "ndparams",
820
                cluster.FillND(node, data.nodegroups[node.group]),
821
                constants.NDS_PARAMETER_TYPES)
822
      used_globals = constants.NDC_GLOBALS.intersection(node.ndparams)
823
      if used_globals:
824
        result.append("Node '%s' has some global parameters set: %s" %
825
                      (node.name, utils.CommaJoin(used_globals)))
826

    
827
    # nodegroups checks
828
    nodegroups_names = set()
829
    for nodegroup_uuid in data.nodegroups:
830
      nodegroup = data.nodegroups[nodegroup_uuid]
831
      if nodegroup.uuid != nodegroup_uuid:
832
        result.append("node group '%s' (uuid: '%s') indexed by wrong uuid '%s'"
833
                      % (nodegroup.name, nodegroup.uuid, nodegroup_uuid))
834
      if utils.UUID_RE.match(nodegroup.name.lower()):
835
        result.append("node group '%s' (uuid: '%s') has uuid-like name" %
836
                      (nodegroup.name, nodegroup.uuid))
837
      if nodegroup.name in nodegroups_names:
838
        result.append("duplicate node group name '%s'" % nodegroup.name)
839
      else:
840
        nodegroups_names.add(nodegroup.name)
841
      group_name = "group %s" % nodegroup.name
842
      _helper_ipolicy(group_name, cluster.SimpleFillIPolicy(nodegroup.ipolicy),
843
                      False)
844
      if nodegroup.ndparams:
845
        _helper(group_name, "ndparams",
846
                cluster.SimpleFillND(nodegroup.ndparams),
847
                constants.NDS_PARAMETER_TYPES)
848

    
849
    # drbd minors check
850
    _, duplicates = self._UnlockedComputeDRBDMap()
851
    for node, minor, instance_a, instance_b in duplicates:
852
      result.append("DRBD minor %d on node %s is assigned twice to instances"
853
                    " %s and %s" % (minor, node, instance_a, instance_b))
854

    
855
    # IP checks
856
    default_nicparams = cluster.nicparams[constants.PP_DEFAULT]
857
    ips = {}
858

    
859
    def _AddIpAddress(ip, name):
860
      ips.setdefault(ip, []).append(name)
861

    
862
    _AddIpAddress(cluster.master_ip, "cluster_ip")
863

    
864
    for node in data.nodes.values():
865
      _AddIpAddress(node.primary_ip, "node:%s/primary" % node.name)
866
      if node.secondary_ip != node.primary_ip:
867
        _AddIpAddress(node.secondary_ip, "node:%s/secondary" % node.name)
868

    
869
    for instance in data.instances.values():
870
      for idx, nic in enumerate(instance.nics):
871
        if nic.ip is None:
872
          continue
873

    
874
        nicparams = objects.FillDict(default_nicparams, nic.nicparams)
875
        nic_mode = nicparams[constants.NIC_MODE]
876
        nic_link = nicparams[constants.NIC_LINK]
877

    
878
        if nic_mode == constants.NIC_MODE_BRIDGED:
879
          link = "bridge:%s" % nic_link
880
        elif nic_mode == constants.NIC_MODE_ROUTED:
881
          link = "route:%s" % nic_link
882
        else:
883
          raise errors.ProgrammerError("NIC mode '%s' not handled" % nic_mode)
884

    
885
        _AddIpAddress("%s/%s/%s" % (link, nic.ip, nic.network),
886
                      "instance:%s/nic:%d" % (instance.name, idx))
887

    
888
    for ip, owners in ips.items():
889
      if len(owners) > 1:
890
        result.append("IP address %s is used by multiple owners: %s" %
891
                      (ip, utils.CommaJoin(owners)))
892

    
893
    return result
894

    
895
  @locking.ssynchronized(_config_lock, shared=1)
896
  def VerifyConfig(self):
897
    """Verify function.
898

899
    This is just a wrapper over L{_UnlockedVerifyConfig}.
900

901
    @rtype: list
902
    @return: a list of error messages; a non-empty list signifies
903
        configuration errors
904

905
    """
906
    return self._UnlockedVerifyConfig()
907

    
908
  @locking.ssynchronized(_config_lock)
909
  def AddTcpUdpPort(self, port):
910
    """Adds a new port to the available port pool.
911

912
    @warning: this method does not "flush" the configuration (via
913
        L{_WriteConfig}); callers should do that themselves once the
914
        configuration is stable
915

916
    """
917
    if not isinstance(port, int):
918
      raise errors.ProgrammerError("Invalid type passed for port")
919

    
920
    self._config_data.cluster.tcpudp_port_pool.add(port)
921

    
922
  @locking.ssynchronized(_config_lock, shared=1)
923
  def GetPortList(self):
924
    """Returns a copy of the current port list.
925

926
    """
927
    return self._config_data.cluster.tcpudp_port_pool.copy()
928

    
929
  @locking.ssynchronized(_config_lock)
930
  def AllocatePort(self):
931
    """Allocate a port.
932

933
    The port will be taken from the available port pool or from the
934
    default port range (and in this case we increase
935
    highest_used_port).
936

937
    """
938
    # If there are TCP/IP ports configured, we use them first.
939
    if self._config_data.cluster.tcpudp_port_pool:
940
      port = self._config_data.cluster.tcpudp_port_pool.pop()
941
    else:
942
      port = self._config_data.cluster.highest_used_port + 1
943
      if port >= constants.LAST_DRBD_PORT:
944
        raise errors.ConfigurationError("The highest used port is greater"
945
                                        " than %s. Aborting." %
946
                                        constants.LAST_DRBD_PORT)
947
      self._config_data.cluster.highest_used_port = port
948

    
949
    self._WriteConfig()
950
    return port
951

    
952
  def _UnlockedComputeDRBDMap(self):
953
    """Compute the used DRBD minor/nodes.
954

955
    @rtype: (dict, list)
956
    @return: dictionary of node_uuid: dict of minor: instance_uuid;
957
        the returned dict will have all the nodes in it (even if with
958
        an empty list), and a list of duplicates; if the duplicates
959
        list is not empty, the configuration is corrupted and its caller
960
        should raise an exception
961

962
    """
963
    def _AppendUsedMinors(get_node_name_fn, instance, disk, used):
964
      duplicates = []
965
      if disk.dev_type == constants.DT_DRBD8 and len(disk.logical_id) >= 5:
966
        node_a, node_b, _, minor_a, minor_b = disk.logical_id[:5]
967
        for node_uuid, minor in ((node_a, minor_a), (node_b, minor_b)):
968
          assert node_uuid in used, \
969
            ("Node '%s' of instance '%s' not found in node list" %
970
             (get_node_name_fn(node_uuid), instance.name))
971
          if minor in used[node_uuid]:
972
            duplicates.append((node_uuid, minor, instance.uuid,
973
                               used[node_uuid][minor]))
974
          else:
975
            used[node_uuid][minor] = instance.uuid
976
      if disk.children:
977
        for child in disk.children:
978
          duplicates.extend(_AppendUsedMinors(get_node_name_fn, instance, child,
979
                                              used))
980
      return duplicates
981

    
982
    duplicates = []
983
    my_dict = dict((node_uuid, {}) for node_uuid in self._config_data.nodes)
984
    for instance in self._config_data.instances.itervalues():
985
      for disk in instance.disks:
986
        duplicates.extend(_AppendUsedMinors(self._UnlockedGetNodeName,
987
                                            instance, disk, my_dict))
988
    for (node_uuid, minor), inst_uuid in self._temporary_drbds.iteritems():
989
      if minor in my_dict[node_uuid] and my_dict[node_uuid][minor] != inst_uuid:
990
        duplicates.append((node_uuid, minor, inst_uuid,
991
                           my_dict[node_uuid][minor]))
992
      else:
993
        my_dict[node_uuid][minor] = inst_uuid
994
    return my_dict, duplicates
995

    
996
  @locking.ssynchronized(_config_lock)
997
  def ComputeDRBDMap(self):
998
    """Compute the used DRBD minor/nodes.
999

1000
    This is just a wrapper over L{_UnlockedComputeDRBDMap}.
1001

1002
    @return: dictionary of node_uuid: dict of minor: instance_uuid;
1003
        the returned dict will have all the nodes in it (even if with
1004
        an empty list).
1005

1006
    """
1007
    d_map, duplicates = self._UnlockedComputeDRBDMap()
1008
    if duplicates:
1009
      raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
1010
                                      str(duplicates))
1011
    return d_map
1012

    
1013
  @locking.ssynchronized(_config_lock)
1014
  def AllocateDRBDMinor(self, node_uuids, inst_uuid):
1015
    """Allocate a drbd minor.
1016

1017
    The free minor will be automatically computed from the existing
1018
    devices. A node can be given multiple times in order to allocate
1019
    multiple minors. The result is the list of minors, in the same
1020
    order as the passed nodes.
1021

1022
    @type inst_uuid: string
1023
    @param inst_uuid: the instance for which we allocate minors
1024

1025
    """
1026
    assert isinstance(inst_uuid, basestring), \
1027
           "Invalid argument '%s' passed to AllocateDRBDMinor" % inst_uuid
1028

    
1029
    d_map, duplicates = self._UnlockedComputeDRBDMap()
1030
    if duplicates:
1031
      raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
1032
                                      str(duplicates))
1033
    result = []
1034
    for nuuid in node_uuids:
1035
      ndata = d_map[nuuid]
1036
      if not ndata:
1037
        # no minors used, we can start at 0
1038
        result.append(0)
1039
        ndata[0] = inst_uuid
1040
        self._temporary_drbds[(nuuid, 0)] = inst_uuid
1041
        continue
1042
      keys = ndata.keys()
1043
      keys.sort()
1044
      ffree = utils.FirstFree(keys)
1045
      if ffree is None:
1046
        # return the next minor
1047
        # TODO: implement high-limit check
1048
        minor = keys[-1] + 1
1049
      else:
1050
        minor = ffree
1051
      # double-check minor against current instances
1052
      assert minor not in d_map[nuuid], \
1053
             ("Attempt to reuse allocated DRBD minor %d on node %s,"
1054
              " already allocated to instance %s" %
1055
              (minor, nuuid, d_map[nuuid][minor]))
1056
      ndata[minor] = inst_uuid
1057
      # double-check minor against reservation
1058
      r_key = (nuuid, minor)
1059
      assert r_key not in self._temporary_drbds, \
1060
             ("Attempt to reuse reserved DRBD minor %d on node %s,"
1061
              " reserved for instance %s" %
1062
              (minor, nuuid, self._temporary_drbds[r_key]))
1063
      self._temporary_drbds[r_key] = inst_uuid
1064
      result.append(minor)
1065
    logging.debug("Request to allocate drbd minors, input: %s, returning %s",
1066
                  node_uuids, result)
1067
    return result
1068

    
1069
  def _UnlockedReleaseDRBDMinors(self, inst_uuid):
1070
    """Release temporary drbd minors allocated for a given instance.
1071

1072
    @type inst_uuid: string
1073
    @param inst_uuid: the instance for which temporary minors should be
1074
                      released
1075

1076
    """
1077
    assert isinstance(inst_uuid, basestring), \
1078
           "Invalid argument passed to ReleaseDRBDMinors"
1079
    for key, uuid in self._temporary_drbds.items():
1080
      if uuid == inst_uuid:
1081
        del self._temporary_drbds[key]
1082

    
1083
  @locking.ssynchronized(_config_lock)
1084
  def ReleaseDRBDMinors(self, inst_uuid):
1085
    """Release temporary drbd minors allocated for a given instance.
1086

1087
    This should be called on the error paths, on the success paths
1088
    it's automatically called by the ConfigWriter add and update
1089
    functions.
1090

1091
    This function is just a wrapper over L{_UnlockedReleaseDRBDMinors}.
1092

1093
    @type inst_uuid: string
1094
    @param inst_uuid: the instance for which temporary minors should be
1095
                      released
1096

1097
    """
1098
    self._UnlockedReleaseDRBDMinors(inst_uuid)
1099

    
1100
  @locking.ssynchronized(_config_lock, shared=1)
1101
  def GetConfigVersion(self):
1102
    """Get the configuration version.
1103

1104
    @return: Config version
1105

1106
    """
1107
    return self._config_data.version
1108

    
1109
  @locking.ssynchronized(_config_lock, shared=1)
1110
  def GetClusterName(self):
1111
    """Get cluster name.
1112

1113
    @return: Cluster name
1114

1115
    """
1116
    return self._config_data.cluster.cluster_name
1117

    
1118
  @locking.ssynchronized(_config_lock, shared=1)
1119
  def GetMasterNode(self):
1120
    """Get the UUID of the master node for this cluster.
1121

1122
    @return: Master node UUID
1123

1124
    """
1125
    return self._config_data.cluster.master_node
1126

    
1127
  @locking.ssynchronized(_config_lock, shared=1)
1128
  def GetMasterNodeName(self):
1129
    """Get the hostname of the master node for this cluster.
1130

1131
    @return: Master node hostname
1132

1133
    """
1134
    return self._UnlockedGetNodeName(self._config_data.cluster.master_node)
1135

    
1136
  @locking.ssynchronized(_config_lock, shared=1)
1137
  def GetMasterNodeInfo(self):
1138
    """Get the master node information for this cluster.
1139

1140
    @rtype: objects.Node
1141
    @return: Master node L{objects.Node} object
1142

1143
    """
1144
    return self._UnlockedGetNodeInfo(self._config_data.cluster.master_node)
1145

    
1146
  @locking.ssynchronized(_config_lock, shared=1)
1147
  def GetMasterIP(self):
1148
    """Get the IP of the master node for this cluster.
1149

1150
    @return: Master IP
1151

1152
    """
1153
    return self._config_data.cluster.master_ip
1154

    
1155
  @locking.ssynchronized(_config_lock, shared=1)
1156
  def GetMasterNetdev(self):
1157
    """Get the master network device for this cluster.
1158

1159
    """
1160
    return self._config_data.cluster.master_netdev
1161

    
1162
  @locking.ssynchronized(_config_lock, shared=1)
1163
  def GetMasterNetmask(self):
1164
    """Get the netmask of the master node for this cluster.
1165

1166
    """
1167
    return self._config_data.cluster.master_netmask
1168

    
1169
  @locking.ssynchronized(_config_lock, shared=1)
1170
  def GetUseExternalMipScript(self):
1171
    """Get flag representing whether to use the external master IP setup script.
1172

1173
    """
1174
    return self._config_data.cluster.use_external_mip_script
1175

    
1176
  @locking.ssynchronized(_config_lock, shared=1)
1177
  def GetFileStorageDir(self):
1178
    """Get the file storage dir for this cluster.
1179

1180
    """
1181
    return self._config_data.cluster.file_storage_dir
1182

    
1183
  @locking.ssynchronized(_config_lock, shared=1)
1184
  def GetSharedFileStorageDir(self):
1185
    """Get the shared file storage dir for this cluster.
1186

1187
    """
1188
    return self._config_data.cluster.shared_file_storage_dir
1189

    
1190
  @locking.ssynchronized(_config_lock, shared=1)
1191
  def GetGlusterStorageDir(self):
1192
    """Get the Gluster storage dir for this cluster.
1193

1194
    """
1195
    return self._config_data.cluster.gluster_storage_dir
1196

    
1197
  @locking.ssynchronized(_config_lock, shared=1)
1198
  def GetHypervisorType(self):
1199
    """Get the hypervisor type for this cluster.
1200

1201
    """
1202
    return self._config_data.cluster.enabled_hypervisors[0]
1203

    
1204
  @locking.ssynchronized(_config_lock, shared=1)
1205
  def GetRsaHostKey(self):
1206
    """Return the rsa hostkey from the config.
1207

1208
    @rtype: string
1209
    @return: the rsa hostkey
1210

1211
    """
1212
    return self._config_data.cluster.rsahostkeypub
1213

    
1214
  @locking.ssynchronized(_config_lock, shared=1)
1215
  def GetDsaHostKey(self):
1216
    """Return the dsa hostkey from the config.
1217

1218
    @rtype: string
1219
    @return: the dsa hostkey
1220

1221
    """
1222
    return self._config_data.cluster.dsahostkeypub
1223

    
1224
  @locking.ssynchronized(_config_lock, shared=1)
1225
  def GetDefaultIAllocator(self):
1226
    """Get the default instance allocator for this cluster.
1227

1228
    """
1229
    return self._config_data.cluster.default_iallocator
1230

    
1231
  @locking.ssynchronized(_config_lock, shared=1)
1232
  def GetDefaultIAllocatorParameters(self):
1233
    """Get the default instance allocator parameters for this cluster.
1234

1235
    @rtype: dict
1236
    @return: dict of iallocator parameters
1237

1238
    """
1239
    return self._config_data.cluster.default_iallocator_params
1240

    
1241
  @locking.ssynchronized(_config_lock, shared=1)
1242
  def GetPrimaryIPFamily(self):
1243
    """Get cluster primary ip family.
1244

1245
    @return: primary ip family
1246

1247
    """
1248
    return self._config_data.cluster.primary_ip_family
1249

    
1250
  @locking.ssynchronized(_config_lock, shared=1)
1251
  def GetMasterNetworkParameters(self):
1252
    """Get network parameters of the master node.
1253

1254
    @rtype: L{object.MasterNetworkParameters}
1255
    @return: network parameters of the master node
1256

1257
    """
1258
    cluster = self._config_data.cluster
1259
    result = objects.MasterNetworkParameters(
1260
      uuid=cluster.master_node, ip=cluster.master_ip,
1261
      netmask=cluster.master_netmask, netdev=cluster.master_netdev,
1262
      ip_family=cluster.primary_ip_family)
1263

    
1264
    return result
1265

    
1266
  @locking.ssynchronized(_config_lock, shared=1)
1267
  def GetInstanceCommunicationNetwork(self):
1268
    """Get cluster instance communication network
1269

1270
    @rtype: string
1271
    @return: instance communication network, which is the name of the
1272
             network used for instance communication
1273

1274
    """
1275
    return self._config_data.cluster.instance_communication_network
1276

    
1277
  @locking.ssynchronized(_config_lock)
1278
  def AddNodeGroup(self, group, ec_id, check_uuid=True):
1279
    """Add a node group to the configuration.
1280

1281
    This method calls group.UpgradeConfig() to fill any missing attributes
1282
    according to their default values.
1283

1284
    @type group: L{objects.NodeGroup}
1285
    @param group: the NodeGroup object to add
1286
    @type ec_id: string
1287
    @param ec_id: unique id for the job to use when creating a missing UUID
1288
    @type check_uuid: bool
1289
    @param check_uuid: add an UUID to the group if it doesn't have one or, if
1290
                       it does, ensure that it does not exist in the
1291
                       configuration already
1292

1293
    """
1294
    self._UnlockedAddNodeGroup(group, ec_id, check_uuid)
1295
    self._WriteConfig()
1296

    
1297
  def _UnlockedAddNodeGroup(self, group, ec_id, check_uuid):
1298
    """Add a node group to the configuration.
1299

1300
    """
1301
    logging.info("Adding node group %s to configuration", group.name)
1302

    
1303
    # Some code might need to add a node group with a pre-populated UUID
1304
    # generated with ConfigWriter.GenerateUniqueID(). We allow them to bypass
1305
    # the "does this UUID" exist already check.
1306
    if check_uuid:
1307
      self._EnsureUUID(group, ec_id)
1308

    
1309
    try:
1310
      existing_uuid = self._UnlockedLookupNodeGroup(group.name)
1311
    except errors.OpPrereqError:
1312
      pass
1313
    else:
1314
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
1315
                                 " node group (UUID: %s)" %
1316
                                 (group.name, existing_uuid),
1317
                                 errors.ECODE_EXISTS)
1318

    
1319
    group.serial_no = 1
1320
    group.ctime = group.mtime = time.time()
1321
    group.UpgradeConfig()
1322

    
1323
    self._config_data.nodegroups[group.uuid] = group
1324
    self._config_data.cluster.serial_no += 1
1325

    
1326
  @locking.ssynchronized(_config_lock)
1327
  def RemoveNodeGroup(self, group_uuid):
1328
    """Remove a node group from the configuration.
1329

1330
    @type group_uuid: string
1331
    @param group_uuid: the UUID of the node group to remove
1332

1333
    """
1334
    logging.info("Removing node group %s from configuration", group_uuid)
1335

    
1336
    if group_uuid not in self._config_data.nodegroups:
1337
      raise errors.ConfigurationError("Unknown node group '%s'" % group_uuid)
1338

    
1339
    assert len(self._config_data.nodegroups) != 1, \
1340
            "Group '%s' is the only group, cannot be removed" % group_uuid
1341

    
1342
    del self._config_data.nodegroups[group_uuid]
1343
    self._config_data.cluster.serial_no += 1
1344
    self._WriteConfig()
1345

    
1346
  def _UnlockedLookupNodeGroup(self, target):
1347
    """Lookup a node group's UUID.
1348

1349
    @type target: string or None
1350
    @param target: group name or UUID or None to look for the default
1351
    @rtype: string
1352
    @return: nodegroup UUID
1353
    @raises errors.OpPrereqError: when the target group cannot be found
1354

1355
    """
1356
    if target is None:
1357
      if len(self._config_data.nodegroups) != 1:
1358
        raise errors.OpPrereqError("More than one node group exists. Target"
1359
                                   " group must be specified explicitly.")
1360
      else:
1361
        return self._config_data.nodegroups.keys()[0]
1362
    if target in self._config_data.nodegroups:
1363
      return target
1364
    for nodegroup in self._config_data.nodegroups.values():
1365
      if nodegroup.name == target:
1366
        return nodegroup.uuid
1367
    raise errors.OpPrereqError("Node group '%s' not found" % target,
1368
                               errors.ECODE_NOENT)
1369

    
1370
  @locking.ssynchronized(_config_lock, shared=1)
1371
  def LookupNodeGroup(self, target):
1372
    """Lookup a node group's UUID.
1373

1374
    This function is just a wrapper over L{_UnlockedLookupNodeGroup}.
1375

1376
    @type target: string or None
1377
    @param target: group name or UUID or None to look for the default
1378
    @rtype: string
1379
    @return: nodegroup UUID
1380

1381
    """
1382
    return self._UnlockedLookupNodeGroup(target)
1383

    
1384
  def _UnlockedGetNodeGroup(self, uuid):
1385
    """Lookup a node group.
1386

1387
    @type uuid: string
1388
    @param uuid: group UUID
1389
    @rtype: L{objects.NodeGroup} or None
1390
    @return: nodegroup object, or None if not found
1391

1392
    """
1393
    if uuid not in self._config_data.nodegroups:
1394
      return None
1395

    
1396
    return self._config_data.nodegroups[uuid]
1397

    
1398
  @locking.ssynchronized(_config_lock, shared=1)
1399
  def GetNodeGroup(self, uuid):
1400
    """Lookup a node group.
1401

1402
    @type uuid: string
1403
    @param uuid: group UUID
1404
    @rtype: L{objects.NodeGroup} or None
1405
    @return: nodegroup object, or None if not found
1406

1407
    """
1408
    return self._UnlockedGetNodeGroup(uuid)
1409

    
1410
  def _UnlockedGetAllNodeGroupsInfo(self):
1411
    """Get the configuration of all node groups.
1412

1413
    """
1414
    return dict(self._config_data.nodegroups)
1415

    
1416
  @locking.ssynchronized(_config_lock, shared=1)
1417
  def GetAllNodeGroupsInfo(self):
1418
    """Get the configuration of all node groups.
1419

1420
    """
1421
    return self._UnlockedGetAllNodeGroupsInfo()
1422

    
1423
  @locking.ssynchronized(_config_lock, shared=1)
1424
  def GetAllNodeGroupsInfoDict(self):
1425
    """Get the configuration of all node groups expressed as a dictionary of
1426
    dictionaries.
1427

1428
    """
1429
    return dict(map(lambda (uuid, ng): (uuid, ng.ToDict()),
1430
                    self._UnlockedGetAllNodeGroupsInfo().items()))
1431

    
1432
  @locking.ssynchronized(_config_lock, shared=1)
1433
  def GetNodeGroupList(self):
1434
    """Get a list of node groups.
1435

1436
    """
1437
    return self._config_data.nodegroups.keys()
1438

    
1439
  @locking.ssynchronized(_config_lock, shared=1)
1440
  def GetNodeGroupMembersByNodes(self, nodes):
1441
    """Get nodes which are member in the same nodegroups as the given nodes.
1442

1443
    """
1444
    ngfn = lambda node_uuid: self._UnlockedGetNodeInfo(node_uuid).group
1445
    return frozenset(member_uuid
1446
                     for node_uuid in nodes
1447
                     for member_uuid in
1448
                       self._UnlockedGetNodeGroup(ngfn(node_uuid)).members)
1449

    
1450
  @locking.ssynchronized(_config_lock, shared=1)
1451
  def GetMultiNodeGroupInfo(self, group_uuids):
1452
    """Get the configuration of multiple node groups.
1453

1454
    @param group_uuids: List of node group UUIDs
1455
    @rtype: list
1456
    @return: List of tuples of (group_uuid, group_info)
1457

1458
    """
1459
    return [(uuid, self._UnlockedGetNodeGroup(uuid)) for uuid in group_uuids]
1460

    
1461
  @locking.ssynchronized(_config_lock)
1462
  def AddInstance(self, instance, ec_id):
1463
    """Add an instance to the config.
1464

1465
    This should be used after creating a new instance.
1466

1467
    @type instance: L{objects.Instance}
1468
    @param instance: the instance object
1469

1470
    """
1471
    if not isinstance(instance, objects.Instance):
1472
      raise errors.ProgrammerError("Invalid type passed to AddInstance")
1473

    
1474
    if instance.disk_template != constants.DT_DISKLESS:
1475
      all_lvs = instance.MapLVsByNode()
1476
      logging.info("Instance '%s' DISK_LAYOUT: %s", instance.name, all_lvs)
1477

    
1478
    all_macs = self._AllMACs()
1479
    for nic in instance.nics:
1480
      if nic.mac in all_macs:
1481
        raise errors.ConfigurationError("Cannot add instance %s:"
1482
                                        " MAC address '%s' already in use." %
1483
                                        (instance.name, nic.mac))
1484

    
1485
    self._CheckUniqueUUID(instance, include_temporary=False)
1486

    
1487
    instance.serial_no = 1
1488
    instance.ctime = instance.mtime = time.time()
1489
    self._config_data.instances[instance.uuid] = instance
1490
    self._config_data.cluster.serial_no += 1
1491
    self._UnlockedReleaseDRBDMinors(instance.uuid)
1492
    self._UnlockedCommitTemporaryIps(ec_id)
1493
    self._WriteConfig()
1494

    
1495
  def _EnsureUUID(self, item, ec_id):
1496
    """Ensures a given object has a valid UUID.
1497

1498
    @param item: the instance or node to be checked
1499
    @param ec_id: the execution context id for the uuid reservation
1500

1501
    """
1502
    if not item.uuid:
1503
      item.uuid = self._GenerateUniqueID(ec_id)
1504
    else:
1505
      self._CheckUniqueUUID(item, include_temporary=True)
1506

    
1507
  def _CheckUniqueUUID(self, item, include_temporary):
1508
    """Checks that the UUID of the given object is unique.
1509

1510
    @param item: the instance or node to be checked
1511
    @param include_temporary: whether temporarily generated UUID's should be
1512
              included in the check. If the UUID of the item to be checked is
1513
              a temporarily generated one, this has to be C{False}.
1514

1515
    """
1516
    if not item.uuid:
1517
      raise errors.ConfigurationError("'%s' must have an UUID" % (item.name,))
1518
    if item.uuid in self._AllIDs(include_temporary=include_temporary):
1519
      raise errors.ConfigurationError("Cannot add '%s': UUID %s already"
1520
                                      " in use" % (item.name, item.uuid))
1521

    
1522
  def _SetInstanceStatus(self, inst_uuid, status, disks_active):
1523
    """Set the instance's status to a given value.
1524

1525
    """
1526
    if inst_uuid not in self._config_data.instances:
1527
      raise errors.ConfigurationError("Unknown instance '%s'" %
1528
                                      inst_uuid)
1529
    instance = self._config_data.instances[inst_uuid]
1530

    
1531
    if status is None:
1532
      status = instance.admin_state
1533
    if disks_active is None:
1534
      disks_active = instance.disks_active
1535

    
1536
    assert status in constants.ADMINST_ALL, \
1537
           "Invalid status '%s' passed to SetInstanceStatus" % (status,)
1538

    
1539
    if instance.admin_state != status or \
1540
       instance.disks_active != disks_active:
1541
      instance.admin_state = status
1542
      instance.disks_active = disks_active
1543
      instance.serial_no += 1
1544
      instance.mtime = time.time()
1545
      self._WriteConfig()
1546

    
1547
  @locking.ssynchronized(_config_lock)
1548
  def MarkInstanceUp(self, inst_uuid):
1549
    """Mark the instance status to up in the config.
1550

1551
    This also sets the instance disks active flag.
1552

1553
    """
1554
    self._SetInstanceStatus(inst_uuid, constants.ADMINST_UP, True)
1555

    
1556
  @locking.ssynchronized(_config_lock)
1557
  def MarkInstanceOffline(self, inst_uuid):
1558
    """Mark the instance status to down in the config.
1559

1560
    This also clears the instance disks active flag.
1561

1562
    """
1563
    self._SetInstanceStatus(inst_uuid, constants.ADMINST_OFFLINE, False)
1564

    
1565
  @locking.ssynchronized(_config_lock)
1566
  def RemoveInstance(self, inst_uuid):
1567
    """Remove the instance from the configuration.
1568

1569
    """
1570
    if inst_uuid not in self._config_data.instances:
1571
      raise errors.ConfigurationError("Unknown instance '%s'" % inst_uuid)
1572

    
1573
    # If a network port has been allocated to the instance,
1574
    # return it to the pool of free ports.
1575
    inst = self._config_data.instances[inst_uuid]
1576
    network_port = getattr(inst, "network_port", None)
1577
    if network_port is not None:
1578
      self._config_data.cluster.tcpudp_port_pool.add(network_port)
1579

    
1580
    instance = self._UnlockedGetInstanceInfo(inst_uuid)
1581

    
1582
    for nic in instance.nics:
1583
      if nic.network and nic.ip:
1584
        # Return all IP addresses to the respective address pools
1585
        self._UnlockedCommitIp(constants.RELEASE_ACTION, nic.network, nic.ip)
1586

    
1587
    del self._config_data.instances[inst_uuid]
1588
    self._config_data.cluster.serial_no += 1
1589
    self._WriteConfig()
1590

    
1591
  @locking.ssynchronized(_config_lock)
1592
  def RenameInstance(self, inst_uuid, new_name):
1593
    """Rename an instance.
1594

1595
    This needs to be done in ConfigWriter and not by RemoveInstance
1596
    combined with AddInstance as only we can guarantee an atomic
1597
    rename.
1598

1599
    """
1600
    if inst_uuid not in self._config_data.instances:
1601
      raise errors.ConfigurationError("Unknown instance '%s'" % inst_uuid)
1602

    
1603
    inst = self._config_data.instances[inst_uuid]
1604
    inst.name = new_name
1605

    
1606
    for (_, disk) in enumerate(inst.disks):
1607
      if disk.dev_type in [constants.DT_FILE, constants.DT_SHARED_FILE]:
1608
        # rename the file paths in logical and physical id
1609
        file_storage_dir = os.path.dirname(os.path.dirname(disk.logical_id[1]))
1610
        disk.logical_id = (disk.logical_id[0],
1611
                           utils.PathJoin(file_storage_dir, inst.name,
1612
                                          os.path.basename(disk.logical_id[1])))
1613

    
1614
    # Force update of ssconf files
1615
    self._config_data.cluster.serial_no += 1
1616

    
1617
    self._WriteConfig()
1618

    
1619
  @locking.ssynchronized(_config_lock)
1620
  def MarkInstanceDown(self, inst_uuid):
1621
    """Mark the status of an instance to down in the configuration.
1622

1623
    This does not touch the instance disks active flag, as shut down instances
1624
    can still have active disks.
1625

1626
    """
1627
    self._SetInstanceStatus(inst_uuid, constants.ADMINST_DOWN, None)
1628

    
1629
  @locking.ssynchronized(_config_lock)
1630
  def MarkInstanceDisksActive(self, inst_uuid):
1631
    """Mark the status of instance disks active.
1632

1633
    """
1634
    self._SetInstanceStatus(inst_uuid, None, True)
1635

    
1636
  @locking.ssynchronized(_config_lock)
1637
  def MarkInstanceDisksInactive(self, inst_uuid):
1638
    """Mark the status of instance disks inactive.
1639

1640
    """
1641
    self._SetInstanceStatus(inst_uuid, None, False)
1642

    
1643
  def _UnlockedGetInstanceList(self):
1644
    """Get the list of instances.
1645

1646
    This function is for internal use, when the config lock is already held.
1647

1648
    """
1649
    return self._config_data.instances.keys()
1650

    
1651
  @locking.ssynchronized(_config_lock, shared=1)
1652
  def GetInstanceList(self):
1653
    """Get the list of instances.
1654

1655
    @return: array of instances, ex. ['instance2-uuid', 'instance1-uuid']
1656

1657
    """
1658
    return self._UnlockedGetInstanceList()
1659

    
1660
  def ExpandInstanceName(self, short_name):
1661
    """Attempt to expand an incomplete instance name.
1662

1663
    """
1664
    # Locking is done in L{ConfigWriter.GetAllInstancesInfo}
1665
    all_insts = self.GetAllInstancesInfo().values()
1666
    expanded_name = _MatchNameComponentIgnoreCase(
1667
                      short_name, [inst.name for inst in all_insts])
1668

    
1669
    if expanded_name is not None:
1670
      # there has to be exactly one instance with that name
1671
      inst = (filter(lambda n: n.name == expanded_name, all_insts)[0])
1672
      return (inst.uuid, inst.name)
1673
    else:
1674
      return (None, None)
1675

    
1676
  def _UnlockedGetInstanceInfo(self, inst_uuid):
1677
    """Returns information about an instance.
1678

1679
    This function is for internal use, when the config lock is already held.
1680

1681
    """
1682
    if inst_uuid not in self._config_data.instances:
1683
      return None
1684

    
1685
    return self._config_data.instances[inst_uuid]
1686

    
1687
  @locking.ssynchronized(_config_lock, shared=1)
1688
  def GetInstanceInfo(self, inst_uuid):
1689
    """Returns information about an instance.
1690

1691
    It takes the information from the configuration file. Other information of
1692
    an instance are taken from the live systems.
1693

1694
    @param inst_uuid: UUID of the instance
1695

1696
    @rtype: L{objects.Instance}
1697
    @return: the instance object
1698

1699
    """
1700
    return self._UnlockedGetInstanceInfo(inst_uuid)
1701

    
1702
  @locking.ssynchronized(_config_lock, shared=1)
1703
  def GetInstanceNodeGroups(self, inst_uuid, primary_only=False):
1704
    """Returns set of node group UUIDs for instance's nodes.
1705

1706
    @rtype: frozenset
1707

1708
    """
1709
    instance = self._UnlockedGetInstanceInfo(inst_uuid)
1710
    if not instance:
1711
      raise errors.ConfigurationError("Unknown instance '%s'" % inst_uuid)
1712

    
1713
    if primary_only:
1714
      nodes = [instance.primary_node]
1715
    else:
1716
      nodes = instance.all_nodes
1717

    
1718
    return frozenset(self._UnlockedGetNodeInfo(node_uuid).group
1719
                     for node_uuid in nodes)
1720

    
1721
  @locking.ssynchronized(_config_lock, shared=1)
1722
  def GetInstanceNetworks(self, inst_uuid):
1723
    """Returns set of network UUIDs for instance's nics.
1724

1725
    @rtype: frozenset
1726

1727
    """
1728
    instance = self._UnlockedGetInstanceInfo(inst_uuid)
1729
    if not instance:
1730
      raise errors.ConfigurationError("Unknown instance '%s'" % inst_uuid)
1731

    
1732
    networks = set()
1733
    for nic in instance.nics:
1734
      if nic.network:
1735
        networks.add(nic.network)
1736

    
1737
    return frozenset(networks)
1738

    
1739
  @locking.ssynchronized(_config_lock, shared=1)
1740
  def GetMultiInstanceInfo(self, inst_uuids):
1741
    """Get the configuration of multiple instances.
1742

1743
    @param inst_uuids: list of instance UUIDs
1744
    @rtype: list
1745
    @return: list of tuples (instance UUID, instance_info), where
1746
        instance_info is what would GetInstanceInfo return for the
1747
        node, while keeping the original order
1748

1749
    """
1750
    return [(uuid, self._UnlockedGetInstanceInfo(uuid)) for uuid in inst_uuids]
1751

    
1752
  @locking.ssynchronized(_config_lock, shared=1)
1753
  def GetMultiInstanceInfoByName(self, inst_names):
1754
    """Get the configuration of multiple instances.
1755

1756
    @param inst_names: list of instance names
1757
    @rtype: list
1758
    @return: list of tuples (instance, instance_info), where
1759
        instance_info is what would GetInstanceInfo return for the
1760
        node, while keeping the original order
1761

1762
    """
1763
    result = []
1764
    for name in inst_names:
1765
      instance = self._UnlockedGetInstanceInfoByName(name)
1766
      result.append((instance.uuid, instance))
1767
    return result
1768

    
1769
  @locking.ssynchronized(_config_lock, shared=1)
1770
  def GetAllInstancesInfo(self):
1771
    """Get the configuration of all instances.
1772

1773
    @rtype: dict
1774
    @return: dict of (instance, instance_info), where instance_info is what
1775
              would GetInstanceInfo return for the node
1776

1777
    """
1778
    return self._UnlockedGetAllInstancesInfo()
1779

    
1780
  def _UnlockedGetAllInstancesInfo(self):
1781
    my_dict = dict([(inst_uuid, self._UnlockedGetInstanceInfo(inst_uuid))
1782
                    for inst_uuid in self._UnlockedGetInstanceList()])
1783
    return my_dict
1784

    
1785
  @locking.ssynchronized(_config_lock, shared=1)
1786
  def GetInstancesInfoByFilter(self, filter_fn):
1787
    """Get instance configuration with a filter.
1788

1789
    @type filter_fn: callable
1790
    @param filter_fn: Filter function receiving instance object as parameter,
1791
      returning boolean. Important: this function is called while the
1792
      configuration locks is held. It must not do any complex work or call
1793
      functions potentially leading to a deadlock. Ideally it doesn't call any
1794
      other functions and just compares instance attributes.
1795

1796
    """
1797
    return dict((uuid, inst)
1798
                for (uuid, inst) in self._config_data.instances.items()
1799
                if filter_fn(inst))
1800

    
1801
  @locking.ssynchronized(_config_lock, shared=1)
1802
  def GetInstanceInfoByName(self, inst_name):
1803
    """Get the L{objects.Instance} object for a named instance.
1804

1805
    @param inst_name: name of the instance to get information for
1806
    @type inst_name: string
1807
    @return: the corresponding L{objects.Instance} instance or None if no
1808
          information is available
1809

1810
    """
1811
    return self._UnlockedGetInstanceInfoByName(inst_name)
1812

    
1813
  def _UnlockedGetInstanceInfoByName(self, inst_name):
1814
    for inst in self._UnlockedGetAllInstancesInfo().values():
1815
      if inst.name == inst_name:
1816
        return inst
1817
    return None
1818

    
1819
  def _UnlockedGetInstanceName(self, inst_uuid):
1820
    inst_info = self._UnlockedGetInstanceInfo(inst_uuid)
1821
    if inst_info is None:
1822
      raise errors.OpExecError("Unknown instance: %s" % inst_uuid)
1823
    return inst_info.name
1824

    
1825
  @locking.ssynchronized(_config_lock, shared=1)
1826
  def GetInstanceName(self, inst_uuid):
1827
    """Gets the instance name for the passed instance.
1828

1829
    @param inst_uuid: instance UUID to get name for
1830
    @type inst_uuid: string
1831
    @rtype: string
1832
    @return: instance name
1833

1834
    """
1835
    return self._UnlockedGetInstanceName(inst_uuid)
1836

    
1837
  @locking.ssynchronized(_config_lock, shared=1)
1838
  def GetInstanceNames(self, inst_uuids):
1839
    """Gets the instance names for the passed list of nodes.
1840

1841
    @param inst_uuids: list of instance UUIDs to get names for
1842
    @type inst_uuids: list of strings
1843
    @rtype: list of strings
1844
    @return: list of instance names
1845

1846
    """
1847
    return self._UnlockedGetInstanceNames(inst_uuids)
1848

    
1849
  def _UnlockedGetInstanceNames(self, inst_uuids):
1850
    return [self._UnlockedGetInstanceName(uuid) for uuid in inst_uuids]
1851

    
1852
  @locking.ssynchronized(_config_lock)
1853
  def AddNode(self, node, ec_id):
1854
    """Add a node to the configuration.
1855

1856
    @type node: L{objects.Node}
1857
    @param node: a Node instance
1858

1859
    """
1860
    logging.info("Adding node %s to configuration", node.name)
1861

    
1862
    self._EnsureUUID(node, ec_id)
1863

    
1864
    node.serial_no = 1
1865
    node.ctime = node.mtime = time.time()
1866
    self._UnlockedAddNodeToGroup(node.uuid, node.group)
1867
    self._config_data.nodes[node.uuid] = node
1868
    self._config_data.cluster.serial_no += 1
1869
    self._WriteConfig()
1870

    
1871
  @locking.ssynchronized(_config_lock)
1872
  def RemoveNode(self, node_uuid):
1873
    """Remove a node from the configuration.
1874

1875
    """
1876
    logging.info("Removing node %s from configuration", node_uuid)
1877

    
1878
    if node_uuid not in self._config_data.nodes:
1879
      raise errors.ConfigurationError("Unknown node '%s'" % node_uuid)
1880

    
1881
    self._UnlockedRemoveNodeFromGroup(self._config_data.nodes[node_uuid])
1882
    del self._config_data.nodes[node_uuid]
1883
    self._config_data.cluster.serial_no += 1
1884
    self._WriteConfig()
1885

    
1886
  def ExpandNodeName(self, short_name):
1887
    """Attempt to expand an incomplete node name into a node UUID.
1888

1889
    """
1890
    # Locking is done in L{ConfigWriter.GetAllNodesInfo}
1891
    all_nodes = self.GetAllNodesInfo().values()
1892
    expanded_name = _MatchNameComponentIgnoreCase(
1893
                      short_name, [node.name for node in all_nodes])
1894

    
1895
    if expanded_name is not None:
1896
      # there has to be exactly one node with that name
1897
      node = (filter(lambda n: n.name == expanded_name, all_nodes)[0])
1898
      return (node.uuid, node.name)
1899
    else:
1900
      return (None, None)
1901

    
1902
  def _UnlockedGetNodeInfo(self, node_uuid):
1903
    """Get the configuration of a node, as stored in the config.
1904

1905
    This function is for internal use, when the config lock is already
1906
    held.
1907

1908
    @param node_uuid: the node UUID
1909

1910
    @rtype: L{objects.Node}
1911
    @return: the node object
1912

1913
    """
1914
    if node_uuid not in self._config_data.nodes:
1915
      return None
1916

    
1917
    return self._config_data.nodes[node_uuid]
1918

    
1919
  @locking.ssynchronized(_config_lock, shared=1)
1920
  def GetNodeInfo(self, node_uuid):
1921
    """Get the configuration of a node, as stored in the config.
1922

1923
    This is just a locked wrapper over L{_UnlockedGetNodeInfo}.
1924

1925
    @param node_uuid: the node UUID
1926

1927
    @rtype: L{objects.Node}
1928
    @return: the node object
1929

1930
    """
1931
    return self._UnlockedGetNodeInfo(node_uuid)
1932

    
1933
  @locking.ssynchronized(_config_lock, shared=1)
1934
  def GetNodeInstances(self, node_uuid):
1935
    """Get the instances of a node, as stored in the config.
1936

1937
    @param node_uuid: the node UUID
1938

1939
    @rtype: (list, list)
1940
    @return: a tuple with two lists: the primary and the secondary instances
1941

1942
    """
1943
    pri = []
1944
    sec = []
1945
    for inst in self._config_data.instances.values():
1946
      if inst.primary_node == node_uuid:
1947
        pri.append(inst.uuid)
1948
      if node_uuid in inst.secondary_nodes:
1949
        sec.append(inst.uuid)
1950
    return (pri, sec)
1951

    
1952
  @locking.ssynchronized(_config_lock, shared=1)
1953
  def GetNodeGroupInstances(self, uuid, primary_only=False):
1954
    """Get the instances of a node group.
1955

1956
    @param uuid: Node group UUID
1957
    @param primary_only: Whether to only consider primary nodes
1958
    @rtype: frozenset
1959
    @return: List of instance UUIDs in node group
1960

1961
    """
1962
    if primary_only:
1963
      nodes_fn = lambda inst: [inst.primary_node]
1964
    else:
1965
      nodes_fn = lambda inst: inst.all_nodes
1966

    
1967
    return frozenset(inst.uuid
1968
                     for inst in self._config_data.instances.values()
1969
                     for node_uuid in nodes_fn(inst)
1970
                     if self._UnlockedGetNodeInfo(node_uuid).group == uuid)
1971

    
1972
  def _UnlockedGetHvparamsString(self, hvname):
1973
    """Return the string representation of the list of hyervisor parameters of
1974
    the given hypervisor.
1975

1976
    @see: C{GetHvparams}
1977

1978
    """
1979
    result = ""
1980
    hvparams = self._config_data.cluster.hvparams[hvname]
1981
    for key in hvparams:
1982
      result += "%s=%s\n" % (key, hvparams[key])
1983
    return result
1984

    
1985
  @locking.ssynchronized(_config_lock, shared=1)
1986
  def GetHvparamsString(self, hvname):
1987
    """Return the hypervisor parameters of the given hypervisor.
1988

1989
    @type hvname: string
1990
    @param hvname: name of a hypervisor
1991
    @rtype: string
1992
    @return: string containing key-value-pairs, one pair on each line;
1993
      format: KEY=VALUE
1994

1995
    """
1996
    return self._UnlockedGetHvparamsString(hvname)
1997

    
1998
  def _UnlockedGetNodeList(self):
1999
    """Return the list of nodes which are in the configuration.
2000

2001
    This function is for internal use, when the config lock is already
2002
    held.
2003

2004
    @rtype: list
2005

2006
    """
2007
    return self._config_data.nodes.keys()
2008

    
2009
  @locking.ssynchronized(_config_lock, shared=1)
2010
  def GetNodeList(self):
2011
    """Return the list of nodes which are in the configuration.
2012

2013
    """
2014
    return self._UnlockedGetNodeList()
2015

    
2016
  def _UnlockedGetOnlineNodeList(self):
2017
    """Return the list of nodes which are online.
2018

2019
    """
2020
    all_nodes = [self._UnlockedGetNodeInfo(node)
2021
                 for node in self._UnlockedGetNodeList()]
2022
    return [node.uuid for node in all_nodes if not node.offline]
2023

    
2024
  @locking.ssynchronized(_config_lock, shared=1)
2025
  def GetOnlineNodeList(self):
2026
    """Return the list of nodes which are online.
2027

2028
    """
2029
    return self._UnlockedGetOnlineNodeList()
2030

    
2031
  @locking.ssynchronized(_config_lock, shared=1)
2032
  def GetVmCapableNodeList(self):
2033
    """Return the list of nodes which are not vm capable.
2034

2035
    """
2036
    all_nodes = [self._UnlockedGetNodeInfo(node)
2037
                 for node in self._UnlockedGetNodeList()]
2038
    return [node.uuid for node in all_nodes if node.vm_capable]
2039

    
2040
  @locking.ssynchronized(_config_lock, shared=1)
2041
  def GetNonVmCapableNodeList(self):
2042
    """Return the list of nodes which are not vm capable.
2043

2044
    """
2045
    all_nodes = [self._UnlockedGetNodeInfo(node)
2046
                 for node in self._UnlockedGetNodeList()]
2047
    return [node.uuid for node in all_nodes if not node.vm_capable]
2048

    
2049
  @locking.ssynchronized(_config_lock, shared=1)
2050
  def GetMultiNodeInfo(self, node_uuids):
2051
    """Get the configuration of multiple nodes.
2052

2053
    @param node_uuids: list of node UUIDs
2054
    @rtype: list
2055
    @return: list of tuples of (node, node_info), where node_info is
2056
        what would GetNodeInfo return for the node, in the original
2057
        order
2058

2059
    """
2060
    return [(uuid, self._UnlockedGetNodeInfo(uuid)) for uuid in node_uuids]
2061

    
2062
  def _UnlockedGetAllNodesInfo(self):
2063
    """Gets configuration of all nodes.
2064

2065
    @note: See L{GetAllNodesInfo}
2066

2067
    """
2068
    return dict([(node_uuid, self._UnlockedGetNodeInfo(node_uuid))
2069
                 for node_uuid in self._UnlockedGetNodeList()])
2070

    
2071
  @locking.ssynchronized(_config_lock, shared=1)
2072
  def GetAllNodesInfo(self):
2073
    """Get the configuration of all nodes.
2074

2075
    @rtype: dict
2076
    @return: dict of (node, node_info), where node_info is what
2077
              would GetNodeInfo return for the node
2078

2079
    """
2080
    return self._UnlockedGetAllNodesInfo()
2081

    
2082
  def _UnlockedGetNodeInfoByName(self, node_name):
2083
    for node in self._UnlockedGetAllNodesInfo().values():
2084
      if node.name == node_name:
2085
        return node
2086
    return None
2087

    
2088
  @locking.ssynchronized(_config_lock, shared=1)
2089
  def GetNodeInfoByName(self, node_name):
2090
    """Get the L{objects.Node} object for a named node.
2091

2092
    @param node_name: name of the node to get information for
2093
    @type node_name: string
2094
    @return: the corresponding L{objects.Node} instance or None if no
2095
          information is available
2096

2097
    """
2098
    return self._UnlockedGetNodeInfoByName(node_name)
2099

    
2100
  @locking.ssynchronized(_config_lock, shared=1)
2101
  def GetNodeGroupInfoByName(self, nodegroup_name):
2102
    """Get the L{objects.NodeGroup} object for a named node group.
2103

2104
    @param nodegroup_name: name of the node group to get information for
2105
    @type nodegroup_name: string
2106
    @return: the corresponding L{objects.NodeGroup} instance or None if no
2107
          information is available
2108

2109
    """
2110
    for nodegroup in self._UnlockedGetAllNodeGroupsInfo().values():
2111
      if nodegroup.name == nodegroup_name:
2112
        return nodegroup
2113
    return None
2114

    
2115
  def _UnlockedGetNodeName(self, node_spec):
2116
    if isinstance(node_spec, objects.Node):
2117
      return node_spec.name
2118
    elif isinstance(node_spec, basestring):
2119
      node_info = self._UnlockedGetNodeInfo(node_spec)
2120
      if node_info is None:
2121
        raise errors.OpExecError("Unknown node: %s" % node_spec)
2122
      return node_info.name
2123
    else:
2124
      raise errors.ProgrammerError("Can't handle node spec '%s'" % node_spec)
2125

    
2126
  @locking.ssynchronized(_config_lock, shared=1)
2127
  def GetNodeName(self, node_spec):
2128
    """Gets the node name for the passed node.
2129

2130
    @param node_spec: node to get names for
2131
    @type node_spec: either node UUID or a L{objects.Node} object
2132
    @rtype: string
2133
    @return: node name
2134

2135
    """
2136
    return self._UnlockedGetNodeName(node_spec)
2137

    
2138
  def _UnlockedGetNodeNames(self, node_specs):
2139
    return [self._UnlockedGetNodeName(node_spec) for node_spec in node_specs]
2140

    
2141
  @locking.ssynchronized(_config_lock, shared=1)
2142
  def GetNodeNames(self, node_specs):
2143
    """Gets the node names for the passed list of nodes.
2144

2145
    @param node_specs: list of nodes to get names for
2146
    @type node_specs: list of either node UUIDs or L{objects.Node} objects
2147
    @rtype: list of strings
2148
    @return: list of node names
2149

2150
    """
2151
    return self._UnlockedGetNodeNames(node_specs)
2152

    
2153
  @locking.ssynchronized(_config_lock, shared=1)
2154
  def GetNodeGroupsFromNodes(self, node_uuids):
2155
    """Returns groups for a list of nodes.
2156

2157
    @type node_uuids: list of string
2158
    @param node_uuids: List of node UUIDs
2159
    @rtype: frozenset
2160

2161
    """
2162
    return frozenset(self._UnlockedGetNodeInfo(uuid).group
2163
                     for uuid in node_uuids)
2164

    
2165
  def _UnlockedGetMasterCandidateStats(self, exceptions=None):
2166
    """Get the number of current and maximum desired and possible candidates.
2167

2168
    @type exceptions: list
2169
    @param exceptions: if passed, list of nodes that should be ignored
2170
    @rtype: tuple
2171
    @return: tuple of (current, desired and possible, possible)
2172

2173
    """
2174
    mc_now = mc_should = mc_max = 0
2175
    for node in self._config_data.nodes.values():
2176
      if exceptions and node.uuid in exceptions:
2177
        continue
2178
      if not (node.offline or node.drained) and node.master_capable:
2179
        mc_max += 1
2180
      if node.master_candidate:
2181
        mc_now += 1
2182
    mc_should = min(mc_max, self._config_data.cluster.candidate_pool_size)
2183
    return (mc_now, mc_should, mc_max)
2184

    
2185
  @locking.ssynchronized(_config_lock, shared=1)
2186
  def GetMasterCandidateStats(self, exceptions=None):
2187
    """Get the number of current and maximum possible candidates.
2188

2189
    This is just a wrapper over L{_UnlockedGetMasterCandidateStats}.
2190

2191
    @type exceptions: list
2192
    @param exceptions: if passed, list of nodes that should be ignored
2193
    @rtype: tuple
2194
    @return: tuple of (current, max)
2195

2196
    """
2197
    return self._UnlockedGetMasterCandidateStats(exceptions)
2198

    
2199
  @locking.ssynchronized(_config_lock)
2200
  def MaintainCandidatePool(self, exception_node_uuids):
2201
    """Try to grow the candidate pool to the desired size.
2202

2203
    @type exception_node_uuids: list
2204
    @param exception_node_uuids: if passed, list of nodes that should be ignored
2205
    @rtype: list
2206
    @return: list with the adjusted nodes (L{objects.Node} instances)
2207

2208
    """
2209
    mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats(
2210
                          exception_node_uuids)
2211
    mod_list = []
2212
    if mc_now < mc_max:
2213
      node_list = self._config_data.nodes.keys()
2214
      random.shuffle(node_list)
2215
      for uuid in node_list:
2216
        if mc_now >= mc_max:
2217
          break
2218
        node = self._config_data.nodes[uuid]
2219
        if (node.master_candidate or node.offline or node.drained or
2220
            node.uuid in exception_node_uuids or not node.master_capable):
2221
          continue
2222
        mod_list.append(node)
2223
        node.master_candidate = True
2224
        node.serial_no += 1
2225
        mc_now += 1
2226
      if mc_now != mc_max:
2227
        # this should not happen
2228
        logging.warning("Warning: MaintainCandidatePool didn't manage to"
2229
                        " fill the candidate pool (%d/%d)", mc_now, mc_max)
2230
      if mod_list:
2231
        self._config_data.cluster.serial_no += 1
2232
        self._WriteConfig()
2233

    
2234
    return mod_list
2235

    
2236
  def _UnlockedAddNodeToGroup(self, node_uuid, nodegroup_uuid):
2237
    """Add a given node to the specified group.
2238

2239
    """
2240
    if nodegroup_uuid not in self._config_data.nodegroups:
2241
      # This can happen if a node group gets deleted between its lookup and
2242
      # when we're adding the first node to it, since we don't keep a lock in
2243
      # the meantime. It's ok though, as we'll fail cleanly if the node group
2244
      # is not found anymore.
2245
      raise errors.OpExecError("Unknown node group: %s" % nodegroup_uuid)
2246
    if node_uuid not in self._config_data.nodegroups[nodegroup_uuid].members:
2247
      self._config_data.nodegroups[nodegroup_uuid].members.append(node_uuid)
2248

    
2249
  def _UnlockedRemoveNodeFromGroup(self, node):
2250
    """Remove a given node from its group.
2251

2252
    """
2253
    nodegroup = node.group
2254
    if nodegroup not in self._config_data.nodegroups:
2255
      logging.warning("Warning: node '%s' has unknown node group '%s'"
2256
                      " (while being removed from it)", node.uuid, nodegroup)
2257
    nodegroup_obj = self._config_data.nodegroups[nodegroup]
2258
    if node.uuid not in nodegroup_obj.members:
2259
      logging.warning("Warning: node '%s' not a member of its node group '%s'"
2260
                      " (while being removed from it)", node.uuid, nodegroup)
2261
    else:
2262
      nodegroup_obj.members.remove(node.uuid)
2263

    
2264
  @locking.ssynchronized(_config_lock)
2265
  def AssignGroupNodes(self, mods):
2266
    """Changes the group of a number of nodes.
2267

2268
    @type mods: list of tuples; (node name, new group UUID)
2269
    @param mods: Node membership modifications
2270

2271
    """
2272
    groups = self._config_data.nodegroups
2273
    nodes = self._config_data.nodes
2274

    
2275
    resmod = []
2276

    
2277
    # Try to resolve UUIDs first
2278
    for (node_uuid, new_group_uuid) in mods:
2279
      try:
2280
        node = nodes[node_uuid]
2281
      except KeyError:
2282
        raise errors.ConfigurationError("Unable to find node '%s'" % node_uuid)
2283

    
2284
      if node.group == new_group_uuid:
2285
        # Node is being assigned to its current group
2286
        logging.debug("Node '%s' was assigned to its current group (%s)",
2287
                      node_uuid, node.group)
2288
        continue
2289

    
2290
      # Try to find current group of node
2291
      try:
2292
        old_group = groups[node.group]
2293
      except KeyError:
2294
        raise errors.ConfigurationError("Unable to find old group '%s'" %
2295
                                        node.group)
2296

    
2297
      # Try to find new group for node
2298
      try:
2299
        new_group = groups[new_group_uuid]
2300
      except KeyError:
2301
        raise errors.ConfigurationError("Unable to find new group '%s'" %
2302
                                        new_group_uuid)
2303

    
2304
      assert node.uuid in old_group.members, \
2305
        ("Inconsistent configuration: node '%s' not listed in members for its"
2306
         " old group '%s'" % (node.uuid, old_group.uuid))
2307
      assert node.uuid not in new_group.members, \
2308
        ("Inconsistent configuration: node '%s' already listed in members for"
2309
         " its new group '%s'" % (node.uuid, new_group.uuid))
2310

    
2311
      resmod.append((node, old_group, new_group))
2312

    
2313
    # Apply changes
2314
    for (node, old_group, new_group) in resmod:
2315
      assert node.uuid != new_group.uuid and old_group.uuid != new_group.uuid, \
2316
        "Assigning to current group is not possible"
2317

    
2318
      node.group = new_group.uuid
2319

    
2320
      # Update members of involved groups
2321
      if node.uuid in old_group.members:
2322
        old_group.members.remove(node.uuid)
2323
      if node.uuid not in new_group.members:
2324
        new_group.members.append(node.uuid)
2325

    
2326
    # Update timestamps and serials (only once per node/group object)
2327
    now = time.time()
2328
    for obj in frozenset(itertools.chain(*resmod)): # pylint: disable=W0142
2329
      obj.serial_no += 1
2330
      obj.mtime = now
2331

    
2332
    # Force ssconf update
2333
    self._config_data.cluster.serial_no += 1
2334

    
2335
    self._WriteConfig()
2336

    
2337
  def _BumpSerialNo(self):
2338
    """Bump up the serial number of the config.
2339

2340
    """
2341
    self._config_data.serial_no += 1
2342
    self._config_data.mtime = time.time()
2343

    
2344
  def _AllUUIDObjects(self):
2345
    """Returns all objects with uuid attributes.
2346

2347
    """
2348
    return (self._config_data.instances.values() +
2349
            self._config_data.nodes.values() +
2350
            self._config_data.nodegroups.values() +
2351
            self._config_data.networks.values() +
2352
            self._AllDisks() +
2353
            self._AllNICs() +
2354
            [self._config_data.cluster])
2355

    
2356
  def _OpenConfig(self, accept_foreign):
2357
    """Read the config data from disk.
2358

2359
    """
2360
    # Read the configuration data. If offline, read the file directly.
2361
    # If online, call WConfd.
2362
    if self._offline:
2363
      raw_data = utils.ReadFile(self._cfg_file)
2364
      try:
2365
        dict_data = serializer.Load(raw_data)
2366
      except Exception, err:
2367
        raise errors.ConfigurationError(err)
2368
    else:
2369
      self._wconfd = wc.Client()
2370
      dict_data = self._wconfd.ReadConfig()
2371

    
2372
    try:
2373
      data = objects.ConfigData.FromDict(dict_data)
2374
    except Exception, err:
2375
      raise errors.ConfigurationError(err)
2376

    
2377
    # Make sure the configuration has the right version
2378
    _ValidateConfig(data)
2379

    
2380
    if (not hasattr(data, "cluster") or
2381
        not hasattr(data.cluster, "rsahostkeypub")):
2382
      raise errors.ConfigurationError("Incomplete configuration"
2383
                                      " (missing cluster.rsahostkeypub)")
2384

    
2385
    if not data.cluster.master_node in data.nodes:
2386
      msg = ("The configuration denotes node %s as master, but does not"
2387
             " contain information about this node" %
2388
             data.cluster.master_node)
2389
      raise errors.ConfigurationError(msg)
2390

    
2391
    master_info = data.nodes[data.cluster.master_node]
2392
    if master_info.name != self._my_hostname and not accept_foreign:
2393
      msg = ("The configuration denotes node %s as master, while my"
2394
             " hostname is %s; opening a foreign configuration is only"
2395
             " possible in accept_foreign mode" %
2396
             (master_info.name, self._my_hostname))
2397
      raise errors.ConfigurationError(msg)
2398

    
2399
    self._config_data = data
2400
    # reset the last serial as -1 so that the next write will cause
2401
    # ssconf update
2402
    self._last_cluster_serial = -1
2403

    
2404
    # Upgrade configuration if needed
2405
    self._UpgradeConfig()
2406

    
2407
    self._cfg_id = utils.GetFileID(path=self._cfg_file)
2408

    
2409
  def _UpgradeConfig(self):
2410
    """Run any upgrade steps.
2411

2412
    This method performs both in-object upgrades and also update some data
2413
    elements that need uniqueness across the whole configuration or interact
2414
    with other objects.
2415

2416
    @warning: this function will call L{_WriteConfig()}, but also
2417
        L{DropECReservations} so it needs to be called only from a
2418
        "safe" place (the constructor). If one wanted to call it with
2419
        the lock held, a DropECReservationUnlocked would need to be
2420
        created first, to avoid causing deadlock.
2421

2422
    """
2423
    # Keep a copy of the persistent part of _config_data to check for changes
2424
    # Serialization doesn't guarantee order in dictionaries
2425
    oldconf = copy.deepcopy(self._config_data.ToDict())
2426

    
2427
    # In-object upgrades
2428
    self._config_data.UpgradeConfig()
2429

    
2430
    for item in self._AllUUIDObjects():
2431
      if item.uuid is None:
2432
        item.uuid = self._GenerateUniqueID(_UPGRADE_CONFIG_JID)
2433
    if not self._config_data.nodegroups:
2434
      default_nodegroup_name = constants.INITIAL_NODE_GROUP_NAME
2435
      default_nodegroup = objects.NodeGroup(name=default_nodegroup_name,
2436
                                            members=[])
2437
      self._UnlockedAddNodeGroup(default_nodegroup, _UPGRADE_CONFIG_JID, True)
2438
    for node in self._config_data.nodes.values():
2439
      if not node.group:
2440
        node.group = self.LookupNodeGroup(None)
2441
      # This is technically *not* an upgrade, but needs to be done both when
2442
      # nodegroups are being added, and upon normally loading the config,
2443
      # because the members list of a node group is discarded upon
2444
      # serializing/deserializing the object.
2445
      self._UnlockedAddNodeToGroup(node.uuid, node.group)
2446

    
2447
    modified = (oldconf != self._config_data.ToDict())
2448
    if modified:
2449
      self._WriteConfig()
2450
      # This is ok even if it acquires the internal lock, as _UpgradeConfig is
2451
      # only called at config init time, without the lock held
2452
      self.DropECReservations(_UPGRADE_CONFIG_JID)
2453
    else:
2454
      config_errors = self._UnlockedVerifyConfig()
2455
      if config_errors:
2456
        errmsg = ("Loaded configuration data is not consistent: %s" %
2457
                  (utils.CommaJoin(config_errors)))
2458
        logging.critical(errmsg)
2459

    
2460
  def _DistributeConfig(self, feedback_fn):
2461
    """Distribute the configuration to the other nodes.
2462

2463
    Currently, this only copies the configuration file. In the future,
2464
    it could be used to encapsulate the 2/3-phase update mechanism.
2465

2466
    """
2467
    if self._offline:
2468
      return True
2469

    
2470
    bad = False
2471

    
2472
    node_list = []
2473
    addr_list = []
2474
    myhostname = self._my_hostname
2475
    # we can skip checking whether _UnlockedGetNodeInfo returns None
2476
    # since the node list comes from _UnlocketGetNodeList, and we are
2477
    # called with the lock held, so no modifications should take place
2478
    # in between
2479
    for node_uuid in self._UnlockedGetNodeList():
2480
      node_info = self._UnlockedGetNodeInfo(node_uuid)
2481
      if node_info.name == myhostname or not node_info.master_candidate:
2482
        continue
2483
      node_list.append(node_info.name)
2484
      addr_list.append(node_info.primary_ip)
2485

    
2486
    # TODO: Use dedicated resolver talking to config writer for name resolution
2487
    result = \
2488
      self._GetRpc(addr_list).call_upload_file(node_list, self._cfg_file)
2489
    for to_node, to_result in result.items():
2490
      msg = to_result.fail_msg
2491
      if msg:
2492
        msg = ("Copy of file %s to node %s failed: %s" %
2493
               (self._cfg_file, to_node, msg))
2494
        logging.error(msg)
2495

    
2496
        if feedback_fn:
2497
          feedback_fn(msg)
2498

    
2499
        bad = True
2500

    
2501
    return not bad
2502

    
2503
  def _WriteConfig(self, destination=None, feedback_fn=None):
2504
    """Write the configuration data to persistent storage.
2505

2506
    """
2507
    assert feedback_fn is None or callable(feedback_fn)
2508

    
2509
    # Warn on config errors, but don't abort the save - the
2510
    # configuration has already been modified, and we can't revert;
2511
    # the best we can do is to warn the user and save as is, leaving
2512
    # recovery to the user
2513
    config_errors = self._UnlockedVerifyConfig()
2514
    if config_errors:
2515
      errmsg = ("Configuration data is not consistent: %s" %
2516
                (utils.CommaJoin(config_errors)))
2517
      logging.critical(errmsg)
2518
      if feedback_fn:
2519
        feedback_fn(errmsg)
2520

    
2521
    if destination is None:
2522
      destination = self._cfg_file
2523

    
2524
    self._BumpSerialNo()
2525
    # Save the configuration data. If offline, write the file directly.
2526
    # If online, call WConfd.
2527
    if self._offline:
2528
      txt = serializer.DumpJson(
2529
        self._config_data.ToDict(_with_private=True),
2530
        private_encoder=serializer.EncodeWithPrivateFields
2531
      )
2532

    
2533
      getents = self._getents()
2534
      try:
2535
        fd = utils.SafeWriteFile(destination, self._cfg_id, data=txt,
2536
                                 close=False, gid=getents.confd_gid, mode=0640)
2537
      except errors.LockError:
2538
        raise errors.ConfigurationError("The configuration file has been"
2539
                                        " modified since the last write, cannot"
2540
                                        " update")
2541
      try:
2542
        self._cfg_id = utils.GetFileID(fd=fd)
2543
      finally:
2544
        os.close(fd)
2545
    else:
2546
      try:
2547
        self._wconfd.WriteConfig(self._config_data.ToDict())
2548
      except errors.LockError:
2549
        raise errors.ConfigurationError("The configuration file has been"
2550
                                        " modified since the last write, cannot"
2551
                                        " update")
2552

    
2553
    self.write_count += 1
2554

    
2555
    # and redistribute the config file to master candidates
2556
    self._DistributeConfig(feedback_fn)
2557

    
2558
    # Write ssconf files on all nodes (including locally)
2559
    if self._last_cluster_serial < self._config_data.cluster.serial_no:
2560
      if not self._offline:
2561
        result = self._GetRpc(None).call_write_ssconf_files(
2562
          self._UnlockedGetNodeNames(self._UnlockedGetOnlineNodeList()),
2563
          self._UnlockedGetSsconfValues())
2564

    
2565
        for nname, nresu in result.items():
2566
          msg = nresu.fail_msg
2567
          if msg:
2568
            errmsg = ("Error while uploading ssconf files to"
2569
                      " node %s: %s" % (nname, msg))
2570
            logging.warning(errmsg)
2571

    
2572
            if feedback_fn:
2573
              feedback_fn(errmsg)
2574

    
2575
      self._last_cluster_serial = self._config_data.cluster.serial_no
2576

    
2577
  def _GetAllHvparamsStrings(self, hypervisors):
2578
    """Get the hvparams of all given hypervisors from the config.
2579

2580
    @type hypervisors: list of string
2581
    @param hypervisors: list of hypervisor names
2582
    @rtype: dict of strings
2583
    @returns: dictionary mapping the hypervisor name to a string representation
2584
      of the hypervisor's hvparams
2585

2586
    """
2587
    hvparams = {}
2588
    for hv in hypervisors:
2589
      hvparams[hv] = self._UnlockedGetHvparamsString(hv)
2590
    return hvparams
2591

    
2592
  @staticmethod
2593
  def _ExtendByAllHvparamsStrings(ssconf_values, all_hvparams):
2594
    """Extends the ssconf_values dictionary by hvparams.
2595

2596
    @type ssconf_values: dict of strings
2597
    @param ssconf_values: dictionary mapping ssconf_keys to strings
2598
      representing the content of ssconf files
2599
    @type all_hvparams: dict of strings
2600
    @param all_hvparams: dictionary mapping hypervisor names to a string
2601
      representation of their hvparams
2602
    @rtype: same as ssconf_values
2603
    @returns: the ssconf_values dictionary extended by hvparams
2604

2605
    """
2606
    for hv in all_hvparams:
2607
      ssconf_key = constants.SS_HVPARAMS_PREF + hv
2608
      ssconf_values[ssconf_key] = all_hvparams[hv]
2609
    return ssconf_values
2610

    
2611
  def _UnlockedGetSsconfValues(self):
2612
    """Return the values needed by ssconf.
2613

2614
    @rtype: dict
2615
    @return: a dictionary with keys the ssconf names and values their
2616
        associated value
2617

2618
    """
2619
    fn = "\n".join
2620
    instance_names = utils.NiceSort(
2621
                       [inst.name for inst in
2622
                        self._UnlockedGetAllInstancesInfo().values()])
2623
    node_infos = self._UnlockedGetAllNodesInfo().values()
2624
    node_names = [node.name for node in node_infos]
2625
    node_pri_ips = ["%s %s" % (ninfo.name, ninfo.primary_ip)
2626
                    for ninfo in node_infos]
2627
    node_snd_ips = ["%s %s" % (ninfo.name, ninfo.secondary_ip)
2628
                    for ninfo in node_infos]
2629

    
2630
    instance_data = fn(instance_names)
2631
    off_data = fn(node.name for node in node_infos if node.offline)
2632
    on_data = fn(node.name for node in node_infos if not node.offline)
2633
    mc_data = fn(node.name for node in node_infos if node.master_candidate)
2634
    mc_ips_data = fn(node.primary_ip for node in node_infos
2635
                     if node.master_candidate)
2636
    node_data = fn(node_names)
2637
    node_pri_ips_data = fn(node_pri_ips)
2638
    node_snd_ips_data = fn(node_snd_ips)
2639

    
2640
    cluster = self._config_data.cluster
2641
    cluster_tags = fn(cluster.GetTags())
2642

    
2643
    master_candidates_certs = fn("%s=%s" % (mc_uuid, mc_cert)
2644
                                 for mc_uuid, mc_cert
2645
                                 in cluster.candidate_certs.items())
2646

    
2647
    hypervisor_list = fn(cluster.enabled_hypervisors)
2648
    all_hvparams = self._GetAllHvparamsStrings(constants.HYPER_TYPES)
2649

    
2650
    uid_pool = uidpool.FormatUidPool(cluster.uid_pool, separator="\n")
2651

    
2652
    nodegroups = ["%s %s" % (nodegroup.uuid, nodegroup.name) for nodegroup in
2653
                  self._config_data.nodegroups.values()]
2654
    nodegroups_data = fn(utils.NiceSort(nodegroups))
2655
    networks = ["%s %s" % (net.uuid, net.name) for net in
2656
                self._config_data.networks.values()]
2657
    networks_data = fn(utils.NiceSort(networks))
2658

    
2659
    ssconf_values = {
2660
      constants.SS_CLUSTER_NAME: cluster.cluster_name,
2661
      constants.SS_CLUSTER_TAGS: cluster_tags,
2662
      constants.SS_FILE_STORAGE_DIR: cluster.file_storage_dir,
2663
      constants.SS_SHARED_FILE_STORAGE_DIR: cluster.shared_file_storage_dir,
2664
      constants.SS_GLUSTER_STORAGE_DIR: cluster.gluster_storage_dir,
2665
      constants.SS_MASTER_CANDIDATES: mc_data,
2666
      constants.SS_MASTER_CANDIDATES_IPS: mc_ips_data,
2667
      constants.SS_MASTER_CANDIDATES_CERTS: master_candidates_certs,
2668
      constants.SS_MASTER_IP: cluster.master_ip,
2669
      constants.SS_MASTER_NETDEV: cluster.master_netdev,
2670
      constants.SS_MASTER_NETMASK: str(cluster.master_netmask),
2671
      constants.SS_MASTER_NODE: self._UnlockedGetNodeName(cluster.master_node),
2672
      constants.SS_NODE_LIST: node_data,
2673
      constants.SS_NODE_PRIMARY_IPS: node_pri_ips_data,
2674
      constants.SS_NODE_SECONDARY_IPS: node_snd_ips_data,
2675
      constants.SS_OFFLINE_NODES: off_data,
2676
      constants.SS_ONLINE_NODES: on_data,
2677
      constants.SS_PRIMARY_IP_FAMILY: str(cluster.primary_ip_family),
2678
      constants.SS_INSTANCE_LIST: instance_data,
2679
      constants.SS_RELEASE_VERSION: constants.RELEASE_VERSION,
2680
      constants.SS_HYPERVISOR_LIST: hypervisor_list,
2681
      constants.SS_MAINTAIN_NODE_HEALTH: str(cluster.maintain_node_health),
2682
      constants.SS_UID_POOL: uid_pool,
2683
      constants.SS_NODEGROUPS: nodegroups_data,
2684
      constants.SS_NETWORKS: networks_data,
2685
      }
2686
    ssconf_values = self._ExtendByAllHvparamsStrings(ssconf_values,
2687
                                                     all_hvparams)
2688
    bad_values = [(k, v) for k, v in ssconf_values.items()
2689
                  if not isinstance(v, (str, basestring))]
2690
    if bad_values:
2691
      err = utils.CommaJoin("%s=%s" % (k, v) for k, v in bad_values)
2692
      raise errors.ConfigurationError("Some ssconf key(s) have non-string"
2693
                                      " values: %s" % err)
2694
    return ssconf_values
2695

    
2696
  @locking.ssynchronized(_config_lock, shared=1)
2697
  def GetSsconfValues(self):
2698
    """Wrapper using lock around _UnlockedGetSsconf().
2699

2700
    """
2701
    return self._UnlockedGetSsconfValues()
2702

    
2703
  @locking.ssynchronized(_config_lock, shared=1)
2704
  def GetVGName(self):
2705
    """Return the volume group name.
2706

2707
    """
2708
    return self._config_data.cluster.volume_group_name
2709

    
2710
  @locking.ssynchronized(_config_lock)
2711
  def SetVGName(self, vg_name):
2712
    """Set the volume group name.
2713

2714
    """
2715
    self._config_data.cluster.volume_group_name = vg_name
2716
    self._config_data.cluster.serial_no += 1
2717
    self._WriteConfig()
2718

    
2719
  @locking.ssynchronized(_config_lock, shared=1)
2720
  def GetDRBDHelper(self):
2721
    """Return DRBD usermode helper.
2722

2723
    """
2724
    return self._config_data.cluster.drbd_usermode_helper
2725

    
2726
  @locking.ssynchronized(_config_lock)
2727
  def SetDRBDHelper(self, drbd_helper):
2728
    """Set DRBD usermode helper.
2729

2730
    """
2731
    self._config_data.cluster.drbd_usermode_helper = drbd_helper
2732
    self._config_data.cluster.serial_no += 1
2733
    self._WriteConfig()
2734

    
2735
  @locking.ssynchronized(_config_lock, shared=1)
2736
  def GetMACPrefix(self):
2737
    """Return the mac prefix.
2738

2739
    """
2740
    return self._config_data.cluster.mac_prefix
2741

    
2742
  @locking.ssynchronized(_config_lock, shared=1)
2743
  def GetClusterInfo(self):
2744
    """Returns information about the cluster
2745

2746
    @rtype: L{objects.Cluster}
2747
    @return: the cluster object
2748

2749
    """
2750
    return self._config_data.cluster
2751

    
2752
  @locking.ssynchronized(_config_lock, shared=1)
2753
  def HasAnyDiskOfType(self, dev_type):
2754
    """Check if in there is at disk of the given type in the configuration.
2755

2756
    """
2757
    return self._config_data.HasAnyDiskOfType(dev_type)
2758

    
2759
  @locking.ssynchronized(_config_lock)
2760
  def Update(self, target, feedback_fn, ec_id=None):
2761
    """Notify function to be called after updates.
2762

2763
    This function must be called when an object (as returned by
2764
    GetInstanceInfo, GetNodeInfo, GetCluster) has been updated and the
2765
    caller wants the modifications saved to the backing store. Note
2766
    that all modified objects will be saved, but the target argument
2767
    is the one the caller wants to ensure that it's saved.
2768

2769
    @param target: an instance of either L{objects.Cluster},
2770
        L{objects.Node} or L{objects.Instance} which is existing in
2771
        the cluster
2772
    @param feedback_fn: Callable feedback function
2773

2774
    """
2775
    if self._config_data is None:
2776
      raise errors.ProgrammerError("Configuration file not read,"
2777
                                   " cannot save.")
2778
    update_serial = False
2779
    if isinstance(target, objects.Cluster):
2780
      test = target == self._config_data.cluster
2781
    elif isinstance(target, objects.Node):
2782
      test = target in self._config_data.nodes.values()
2783
      update_serial = True
2784
    elif isinstance(target, objects.Instance):
2785
      test = target in self._config_data.instances.values()
2786
    elif isinstance(target, objects.NodeGroup):
2787
      test = target in self._config_data.nodegroups.values()
2788
    elif isinstance(target, objects.Network):
2789
      test = target in self._config_data.networks.values()
2790
    else:
2791
      raise errors.ProgrammerError("Invalid object type (%s) passed to"
2792
                                   " ConfigWriter.Update" % type(target))
2793
    if not test:
2794
      raise errors.ConfigurationError("Configuration updated since object"
2795
                                      " has been read or unknown object")
2796
    target.serial_no += 1
2797
    target.mtime = now = time.time()
2798

    
2799
    if update_serial:
2800
      # for node updates, we need to increase the cluster serial too
2801
      self._config_data.cluster.serial_no += 1
2802
      self._config_data.cluster.mtime = now
2803

    
2804
    if isinstance(target, objects.Instance):
2805
      self._UnlockedReleaseDRBDMinors(target.uuid)
2806

    
2807
    if ec_id is not None:
2808
      # Commit all ips reserved by OpInstanceSetParams and OpGroupSetParams
2809
      self._UnlockedCommitTemporaryIps(ec_id)
2810

    
2811
    self._WriteConfig(feedback_fn=feedback_fn)
2812

    
2813
  @locking.ssynchronized(_config_lock)
2814
  def DropECReservations(self, ec_id):
2815
    """Drop per-execution-context reservations
2816

2817
    """
2818
    for rm in self._all_rms:
2819
      rm.DropECReservations(ec_id)
2820

    
2821
  @locking.ssynchronized(_config_lock, shared=1)
2822
  def GetAllNetworksInfo(self):
2823
    """Get configuration info of all the networks.
2824

2825
    """
2826
    return dict(self._config_data.networks)
2827

    
2828
  def _UnlockedGetNetworkList(self):
2829
    """Get the list of networks.
2830

2831
    This function is for internal use, when the config lock is already held.
2832

2833
    """
2834
    return self._config_data.networks.keys()
2835

    
2836
  @locking.ssynchronized(_config_lock, shared=1)
2837
  def GetNetworkList(self):
2838
    """Get the list of networks.
2839

2840
    @return: array of networks, ex. ["main", "vlan100", "200]
2841

2842
    """
2843
    return self._UnlockedGetNetworkList()
2844

    
2845
  @locking.ssynchronized(_config_lock, shared=1)
2846
  def GetNetworkNames(self):
2847
    """Get a list of network names
2848

2849
    """
2850
    names = [net.name
2851
             for net in self._config_data.networks.values()]
2852
    return names
2853

    
2854
  def _UnlockedGetNetwork(self, uuid):
2855
    """Returns information about a network.
2856

2857
    This function is for internal use, when the config lock is already held.
2858

2859
    """
2860
    if uuid not in self._config_data.networks:
2861
      return None
2862

    
2863
    return self._config_data.networks[uuid]
2864

    
2865
  @locking.ssynchronized(_config_lock, shared=1)
2866
  def GetNetwork(self, uuid):
2867
    """Returns information about a network.
2868

2869
    It takes the information from the configuration file.
2870

2871
    @param uuid: UUID of the network
2872

2873
    @rtype: L{objects.Network}
2874
    @return: the network object
2875

2876
    """
2877
    return self._UnlockedGetNetwork(uuid)
2878

    
2879
  @locking.ssynchronized(_config_lock)
2880
  def AddNetwork(self, net, ec_id, check_uuid=True):
2881
    """Add a network to the configuration.
2882

2883
    @type net: L{objects.Network}
2884
    @param net: the Network object to add
2885
    @type ec_id: string
2886
    @param ec_id: unique id for the job to use when creating a missing UUID
2887

2888
    """
2889
    self._UnlockedAddNetwork(net, ec_id, check_uuid)
2890
    self._WriteConfig()
2891

    
2892
  def _UnlockedAddNetwork(self, net, ec_id, check_uuid):
2893
    """Add a network to the configuration.
2894

2895
    """
2896
    logging.info("Adding network %s to configuration", net.name)
2897

    
2898
    if check_uuid:
2899
      self._EnsureUUID(net, ec_id)
2900

    
2901
    net.serial_no = 1
2902
    net.ctime = net.mtime = time.time()
2903
    self._config_data.networks[net.uuid] = net
2904
    self._config_data.cluster.serial_no += 1
2905

    
2906
  def _UnlockedLookupNetwork(self, target):
2907
    """Lookup a network's UUID.
2908

2909
    @type target: string
2910
    @param target: network name or UUID
2911
    @rtype: string
2912
    @return: network UUID
2913
    @raises errors.OpPrereqError: when the target network cannot be found
2914

2915
    """
2916
    if target is None:
2917
      return None
2918
    if target in self._config_data.networks:
2919
      return target
2920
    for net in self._config_data.networks.values():
2921
      if net.name == target:
2922
        return net.uuid
2923
    raise errors.OpPrereqError("Network '%s' not found" % target,
2924
                               errors.ECODE_NOENT)
2925

    
2926
  @locking.ssynchronized(_config_lock, shared=1)
2927
  def LookupNetwork(self, target):
2928
    """Lookup a network's UUID.
2929

2930
    This function is just a wrapper over L{_UnlockedLookupNetwork}.
2931

2932
    @type target: string
2933
    @param target: network name or UUID
2934
    @rtype: string
2935
    @return: network UUID
2936

2937
    """
2938
    return self._UnlockedLookupNetwork(target)
2939

    
2940
  @locking.ssynchronized(_config_lock)
2941
  def RemoveNetwork(self, network_uuid):
2942
    """Remove a network from the configuration.
2943

2944
    @type network_uuid: string
2945
    @param network_uuid: the UUID of the network to remove
2946

2947
    """
2948
    logging.info("Removing network %s from configuration", network_uuid)
2949

    
2950
    if network_uuid not in self._config_data.networks:
2951
      raise errors.ConfigurationError("Unknown network '%s'" % network_uuid)
2952

    
2953
    del self._config_data.networks[network_uuid]
2954
    self._config_data.cluster.serial_no += 1
2955
    self._WriteConfig()
2956

    
2957
  def _UnlockedGetGroupNetParams(self, net_uuid, node_uuid):
2958
    """Get the netparams (mode, link) of a network.
2959

2960
    Get a network's netparams for a given node.
2961

2962
    @type net_uuid: string
2963
    @param net_uuid: network uuid
2964
    @type node_uuid: string
2965
    @param node_uuid: node UUID
2966
    @rtype: dict or None
2967
    @return: netparams
2968

2969
    """
2970
    node_info = self._UnlockedGetNodeInfo(node_uuid)
2971
    nodegroup_info = self._UnlockedGetNodeGroup(node_info.group)
2972
    netparams = nodegroup_info.networks.get(net_uuid, None)
2973

    
2974
    return netparams
2975

    
2976
  @locking.ssynchronized(_config_lock, shared=1)
2977
  def GetGroupNetParams(self, net_uuid, node_uuid):
2978
    """Locking wrapper of _UnlockedGetGroupNetParams()
2979

2980
    """
2981
    return self._UnlockedGetGroupNetParams(net_uuid, node_uuid)
2982

    
2983
  @locking.ssynchronized(_config_lock, shared=1)
2984
  def CheckIPInNodeGroup(self, ip, node_uuid):
2985
    """Check IP uniqueness in nodegroup.
2986

2987
    Check networks that are connected in the node's node group
2988
    if ip is contained in any of them. Used when creating/adding
2989
    a NIC to ensure uniqueness among nodegroups.
2990

2991
    @type ip: string
2992
    @param ip: ip address
2993
    @type node_uuid: string
2994
    @param node_uuid: node UUID
2995
    @rtype: (string, dict) or (None, None)
2996
    @return: (network name, netparams)
2997

2998
    """
2999
    if ip is None:
3000
      return (None, None)
3001
    node_info = self._UnlockedGetNodeInfo(node_uuid)
3002
    nodegroup_info = self._UnlockedGetNodeGroup(node_info.group)
3003
    for net_uuid in nodegroup_info.networks.keys():
3004
      net_info = self._UnlockedGetNetwork(net_uuid)
3005
      pool = network.AddressPool(net_info)
3006
      if pool.Contains(ip):
3007
        return (net_info.name, nodegroup_info.networks[net_uuid])
3008

    
3009
    return (None, None)