Statistics
| Branch: | Tag: | Revision:

root / lib / config.py @ 85c5d3c9

History | View | Annotate | Download (72.6 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Configuration management for Ganeti
23

24
This module provides the interface to the Ganeti cluster configuration.
25

26
The configuration data is stored on every node but is updated on the master
27
only. After each update, the master distributes the data to the other nodes.
28

29
Currently, the data storage format is JSON. YAML was slow and consuming too
30
much memory.
31

32
"""
33

    
34
# pylint: disable-msg=R0904
35
# R0904: Too many public methods
36

    
37
import os
38
import random
39
import logging
40
import time
41

    
42
from ganeti import errors
43
from ganeti import locking
44
from ganeti import utils
45
from ganeti import constants
46
from ganeti import rpc
47
from ganeti import objects
48
from ganeti import serializer
49
from ganeti import uidpool
50
from ganeti import netutils
51
from ganeti import runtime
52
from ganeti import network
53

    
54

    
55
_config_lock = locking.SharedLock("ConfigWriter")
56

    
57
# job id used for resource management at config upgrade time
58
_UPGRADE_CONFIG_JID = "jid-cfg-upgrade"
59

    
60

    
61
def _ValidateConfig(data):
62
  """Verifies that a configuration objects looks valid.
63

64
  This only verifies the version of the configuration.
65

66
  @raise errors.ConfigurationError: if the version differs from what
67
      we expect
68

69
  """
70
  if data.version != constants.CONFIG_VERSION:
71
    raise errors.ConfigVersionMismatch(constants.CONFIG_VERSION, data.version)
72

    
73

    
74
class TemporaryReservationManager:
75
  """A temporary resource reservation manager.
76

77
  This is used to reserve resources in a job, before using them, making sure
78
  other jobs cannot get them in the meantime.
79

80
  """
81
  def __init__(self):
82
    self._ec_reserved = {}
83

    
84
  def Reserved(self, resource):
85
    for holder_reserved in self._ec_reserved.values():
86
      if resource in holder_reserved:
87
        return True
88
    return False
89

    
90
  def Reserve(self, ec_id, resource):
91
    if self.Reserved(resource):
92
      raise errors.ReservationError("Duplicate reservation for resource '%s'"
93
                                    % str(resource))
94
    if ec_id not in self._ec_reserved:
95
      self._ec_reserved[ec_id] = set([resource])
96
    else:
97
      self._ec_reserved[ec_id].add(resource)
98

    
99
  def DropECReservations(self, ec_id):
100
    if ec_id in self._ec_reserved:
101
      del self._ec_reserved[ec_id]
102

    
103
  def GetReserved(self):
104
    all_reserved = set()
105
    for holder_reserved in self._ec_reserved.values():
106
      all_reserved.update(holder_reserved)
107
    return all_reserved
108

    
109
  def GetECReserved(self, ec_id):
110
    ec_reserved = set()
111
    if ec_id in self._ec_reserved:
112
      ec_reserved.update(self._ec_reserved[ec_id])
113
    return ec_reserved
114

    
115
  def Generate(self, existing, generate_one_fn, ec_id):
116
    """Generate a new resource of this type
117

118
    """
119
    assert callable(generate_one_fn)
120

    
121
    all_elems = self.GetReserved()
122
    all_elems.update(existing)
123
    retries = 64
124
    while retries > 0:
125
      new_resource = generate_one_fn()
126
      if new_resource is not None and new_resource not in all_elems:
127
        break
128
    else:
129
      raise errors.ConfigurationError("Not able generate new resource"
130
                                      " (last tried: %s)" % new_resource)
131
    self.Reserve(ec_id, new_resource)
132
    return new_resource
133

    
134

    
135
class ConfigWriter:
136
  """The interface to the cluster configuration.
137

138
  @ivar _temporary_lvs: reservation manager for temporary LVs
139
  @ivar _all_rms: a list of all temporary reservation managers
140

141
  """
142
  def __init__(self, cfg_file=None, offline=False, _getents=runtime.GetEnts,
143
               accept_foreign=False):
144
    self.write_count = 0
145
    self._lock = _config_lock
146
    self._config_data = None
147
    self._offline = offline
148
    if cfg_file is None:
149
      self._cfg_file = constants.CLUSTER_CONF_FILE
150
    else:
151
      self._cfg_file = cfg_file
152
    self._getents = _getents
153
    self._temporary_ids = TemporaryReservationManager()
154
    self._temporary_drbds = {}
155
    self._temporary_macs = TemporaryReservationManager()
156
    self._temporary_secrets = TemporaryReservationManager()
157
    self._temporary_lvs = TemporaryReservationManager()
158
    self._temporary_ips = TemporaryReservationManager()
159
    self._all_rms = [self._temporary_ids, self._temporary_macs,
160
                     self._temporary_secrets, self._temporary_lvs,
161
                     self._temporary_ips]
162
    # Note: in order to prevent errors when resolving our name in
163
    # _DistributeConfig, we compute it here once and reuse it; it's
164
    # better to raise an error before starting to modify the config
165
    # file than after it was modified
166
    self._my_hostname = netutils.Hostname.GetSysName()
167
    self._last_cluster_serial = -1
168
    self._cfg_id = None
169
    self._OpenConfig(accept_foreign)
170

    
171
  # this method needs to be static, so that we can call it on the class
172
  @staticmethod
173
  def IsCluster():
174
    """Check if the cluster is configured.
175

176
    """
177
    return os.path.exists(constants.CLUSTER_CONF_FILE)
178

    
179
  def _GenerateOneMAC(self):
180
    """Generate one mac address
181

182
    """
183
    prefix = self._config_data.cluster.mac_prefix
184
    byte1 = random.randrange(0, 256)
185
    byte2 = random.randrange(0, 256)
186
    byte3 = random.randrange(0, 256)
187
    mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
188
    return mac
189

    
190
  @locking.ssynchronized(_config_lock, shared=1)
191
  def GetNdParams(self, node):
192
    """Get the node params populated with cluster defaults.
193

194
    @type node: L{object.Node}
195
    @param node: The node we want to know the params for
196
    @return: A dict with the filled in node params
197

198
    """
199
    nodegroup = self._UnlockedGetNodeGroup(node.group)
200
    return self._config_data.cluster.FillND(node, nodegroup)
201

    
202
  @locking.ssynchronized(_config_lock, shared=1)
203
  def GenerateMAC(self, ec_id):
204
    """Generate a MAC for an instance.
205

206
    This should check the current instances for duplicates.
207

208
    """
209
    existing = self._AllMACs()
210
    return self._temporary_ids.Generate(existing, self._GenerateOneMAC, ec_id)
211

    
212
  @locking.ssynchronized(_config_lock, shared=1)
213
  def ReserveMAC(self, mac, ec_id):
214
    """Reserve a MAC for an instance.
215

216
    This only checks instances managed by this cluster, it does not
217
    check for potential collisions elsewhere.
218

219
    """
220
    all_macs = self._AllMACs()
221
    if mac in all_macs:
222
      raise errors.ReservationError("mac already in use")
223
    else:
224
      self._temporary_macs.Reserve(ec_id, mac)
225

    
226
  def _UnlockedCommitReservedIps(self, ec_id):
227
    """Commit all reserved IP address to their respective pools
228

229
    """
230
    for address, net_uuid in self._temporary_ips.GetECReserved(ec_id):
231
      self._UnlockedCommitIp(net_uuid, address)
232

    
233
  def _UnlockedCommitIp(self, net_uuid, address):
234
    """Commit a reserved IP address to an IP pool.
235

236
    The IP address is taken from the IP pool designated by link and marked
237
    as reserved.
238

239
    """
240
    nobj = self._UnlockedGetNetwork(net_uuid)
241
    pool = network.AddressPool(nobj)
242
    pool.Reserve(address)
243

    
244
  @locking.ssynchronized(_config_lock)
245
  def CommitIp(self, net_uuid, address):
246
    """Commit a reserved IP to an IP pool
247

248
    This is just a wrapper around _UnlockedCommitIp.
249

250
    @param net_uuid: UUID of the network to commit the IP to
251
    @param address: The IP address
252

253
    """
254
    self._UnlockedCommitIp(net_uuid, address)
255
    self._WriteConfig()
256

    
257
  @locking.ssynchronized(_config_lock)
258
  def CommitGroupInstanceIps(self, group_uuid, net_uuid,
259
                             link, feedback_fn=None):
260
    """Commit all IPs of instances on a given node group's link to the pools.
261

262
    This is used when mapping networks to node groups, and is a separate method
263
    to ensure atomicity (i.e. all or none commited).
264

265
    @param group_uuid: the uuid of the node group
266
    @param net_uuid: the uuid of the network to use
267
    @param link: the link on which the relevant instances reside
268

269
    """
270
    affected_nodes = []
271
    for node, ni in self._config_data.nodes.items():
272
      if ni.group == group_uuid:
273
        affected_nodes.append(node)
274

    
275
    for instance in self._config_data.instances.values():
276
      if instance.primary_node not in affected_nodes:
277
        continue
278

    
279
      for nic in instance.nics:
280
        nic_link = nic.nicparams.get(constants.NIC_LINK, None)
281
        if nic_link == link:
282
          if feedback_fn:
283
            feedback_fn("Commiting instance %s IP %s" % (instance.name, nic.ip))
284
          self._UnlockedCommitIp(net_uuid, nic.ip)
285

    
286
    self._WriteConfig()
287

    
288
  def _UnlockedReleaseIp(self, net_uuid, address):
289
    """Give a specific IP address back to an IP pool.
290

291
    The IP address is returned to the IP pool designated by pool_id and marked
292
    as reserved.
293

294
    """
295
    nobj = self._UnlockedGetNetwork(net_uuid)
296
    pool = network.AddressPool(nobj)
297
    pool.Release(address)
298

    
299
  @locking.ssynchronized(_config_lock)
300
  def ReleaseIp(self, net_uuid, address):
301
    """Give a specified IP address back to an IP pool.
302

303
    This is just a wrapper around _UnlockedReleaseIp.
304

305
    """
306
    self._UnlockedReleaseIp(net_uuid, address)
307
    self._WriteConfig()
308

    
309
  @locking.ssynchronized(_config_lock)
310
  def ReleaseGroupInstanceIps(self, group_uuid, net_uuid,
311
                              link, feedback_fn=None):
312
    """Commit all IPs of instances on a given node group's link to the pools.
313

314
    This is used when unmapping networks from node groups and
315
    is a separate method to ensure atomicity (i.e. all or none commited).
316

317
    @param group_uuid: the uuid of the node group
318
    @param net_uuid: the uuid of the network to use
319
    @param link: the link on which the relevant instances reside
320

321
    """
322
    affected_nodes = []
323
    for node, ni in self._config_data.nodes.items():
324
      if ni.group == group_uuid:
325
        affected_nodes.append(node)
326

    
327
    for instance in self._config_data.instances.values():
328
      if instance.primary_node not in affected_nodes:
329
        continue
330

    
331
      for nic in instance.nics:
332
        nic_link = nic.nicparams.get(constants.NIC_LINK, None)
333
        if nic_link == link:
334
          if feedback_fn:
335
            feedback_fn("Releasing instance %s IP %s" % (instance.name, nic.ip))
336
          self._UnlockedReleaseIp(net_uuid, nic.ip)
337

    
338
    self._WriteConfig()
339

    
340
  @locking.ssynchronized(_config_lock, shared=1)
341
  def GenerateIp(self, node_name, link, ec_id):
342
    """Find a free IPv4 address for an instance.
343

344
    """
345
    net_uuid = self._UnlockedGetNetworkFromNodeLink(node_name, link)
346
    nobj = self._UnlockedGetNetwork(net_uuid)
347
    pool = network.AddressPool(nobj)
348
    gen_free = pool.GenerateFree()
349
    gen_one = lambda: (gen_free(), net_uuid)
350
    address, _ = self._temporary_ips.Generate([], gen_one, ec_id)
351
    return address
352

    
353
  @locking.ssynchronized(_config_lock, shared=1)
354
  def ReserveIp(self, node_name, link, address, ec_id):
355
    """Reserve a given IPv4 address for use by an instance.
356

357
    """
358
    net_uuid = self._UnlockedGetNetworkFromNodeLink(node_name, link)
359
    nobj = self._UnlockedGetNetwork(net_uuid)
360
    pool = network.AddressPool(nobj)
361
    try:
362
      pool.Reserve(address)
363
    except errors.AddressPoolError:
364
      raise errors.ReservationError("IP address already in use")
365
    return self._temporary_ips.Reserve((address, net_uuid), ec_id)
366

    
367
  @locking.ssynchronized(_config_lock, shared=1)
368
  def ReserveLV(self, lv_name, ec_id):
369
    """Reserve an VG/LV pair for an instance.
370

371
    @type lv_name: string
372
    @param lv_name: the logical volume name to reserve
373

374
    """
375
    all_lvs = self._AllLVs()
376
    if lv_name in all_lvs:
377
      raise errors.ReservationError("LV already in use")
378
    else:
379
      self._temporary_lvs.Reserve(ec_id, lv_name)
380

    
381
  @locking.ssynchronized(_config_lock, shared=1)
382
  def GenerateDRBDSecret(self, ec_id):
383
    """Generate a DRBD secret.
384

385
    This checks the current disks for duplicates.
386

387
    """
388
    return self._temporary_secrets.Generate(self._AllDRBDSecrets(),
389
                                            utils.GenerateSecret,
390
                                            ec_id)
391

    
392
  def _AllLVs(self):
393
    """Compute the list of all LVs.
394

395
    """
396
    lvnames = set()
397
    for instance in self._config_data.instances.values():
398
      node_data = instance.MapLVsByNode()
399
      for lv_list in node_data.values():
400
        lvnames.update(lv_list)
401
    return lvnames
402

    
403
  def _AllIDs(self, include_temporary):
404
    """Compute the list of all UUIDs and names we have.
405

406
    @type include_temporary: boolean
407
    @param include_temporary: whether to include the _temporary_ids set
408
    @rtype: set
409
    @return: a set of IDs
410

411
    """
412
    existing = set()
413
    if include_temporary:
414
      existing.update(self._temporary_ids.GetReserved())
415
    existing.update(self._AllLVs())
416
    existing.update(self._config_data.instances.keys())
417
    existing.update(self._config_data.nodes.keys())
418
    existing.update([i.uuid for i in self._AllUUIDObjects() if i.uuid])
419
    return existing
420

    
421
  def _GenerateUniqueID(self, ec_id):
422
    """Generate an unique UUID.
423

424
    This checks the current node, instances and disk names for
425
    duplicates.
426

427
    @rtype: string
428
    @return: the unique id
429

430
    """
431
    existing = self._AllIDs(include_temporary=False)
432
    return self._temporary_ids.Generate(existing, utils.NewUUID, ec_id)
433

    
434
  @locking.ssynchronized(_config_lock, shared=1)
435
  def GenerateUniqueID(self, ec_id):
436
    """Generate an unique ID.
437

438
    This is just a wrapper over the unlocked version.
439

440
    @type ec_id: string
441
    @param ec_id: unique id for the job to reserve the id to
442

443
    """
444
    return self._GenerateUniqueID(ec_id)
445

    
446
  def _AllMACs(self):
447
    """Return all MACs present in the config.
448

449
    @rtype: list
450
    @return: the list of all MACs
451

452
    """
453
    result = []
454
    for instance in self._config_data.instances.values():
455
      for nic in instance.nics:
456
        result.append(nic.mac)
457

    
458
    return result
459

    
460
  def _AllDRBDSecrets(self):
461
    """Return all DRBD secrets present in the config.
462

463
    @rtype: list
464
    @return: the list of all DRBD secrets
465

466
    """
467
    def helper(disk, result):
468
      """Recursively gather secrets from this disk."""
469
      if disk.dev_type == constants.DT_DRBD8:
470
        result.append(disk.logical_id[5])
471
      if disk.children:
472
        for child in disk.children:
473
          helper(child, result)
474

    
475
    result = []
476
    for instance in self._config_data.instances.values():
477
      for disk in instance.disks:
478
        helper(disk, result)
479

    
480
    return result
481

    
482
  def _CheckDiskIDs(self, disk, l_ids, p_ids):
483
    """Compute duplicate disk IDs
484

485
    @type disk: L{objects.Disk}
486
    @param disk: the disk at which to start searching
487
    @type l_ids: list
488
    @param l_ids: list of current logical ids
489
    @type p_ids: list
490
    @param p_ids: list of current physical ids
491
    @rtype: list
492
    @return: a list of error messages
493

494
    """
495
    result = []
496
    if disk.logical_id is not None:
497
      if disk.logical_id in l_ids:
498
        result.append("duplicate logical id %s" % str(disk.logical_id))
499
      else:
500
        l_ids.append(disk.logical_id)
501
    if disk.physical_id is not None:
502
      if disk.physical_id in p_ids:
503
        result.append("duplicate physical id %s" % str(disk.physical_id))
504
      else:
505
        p_ids.append(disk.physical_id)
506

    
507
    if disk.children:
508
      for child in disk.children:
509
        result.extend(self._CheckDiskIDs(child, l_ids, p_ids))
510
    return result
511

    
512
  def _UnlockedVerifyConfig(self):
513
    """Verify function.
514

515
    @rtype: list
516
    @return: a list of error messages; a non-empty list signifies
517
        configuration errors
518

519
    """
520
    # pylint: disable-msg=R0914
521
    result = []
522
    seen_macs = []
523
    ports = {}
524
    data = self._config_data
525
    cluster = data.cluster
526
    seen_lids = []
527
    seen_pids = []
528

    
529
    # global cluster checks
530
    if not cluster.enabled_hypervisors:
531
      result.append("enabled hypervisors list doesn't have any entries")
532
    invalid_hvs = set(cluster.enabled_hypervisors) - constants.HYPER_TYPES
533
    if invalid_hvs:
534
      result.append("enabled hypervisors contains invalid entries: %s" %
535
                    invalid_hvs)
536
    missing_hvp = (set(cluster.enabled_hypervisors) -
537
                   set(cluster.hvparams.keys()))
538
    if missing_hvp:
539
      result.append("hypervisor parameters missing for the enabled"
540
                    " hypervisor(s) %s" % utils.CommaJoin(missing_hvp))
541

    
542
    if cluster.master_node not in data.nodes:
543
      result.append("cluster has invalid primary node '%s'" %
544
                    cluster.master_node)
545

    
546
    def _helper(owner, attr, value, template):
547
      try:
548
        utils.ForceDictType(value, template)
549
      except errors.GenericError, err:
550
        result.append("%s has invalid %s: %s" % (owner, attr, err))
551

    
552
    def _helper_nic(owner, params):
553
      try:
554
        objects.NIC.CheckParameterSyntax(params)
555
      except errors.ConfigurationError, err:
556
        result.append("%s has invalid nicparams: %s" % (owner, err))
557

    
558
    # check cluster parameters
559
    _helper("cluster", "beparams", cluster.SimpleFillBE({}),
560
            constants.BES_PARAMETER_TYPES)
561
    _helper("cluster", "nicparams", cluster.SimpleFillNIC({}),
562
            constants.NICS_PARAMETER_TYPES)
563
    _helper_nic("cluster", cluster.SimpleFillNIC({}))
564
    _helper("cluster", "ndparams", cluster.SimpleFillND({}),
565
            constants.NDS_PARAMETER_TYPES)
566

    
567
    # per-instance checks
568
    for instance_name in data.instances:
569
      instance = data.instances[instance_name]
570
      if instance.name != instance_name:
571
        result.append("instance '%s' is indexed by wrong name '%s'" %
572
                      (instance.name, instance_name))
573
      if instance.primary_node not in data.nodes:
574
        result.append("instance '%s' has invalid primary node '%s'" %
575
                      (instance_name, instance.primary_node))
576
      for snode in instance.secondary_nodes:
577
        if snode not in data.nodes:
578
          result.append("instance '%s' has invalid secondary node '%s'" %
579
                        (instance_name, snode))
580
      for idx, nic in enumerate(instance.nics):
581
        if nic.mac in seen_macs:
582
          result.append("instance '%s' has NIC %d mac %s duplicate" %
583
                        (instance_name, idx, nic.mac))
584
        else:
585
          seen_macs.append(nic.mac)
586
        if nic.nicparams:
587
          filled = cluster.SimpleFillNIC(nic.nicparams)
588
          owner = "instance %s nic %d" % (instance.name, idx)
589
          _helper(owner, "nicparams",
590
                  filled, constants.NICS_PARAMETER_TYPES)
591
          _helper_nic(owner, filled)
592

    
593
      # parameter checks
594
      if instance.beparams:
595
        _helper("instance %s" % instance.name, "beparams",
596
                cluster.FillBE(instance), constants.BES_PARAMETER_TYPES)
597

    
598
      # gather the drbd ports for duplicate checks
599
      for dsk in instance.disks:
600
        if dsk.dev_type in constants.LDS_DRBD:
601
          tcp_port = dsk.logical_id[2]
602
          if tcp_port not in ports:
603
            ports[tcp_port] = []
604
          ports[tcp_port].append((instance.name, "drbd disk %s" % dsk.iv_name))
605
      # gather network port reservation
606
      net_port = getattr(instance, "network_port", None)
607
      if net_port is not None:
608
        if net_port not in ports:
609
          ports[net_port] = []
610
        ports[net_port].append((instance.name, "network port"))
611

    
612
      # instance disk verify
613
      for idx, disk in enumerate(instance.disks):
614
        result.extend(["instance '%s' disk %d error: %s" %
615
                       (instance.name, idx, msg) for msg in disk.Verify()])
616
        result.extend(self._CheckDiskIDs(disk, seen_lids, seen_pids))
617

    
618
    # cluster-wide pool of free ports
619
    for free_port in cluster.tcpudp_port_pool:
620
      if free_port not in ports:
621
        ports[free_port] = []
622
      ports[free_port].append(("cluster", "port marked as free"))
623

    
624
    # compute tcp/udp duplicate ports
625
    keys = ports.keys()
626
    keys.sort()
627
    for pnum in keys:
628
      pdata = ports[pnum]
629
      if len(pdata) > 1:
630
        txt = utils.CommaJoin(["%s/%s" % val for val in pdata])
631
        result.append("tcp/udp port %s has duplicates: %s" % (pnum, txt))
632

    
633
    # highest used tcp port check
634
    if keys:
635
      if keys[-1] > cluster.highest_used_port:
636
        result.append("Highest used port mismatch, saved %s, computed %s" %
637
                      (cluster.highest_used_port, keys[-1]))
638

    
639
    if not data.nodes[cluster.master_node].master_candidate:
640
      result.append("Master node is not a master candidate")
641

    
642
    # master candidate checks
643
    mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats()
644
    if mc_now < mc_max:
645
      result.append("Not enough master candidates: actual %d, target %d" %
646
                    (mc_now, mc_max))
647

    
648
    # node checks
649
    for node_name, node in data.nodes.items():
650
      if node.name != node_name:
651
        result.append("Node '%s' is indexed by wrong name '%s'" %
652
                      (node.name, node_name))
653
      if [node.master_candidate, node.drained, node.offline].count(True) > 1:
654
        result.append("Node %s state is invalid: master_candidate=%s,"
655
                      " drain=%s, offline=%s" %
656
                      (node.name, node.master_candidate, node.drained,
657
                       node.offline))
658
      if node.group not in data.nodegroups:
659
        result.append("Node '%s' has invalid group '%s'" %
660
                      (node.name, node.group))
661
      else:
662
        _helper("node %s" % node.name, "ndparams",
663
                cluster.FillND(node, data.nodegroups[node.group]),
664
                constants.NDS_PARAMETER_TYPES)
665

    
666
    # nodegroups checks
667
    nodegroups_names = set()
668
    for nodegroup_uuid in data.nodegroups:
669
      nodegroup = data.nodegroups[nodegroup_uuid]
670
      if nodegroup.uuid != nodegroup_uuid:
671
        result.append("node group '%s' (uuid: '%s') indexed by wrong uuid '%s'"
672
                      % (nodegroup.name, nodegroup.uuid, nodegroup_uuid))
673
      if utils.UUID_RE.match(nodegroup.name.lower()):
674
        result.append("node group '%s' (uuid: '%s') has uuid-like name" %
675
                      (nodegroup.name, nodegroup.uuid))
676
      if nodegroup.name in nodegroups_names:
677
        result.append("duplicate node group name '%s'" % nodegroup.name)
678
      else:
679
        nodegroups_names.add(nodegroup.name)
680
      if nodegroup.ndparams:
681
        _helper("group %s" % nodegroup.name, "ndparams",
682
                cluster.SimpleFillND(nodegroup.ndparams),
683
                constants.NDS_PARAMETER_TYPES)
684

    
685

    
686
    # drbd minors check
687
    _, duplicates = self._UnlockedComputeDRBDMap()
688
    for node, minor, instance_a, instance_b in duplicates:
689
      result.append("DRBD minor %d on node %s is assigned twice to instances"
690
                    " %s and %s" % (minor, node, instance_a, instance_b))
691

    
692
    # IP checks
693
    default_nicparams = cluster.nicparams[constants.PP_DEFAULT]
694
    ips = {}
695

    
696
    def _AddIpAddress(ip, name):
697
      ips.setdefault(ip, []).append(name)
698

    
699
    _AddIpAddress(cluster.master_ip, "cluster_ip")
700

    
701
    for node in data.nodes.values():
702
      _AddIpAddress(node.primary_ip, "node:%s/primary" % node.name)
703
      if node.secondary_ip != node.primary_ip:
704
        _AddIpAddress(node.secondary_ip, "node:%s/secondary" % node.name)
705

    
706
    for instance in data.instances.values():
707
      for idx, nic in enumerate(instance.nics):
708
        if nic.ip is None:
709
          continue
710

    
711
        nicparams = objects.FillDict(default_nicparams, nic.nicparams)
712
        nic_mode = nicparams[constants.NIC_MODE]
713
        nic_link = nicparams[constants.NIC_LINK]
714

    
715
        if nic_mode == constants.NIC_MODE_BRIDGED:
716
          link = "bridge:%s" % nic_link
717
        elif nic_mode == constants.NIC_MODE_ROUTED:
718
          link = "route:%s" % nic_link
719
        else:
720
          raise errors.ProgrammerError("NIC mode '%s' not handled" % nic_mode)
721

    
722
        _AddIpAddress("%s/%s" % (link, nic.ip),
723
                      "instance:%s/nic:%d" % (instance.name, idx))
724

    
725
    for ip, owners in ips.items():
726
      if len(owners) > 1:
727
        result.append("IP address %s is used by multiple owners: %s" %
728
                      (ip, utils.CommaJoin(owners)))
729

    
730
    return result
731

    
732
  @locking.ssynchronized(_config_lock, shared=1)
733
  def VerifyConfig(self):
734
    """Verify function.
735

736
    This is just a wrapper over L{_UnlockedVerifyConfig}.
737

738
    @rtype: list
739
    @return: a list of error messages; a non-empty list signifies
740
        configuration errors
741

742
    """
743
    return self._UnlockedVerifyConfig()
744

    
745
  def _UnlockedSetDiskID(self, disk, node_name):
746
    """Convert the unique ID to the ID needed on the target nodes.
747

748
    This is used only for drbd, which needs ip/port configuration.
749

750
    The routine descends down and updates its children also, because
751
    this helps when the only the top device is passed to the remote
752
    node.
753

754
    This function is for internal use, when the config lock is already held.
755

756
    """
757
    if disk.children:
758
      for child in disk.children:
759
        self._UnlockedSetDiskID(child, node_name)
760

    
761
    if disk.logical_id is None and disk.physical_id is not None:
762
      return
763
    if disk.dev_type == constants.LD_DRBD8:
764
      pnode, snode, port, pminor, sminor, secret = disk.logical_id
765
      if node_name not in (pnode, snode):
766
        raise errors.ConfigurationError("DRBD device not knowing node %s" %
767
                                        node_name)
768
      pnode_info = self._UnlockedGetNodeInfo(pnode)
769
      snode_info = self._UnlockedGetNodeInfo(snode)
770
      if pnode_info is None or snode_info is None:
771
        raise errors.ConfigurationError("Can't find primary or secondary node"
772
                                        " for %s" % str(disk))
773
      p_data = (pnode_info.secondary_ip, port)
774
      s_data = (snode_info.secondary_ip, port)
775
      if pnode == node_name:
776
        disk.physical_id = p_data + s_data + (pminor, secret)
777
      else: # it must be secondary, we tested above
778
        disk.physical_id = s_data + p_data + (sminor, secret)
779
    else:
780
      disk.physical_id = disk.logical_id
781
    return
782

    
783
  @locking.ssynchronized(_config_lock)
784
  def SetDiskID(self, disk, node_name):
785
    """Convert the unique ID to the ID needed on the target nodes.
786

787
    This is used only for drbd, which needs ip/port configuration.
788

789
    The routine descends down and updates its children also, because
790
    this helps when the only the top device is passed to the remote
791
    node.
792

793
    """
794
    return self._UnlockedSetDiskID(disk, node_name)
795

    
796
  @locking.ssynchronized(_config_lock)
797
  def AddTcpUdpPort(self, port):
798
    """Adds a new port to the available port pool.
799

800
    """
801
    if not isinstance(port, int):
802
      raise errors.ProgrammerError("Invalid type passed for port")
803

    
804
    self._config_data.cluster.tcpudp_port_pool.add(port)
805
    self._WriteConfig()
806

    
807
  @locking.ssynchronized(_config_lock, shared=1)
808
  def GetPortList(self):
809
    """Returns a copy of the current port list.
810

811
    """
812
    return self._config_data.cluster.tcpudp_port_pool.copy()
813

    
814
  @locking.ssynchronized(_config_lock)
815
  def AllocatePort(self):
816
    """Allocate a port.
817

818
    The port will be taken from the available port pool or from the
819
    default port range (and in this case we increase
820
    highest_used_port).
821

822
    """
823
    # If there are TCP/IP ports configured, we use them first.
824
    if self._config_data.cluster.tcpudp_port_pool:
825
      port = self._config_data.cluster.tcpudp_port_pool.pop()
826
    else:
827
      port = self._config_data.cluster.highest_used_port + 1
828
      if port >= constants.LAST_DRBD_PORT:
829
        raise errors.ConfigurationError("The highest used port is greater"
830
                                        " than %s. Aborting." %
831
                                        constants.LAST_DRBD_PORT)
832
      self._config_data.cluster.highest_used_port = port
833

    
834
    self._WriteConfig()
835
    return port
836

    
837
  def _UnlockedComputeDRBDMap(self):
838
    """Compute the used DRBD minor/nodes.
839

840
    @rtype: (dict, list)
841
    @return: dictionary of node_name: dict of minor: instance_name;
842
        the returned dict will have all the nodes in it (even if with
843
        an empty list), and a list of duplicates; if the duplicates
844
        list is not empty, the configuration is corrupted and its caller
845
        should raise an exception
846

847
    """
848
    def _AppendUsedPorts(instance_name, disk, used):
849
      duplicates = []
850
      if disk.dev_type == constants.LD_DRBD8 and len(disk.logical_id) >= 5:
851
        node_a, node_b, _, minor_a, minor_b = disk.logical_id[:5]
852
        for node, port in ((node_a, minor_a), (node_b, minor_b)):
853
          assert node in used, ("Node '%s' of instance '%s' not found"
854
                                " in node list" % (node, instance_name))
855
          if port in used[node]:
856
            duplicates.append((node, port, instance_name, used[node][port]))
857
          else:
858
            used[node][port] = instance_name
859
      if disk.children:
860
        for child in disk.children:
861
          duplicates.extend(_AppendUsedPorts(instance_name, child, used))
862
      return duplicates
863

    
864
    duplicates = []
865
    my_dict = dict((node, {}) for node in self._config_data.nodes)
866
    for instance in self._config_data.instances.itervalues():
867
      for disk in instance.disks:
868
        duplicates.extend(_AppendUsedPorts(instance.name, disk, my_dict))
869
    for (node, minor), instance in self._temporary_drbds.iteritems():
870
      if minor in my_dict[node] and my_dict[node][minor] != instance:
871
        duplicates.append((node, minor, instance, my_dict[node][minor]))
872
      else:
873
        my_dict[node][minor] = instance
874
    return my_dict, duplicates
875

    
876
  @locking.ssynchronized(_config_lock)
877
  def ComputeDRBDMap(self):
878
    """Compute the used DRBD minor/nodes.
879

880
    This is just a wrapper over L{_UnlockedComputeDRBDMap}.
881

882
    @return: dictionary of node_name: dict of minor: instance_name;
883
        the returned dict will have all the nodes in it (even if with
884
        an empty list).
885

886
    """
887
    d_map, duplicates = self._UnlockedComputeDRBDMap()
888
    if duplicates:
889
      raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
890
                                      str(duplicates))
891
    return d_map
892

    
893
  @locking.ssynchronized(_config_lock)
894
  def AllocateDRBDMinor(self, nodes, instance):
895
    """Allocate a drbd minor.
896

897
    The free minor will be automatically computed from the existing
898
    devices. A node can be given multiple times in order to allocate
899
    multiple minors. The result is the list of minors, in the same
900
    order as the passed nodes.
901

902
    @type instance: string
903
    @param instance: the instance for which we allocate minors
904

905
    """
906
    assert isinstance(instance, basestring), \
907
           "Invalid argument '%s' passed to AllocateDRBDMinor" % instance
908

    
909
    d_map, duplicates = self._UnlockedComputeDRBDMap()
910
    if duplicates:
911
      raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
912
                                      str(duplicates))
913
    result = []
914
    for nname in nodes:
915
      ndata = d_map[nname]
916
      if not ndata:
917
        # no minors used, we can start at 0
918
        result.append(0)
919
        ndata[0] = instance
920
        self._temporary_drbds[(nname, 0)] = instance
921
        continue
922
      keys = ndata.keys()
923
      keys.sort()
924
      ffree = utils.FirstFree(keys)
925
      if ffree is None:
926
        # return the next minor
927
        # TODO: implement high-limit check
928
        minor = keys[-1] + 1
929
      else:
930
        minor = ffree
931
      # double-check minor against current instances
932
      assert minor not in d_map[nname], \
933
             ("Attempt to reuse allocated DRBD minor %d on node %s,"
934
              " already allocated to instance %s" %
935
              (minor, nname, d_map[nname][minor]))
936
      ndata[minor] = instance
937
      # double-check minor against reservation
938
      r_key = (nname, minor)
939
      assert r_key not in self._temporary_drbds, \
940
             ("Attempt to reuse reserved DRBD minor %d on node %s,"
941
              " reserved for instance %s" %
942
              (minor, nname, self._temporary_drbds[r_key]))
943
      self._temporary_drbds[r_key] = instance
944
      result.append(minor)
945
    logging.debug("Request to allocate drbd minors, input: %s, returning %s",
946
                  nodes, result)
947
    return result
948

    
949
  def _UnlockedReleaseDRBDMinors(self, instance):
950
    """Release temporary drbd minors allocated for a given instance.
951

952
    @type instance: string
953
    @param instance: the instance for which temporary minors should be
954
                     released
955

956
    """
957
    assert isinstance(instance, basestring), \
958
           "Invalid argument passed to ReleaseDRBDMinors"
959
    for key, name in self._temporary_drbds.items():
960
      if name == instance:
961
        del self._temporary_drbds[key]
962

    
963
  @locking.ssynchronized(_config_lock)
964
  def ReleaseDRBDMinors(self, instance):
965
    """Release temporary drbd minors allocated for a given instance.
966

967
    This should be called on the error paths, on the success paths
968
    it's automatically called by the ConfigWriter add and update
969
    functions.
970

971
    This function is just a wrapper over L{_UnlockedReleaseDRBDMinors}.
972

973
    @type instance: string
974
    @param instance: the instance for which temporary minors should be
975
                     released
976

977
    """
978
    self._UnlockedReleaseDRBDMinors(instance)
979

    
980
  @locking.ssynchronized(_config_lock, shared=1)
981
  def GetConfigVersion(self):
982
    """Get the configuration version.
983

984
    @return: Config version
985

986
    """
987
    return self._config_data.version
988

    
989
  @locking.ssynchronized(_config_lock, shared=1)
990
  def GetClusterName(self):
991
    """Get cluster name.
992

993
    @return: Cluster name
994

995
    """
996
    return self._config_data.cluster.cluster_name
997

    
998
  @locking.ssynchronized(_config_lock, shared=1)
999
  def GetMasterNode(self):
1000
    """Get the hostname of the master node for this cluster.
1001

1002
    @return: Master hostname
1003

1004
    """
1005
    return self._config_data.cluster.master_node
1006

    
1007
  @locking.ssynchronized(_config_lock, shared=1)
1008
  def GetMasterIP(self):
1009
    """Get the IP of the master node for this cluster.
1010

1011
    @return: Master IP
1012

1013
    """
1014
    return self._config_data.cluster.master_ip
1015

    
1016
  @locking.ssynchronized(_config_lock, shared=1)
1017
  def GetMasterNetdev(self):
1018
    """Get the master network device for this cluster.
1019

1020
    """
1021
    return self._config_data.cluster.master_netdev
1022

    
1023
  @locking.ssynchronized(_config_lock, shared=1)
1024
  def GetFileStorageDir(self):
1025
    """Get the file storage dir for this cluster.
1026

1027
    """
1028
    return self._config_data.cluster.file_storage_dir
1029

    
1030
  @locking.ssynchronized(_config_lock, shared=1)
1031
  def GetSharedFileStorageDir(self):
1032
    """Get the shared file storage dir for this cluster.
1033

1034
    """
1035
    return self._config_data.cluster.shared_file_storage_dir
1036

    
1037
  @locking.ssynchronized(_config_lock, shared=1)
1038
  def GetHypervisorType(self):
1039
    """Get the hypervisor type for this cluster.
1040

1041
    """
1042
    return self._config_data.cluster.enabled_hypervisors[0]
1043

    
1044
  @locking.ssynchronized(_config_lock, shared=1)
1045
  def GetHostKey(self):
1046
    """Return the rsa hostkey from the config.
1047

1048
    @rtype: string
1049
    @return: the rsa hostkey
1050

1051
    """
1052
    return self._config_data.cluster.rsahostkeypub
1053

    
1054
  @locking.ssynchronized(_config_lock, shared=1)
1055
  def GetDefaultIAllocator(self):
1056
    """Get the default instance allocator for this cluster.
1057

1058
    """
1059
    return self._config_data.cluster.default_iallocator
1060

    
1061
  @locking.ssynchronized(_config_lock, shared=1)
1062
  def GetPrimaryIPFamily(self):
1063
    """Get cluster primary ip family.
1064

1065
    @return: primary ip family
1066

1067
    """
1068
    return self._config_data.cluster.primary_ip_family
1069

    
1070
  @locking.ssynchronized(_config_lock)
1071
  def AddNodeGroup(self, group, ec_id, check_uuid=True):
1072
    """Add a node group to the configuration.
1073

1074
    This method calls group.UpgradeConfig() to fill any missing attributes
1075
    according to their default values.
1076

1077
    @type group: L{objects.NodeGroup}
1078
    @param group: the NodeGroup object to add
1079
    @type ec_id: string
1080
    @param ec_id: unique id for the job to use when creating a missing UUID
1081
    @type check_uuid: bool
1082
    @param check_uuid: add an UUID to the group if it doesn't have one or, if
1083
                       it does, ensure that it does not exist in the
1084
                       configuration already
1085

1086
    """
1087
    self._UnlockedAddNodeGroup(group, ec_id, check_uuid)
1088
    self._WriteConfig()
1089

    
1090
  def _UnlockedAddNodeGroup(self, group, ec_id, check_uuid):
1091
    """Add a node group to the configuration.
1092

1093
    """
1094
    logging.info("Adding node group %s to configuration", group.name)
1095

    
1096
    # Some code might need to add a node group with a pre-populated UUID
1097
    # generated with ConfigWriter.GenerateUniqueID(). We allow them to bypass
1098
    # the "does this UUID" exist already check.
1099
    if check_uuid:
1100
      self._EnsureUUID(group, ec_id)
1101

    
1102
    try:
1103
      existing_uuid = self._UnlockedLookupNodeGroup(group.name)
1104
    except errors.OpPrereqError:
1105
      pass
1106
    else:
1107
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
1108
                                 " node group (UUID: %s)" %
1109
                                 (group.name, existing_uuid),
1110
                                 errors.ECODE_EXISTS)
1111

    
1112
    group.serial_no = 1
1113
    group.ctime = group.mtime = time.time()
1114
    group.UpgradeConfig()
1115

    
1116
    self._config_data.nodegroups[group.uuid] = group
1117
    self._config_data.cluster.serial_no += 1
1118

    
1119
  @locking.ssynchronized(_config_lock)
1120
  def RemoveNodeGroup(self, group_uuid):
1121
    """Remove a node group from the configuration.
1122

1123
    @type group_uuid: string
1124
    @param group_uuid: the UUID of the node group to remove
1125

1126
    """
1127
    logging.info("Removing node group %s from configuration", group_uuid)
1128

    
1129
    if group_uuid not in self._config_data.nodegroups:
1130
      raise errors.ConfigurationError("Unknown node group '%s'" % group_uuid)
1131

    
1132
    assert len(self._config_data.nodegroups) != 1, \
1133
            "Group '%s' is the only group, cannot be removed" % group_uuid
1134

    
1135
    del self._config_data.nodegroups[group_uuid]
1136
    self._config_data.cluster.serial_no += 1
1137
    self._WriteConfig()
1138

    
1139
  def _UnlockedLookupNodeGroup(self, target):
1140
    """Lookup a node group's UUID.
1141

1142
    @type target: string or None
1143
    @param target: group name or UUID or None to look for the default
1144
    @rtype: string
1145
    @return: nodegroup UUID
1146
    @raises errors.OpPrereqError: when the target group cannot be found
1147

1148
    """
1149
    if target is None:
1150
      if len(self._config_data.nodegroups) != 1:
1151
        raise errors.OpPrereqError("More than one node group exists. Target"
1152
                                   " group must be specified explicitely.")
1153
      else:
1154
        return self._config_data.nodegroups.keys()[0]
1155
    if target in self._config_data.nodegroups:
1156
      return target
1157
    for nodegroup in self._config_data.nodegroups.values():
1158
      if nodegroup.name == target:
1159
        return nodegroup.uuid
1160
    raise errors.OpPrereqError("Node group '%s' not found" % target,
1161
                               errors.ECODE_NOENT)
1162

    
1163
  @locking.ssynchronized(_config_lock, shared=1)
1164
  def LookupNodeGroup(self, target):
1165
    """Lookup a node group's UUID.
1166

1167
    This function is just a wrapper over L{_UnlockedLookupNodeGroup}.
1168

1169
    @type target: string or None
1170
    @param target: group name or UUID or None to look for the default
1171
    @rtype: string
1172
    @return: nodegroup UUID
1173

1174
    """
1175
    return self._UnlockedLookupNodeGroup(target)
1176

    
1177
  def _UnlockedGetNodeGroup(self, uuid):
1178
    """Lookup a node group.
1179

1180
    @type uuid: string
1181
    @param uuid: group UUID
1182
    @rtype: L{objects.NodeGroup} or None
1183
    @return: nodegroup object, or None if not found
1184

1185
    """
1186
    if uuid not in self._config_data.nodegroups:
1187
      return None
1188

    
1189
    return self._config_data.nodegroups[uuid]
1190

    
1191
  @locking.ssynchronized(_config_lock, shared=1)
1192
  def GetNodeGroup(self, uuid):
1193
    """Lookup a node group.
1194

1195
    @type uuid: string
1196
    @param uuid: group UUID
1197
    @rtype: L{objects.NodeGroup} or None
1198
    @return: nodegroup object, or None if not found
1199

1200
    """
1201
    return self._UnlockedGetNodeGroup(uuid)
1202

    
1203
  @locking.ssynchronized(_config_lock, shared=1)
1204
  def GetAllNodeGroupsInfo(self):
1205
    """Get the configuration of all node groups.
1206

1207
    """
1208
    return dict(self._config_data.nodegroups)
1209

    
1210
  @locking.ssynchronized(_config_lock, shared=1)
1211
  def GetNodeGroupList(self):
1212
    """Get a list of node groups.
1213

1214
    """
1215
    return self._config_data.nodegroups.keys()
1216

    
1217
  def _UnlockedGetNetworkFromNodeLink(self, node_name, node_link):
1218
    node = self._config_data.nodes[node_name]
1219
    nodegroup = self._UnlockedGetNodeGroup(node.group)
1220
    for uuid, link in nodegroup.networks.items():
1221
      if link == node_link:
1222
        return uuid
1223

    
1224
    return None
1225

    
1226
  @locking.ssynchronized(_config_lock, shared=1)
1227
  def GetNetworkFromNodeLink(self, node_name, node_link):
1228
    return self._UnlockedGetNetworkFromNodeLink(node_name, node_link)
1229

    
1230
  @locking.ssynchronized(_config_lock)
1231
  def AddInstance(self, instance, ec_id):
1232
    """Add an instance to the config.
1233

1234
    This should be used after creating a new instance.
1235

1236
    @type instance: L{objects.Instance}
1237
    @param instance: the instance object
1238

1239
    """
1240
    if not isinstance(instance, objects.Instance):
1241
      raise errors.ProgrammerError("Invalid type passed to AddInstance")
1242

    
1243
    if instance.disk_template != constants.DT_DISKLESS:
1244
      all_lvs = instance.MapLVsByNode()
1245
      logging.info("Instance '%s' DISK_LAYOUT: %s", instance.name, all_lvs)
1246

    
1247
    all_macs = self._AllMACs()
1248
    for nic in instance.nics:
1249
      if nic.mac in all_macs:
1250
        raise errors.ConfigurationError("Cannot add instance %s:"
1251
                                        " MAC address '%s' already in use." %
1252
                                        (instance.name, nic.mac))
1253

    
1254
    self._EnsureUUID(instance, ec_id)
1255

    
1256
    instance.serial_no = 1
1257
    instance.ctime = instance.mtime = time.time()
1258
    self._config_data.instances[instance.name] = instance
1259
    self._config_data.cluster.serial_no += 1
1260
    self._UnlockedReleaseDRBDMinors(instance.name)
1261
    self._UnlockedCommitReservedIps(ec_id)
1262
    self._WriteConfig()
1263

    
1264
  def _EnsureUUID(self, item, ec_id):
1265
    """Ensures a given object has a valid UUID.
1266

1267
    @param item: the instance or node to be checked
1268
    @param ec_id: the execution context id for the uuid reservation
1269

1270
    """
1271
    if not item.uuid:
1272
      item.uuid = self._GenerateUniqueID(ec_id)
1273
    elif item.uuid in self._AllIDs(include_temporary=True):
1274
      raise errors.ConfigurationError("Cannot add '%s': UUID %s already"
1275
                                      " in use" % (item.name, item.uuid))
1276

    
1277
  def _SetInstanceStatus(self, instance_name, status):
1278
    """Set the instance's status to a given value.
1279

1280
    """
1281
    assert isinstance(status, bool), \
1282
           "Invalid status '%s' passed to SetInstanceStatus" % (status,)
1283

    
1284
    if instance_name not in self._config_data.instances:
1285
      raise errors.ConfigurationError("Unknown instance '%s'" %
1286
                                      instance_name)
1287
    instance = self._config_data.instances[instance_name]
1288
    if instance.admin_up != status:
1289
      instance.admin_up = status
1290
      instance.serial_no += 1
1291
      instance.mtime = time.time()
1292
      self._WriteConfig()
1293

    
1294
  @locking.ssynchronized(_config_lock)
1295
  def MarkInstanceUp(self, instance_name):
1296
    """Mark the instance status to up in the config.
1297

1298
    """
1299
    self._SetInstanceStatus(instance_name, True)
1300

    
1301
  @locking.ssynchronized(_config_lock)
1302
  def RemoveInstance(self, instance_name):
1303
    """Remove the instance from the configuration.
1304

1305
    """
1306
    if instance_name not in self._config_data.instances:
1307
      raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
1308

    
1309
    instance = self._UnlockedGetInstanceInfo(instance_name)
1310

    
1311
    for nic in instance.nics:
1312
      link = nic.nicparams.get(constants.NIC_LINK, None)
1313
      net_uuid = self._UnlockedGetNetworkFromNodeLink(instance.primary_node,
1314
                                                      link)
1315
      if net_uuid:
1316
        # Return all IP addresses to the respective address pools
1317
        self._UnlockedReleaseIp(net_uuid, nic.ip)
1318

    
1319
    del self._config_data.instances[instance_name]
1320
    self._config_data.cluster.serial_no += 1
1321
    self._WriteConfig()
1322

    
1323
  @locking.ssynchronized(_config_lock)
1324
  def RenameInstance(self, old_name, new_name):
1325
    """Rename an instance.
1326

1327
    This needs to be done in ConfigWriter and not by RemoveInstance
1328
    combined with AddInstance as only we can guarantee an atomic
1329
    rename.
1330

1331
    """
1332
    if old_name not in self._config_data.instances:
1333
      raise errors.ConfigurationError("Unknown instance '%s'" % old_name)
1334
    inst = self._config_data.instances[old_name]
1335
    del self._config_data.instances[old_name]
1336
    inst.name = new_name
1337

    
1338
    for disk in inst.disks:
1339
      if disk.dev_type == constants.LD_FILE:
1340
        # rename the file paths in logical and physical id
1341
        file_storage_dir = os.path.dirname(os.path.dirname(disk.logical_id[1]))
1342
        disk_fname = "disk%s" % disk.iv_name.split("/")[1]
1343
        disk.physical_id = disk.logical_id = (disk.logical_id[0],
1344
                                              utils.PathJoin(file_storage_dir,
1345
                                                             inst.name,
1346
                                                             disk_fname))
1347

    
1348
    # Force update of ssconf files
1349
    self._config_data.cluster.serial_no += 1
1350

    
1351
    self._config_data.instances[inst.name] = inst
1352
    self._WriteConfig()
1353

    
1354
  @locking.ssynchronized(_config_lock)
1355
  def MarkInstanceDown(self, instance_name):
1356
    """Mark the status of an instance to down in the configuration.
1357

1358
    """
1359
    self._SetInstanceStatus(instance_name, False)
1360

    
1361
  def _UnlockedGetInstanceList(self):
1362
    """Get the list of instances.
1363

1364
    This function is for internal use, when the config lock is already held.
1365

1366
    """
1367
    return self._config_data.instances.keys()
1368

    
1369
  @locking.ssynchronized(_config_lock, shared=1)
1370
  def GetInstanceList(self):
1371
    """Get the list of instances.
1372

1373
    @return: array of instances, ex. ['instance2.example.com',
1374
        'instance1.example.com']
1375

1376
    """
1377
    return self._UnlockedGetInstanceList()
1378

    
1379
  @locking.ssynchronized(_config_lock, shared=1)
1380
  def ExpandInstanceName(self, short_name):
1381
    """Attempt to expand an incomplete instance name.
1382

1383
    """
1384
    return utils.MatchNameComponent(short_name,
1385
                                    self._config_data.instances.keys(),
1386
                                    case_sensitive=False)
1387

    
1388
  def _UnlockedGetInstanceInfo(self, instance_name):
1389
    """Returns information about an instance.
1390

1391
    This function is for internal use, when the config lock is already held.
1392

1393
    """
1394
    if instance_name not in self._config_data.instances:
1395
      return None
1396

    
1397
    return self._config_data.instances[instance_name]
1398

    
1399
  @locking.ssynchronized(_config_lock, shared=1)
1400
  def GetInstanceInfo(self, instance_name):
1401
    """Returns information about an instance.
1402

1403
    It takes the information from the configuration file. Other information of
1404
    an instance are taken from the live systems.
1405

1406
    @param instance_name: name of the instance, e.g.
1407
        I{instance1.example.com}
1408

1409
    @rtype: L{objects.Instance}
1410
    @return: the instance object
1411

1412
    """
1413
    return self._UnlockedGetInstanceInfo(instance_name)
1414

    
1415
  @locking.ssynchronized(_config_lock, shared=1)
1416
  def GetAllInstancesInfo(self):
1417
    """Get the configuration of all instances.
1418

1419
    @rtype: dict
1420
    @return: dict of (instance, instance_info), where instance_info is what
1421
              would GetInstanceInfo return for the node
1422

1423
    """
1424
    my_dict = dict([(instance, self._UnlockedGetInstanceInfo(instance))
1425
                    for instance in self._UnlockedGetInstanceList()])
1426
    return my_dict
1427

    
1428
  @locking.ssynchronized(_config_lock)
1429
  def AddNode(self, node, ec_id):
1430
    """Add a node to the configuration.
1431

1432
    @type node: L{objects.Node}
1433
    @param node: a Node instance
1434

1435
    """
1436
    logging.info("Adding node %s to configuration", node.name)
1437

    
1438
    self._EnsureUUID(node, ec_id)
1439

    
1440
    node.serial_no = 1
1441
    node.ctime = node.mtime = time.time()
1442
    self._UnlockedAddNodeToGroup(node.name, node.group)
1443
    self._config_data.nodes[node.name] = node
1444
    self._config_data.cluster.serial_no += 1
1445
    self._WriteConfig()
1446

    
1447
  @locking.ssynchronized(_config_lock)
1448
  def RemoveNode(self, node_name):
1449
    """Remove a node from the configuration.
1450

1451
    """
1452
    logging.info("Removing node %s from configuration", node_name)
1453

    
1454
    if node_name not in self._config_data.nodes:
1455
      raise errors.ConfigurationError("Unknown node '%s'" % node_name)
1456

    
1457
    self._UnlockedRemoveNodeFromGroup(self._config_data.nodes[node_name])
1458
    del self._config_data.nodes[node_name]
1459
    self._config_data.cluster.serial_no += 1
1460
    self._WriteConfig()
1461

    
1462
  @locking.ssynchronized(_config_lock, shared=1)
1463
  def ExpandNodeName(self, short_name):
1464
    """Attempt to expand an incomplete instance name.
1465

1466
    """
1467
    return utils.MatchNameComponent(short_name,
1468
                                    self._config_data.nodes.keys(),
1469
                                    case_sensitive=False)
1470

    
1471
  def _UnlockedGetNodeInfo(self, node_name):
1472
    """Get the configuration of a node, as stored in the config.
1473

1474
    This function is for internal use, when the config lock is already
1475
    held.
1476

1477
    @param node_name: the node name, e.g. I{node1.example.com}
1478

1479
    @rtype: L{objects.Node}
1480
    @return: the node object
1481

1482
    """
1483
    if node_name not in self._config_data.nodes:
1484
      return None
1485

    
1486
    return self._config_data.nodes[node_name]
1487

    
1488
  @locking.ssynchronized(_config_lock, shared=1)
1489
  def GetNodeInfo(self, node_name):
1490
    """Get the configuration of a node, as stored in the config.
1491

1492
    This is just a locked wrapper over L{_UnlockedGetNodeInfo}.
1493

1494
    @param node_name: the node name, e.g. I{node1.example.com}
1495

1496
    @rtype: L{objects.Node}
1497
    @return: the node object
1498

1499
    """
1500
    return self._UnlockedGetNodeInfo(node_name)
1501

    
1502
  @locking.ssynchronized(_config_lock, shared=1)
1503
  def GetNodeInstances(self, node_name):
1504
    """Get the instances of a node, as stored in the config.
1505

1506
    @param node_name: the node name, e.g. I{node1.example.com}
1507

1508
    @rtype: (list, list)
1509
    @return: a tuple with two lists: the primary and the secondary instances
1510

1511
    """
1512
    pri = []
1513
    sec = []
1514
    for inst in self._config_data.instances.values():
1515
      if inst.primary_node == node_name:
1516
        pri.append(inst.name)
1517
      if node_name in inst.secondary_nodes:
1518
        sec.append(inst.name)
1519
    return (pri, sec)
1520

    
1521
  def _UnlockedGetNodeList(self):
1522
    """Return the list of nodes which are in the configuration.
1523

1524
    This function is for internal use, when the config lock is already
1525
    held.
1526

1527
    @rtype: list
1528

1529
    """
1530
    return self._config_data.nodes.keys()
1531

    
1532
  @locking.ssynchronized(_config_lock, shared=1)
1533
  def GetNodeList(self):
1534
    """Return the list of nodes which are in the configuration.
1535

1536
    """
1537
    return self._UnlockedGetNodeList()
1538

    
1539
  def _UnlockedGetOnlineNodeList(self):
1540
    """Return the list of nodes which are online.
1541

1542
    """
1543
    all_nodes = [self._UnlockedGetNodeInfo(node)
1544
                 for node in self._UnlockedGetNodeList()]
1545
    return [node.name for node in all_nodes if not node.offline]
1546

    
1547
  @locking.ssynchronized(_config_lock, shared=1)
1548
  def GetOnlineNodeList(self):
1549
    """Return the list of nodes which are online.
1550

1551
    """
1552
    return self._UnlockedGetOnlineNodeList()
1553

    
1554
  @locking.ssynchronized(_config_lock, shared=1)
1555
  def GetVmCapableNodeList(self):
1556
    """Return the list of nodes which are not vm capable.
1557

1558
    """
1559
    all_nodes = [self._UnlockedGetNodeInfo(node)
1560
                 for node in self._UnlockedGetNodeList()]
1561
    return [node.name for node in all_nodes if node.vm_capable]
1562

    
1563
  @locking.ssynchronized(_config_lock, shared=1)
1564
  def GetNonVmCapableNodeList(self):
1565
    """Return the list of nodes which are not vm capable.
1566

1567
    """
1568
    all_nodes = [self._UnlockedGetNodeInfo(node)
1569
                 for node in self._UnlockedGetNodeList()]
1570
    return [node.name for node in all_nodes if not node.vm_capable]
1571

    
1572
  @locking.ssynchronized(_config_lock, shared=1)
1573
  def GetAllNodesInfo(self):
1574
    """Get the configuration of all nodes.
1575

1576
    @rtype: dict
1577
    @return: dict of (node, node_info), where node_info is what
1578
              would GetNodeInfo return for the node
1579

1580
    """
1581
    my_dict = dict([(node, self._UnlockedGetNodeInfo(node))
1582
                    for node in self._UnlockedGetNodeList()])
1583
    return my_dict
1584

    
1585
  @locking.ssynchronized(_config_lock, shared=1)
1586
  def GetNodeGroupsFromNodes(self, nodes):
1587
    """Returns groups for a list of nodes.
1588

1589
    @type nodes: list of string
1590
    @param nodes: List of node names
1591
    @rtype: frozenset
1592

1593
    """
1594
    return frozenset(self._UnlockedGetNodeInfo(name).group for name in nodes)
1595

    
1596
  def _UnlockedGetMasterCandidateStats(self, exceptions=None):
1597
    """Get the number of current and maximum desired and possible candidates.
1598

1599
    @type exceptions: list
1600
    @param exceptions: if passed, list of nodes that should be ignored
1601
    @rtype: tuple
1602
    @return: tuple of (current, desired and possible, possible)
1603

1604
    """
1605
    mc_now = mc_should = mc_max = 0
1606
    for node in self._config_data.nodes.values():
1607
      if exceptions and node.name in exceptions:
1608
        continue
1609
      if not (node.offline or node.drained) and node.master_capable:
1610
        mc_max += 1
1611
      if node.master_candidate:
1612
        mc_now += 1
1613
    mc_should = min(mc_max, self._config_data.cluster.candidate_pool_size)
1614
    return (mc_now, mc_should, mc_max)
1615

    
1616
  @locking.ssynchronized(_config_lock, shared=1)
1617
  def GetMasterCandidateStats(self, exceptions=None):
1618
    """Get the number of current and maximum possible candidates.
1619

1620
    This is just a wrapper over L{_UnlockedGetMasterCandidateStats}.
1621

1622
    @type exceptions: list
1623
    @param exceptions: if passed, list of nodes that should be ignored
1624
    @rtype: tuple
1625
    @return: tuple of (current, max)
1626

1627
    """
1628
    return self._UnlockedGetMasterCandidateStats(exceptions)
1629

    
1630
  @locking.ssynchronized(_config_lock)
1631
  def MaintainCandidatePool(self, exceptions):
1632
    """Try to grow the candidate pool to the desired size.
1633

1634
    @type exceptions: list
1635
    @param exceptions: if passed, list of nodes that should be ignored
1636
    @rtype: list
1637
    @return: list with the adjusted nodes (L{objects.Node} instances)
1638

1639
    """
1640
    mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats(exceptions)
1641
    mod_list = []
1642
    if mc_now < mc_max:
1643
      node_list = self._config_data.nodes.keys()
1644
      random.shuffle(node_list)
1645
      for name in node_list:
1646
        if mc_now >= mc_max:
1647
          break
1648
        node = self._config_data.nodes[name]
1649
        if (node.master_candidate or node.offline or node.drained or
1650
            node.name in exceptions or not node.master_capable):
1651
          continue
1652
        mod_list.append(node)
1653
        node.master_candidate = True
1654
        node.serial_no += 1
1655
        mc_now += 1
1656
      if mc_now != mc_max:
1657
        # this should not happen
1658
        logging.warning("Warning: MaintainCandidatePool didn't manage to"
1659
                        " fill the candidate pool (%d/%d)", mc_now, mc_max)
1660
      if mod_list:
1661
        self._config_data.cluster.serial_no += 1
1662
        self._WriteConfig()
1663

    
1664
    return mod_list
1665

    
1666
  def _UnlockedAddNodeToGroup(self, node_name, nodegroup_uuid):
1667
    """Add a given node to the specified group.
1668

1669
    """
1670
    if nodegroup_uuid not in self._config_data.nodegroups:
1671
      # This can happen if a node group gets deleted between its lookup and
1672
      # when we're adding the first node to it, since we don't keep a lock in
1673
      # the meantime. It's ok though, as we'll fail cleanly if the node group
1674
      # is not found anymore.
1675
      raise errors.OpExecError("Unknown node group: %s" % nodegroup_uuid)
1676
    if node_name not in self._config_data.nodegroups[nodegroup_uuid].members:
1677
      self._config_data.nodegroups[nodegroup_uuid].members.append(node_name)
1678

    
1679
  def _UnlockedRemoveNodeFromGroup(self, node):
1680
    """Remove a given node from its group.
1681

1682
    """
1683
    nodegroup = node.group
1684
    if nodegroup not in self._config_data.nodegroups:
1685
      logging.warning("Warning: node '%s' has unknown node group '%s'"
1686
                      " (while being removed from it)", node.name, nodegroup)
1687
    nodegroup_obj = self._config_data.nodegroups[nodegroup]
1688
    if node.name not in nodegroup_obj.members:
1689
      logging.warning("Warning: node '%s' not a member of its node group '%s'"
1690
                      " (while being removed from it)", node.name, nodegroup)
1691
    else:
1692
      nodegroup_obj.members.remove(node.name)
1693

    
1694
  def _BumpSerialNo(self):
1695
    """Bump up the serial number of the config.
1696

1697
    """
1698
    self._config_data.serial_no += 1
1699
    self._config_data.mtime = time.time()
1700

    
1701
  def _AllUUIDObjects(self):
1702
    """Returns all objects with uuid attributes.
1703

1704
    """
1705
    return (self._config_data.instances.values() +
1706
            self._config_data.nodes.values() +
1707
            self._config_data.nodegroups.values() +
1708
            [self._config_data.cluster])
1709

    
1710
  def _OpenConfig(self, accept_foreign):
1711
    """Read the config data from disk.
1712

1713
    """
1714
    raw_data = utils.ReadFile(self._cfg_file)
1715

    
1716
    try:
1717
      data = objects.ConfigData.FromDict(serializer.Load(raw_data))
1718
    except Exception, err:
1719
      raise errors.ConfigurationError(err)
1720

    
1721
    # Make sure the configuration has the right version
1722
    _ValidateConfig(data)
1723

    
1724
    if (not hasattr(data, 'cluster') or
1725
        not hasattr(data.cluster, 'rsahostkeypub')):
1726
      raise errors.ConfigurationError("Incomplete configuration"
1727
                                      " (missing cluster.rsahostkeypub)")
1728

    
1729
    if data.cluster.master_node != self._my_hostname and not accept_foreign:
1730
      msg = ("The configuration denotes node %s as master, while my"
1731
             " hostname is %s; opening a foreign configuration is only"
1732
             " possible in accept_foreign mode" %
1733
             (data.cluster.master_node, self._my_hostname))
1734
      raise errors.ConfigurationError(msg)
1735

    
1736
    # Upgrade configuration if needed
1737
    data.UpgradeConfig()
1738

    
1739
    self._config_data = data
1740
    # reset the last serial as -1 so that the next write will cause
1741
    # ssconf update
1742
    self._last_cluster_serial = -1
1743

    
1744
    # And finally run our (custom) config upgrade sequence
1745
    self._UpgradeConfig()
1746

    
1747
    self._cfg_id = utils.GetFileID(path=self._cfg_file)
1748

    
1749
  def _UpgradeConfig(self):
1750
    """Run upgrade steps that cannot be done purely in the objects.
1751

1752
    This is because some data elements need uniqueness across the
1753
    whole configuration, etc.
1754

1755
    @warning: this function will call L{_WriteConfig()}, but also
1756
        L{DropECReservations} so it needs to be called only from a
1757
        "safe" place (the constructor). If one wanted to call it with
1758
        the lock held, a DropECReservationUnlocked would need to be
1759
        created first, to avoid causing deadlock.
1760

1761
    """
1762
    modified = False
1763
    for item in self._AllUUIDObjects():
1764
      if item.uuid is None:
1765
        item.uuid = self._GenerateUniqueID(_UPGRADE_CONFIG_JID)
1766
        modified = True
1767
    if not self._config_data.nodegroups:
1768
      default_nodegroup_name = constants.INITIAL_NODE_GROUP_NAME
1769
      default_nodegroup = objects.NodeGroup(name=default_nodegroup_name,
1770
                                            members=[])
1771
      self._UnlockedAddNodeGroup(default_nodegroup, _UPGRADE_CONFIG_JID, True)
1772
      modified = True
1773
    for node in self._config_data.nodes.values():
1774
      if not node.group:
1775
        node.group = self.LookupNodeGroup(None)
1776
        modified = True
1777
      # This is technically *not* an upgrade, but needs to be done both when
1778
      # nodegroups are being added, and upon normally loading the config,
1779
      # because the members list of a node group is discarded upon
1780
      # serializing/deserializing the object.
1781
      self._UnlockedAddNodeToGroup(node.name, node.group)
1782
    if modified:
1783
      self._WriteConfig()
1784
      # This is ok even if it acquires the internal lock, as _UpgradeConfig is
1785
      # only called at config init time, without the lock held
1786
      self.DropECReservations(_UPGRADE_CONFIG_JID)
1787

    
1788
  def _DistributeConfig(self, feedback_fn):
1789
    """Distribute the configuration to the other nodes.
1790

1791
    Currently, this only copies the configuration file. In the future,
1792
    it could be used to encapsulate the 2/3-phase update mechanism.
1793

1794
    """
1795
    if self._offline:
1796
      return True
1797

    
1798
    bad = False
1799

    
1800
    node_list = []
1801
    addr_list = []
1802
    myhostname = self._my_hostname
1803
    # we can skip checking whether _UnlockedGetNodeInfo returns None
1804
    # since the node list comes from _UnlocketGetNodeList, and we are
1805
    # called with the lock held, so no modifications should take place
1806
    # in between
1807
    for node_name in self._UnlockedGetNodeList():
1808
      if node_name == myhostname:
1809
        continue
1810
      node_info = self._UnlockedGetNodeInfo(node_name)
1811
      if not node_info.master_candidate:
1812
        continue
1813
      node_list.append(node_info.name)
1814
      addr_list.append(node_info.primary_ip)
1815

    
1816
    result = rpc.RpcRunner.call_upload_file(node_list, self._cfg_file,
1817
                                            address_list=addr_list)
1818
    for to_node, to_result in result.items():
1819
      msg = to_result.fail_msg
1820
      if msg:
1821
        msg = ("Copy of file %s to node %s failed: %s" %
1822
               (self._cfg_file, to_node, msg))
1823
        logging.error(msg)
1824

    
1825
        if feedback_fn:
1826
          feedback_fn(msg)
1827

    
1828
        bad = True
1829

    
1830
    return not bad
1831

    
1832
  def _WriteConfig(self, destination=None, feedback_fn=None):
1833
    """Write the configuration data to persistent storage.
1834

1835
    """
1836
    assert feedback_fn is None or callable(feedback_fn)
1837

    
1838
    # Warn on config errors, but don't abort the save - the
1839
    # configuration has already been modified, and we can't revert;
1840
    # the best we can do is to warn the user and save as is, leaving
1841
    # recovery to the user
1842
    config_errors = self._UnlockedVerifyConfig()
1843
    if config_errors:
1844
      errmsg = ("Configuration data is not consistent: %s" %
1845
                (utils.CommaJoin(config_errors)))
1846
      logging.critical(errmsg)
1847
      if feedback_fn:
1848
        feedback_fn(errmsg)
1849

    
1850
    if destination is None:
1851
      destination = self._cfg_file
1852
    self._BumpSerialNo()
1853
    txt = serializer.Dump(self._config_data.ToDict())
1854

    
1855
    getents = self._getents()
1856
    try:
1857
      fd = utils.SafeWriteFile(destination, self._cfg_id, data=txt,
1858
                               close=False, gid=getents.confd_gid, mode=0640)
1859
    except errors.LockError:
1860
      raise errors.ConfigurationError("The configuration file has been"
1861
                                      " modified since the last write, cannot"
1862
                                      " update")
1863
    try:
1864
      self._cfg_id = utils.GetFileID(fd=fd)
1865
    finally:
1866
      os.close(fd)
1867

    
1868
    self.write_count += 1
1869

    
1870
    # and redistribute the config file to master candidates
1871
    self._DistributeConfig(feedback_fn)
1872

    
1873
    # Write ssconf files on all nodes (including locally)
1874
    if self._last_cluster_serial < self._config_data.cluster.serial_no:
1875
      if not self._offline:
1876
        result = rpc.RpcRunner.call_write_ssconf_files(
1877
          self._UnlockedGetOnlineNodeList(),
1878
          self._UnlockedGetSsconfValues())
1879

    
1880
        for nname, nresu in result.items():
1881
          msg = nresu.fail_msg
1882
          if msg:
1883
            errmsg = ("Error while uploading ssconf files to"
1884
                      " node %s: %s" % (nname, msg))
1885
            logging.warning(errmsg)
1886

    
1887
            if feedback_fn:
1888
              feedback_fn(errmsg)
1889

    
1890
      self._last_cluster_serial = self._config_data.cluster.serial_no
1891

    
1892
  def _UnlockedGetSsconfValues(self):
1893
    """Return the values needed by ssconf.
1894

1895
    @rtype: dict
1896
    @return: a dictionary with keys the ssconf names and values their
1897
        associated value
1898

1899
    """
1900
    fn = "\n".join
1901
    instance_names = utils.NiceSort(self._UnlockedGetInstanceList())
1902
    node_names = utils.NiceSort(self._UnlockedGetNodeList())
1903
    node_info = [self._UnlockedGetNodeInfo(name) for name in node_names]
1904
    node_pri_ips = ["%s %s" % (ninfo.name, ninfo.primary_ip)
1905
                    for ninfo in node_info]
1906
    node_snd_ips = ["%s %s" % (ninfo.name, ninfo.secondary_ip)
1907
                    for ninfo in node_info]
1908

    
1909
    instance_data = fn(instance_names)
1910
    off_data = fn(node.name for node in node_info if node.offline)
1911
    on_data = fn(node.name for node in node_info if not node.offline)
1912
    mc_data = fn(node.name for node in node_info if node.master_candidate)
1913
    mc_ips_data = fn(node.primary_ip for node in node_info
1914
                     if node.master_candidate)
1915
    node_data = fn(node_names)
1916
    node_pri_ips_data = fn(node_pri_ips)
1917
    node_snd_ips_data = fn(node_snd_ips)
1918

    
1919
    cluster = self._config_data.cluster
1920
    cluster_tags = fn(cluster.GetTags())
1921

    
1922
    hypervisor_list = fn(cluster.enabled_hypervisors)
1923

    
1924
    uid_pool = uidpool.FormatUidPool(cluster.uid_pool, separator="\n")
1925

    
1926
    nodegroups = ["%s %s" % (nodegroup.uuid, nodegroup.name) for nodegroup in
1927
                  self._config_data.nodegroups.values()]
1928
    nodegroups_data = fn(utils.NiceSort(nodegroups))
1929
    networks = ["%s %s" % (net.uuid, net.name) for net in
1930
                self._config_data.networks.values()]
1931
    networks_data = fn(utils.NiceSort(networks))
1932

    
1933
    ssconf_values = {
1934
      constants.SS_CLUSTER_NAME: cluster.cluster_name,
1935
      constants.SS_CLUSTER_TAGS: cluster_tags,
1936
      constants.SS_FILE_STORAGE_DIR: cluster.file_storage_dir,
1937
      constants.SS_SHARED_FILE_STORAGE_DIR: cluster.shared_file_storage_dir,
1938
      constants.SS_MASTER_CANDIDATES: mc_data,
1939
      constants.SS_MASTER_CANDIDATES_IPS: mc_ips_data,
1940
      constants.SS_MASTER_IP: cluster.master_ip,
1941
      constants.SS_MASTER_NETDEV: cluster.master_netdev,
1942
      constants.SS_MASTER_NODE: cluster.master_node,
1943
      constants.SS_NODE_LIST: node_data,
1944
      constants.SS_NODE_PRIMARY_IPS: node_pri_ips_data,
1945
      constants.SS_NODE_SECONDARY_IPS: node_snd_ips_data,
1946
      constants.SS_OFFLINE_NODES: off_data,
1947
      constants.SS_ONLINE_NODES: on_data,
1948
      constants.SS_PRIMARY_IP_FAMILY: str(cluster.primary_ip_family),
1949
      constants.SS_INSTANCE_LIST: instance_data,
1950
      constants.SS_RELEASE_VERSION: constants.RELEASE_VERSION,
1951
      constants.SS_HYPERVISOR_LIST: hypervisor_list,
1952
      constants.SS_MAINTAIN_NODE_HEALTH: str(cluster.maintain_node_health),
1953
      constants.SS_UID_POOL: uid_pool,
1954
      constants.SS_NODEGROUPS: nodegroups_data,
1955
      constants.SS_NETWORKS: networks_data,
1956
      }
1957
    bad_values = [(k, v) for k, v in ssconf_values.items()
1958
                  if not isinstance(v, (str, basestring))]
1959
    if bad_values:
1960
      err = utils.CommaJoin("%s=%s" % (k, v) for k, v in bad_values)
1961
      raise errors.ConfigurationError("Some ssconf key(s) have non-string"
1962
                                      " values: %s" % err)
1963
    return ssconf_values
1964

    
1965
  @locking.ssynchronized(_config_lock, shared=1)
1966
  def GetSsconfValues(self):
1967
    """Wrapper using lock around _UnlockedGetSsconf().
1968

1969
    """
1970
    return self._UnlockedGetSsconfValues()
1971

    
1972
  @locking.ssynchronized(_config_lock, shared=1)
1973
  def GetVGName(self):
1974
    """Return the volume group name.
1975

1976
    """
1977
    return self._config_data.cluster.volume_group_name
1978

    
1979
  @locking.ssynchronized(_config_lock)
1980
  def SetVGName(self, vg_name):
1981
    """Set the volume group name.
1982

1983
    """
1984
    self._config_data.cluster.volume_group_name = vg_name
1985
    self._config_data.cluster.serial_no += 1
1986
    self._WriteConfig()
1987

    
1988
  @locking.ssynchronized(_config_lock, shared=1)
1989
  def GetDRBDHelper(self):
1990
    """Return DRBD usermode helper.
1991

1992
    """
1993
    return self._config_data.cluster.drbd_usermode_helper
1994

    
1995
  @locking.ssynchronized(_config_lock)
1996
  def SetDRBDHelper(self, drbd_helper):
1997
    """Set DRBD usermode helper.
1998

1999
    """
2000
    self._config_data.cluster.drbd_usermode_helper = drbd_helper
2001
    self._config_data.cluster.serial_no += 1
2002
    self._WriteConfig()
2003

    
2004
  @locking.ssynchronized(_config_lock, shared=1)
2005
  def GetMACPrefix(self):
2006
    """Return the mac prefix.
2007

2008
    """
2009
    return self._config_data.cluster.mac_prefix
2010

    
2011
  @locking.ssynchronized(_config_lock, shared=1)
2012
  def GetClusterInfo(self):
2013
    """Returns information about the cluster
2014

2015
    @rtype: L{objects.Cluster}
2016
    @return: the cluster object
2017

2018
    """
2019
    return self._config_data.cluster
2020

    
2021
  @locking.ssynchronized(_config_lock, shared=1)
2022
  def HasAnyDiskOfType(self, dev_type):
2023
    """Check if in there is at disk of the given type in the configuration.
2024

2025
    """
2026
    return self._config_data.HasAnyDiskOfType(dev_type)
2027

    
2028
  @locking.ssynchronized(_config_lock)
2029
  def Update(self, target, feedback_fn, ec_id=None):
2030
    """Notify function to be called after updates.
2031

2032
    This function must be called when an object (as returned by
2033
    GetInstanceInfo, GetNodeInfo, GetCluster) has been updated and the
2034
    caller wants the modifications saved to the backing store. Note
2035
    that all modified objects will be saved, but the target argument
2036
    is the one the caller wants to ensure that it's saved.
2037

2038
    @param target: an instance of either L{objects.Cluster},
2039
        L{objects.Node} or L{objects.Instance} which is existing in
2040
        the cluster
2041
    @param feedback_fn: Callable feedback function
2042

2043
    """
2044
    if self._config_data is None:
2045
      raise errors.ProgrammerError("Configuration file not read,"
2046
                                   " cannot save.")
2047
    update_serial = False
2048
    if isinstance(target, objects.Cluster):
2049
      test = target == self._config_data.cluster
2050
    elif isinstance(target, objects.Node):
2051
      test = target in self._config_data.nodes.values()
2052
      update_serial = True
2053
    elif isinstance(target, objects.Instance):
2054
      test = target in self._config_data.instances.values()
2055
    elif isinstance(target, objects.NodeGroup):
2056
      test = target in self._config_data.nodegroups.values()
2057
    elif isinstance(target, objects.Network):
2058
      test = target in self._config_data.networks.values()
2059
    else:
2060
      raise errors.ProgrammerError("Invalid object type (%s) passed to"
2061
                                   " ConfigWriter.Update" % type(target))
2062
    if not test:
2063
      raise errors.ConfigurationError("Configuration updated since object"
2064
                                      " has been read or unknown object")
2065
    target.serial_no += 1
2066
    target.mtime = now = time.time()
2067

    
2068
    if update_serial:
2069
      # for node updates, we need to increase the cluster serial too
2070
      self._config_data.cluster.serial_no += 1
2071
      self._config_data.cluster.mtime = now
2072

    
2073
    if isinstance(target, objects.Instance):
2074
      self._UnlockedReleaseDRBDMinors(target.name)
2075
      if ec_id is not None:
2076
        # Commit all reserved IPs
2077
        self._UnlockedCommitReservedIps(ec_id)
2078
        # Drop the IP reservations so that we can call Update() multiple times
2079
        # from within the same LU
2080
        self._temporary_ips.DropECReservations(ec_id)
2081

    
2082
    self._WriteConfig(feedback_fn=feedback_fn)
2083

    
2084
  @locking.ssynchronized(_config_lock)
2085
  def DropECReservations(self, ec_id):
2086
    """Drop per-execution-context reservations
2087

2088
    """
2089
    for rm in self._all_rms:
2090
      rm.DropECReservations(ec_id)
2091

    
2092
  @locking.ssynchronized(_config_lock, shared=1)
2093
  def GetAllNetworksInfo(self):
2094
    """Get the configuration of all networks
2095

2096
    """
2097
    return dict(self._config_data.networks)
2098

    
2099
  def _UnlockedGetNetworkList(self):
2100
    """Get the list of networks.
2101

2102
    This function is for internal use, when the config lock is already held.
2103

2104
    """
2105
    return self._config_data.networks.keys()
2106

    
2107
  @locking.ssynchronized(_config_lock, shared=1)
2108
  def GetNetworkList(self):
2109
    """Get the list of networks.
2110

2111
    @return: array of networks, ex. ["main", "vlan100", "200]
2112

2113
    """
2114
    return self._UnlockedGetNetworkList()
2115

    
2116
  def _UnlockedGetNetwork(self, uuid):
2117
    """Returns information about a network.
2118

2119
    This function is for internal use, when the config lock is already held.
2120

2121
    """
2122
    if uuid not in self._config_data.networks:
2123
      return None
2124

    
2125
    return self._config_data.networks[uuid]
2126

    
2127
  @locking.ssynchronized(_config_lock, shared=1)
2128
  def GetNetwork(self, uuid):
2129
    """Returns information about a network.
2130

2131
    It takes the information from the configuration file.
2132

2133
    @param uuid: UUID of the network
2134

2135
    @rtype: L{objects.Network}
2136
    @return: the network object
2137

2138
    """
2139
    return self._UnlockedGetNetwork(uuid)
2140

    
2141
  @locking.ssynchronized(_config_lock)
2142
  def AddNetwork(self, net, ec_id):
2143
    """Add a network to the configuration.
2144

2145
    @type net: L{objects.Network}
2146
    @param net: the Network object to add
2147
    @type ec_id: string
2148
    @param ec_id: unique id for the job to use when creating a missing UUID
2149

2150
    """
2151
    self._UnlockedAddNetwork(net, ec_id)
2152
    self._WriteConfig()
2153

    
2154
  def _UnlockedAddNetwork(self, net, ec_id):
2155
    """Add a network to the configuration.
2156

2157
    """
2158
    logging.info("Adding network %s to configuration", net.name)
2159

    
2160
    self._EnsureUUID(net, ec_id)
2161

    
2162
    try:
2163
      existing_uuid = self._UnlockedLookupNetwork(net.name)
2164
    except errors.OpPrereqError:
2165
      pass
2166
    else:
2167
      raise errors.OpPrereqError("Desired network name '%s' already exists as a"
2168
                                 " network (UUID: %s)" %
2169
                                 (net.name, existing_uuid),
2170
                                 errors.ECODE_EXISTS)
2171

    
2172
    self._config_data.networks[net.uuid] = net
2173
    self._config_data.cluster.serial_no += 1
2174

    
2175
  def _UnlockedLookupNetwork(self, target):
2176
    """Lookup a network's UUID.
2177

2178
    @type target: string
2179
    @param target: network name or UUID
2180
    @rtype: string
2181
    @return: network UUID
2182
    @raises errors.OpPrereqError: when the target network cannot be found
2183

2184
    """
2185
    if target in self._config_data.networks:
2186
      return target
2187
    for net in self._config_data.networks.values():
2188
      if net.name == target:
2189
        return net.uuid
2190
    raise errors.OpPrereqError("Network '%s' not found" % target,
2191
                               errors.ECODE_NOENT)
2192

    
2193
  @locking.ssynchronized(_config_lock, shared=1)
2194
  def LookupNetwork(self, target):
2195
    """Lookup a network's UUID.
2196

2197
    This function is just a wrapper over L{_UnlockedLookupNetwork}.
2198

2199
    @type target: string
2200
    @param target: network name or UUID
2201
    @rtype: string
2202
    @return: network UUID
2203

2204
    """
2205
    return self._UnlockedLookupNetwork(target)