4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Configuration management for Ganeti
24 This module provides the interface to the Ganeti cluster configuration.
26 The configuration data is stored on every node but is updated on the master
27 only. After each update, the master distributes the data to the other nodes.
29 Currently, the data storage format is JSON. YAML was slow and consuming too
34 # pylint: disable=R0904
35 # R0904: Too many public methods
44 from ganeti import errors
45 from ganeti import locking
46 from ganeti import utils
47 from ganeti import constants
48 from ganeti import rpc
49 from ganeti import objects
50 from ganeti import serializer
51 from ganeti import uidpool
52 from ganeti import netutils
53 from ganeti import runtime
54 from ganeti import pathutils
55 from ganeti import network
58 _config_lock = locking.SharedLock("ConfigWriter")
60 # job id used for resource management at config upgrade time
61 _UPGRADE_CONFIG_JID = "jid-cfg-upgrade"
64 def _ValidateConfig(data):
65 """Verifies that a configuration objects looks valid.
67 This only verifies the version of the configuration.
69 @raise errors.ConfigurationError: if the version differs from what
73 if data.version != constants.CONFIG_VERSION:
74 raise errors.ConfigVersionMismatch(constants.CONFIG_VERSION, data.version)
77 class TemporaryReservationManager:
78 """A temporary resource reservation manager.
80 This is used to reserve resources in a job, before using them, making sure
81 other jobs cannot get them in the meantime.
85 self._ec_reserved = {}
87 def Reserved(self, resource):
88 for holder_reserved in self._ec_reserved.values():
89 if resource in holder_reserved:
93 def Reserve(self, ec_id, resource):
94 if self.Reserved(resource):
95 raise errors.ReservationError("Duplicate reservation for resource '%s'"
97 if ec_id not in self._ec_reserved:
98 self._ec_reserved[ec_id] = set([resource])
100 self._ec_reserved[ec_id].add(resource)
102 def DropECReservations(self, ec_id):
103 if ec_id in self._ec_reserved:
104 del self._ec_reserved[ec_id]
106 def GetReserved(self):
108 for holder_reserved in self._ec_reserved.values():
109 all_reserved.update(holder_reserved)
112 def GetECReserved(self, ec_id):
113 """ Used when you want to retrieve all reservations for a specific
114 execution context. E.g when commiting reserved IPs for a specific
119 if ec_id in self._ec_reserved:
120 ec_reserved.update(self._ec_reserved[ec_id])
123 def Generate(self, existing, generate_one_fn, ec_id):
124 """Generate a new resource of this type
127 assert callable(generate_one_fn)
129 all_elems = self.GetReserved()
130 all_elems.update(existing)
133 new_resource = generate_one_fn()
134 if new_resource is not None and new_resource not in all_elems:
137 raise errors.ConfigurationError("Not able generate new resource"
138 " (last tried: %s)" % new_resource)
139 self.Reserve(ec_id, new_resource)
143 def _MatchNameComponentIgnoreCase(short_name, names):
144 """Wrapper around L{utils.text.MatchNameComponent}.
147 return utils.MatchNameComponent(short_name, names, case_sensitive=False)
150 def _CheckInstanceDiskIvNames(disks):
151 """Checks if instance's disks' C{iv_name} attributes are in order.
153 @type disks: list of L{objects.Disk}
154 @param disks: List of disks
155 @rtype: list of tuples; (int, string, string)
156 @return: List of wrongly named disks, each tuple contains disk index,
157 expected and actual name
162 for (idx, disk) in enumerate(disks):
163 exp_iv_name = "disk/%s" % idx
164 if disk.iv_name != exp_iv_name:
165 result.append((idx, exp_iv_name, disk.iv_name))
170 class ConfigWriter(object):
171 """The interface to the cluster configuration.
173 @ivar _temporary_lvs: reservation manager for temporary LVs
174 @ivar _all_rms: a list of all temporary reservation managers
177 def __init__(self, cfg_file=None, offline=False, _getents=runtime.GetEnts,
178 accept_foreign=False):
180 self._lock = _config_lock
181 self._config_data = None
182 self._offline = offline
184 self._cfg_file = pathutils.CLUSTER_CONF_FILE
186 self._cfg_file = cfg_file
187 self._getents = _getents
188 self._temporary_ids = TemporaryReservationManager()
189 self._temporary_drbds = {}
190 self._temporary_macs = TemporaryReservationManager()
191 self._temporary_secrets = TemporaryReservationManager()
192 self._temporary_lvs = TemporaryReservationManager()
193 self._temporary_ips = TemporaryReservationManager()
194 self._all_rms = [self._temporary_ids, self._temporary_macs,
195 self._temporary_secrets, self._temporary_lvs,
197 # Note: in order to prevent errors when resolving our name in
198 # _DistributeConfig, we compute it here once and reuse it; it's
199 # better to raise an error before starting to modify the config
200 # file than after it was modified
201 self._my_hostname = netutils.Hostname.GetSysName()
202 self._last_cluster_serial = -1
205 self._OpenConfig(accept_foreign)
207 def _GetRpc(self, address_list):
208 """Returns RPC runner for configuration.
211 return rpc.ConfigRunner(self._context, address_list)
213 def SetContext(self, context):
214 """Sets Ganeti context.
217 self._context = context
219 # this method needs to be static, so that we can call it on the class
222 """Check if the cluster is configured.
225 return os.path.exists(pathutils.CLUSTER_CONF_FILE)
227 @locking.ssynchronized(_config_lock, shared=1)
228 def GetNdParams(self, node):
229 """Get the node params populated with cluster defaults.
231 @type node: L{objects.Node}
232 @param node: The node we want to know the params for
233 @return: A dict with the filled in node params
236 nodegroup = self._UnlockedGetNodeGroup(node.group)
237 return self._config_data.cluster.FillND(node, nodegroup)
239 @locking.ssynchronized(_config_lock, shared=1)
240 def GetInstanceDiskParams(self, instance):
241 """Get the disk params populated with inherit chain.
243 @type instance: L{objects.Instance}
244 @param instance: The instance we want to know the params for
245 @return: A dict with the filled in disk params
248 node = self._UnlockedGetNodeInfo(instance.primary_node)
249 nodegroup = self._UnlockedGetNodeGroup(node.group)
250 return self._UnlockedGetGroupDiskParams(nodegroup)
252 @locking.ssynchronized(_config_lock, shared=1)
253 def GetGroupDiskParams(self, group):
254 """Get the disk params populated with inherit chain.
256 @type group: L{objects.NodeGroup}
257 @param group: The group we want to know the params for
258 @return: A dict with the filled in disk params
261 return self._UnlockedGetGroupDiskParams(group)
263 def _UnlockedGetGroupDiskParams(self, group):
264 """Get the disk params populated with inherit chain down to node-group.
266 @type group: L{objects.NodeGroup}
267 @param group: The group we want to know the params for
268 @return: A dict with the filled in disk params
271 return self._config_data.cluster.SimpleFillDP(group.diskparams)
273 def _UnlockedGetNetworkMACPrefix(self, net_uuid):
274 """Return the network mac prefix if it exists or the cluster level default.
279 nobj = self._UnlockedGetNetwork(net_uuid)
281 prefix = nobj.mac_prefix
285 def _GenerateOneMAC(self, prefix=None):
286 """Return a function that randomly generates a MAC suffic
287 and appends it to the given prefix. If prefix is not given get
288 the cluster level default.
292 prefix = self._config_data.cluster.mac_prefix
295 byte1 = random.randrange(0, 256)
296 byte2 = random.randrange(0, 256)
297 byte3 = random.randrange(0, 256)
298 mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
303 @locking.ssynchronized(_config_lock, shared=1)
304 def GenerateMAC(self, net_uuid, ec_id):
305 """Generate a MAC for an instance.
307 This should check the current instances for duplicates.
310 existing = self._AllMACs()
311 prefix = self._UnlockedGetNetworkMACPrefix(net_uuid)
312 gen_mac = self._GenerateOneMAC(prefix)
313 return self._temporary_ids.Generate(existing, gen_mac, ec_id)
315 @locking.ssynchronized(_config_lock, shared=1)
316 def ReserveMAC(self, mac, ec_id):
317 """Reserve a MAC for an instance.
319 This only checks instances managed by this cluster, it does not
320 check for potential collisions elsewhere.
323 all_macs = self._AllMACs()
325 raise errors.ReservationError("mac already in use")
327 self._temporary_macs.Reserve(ec_id, mac)
329 def _UnlockedCommitTemporaryIps(self, ec_id):
330 """Commit all reserved IP address to their respective pools
333 for action, address, net_uuid in self._temporary_ips.GetECReserved(ec_id):
334 self._UnlockedCommitIp(action, net_uuid, address)
336 def _UnlockedCommitIp(self, action, net_uuid, address):
337 """Commit a reserved IP address to an IP pool.
339 The IP address is taken from the network's IP pool and marked as reserved.
342 nobj = self._UnlockedGetNetwork(net_uuid)
343 pool = network.AddressPool(nobj)
344 if action == constants.RESERVE_ACTION:
345 pool.Reserve(address)
346 elif action == constants.RELEASE_ACTION:
347 pool.Release(address)
349 def _UnlockedReleaseIp(self, net_uuid, address, ec_id):
350 """Give a specific IP address back to an IP pool.
352 The IP address is returned to the IP pool designated by pool_id and marked
356 self._temporary_ips.Reserve(ec_id,
357 (constants.RELEASE_ACTION, address, net_uuid))
359 @locking.ssynchronized(_config_lock, shared=1)
360 def ReleaseIp(self, net_uuid, address, ec_id):
361 """Give a specified IP address back to an IP pool.
363 This is just a wrapper around _UnlockedReleaseIp.
367 self._UnlockedReleaseIp(net_uuid, address, ec_id)
369 @locking.ssynchronized(_config_lock, shared=1)
370 def GenerateIp(self, net_uuid, ec_id):
371 """Find a free IPv4 address for an instance.
374 nobj = self._UnlockedGetNetwork(net_uuid)
375 pool = network.AddressPool(nobj)
379 ip = pool.GenerateFree()
380 except errors.AddressPoolError:
381 raise errors.ReservationError("Cannot generate IP. Network is full")
382 return (constants.RESERVE_ACTION, ip, net_uuid)
384 _, address, _ = self._temporary_ips.Generate([], gen_one, ec_id)
387 def _UnlockedReserveIp(self, net_uuid, address, ec_id):
388 """Reserve a given IPv4 address for use by an instance.
391 nobj = self._UnlockedGetNetwork(net_uuid)
392 pool = network.AddressPool(nobj)
394 isreserved = pool.IsReserved(address)
395 except errors.AddressPoolError:
396 raise errors.ReservationError("IP address not in network")
398 raise errors.ReservationError("IP address already in use")
400 return self._temporary_ips.Reserve(ec_id,
401 (constants.RESERVE_ACTION,
404 @locking.ssynchronized(_config_lock, shared=1)
405 def ReserveIp(self, net_uuid, address, ec_id):
406 """Reserve a given IPv4 address for use by an instance.
410 return self._UnlockedReserveIp(net_uuid, address, ec_id)
412 @locking.ssynchronized(_config_lock, shared=1)
413 def ReserveLV(self, lv_name, ec_id):
414 """Reserve an VG/LV pair for an instance.
416 @type lv_name: string
417 @param lv_name: the logical volume name to reserve
420 all_lvs = self._AllLVs()
421 if lv_name in all_lvs:
422 raise errors.ReservationError("LV already in use")
424 self._temporary_lvs.Reserve(ec_id, lv_name)
426 @locking.ssynchronized(_config_lock, shared=1)
427 def GenerateDRBDSecret(self, ec_id):
428 """Generate a DRBD secret.
430 This checks the current disks for duplicates.
433 return self._temporary_secrets.Generate(self._AllDRBDSecrets(),
434 utils.GenerateSecret,
438 """Compute the list of all LVs.
442 for instance in self._config_data.instances.values():
443 node_data = instance.MapLVsByNode()
444 for lv_list in node_data.values():
445 lvnames.update(lv_list)
449 """Compute the list of all Disks (recursively, including children).
452 def DiskAndAllChildren(disk):
453 """Returns a list containing the given disk and all of his children.
458 for child_disk in disk.children:
459 disks.extend(DiskAndAllChildren(child_disk))
463 for instance in self._config_data.instances.values():
464 for disk in instance.disks:
465 disks.extend(DiskAndAllChildren(disk))
469 """Compute the list of all NICs.
473 for instance in self._config_data.instances.values():
474 nics.extend(instance.nics)
477 def _AllIDs(self, include_temporary):
478 """Compute the list of all UUIDs and names we have.
480 @type include_temporary: boolean
481 @param include_temporary: whether to include the _temporary_ids set
483 @return: a set of IDs
487 if include_temporary:
488 existing.update(self._temporary_ids.GetReserved())
489 existing.update(self._AllLVs())
490 existing.update(self._config_data.instances.keys())
491 existing.update(self._config_data.nodes.keys())
492 existing.update([i.uuid for i in self._AllUUIDObjects() if i.uuid])
495 def _GenerateUniqueID(self, ec_id):
496 """Generate an unique UUID.
498 This checks the current node, instances and disk names for
502 @return: the unique id
505 existing = self._AllIDs(include_temporary=False)
506 return self._temporary_ids.Generate(existing, utils.NewUUID, ec_id)
508 @locking.ssynchronized(_config_lock, shared=1)
509 def GenerateUniqueID(self, ec_id):
510 """Generate an unique ID.
512 This is just a wrapper over the unlocked version.
515 @param ec_id: unique id for the job to reserve the id to
518 return self._GenerateUniqueID(ec_id)
521 """Return all MACs present in the config.
524 @return: the list of all MACs
528 for instance in self._config_data.instances.values():
529 for nic in instance.nics:
530 result.append(nic.mac)
534 def _AllDRBDSecrets(self):
535 """Return all DRBD secrets present in the config.
538 @return: the list of all DRBD secrets
541 def helper(disk, result):
542 """Recursively gather secrets from this disk."""
543 if disk.dev_type == constants.DT_DRBD8:
544 result.append(disk.logical_id[5])
546 for child in disk.children:
547 helper(child, result)
550 for instance in self._config_data.instances.values():
551 for disk in instance.disks:
556 def _CheckDiskIDs(self, disk, l_ids):
557 """Compute duplicate disk IDs
559 @type disk: L{objects.Disk}
560 @param disk: the disk at which to start searching
562 @param l_ids: list of current logical ids
564 @return: a list of error messages
568 if disk.logical_id is not None:
569 if disk.logical_id in l_ids:
570 result.append("duplicate logical id %s" % str(disk.logical_id))
572 l_ids.append(disk.logical_id)
575 for child in disk.children:
576 result.extend(self._CheckDiskIDs(child, l_ids))
579 def _UnlockedVerifyConfig(self):
583 @return: a list of error messages; a non-empty list signifies
587 # pylint: disable=R0914
591 data = self._config_data
592 cluster = data.cluster
595 # global cluster checks
596 if not cluster.enabled_hypervisors:
597 result.append("enabled hypervisors list doesn't have any entries")
598 invalid_hvs = set(cluster.enabled_hypervisors) - constants.HYPER_TYPES
600 result.append("enabled hypervisors contains invalid entries: %s" %
601 utils.CommaJoin(invalid_hvs))
602 missing_hvp = (set(cluster.enabled_hypervisors) -
603 set(cluster.hvparams.keys()))
605 result.append("hypervisor parameters missing for the enabled"
606 " hypervisor(s) %s" % utils.CommaJoin(missing_hvp))
608 if not cluster.enabled_disk_templates:
609 result.append("enabled disk templates list doesn't have any entries")
610 invalid_disk_templates = set(cluster.enabled_disk_templates) \
611 - constants.DISK_TEMPLATES
612 if invalid_disk_templates:
613 result.append("enabled disk templates list contains invalid entries:"
614 " %s" % utils.CommaJoin(invalid_disk_templates))
616 if cluster.master_node not in data.nodes:
617 result.append("cluster has invalid primary node '%s'" %
620 def _helper(owner, attr, value, template):
622 utils.ForceDictType(value, template)
623 except errors.GenericError, err:
624 result.append("%s has invalid %s: %s" % (owner, attr, err))
626 def _helper_nic(owner, params):
628 objects.NIC.CheckParameterSyntax(params)
629 except errors.ConfigurationError, err:
630 result.append("%s has invalid nicparams: %s" % (owner, err))
632 def _helper_ipolicy(owner, ipolicy, iscluster):
634 objects.InstancePolicy.CheckParameterSyntax(ipolicy, iscluster)
635 except errors.ConfigurationError, err:
636 result.append("%s has invalid instance policy: %s" % (owner, err))
637 for key, value in ipolicy.items():
638 if key == constants.ISPECS_MINMAX:
639 for k in range(len(value)):
640 _helper_ispecs(owner, "ipolicy/%s[%s]" % (key, k), value[k])
641 elif key == constants.ISPECS_STD:
642 _helper(owner, "ipolicy/" + key, value,
643 constants.ISPECS_PARAMETER_TYPES)
645 # FIXME: assuming list type
646 if key in constants.IPOLICY_PARAMETERS:
650 if not isinstance(value, exp_type):
651 result.append("%s has invalid instance policy: for %s,"
652 " expecting %s, got %s" %
653 (owner, key, exp_type.__name__, type(value)))
655 def _helper_ispecs(owner, parentkey, params):
656 for (key, value) in params.items():
657 fullkey = "/".join([parentkey, key])
658 _helper(owner, fullkey, value, constants.ISPECS_PARAMETER_TYPES)
660 # check cluster parameters
661 _helper("cluster", "beparams", cluster.SimpleFillBE({}),
662 constants.BES_PARAMETER_TYPES)
663 _helper("cluster", "nicparams", cluster.SimpleFillNIC({}),
664 constants.NICS_PARAMETER_TYPES)
665 _helper_nic("cluster", cluster.SimpleFillNIC({}))
666 _helper("cluster", "ndparams", cluster.SimpleFillND({}),
667 constants.NDS_PARAMETER_TYPES)
668 _helper_ipolicy("cluster", cluster.ipolicy, True)
670 if constants.DT_RBD in cluster.diskparams:
671 access = cluster.diskparams[constants.DT_RBD][constants.RBD_ACCESS]
672 if access not in constants.DISK_VALID_ACCESS_MODES:
674 "Invalid value of '%s:%s': '%s' (expected one of %s)" % (
675 constants.DT_RBD, constants.RBD_ACCESS, access,
676 utils.CommaJoin(constants.DISK_VALID_ACCESS_MODES)
680 # per-instance checks
681 for instance_uuid in data.instances:
682 instance = data.instances[instance_uuid]
683 if instance.uuid != instance_uuid:
684 result.append("instance '%s' is indexed by wrong UUID '%s'" %
685 (instance.name, instance_uuid))
686 if instance.primary_node not in data.nodes:
687 result.append("instance '%s' has invalid primary node '%s'" %
688 (instance.name, instance.primary_node))
689 for snode in instance.secondary_nodes:
690 if snode not in data.nodes:
691 result.append("instance '%s' has invalid secondary node '%s'" %
692 (instance.name, snode))
693 for idx, nic in enumerate(instance.nics):
694 if nic.mac in seen_macs:
695 result.append("instance '%s' has NIC %d mac %s duplicate" %
696 (instance.name, idx, nic.mac))
698 seen_macs.append(nic.mac)
700 filled = cluster.SimpleFillNIC(nic.nicparams)
701 owner = "instance %s nic %d" % (instance.name, idx)
702 _helper(owner, "nicparams",
703 filled, constants.NICS_PARAMETER_TYPES)
704 _helper_nic(owner, filled)
706 # disk template checks
707 if not instance.disk_template in data.cluster.enabled_disk_templates:
708 result.append("instance '%s' uses the disabled disk template '%s'." %
709 (instance.name, instance.disk_template))
712 if instance.beparams:
713 _helper("instance %s" % instance.name, "beparams",
714 cluster.FillBE(instance), constants.BES_PARAMETER_TYPES)
716 # gather the drbd ports for duplicate checks
717 for (idx, dsk) in enumerate(instance.disks):
718 if dsk.dev_type in constants.DTS_DRBD:
719 tcp_port = dsk.logical_id[2]
720 if tcp_port not in ports:
722 ports[tcp_port].append((instance.name, "drbd disk %s" % idx))
723 # gather network port reservation
724 net_port = getattr(instance, "network_port", None)
725 if net_port is not None:
726 if net_port not in ports:
728 ports[net_port].append((instance.name, "network port"))
730 # instance disk verify
731 for idx, disk in enumerate(instance.disks):
732 result.extend(["instance '%s' disk %d error: %s" %
733 (instance.name, idx, msg) for msg in disk.Verify()])
734 result.extend(self._CheckDiskIDs(disk, seen_lids))
736 wrong_names = _CheckInstanceDiskIvNames(instance.disks)
738 tmp = "; ".join(("name of disk %s should be '%s', but is '%s'" %
739 (idx, exp_name, actual_name))
740 for (idx, exp_name, actual_name) in wrong_names)
742 result.append("Instance '%s' has wrongly named disks: %s" %
743 (instance.name, tmp))
745 # cluster-wide pool of free ports
746 for free_port in cluster.tcpudp_port_pool:
747 if free_port not in ports:
748 ports[free_port] = []
749 ports[free_port].append(("cluster", "port marked as free"))
751 # compute tcp/udp duplicate ports
757 txt = utils.CommaJoin(["%s/%s" % val for val in pdata])
758 result.append("tcp/udp port %s has duplicates: %s" % (pnum, txt))
760 # highest used tcp port check
762 if keys[-1] > cluster.highest_used_port:
763 result.append("Highest used port mismatch, saved %s, computed %s" %
764 (cluster.highest_used_port, keys[-1]))
766 if not data.nodes[cluster.master_node].master_candidate:
767 result.append("Master node is not a master candidate")
769 # master candidate checks
770 mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats()
772 result.append("Not enough master candidates: actual %d, target %d" %
776 for node_uuid, node in data.nodes.items():
777 if node.uuid != node_uuid:
778 result.append("Node '%s' is indexed by wrong UUID '%s'" %
779 (node.name, node_uuid))
780 if [node.master_candidate, node.drained, node.offline].count(True) > 1:
781 result.append("Node %s state is invalid: master_candidate=%s,"
782 " drain=%s, offline=%s" %
783 (node.name, node.master_candidate, node.drained,
785 if node.group not in data.nodegroups:
786 result.append("Node '%s' has invalid group '%s'" %
787 (node.name, node.group))
789 _helper("node %s" % node.name, "ndparams",
790 cluster.FillND(node, data.nodegroups[node.group]),
791 constants.NDS_PARAMETER_TYPES)
792 used_globals = constants.NDC_GLOBALS.intersection(node.ndparams)
794 result.append("Node '%s' has some global parameters set: %s" %
795 (node.name, utils.CommaJoin(used_globals)))
798 nodegroups_names = set()
799 for nodegroup_uuid in data.nodegroups:
800 nodegroup = data.nodegroups[nodegroup_uuid]
801 if nodegroup.uuid != nodegroup_uuid:
802 result.append("node group '%s' (uuid: '%s') indexed by wrong uuid '%s'"
803 % (nodegroup.name, nodegroup.uuid, nodegroup_uuid))
804 if utils.UUID_RE.match(nodegroup.name.lower()):
805 result.append("node group '%s' (uuid: '%s') has uuid-like name" %
806 (nodegroup.name, nodegroup.uuid))
807 if nodegroup.name in nodegroups_names:
808 result.append("duplicate node group name '%s'" % nodegroup.name)
810 nodegroups_names.add(nodegroup.name)
811 group_name = "group %s" % nodegroup.name
812 _helper_ipolicy(group_name, cluster.SimpleFillIPolicy(nodegroup.ipolicy),
814 if nodegroup.ndparams:
815 _helper(group_name, "ndparams",
816 cluster.SimpleFillND(nodegroup.ndparams),
817 constants.NDS_PARAMETER_TYPES)
820 _, duplicates = self._UnlockedComputeDRBDMap()
821 for node, minor, instance_a, instance_b in duplicates:
822 result.append("DRBD minor %d on node %s is assigned twice to instances"
823 " %s and %s" % (minor, node, instance_a, instance_b))
826 default_nicparams = cluster.nicparams[constants.PP_DEFAULT]
829 def _AddIpAddress(ip, name):
830 ips.setdefault(ip, []).append(name)
832 _AddIpAddress(cluster.master_ip, "cluster_ip")
834 for node in data.nodes.values():
835 _AddIpAddress(node.primary_ip, "node:%s/primary" % node.name)
836 if node.secondary_ip != node.primary_ip:
837 _AddIpAddress(node.secondary_ip, "node:%s/secondary" % node.name)
839 for instance in data.instances.values():
840 for idx, nic in enumerate(instance.nics):
844 nicparams = objects.FillDict(default_nicparams, nic.nicparams)
845 nic_mode = nicparams[constants.NIC_MODE]
846 nic_link = nicparams[constants.NIC_LINK]
848 if nic_mode == constants.NIC_MODE_BRIDGED:
849 link = "bridge:%s" % nic_link
850 elif nic_mode == constants.NIC_MODE_ROUTED:
851 link = "route:%s" % nic_link
853 raise errors.ProgrammerError("NIC mode '%s' not handled" % nic_mode)
855 _AddIpAddress("%s/%s/%s" % (link, nic.ip, nic.network),
856 "instance:%s/nic:%d" % (instance.name, idx))
858 for ip, owners in ips.items():
860 result.append("IP address %s is used by multiple owners: %s" %
861 (ip, utils.CommaJoin(owners)))
865 @locking.ssynchronized(_config_lock, shared=1)
866 def VerifyConfig(self):
869 This is just a wrapper over L{_UnlockedVerifyConfig}.
872 @return: a list of error messages; a non-empty list signifies
876 return self._UnlockedVerifyConfig()
878 @locking.ssynchronized(_config_lock)
879 def AddTcpUdpPort(self, port):
880 """Adds a new port to the available port pool.
882 @warning: this method does not "flush" the configuration (via
883 L{_WriteConfig}); callers should do that themselves once the
884 configuration is stable
887 if not isinstance(port, int):
888 raise errors.ProgrammerError("Invalid type passed for port")
890 self._config_data.cluster.tcpudp_port_pool.add(port)
892 @locking.ssynchronized(_config_lock, shared=1)
893 def GetPortList(self):
894 """Returns a copy of the current port list.
897 return self._config_data.cluster.tcpudp_port_pool.copy()
899 @locking.ssynchronized(_config_lock)
900 def AllocatePort(self):
903 The port will be taken from the available port pool or from the
904 default port range (and in this case we increase
908 # If there are TCP/IP ports configured, we use them first.
909 if self._config_data.cluster.tcpudp_port_pool:
910 port = self._config_data.cluster.tcpudp_port_pool.pop()
912 port = self._config_data.cluster.highest_used_port + 1
913 if port >= constants.LAST_DRBD_PORT:
914 raise errors.ConfigurationError("The highest used port is greater"
915 " than %s. Aborting." %
916 constants.LAST_DRBD_PORT)
917 self._config_data.cluster.highest_used_port = port
922 def _UnlockedComputeDRBDMap(self):
923 """Compute the used DRBD minor/nodes.
926 @return: dictionary of node_uuid: dict of minor: instance_uuid;
927 the returned dict will have all the nodes in it (even if with
928 an empty list), and a list of duplicates; if the duplicates
929 list is not empty, the configuration is corrupted and its caller
930 should raise an exception
933 def _AppendUsedMinors(get_node_name_fn, instance, disk, used):
935 if disk.dev_type == constants.DT_DRBD8 and len(disk.logical_id) >= 5:
936 node_a, node_b, _, minor_a, minor_b = disk.logical_id[:5]
937 for node_uuid, minor in ((node_a, minor_a), (node_b, minor_b)):
938 assert node_uuid in used, \
939 ("Node '%s' of instance '%s' not found in node list" %
940 (get_node_name_fn(node_uuid), instance.name))
941 if minor in used[node_uuid]:
942 duplicates.append((node_uuid, minor, instance.uuid,
943 used[node_uuid][minor]))
945 used[node_uuid][minor] = instance.uuid
947 for child in disk.children:
948 duplicates.extend(_AppendUsedMinors(get_node_name_fn, instance, child,
953 my_dict = dict((node_uuid, {}) for node_uuid in self._config_data.nodes)
954 for instance in self._config_data.instances.itervalues():
955 for disk in instance.disks:
956 duplicates.extend(_AppendUsedMinors(self._UnlockedGetNodeName,
957 instance, disk, my_dict))
958 for (node_uuid, minor), inst_uuid in self._temporary_drbds.iteritems():
959 if minor in my_dict[node_uuid] and my_dict[node_uuid][minor] != inst_uuid:
960 duplicates.append((node_uuid, minor, inst_uuid,
961 my_dict[node_uuid][minor]))
963 my_dict[node_uuid][minor] = inst_uuid
964 return my_dict, duplicates
966 @locking.ssynchronized(_config_lock)
967 def ComputeDRBDMap(self):
968 """Compute the used DRBD minor/nodes.
970 This is just a wrapper over L{_UnlockedComputeDRBDMap}.
972 @return: dictionary of node_uuid: dict of minor: instance_uuid;
973 the returned dict will have all the nodes in it (even if with
977 d_map, duplicates = self._UnlockedComputeDRBDMap()
979 raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
983 @locking.ssynchronized(_config_lock)
984 def AllocateDRBDMinor(self, node_uuids, inst_uuid):
985 """Allocate a drbd minor.
987 The free minor will be automatically computed from the existing
988 devices. A node can be given multiple times in order to allocate
989 multiple minors. The result is the list of minors, in the same
990 order as the passed nodes.
992 @type inst_uuid: string
993 @param inst_uuid: the instance for which we allocate minors
996 assert isinstance(inst_uuid, basestring), \
997 "Invalid argument '%s' passed to AllocateDRBDMinor" % inst_uuid
999 d_map, duplicates = self._UnlockedComputeDRBDMap()
1001 raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
1004 for nuuid in node_uuids:
1005 ndata = d_map[nuuid]
1007 # no minors used, we can start at 0
1009 ndata[0] = inst_uuid
1010 self._temporary_drbds[(nuuid, 0)] = inst_uuid
1014 ffree = utils.FirstFree(keys)
1016 # return the next minor
1017 # TODO: implement high-limit check
1018 minor = keys[-1] + 1
1021 # double-check minor against current instances
1022 assert minor not in d_map[nuuid], \
1023 ("Attempt to reuse allocated DRBD minor %d on node %s,"
1024 " already allocated to instance %s" %
1025 (minor, nuuid, d_map[nuuid][minor]))
1026 ndata[minor] = inst_uuid
1027 # double-check minor against reservation
1028 r_key = (nuuid, minor)
1029 assert r_key not in self._temporary_drbds, \
1030 ("Attempt to reuse reserved DRBD minor %d on node %s,"
1031 " reserved for instance %s" %
1032 (minor, nuuid, self._temporary_drbds[r_key]))
1033 self._temporary_drbds[r_key] = inst_uuid
1034 result.append(minor)
1035 logging.debug("Request to allocate drbd minors, input: %s, returning %s",
1039 def _UnlockedReleaseDRBDMinors(self, inst_uuid):
1040 """Release temporary drbd minors allocated for a given instance.
1042 @type inst_uuid: string
1043 @param inst_uuid: the instance for which temporary minors should be
1047 assert isinstance(inst_uuid, basestring), \
1048 "Invalid argument passed to ReleaseDRBDMinors"
1049 for key, uuid in self._temporary_drbds.items():
1050 if uuid == inst_uuid:
1051 del self._temporary_drbds[key]
1053 @locking.ssynchronized(_config_lock)
1054 def ReleaseDRBDMinors(self, inst_uuid):
1055 """Release temporary drbd minors allocated for a given instance.
1057 This should be called on the error paths, on the success paths
1058 it's automatically called by the ConfigWriter add and update
1061 This function is just a wrapper over L{_UnlockedReleaseDRBDMinors}.
1063 @type inst_uuid: string
1064 @param inst_uuid: the instance for which temporary minors should be
1068 self._UnlockedReleaseDRBDMinors(inst_uuid)
1070 @locking.ssynchronized(_config_lock, shared=1)
1071 def GetConfigVersion(self):
1072 """Get the configuration version.
1074 @return: Config version
1077 return self._config_data.version
1079 @locking.ssynchronized(_config_lock, shared=1)
1080 def GetClusterName(self):
1081 """Get cluster name.
1083 @return: Cluster name
1086 return self._config_data.cluster.cluster_name
1088 @locking.ssynchronized(_config_lock, shared=1)
1089 def GetMasterNode(self):
1090 """Get the UUID of the master node for this cluster.
1092 @return: Master node UUID
1095 return self._config_data.cluster.master_node
1097 @locking.ssynchronized(_config_lock, shared=1)
1098 def GetMasterNodeName(self):
1099 """Get the hostname of the master node for this cluster.
1101 @return: Master node hostname
1104 return self._UnlockedGetNodeName(self._config_data.cluster.master_node)
1106 @locking.ssynchronized(_config_lock, shared=1)
1107 def GetMasterNodeInfo(self):
1108 """Get the master node information for this cluster.
1110 @rtype: objects.Node
1111 @return: Master node L{objects.Node} object
1114 return self._UnlockedGetNodeInfo(self._config_data.cluster.master_node)
1116 @locking.ssynchronized(_config_lock, shared=1)
1117 def GetMasterIP(self):
1118 """Get the IP of the master node for this cluster.
1123 return self._config_data.cluster.master_ip
1125 @locking.ssynchronized(_config_lock, shared=1)
1126 def GetMasterNetdev(self):
1127 """Get the master network device for this cluster.
1130 return self._config_data.cluster.master_netdev
1132 @locking.ssynchronized(_config_lock, shared=1)
1133 def GetMasterNetmask(self):
1134 """Get the netmask of the master node for this cluster.
1137 return self._config_data.cluster.master_netmask
1139 @locking.ssynchronized(_config_lock, shared=1)
1140 def GetUseExternalMipScript(self):
1141 """Get flag representing whether to use the external master IP setup script.
1144 return self._config_data.cluster.use_external_mip_script
1146 @locking.ssynchronized(_config_lock, shared=1)
1147 def GetFileStorageDir(self):
1148 """Get the file storage dir for this cluster.
1151 return self._config_data.cluster.file_storage_dir
1153 @locking.ssynchronized(_config_lock, shared=1)
1154 def GetSharedFileStorageDir(self):
1155 """Get the shared file storage dir for this cluster.
1158 return self._config_data.cluster.shared_file_storage_dir
1160 @locking.ssynchronized(_config_lock, shared=1)
1161 def GetHypervisorType(self):
1162 """Get the hypervisor type for this cluster.
1165 return self._config_data.cluster.enabled_hypervisors[0]
1167 @locking.ssynchronized(_config_lock, shared=1)
1168 def GetRsaHostKey(self):
1169 """Return the rsa hostkey from the config.
1172 @return: the rsa hostkey
1175 return self._config_data.cluster.rsahostkeypub
1177 @locking.ssynchronized(_config_lock, shared=1)
1178 def GetDsaHostKey(self):
1179 """Return the dsa hostkey from the config.
1182 @return: the dsa hostkey
1185 return self._config_data.cluster.dsahostkeypub
1187 @locking.ssynchronized(_config_lock, shared=1)
1188 def GetDefaultIAllocator(self):
1189 """Get the default instance allocator for this cluster.
1192 return self._config_data.cluster.default_iallocator
1194 @locking.ssynchronized(_config_lock, shared=1)
1195 def GetPrimaryIPFamily(self):
1196 """Get cluster primary ip family.
1198 @return: primary ip family
1201 return self._config_data.cluster.primary_ip_family
1203 @locking.ssynchronized(_config_lock, shared=1)
1204 def GetMasterNetworkParameters(self):
1205 """Get network parameters of the master node.
1207 @rtype: L{object.MasterNetworkParameters}
1208 @return: network parameters of the master node
1211 cluster = self._config_data.cluster
1212 result = objects.MasterNetworkParameters(
1213 uuid=cluster.master_node, ip=cluster.master_ip,
1214 netmask=cluster.master_netmask, netdev=cluster.master_netdev,
1215 ip_family=cluster.primary_ip_family)
1219 @locking.ssynchronized(_config_lock)
1220 def AddNodeGroup(self, group, ec_id, check_uuid=True):
1221 """Add a node group to the configuration.
1223 This method calls group.UpgradeConfig() to fill any missing attributes
1224 according to their default values.
1226 @type group: L{objects.NodeGroup}
1227 @param group: the NodeGroup object to add
1229 @param ec_id: unique id for the job to use when creating a missing UUID
1230 @type check_uuid: bool
1231 @param check_uuid: add an UUID to the group if it doesn't have one or, if
1232 it does, ensure that it does not exist in the
1233 configuration already
1236 self._UnlockedAddNodeGroup(group, ec_id, check_uuid)
1239 def _UnlockedAddNodeGroup(self, group, ec_id, check_uuid):
1240 """Add a node group to the configuration.
1243 logging.info("Adding node group %s to configuration", group.name)
1245 # Some code might need to add a node group with a pre-populated UUID
1246 # generated with ConfigWriter.GenerateUniqueID(). We allow them to bypass
1247 # the "does this UUID" exist already check.
1249 self._EnsureUUID(group, ec_id)
1252 existing_uuid = self._UnlockedLookupNodeGroup(group.name)
1253 except errors.OpPrereqError:
1256 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
1257 " node group (UUID: %s)" %
1258 (group.name, existing_uuid),
1259 errors.ECODE_EXISTS)
1262 group.ctime = group.mtime = time.time()
1263 group.UpgradeConfig()
1265 self._config_data.nodegroups[group.uuid] = group
1266 self._config_data.cluster.serial_no += 1
1268 @locking.ssynchronized(_config_lock)
1269 def RemoveNodeGroup(self, group_uuid):
1270 """Remove a node group from the configuration.
1272 @type group_uuid: string
1273 @param group_uuid: the UUID of the node group to remove
1276 logging.info("Removing node group %s from configuration", group_uuid)
1278 if group_uuid not in self._config_data.nodegroups:
1279 raise errors.ConfigurationError("Unknown node group '%s'" % group_uuid)
1281 assert len(self._config_data.nodegroups) != 1, \
1282 "Group '%s' is the only group, cannot be removed" % group_uuid
1284 del self._config_data.nodegroups[group_uuid]
1285 self._config_data.cluster.serial_no += 1
1288 def _UnlockedLookupNodeGroup(self, target):
1289 """Lookup a node group's UUID.
1291 @type target: string or None
1292 @param target: group name or UUID or None to look for the default
1294 @return: nodegroup UUID
1295 @raises errors.OpPrereqError: when the target group cannot be found
1299 if len(self._config_data.nodegroups) != 1:
1300 raise errors.OpPrereqError("More than one node group exists. Target"
1301 " group must be specified explicitly.")
1303 return self._config_data.nodegroups.keys()[0]
1304 if target in self._config_data.nodegroups:
1306 for nodegroup in self._config_data.nodegroups.values():
1307 if nodegroup.name == target:
1308 return nodegroup.uuid
1309 raise errors.OpPrereqError("Node group '%s' not found" % target,
1312 @locking.ssynchronized(_config_lock, shared=1)
1313 def LookupNodeGroup(self, target):
1314 """Lookup a node group's UUID.
1316 This function is just a wrapper over L{_UnlockedLookupNodeGroup}.
1318 @type target: string or None
1319 @param target: group name or UUID or None to look for the default
1321 @return: nodegroup UUID
1324 return self._UnlockedLookupNodeGroup(target)
1326 def _UnlockedGetNodeGroup(self, uuid):
1327 """Lookup a node group.
1330 @param uuid: group UUID
1331 @rtype: L{objects.NodeGroup} or None
1332 @return: nodegroup object, or None if not found
1335 if uuid not in self._config_data.nodegroups:
1338 return self._config_data.nodegroups[uuid]
1340 @locking.ssynchronized(_config_lock, shared=1)
1341 def GetNodeGroup(self, uuid):
1342 """Lookup a node group.
1345 @param uuid: group UUID
1346 @rtype: L{objects.NodeGroup} or None
1347 @return: nodegroup object, or None if not found
1350 return self._UnlockedGetNodeGroup(uuid)
1352 @locking.ssynchronized(_config_lock, shared=1)
1353 def GetAllNodeGroupsInfo(self):
1354 """Get the configuration of all node groups.
1357 return dict(self._config_data.nodegroups)
1359 @locking.ssynchronized(_config_lock, shared=1)
1360 def GetNodeGroupList(self):
1361 """Get a list of node groups.
1364 return self._config_data.nodegroups.keys()
1366 @locking.ssynchronized(_config_lock, shared=1)
1367 def GetNodeGroupMembersByNodes(self, nodes):
1368 """Get nodes which are member in the same nodegroups as the given nodes.
1371 ngfn = lambda node_uuid: self._UnlockedGetNodeInfo(node_uuid).group
1372 return frozenset(member_uuid
1373 for node_uuid in nodes
1375 self._UnlockedGetNodeGroup(ngfn(node_uuid)).members)
1377 @locking.ssynchronized(_config_lock, shared=1)
1378 def GetMultiNodeGroupInfo(self, group_uuids):
1379 """Get the configuration of multiple node groups.
1381 @param group_uuids: List of node group UUIDs
1383 @return: List of tuples of (group_uuid, group_info)
1386 return [(uuid, self._UnlockedGetNodeGroup(uuid)) for uuid in group_uuids]
1388 @locking.ssynchronized(_config_lock)
1389 def AddInstance(self, instance, ec_id):
1390 """Add an instance to the config.
1392 This should be used after creating a new instance.
1394 @type instance: L{objects.Instance}
1395 @param instance: the instance object
1398 if not isinstance(instance, objects.Instance):
1399 raise errors.ProgrammerError("Invalid type passed to AddInstance")
1401 if instance.disk_template != constants.DT_DISKLESS:
1402 all_lvs = instance.MapLVsByNode()
1403 logging.info("Instance '%s' DISK_LAYOUT: %s", instance.name, all_lvs)
1405 all_macs = self._AllMACs()
1406 for nic in instance.nics:
1407 if nic.mac in all_macs:
1408 raise errors.ConfigurationError("Cannot add instance %s:"
1409 " MAC address '%s' already in use." %
1410 (instance.name, nic.mac))
1412 self._CheckUniqueUUID(instance, include_temporary=False)
1414 instance.serial_no = 1
1415 instance.ctime = instance.mtime = time.time()
1416 self._config_data.instances[instance.uuid] = instance
1417 self._config_data.cluster.serial_no += 1
1418 self._UnlockedReleaseDRBDMinors(instance.uuid)
1419 self._UnlockedCommitTemporaryIps(ec_id)
1422 def _EnsureUUID(self, item, ec_id):
1423 """Ensures a given object has a valid UUID.
1425 @param item: the instance or node to be checked
1426 @param ec_id: the execution context id for the uuid reservation
1430 item.uuid = self._GenerateUniqueID(ec_id)
1432 self._CheckUniqueUUID(item, include_temporary=True)
1434 def _CheckUniqueUUID(self, item, include_temporary):
1435 """Checks that the UUID of the given object is unique.
1437 @param item: the instance or node to be checked
1438 @param include_temporary: whether temporarily generated UUID's should be
1439 included in the check. If the UUID of the item to be checked is
1440 a temporarily generated one, this has to be C{False}.
1444 raise errors.ConfigurationError("'%s' must have an UUID" % (item.name,))
1445 if item.uuid in self._AllIDs(include_temporary=include_temporary):
1446 raise errors.ConfigurationError("Cannot add '%s': UUID %s already"
1447 " in use" % (item.name, item.uuid))
1449 def _SetInstanceStatus(self, inst_uuid, status, disks_active):
1450 """Set the instance's status to a given value.
1453 if inst_uuid not in self._config_data.instances:
1454 raise errors.ConfigurationError("Unknown instance '%s'" %
1456 instance = self._config_data.instances[inst_uuid]
1459 status = instance.admin_state
1460 if disks_active is None:
1461 disks_active = instance.disks_active
1463 assert status in constants.ADMINST_ALL, \
1464 "Invalid status '%s' passed to SetInstanceStatus" % (status,)
1466 if instance.admin_state != status or \
1467 instance.disks_active != disks_active:
1468 instance.admin_state = status
1469 instance.disks_active = disks_active
1470 instance.serial_no += 1
1471 instance.mtime = time.time()
1474 @locking.ssynchronized(_config_lock)
1475 def MarkInstanceUp(self, inst_uuid):
1476 """Mark the instance status to up in the config.
1478 This also sets the instance disks active flag.
1481 self._SetInstanceStatus(inst_uuid, constants.ADMINST_UP, True)
1483 @locking.ssynchronized(_config_lock)
1484 def MarkInstanceOffline(self, inst_uuid):
1485 """Mark the instance status to down in the config.
1487 This also clears the instance disks active flag.
1490 self._SetInstanceStatus(inst_uuid, constants.ADMINST_OFFLINE, False)
1492 @locking.ssynchronized(_config_lock)
1493 def RemoveInstance(self, inst_uuid):
1494 """Remove the instance from the configuration.
1497 if inst_uuid not in self._config_data.instances:
1498 raise errors.ConfigurationError("Unknown instance '%s'" % inst_uuid)
1500 # If a network port has been allocated to the instance,
1501 # return it to the pool of free ports.
1502 inst = self._config_data.instances[inst_uuid]
1503 network_port = getattr(inst, "network_port", None)
1504 if network_port is not None:
1505 self._config_data.cluster.tcpudp_port_pool.add(network_port)
1507 instance = self._UnlockedGetInstanceInfo(inst_uuid)
1509 for nic in instance.nics:
1510 if nic.network and nic.ip:
1511 # Return all IP addresses to the respective address pools
1512 self._UnlockedCommitIp(constants.RELEASE_ACTION, nic.network, nic.ip)
1514 del self._config_data.instances[inst_uuid]
1515 self._config_data.cluster.serial_no += 1
1518 @locking.ssynchronized(_config_lock)
1519 def RenameInstance(self, inst_uuid, new_name):
1520 """Rename an instance.
1522 This needs to be done in ConfigWriter and not by RemoveInstance
1523 combined with AddInstance as only we can guarantee an atomic
1527 if inst_uuid not in self._config_data.instances:
1528 raise errors.ConfigurationError("Unknown instance '%s'" % inst_uuid)
1530 inst = self._config_data.instances[inst_uuid]
1531 inst.name = new_name
1533 for (idx, disk) in enumerate(inst.disks):
1534 if disk.dev_type in [constants.DT_FILE, constants.DT_SHARED_FILE]:
1535 # rename the file paths in logical and physical id
1536 file_storage_dir = os.path.dirname(os.path.dirname(disk.logical_id[1]))
1537 disk.logical_id = (disk.logical_id[0],
1538 utils.PathJoin(file_storage_dir, inst.name,
1541 # Force update of ssconf files
1542 self._config_data.cluster.serial_no += 1
1546 @locking.ssynchronized(_config_lock)
1547 def MarkInstanceDown(self, inst_uuid):
1548 """Mark the status of an instance to down in the configuration.
1550 This does not touch the instance disks active flag, as shut down instances
1551 can still have active disks.
1554 self._SetInstanceStatus(inst_uuid, constants.ADMINST_DOWN, None)
1556 @locking.ssynchronized(_config_lock)
1557 def MarkInstanceDisksActive(self, inst_uuid):
1558 """Mark the status of instance disks active.
1561 self._SetInstanceStatus(inst_uuid, None, True)
1563 @locking.ssynchronized(_config_lock)
1564 def MarkInstanceDisksInactive(self, inst_uuid):
1565 """Mark the status of instance disks inactive.
1568 self._SetInstanceStatus(inst_uuid, None, False)
1570 def _UnlockedGetInstanceList(self):
1571 """Get the list of instances.
1573 This function is for internal use, when the config lock is already held.
1576 return self._config_data.instances.keys()
1578 @locking.ssynchronized(_config_lock, shared=1)
1579 def GetInstanceList(self):
1580 """Get the list of instances.
1582 @return: array of instances, ex. ['instance2-uuid', 'instance1-uuid']
1585 return self._UnlockedGetInstanceList()
1587 def ExpandInstanceName(self, short_name):
1588 """Attempt to expand an incomplete instance name.
1591 # Locking is done in L{ConfigWriter.GetAllInstancesInfo}
1592 all_insts = self.GetAllInstancesInfo().values()
1593 expanded_name = _MatchNameComponentIgnoreCase(
1594 short_name, [inst.name for inst in all_insts])
1596 if expanded_name is not None:
1597 # there has to be exactly one instance with that name
1598 inst = (filter(lambda n: n.name == expanded_name, all_insts)[0])
1599 return (inst.uuid, inst.name)
1603 def _UnlockedGetInstanceInfo(self, inst_uuid):
1604 """Returns information about an instance.
1606 This function is for internal use, when the config lock is already held.
1609 if inst_uuid not in self._config_data.instances:
1612 return self._config_data.instances[inst_uuid]
1614 @locking.ssynchronized(_config_lock, shared=1)
1615 def GetInstanceInfo(self, inst_uuid):
1616 """Returns information about an instance.
1618 It takes the information from the configuration file. Other information of
1619 an instance are taken from the live systems.
1621 @param inst_uuid: UUID of the instance
1623 @rtype: L{objects.Instance}
1624 @return: the instance object
1627 return self._UnlockedGetInstanceInfo(inst_uuid)
1629 @locking.ssynchronized(_config_lock, shared=1)
1630 def GetInstanceNodeGroups(self, inst_uuid, primary_only=False):
1631 """Returns set of node group UUIDs for instance's nodes.
1636 instance = self._UnlockedGetInstanceInfo(inst_uuid)
1638 raise errors.ConfigurationError("Unknown instance '%s'" % inst_uuid)
1641 nodes = [instance.primary_node]
1643 nodes = instance.all_nodes
1645 return frozenset(self._UnlockedGetNodeInfo(node_uuid).group
1646 for node_uuid in nodes)
1648 @locking.ssynchronized(_config_lock, shared=1)
1649 def GetInstanceNetworks(self, inst_uuid):
1650 """Returns set of network UUIDs for instance's nics.
1655 instance = self._UnlockedGetInstanceInfo(inst_uuid)
1657 raise errors.ConfigurationError("Unknown instance '%s'" % inst_uuid)
1660 for nic in instance.nics:
1662 networks.add(nic.network)
1664 return frozenset(networks)
1666 @locking.ssynchronized(_config_lock, shared=1)
1667 def GetMultiInstanceInfo(self, inst_uuids):
1668 """Get the configuration of multiple instances.
1670 @param inst_uuids: list of instance UUIDs
1672 @return: list of tuples (instance UUID, instance_info), where
1673 instance_info is what would GetInstanceInfo return for the
1674 node, while keeping the original order
1677 return [(uuid, self._UnlockedGetInstanceInfo(uuid)) for uuid in inst_uuids]
1679 @locking.ssynchronized(_config_lock, shared=1)
1680 def GetMultiInstanceInfoByName(self, inst_names):
1681 """Get the configuration of multiple instances.
1683 @param inst_names: list of instance names
1685 @return: list of tuples (instance, instance_info), where
1686 instance_info is what would GetInstanceInfo return for the
1687 node, while keeping the original order
1691 for name in inst_names:
1692 instance = self._UnlockedGetInstanceInfoByName(name)
1693 result.append((instance.uuid, instance))
1696 @locking.ssynchronized(_config_lock, shared=1)
1697 def GetAllInstancesInfo(self):
1698 """Get the configuration of all instances.
1701 @return: dict of (instance, instance_info), where instance_info is what
1702 would GetInstanceInfo return for the node
1705 return self._UnlockedGetAllInstancesInfo()
1707 def _UnlockedGetAllInstancesInfo(self):
1708 my_dict = dict([(inst_uuid, self._UnlockedGetInstanceInfo(inst_uuid))
1709 for inst_uuid in self._UnlockedGetInstanceList()])
1712 @locking.ssynchronized(_config_lock, shared=1)
1713 def GetInstancesInfoByFilter(self, filter_fn):
1714 """Get instance configuration with a filter.
1716 @type filter_fn: callable
1717 @param filter_fn: Filter function receiving instance object as parameter,
1718 returning boolean. Important: this function is called while the
1719 configuration locks is held. It must not do any complex work or call
1720 functions potentially leading to a deadlock. Ideally it doesn't call any
1721 other functions and just compares instance attributes.
1724 return dict((uuid, inst)
1725 for (uuid, inst) in self._config_data.instances.items()
1728 @locking.ssynchronized(_config_lock, shared=1)
1729 def GetInstanceInfoByName(self, inst_name):
1730 """Get the L{objects.Instance} object for a named instance.
1732 @param inst_name: name of the instance to get information for
1733 @type inst_name: string
1734 @return: the corresponding L{objects.Instance} instance or None if no
1735 information is available
1738 return self._UnlockedGetInstanceInfoByName(inst_name)
1740 def _UnlockedGetInstanceInfoByName(self, inst_name):
1741 for inst in self._UnlockedGetAllInstancesInfo().values():
1742 if inst.name == inst_name:
1746 def _UnlockedGetInstanceName(self, inst_uuid):
1747 inst_info = self._UnlockedGetInstanceInfo(inst_uuid)
1748 if inst_info is None:
1749 raise errors.OpExecError("Unknown instance: %s" % inst_uuid)
1750 return inst_info.name
1752 @locking.ssynchronized(_config_lock, shared=1)
1753 def GetInstanceName(self, inst_uuid):
1754 """Gets the instance name for the passed instance.
1756 @param inst_uuid: instance UUID to get name for
1757 @type inst_uuid: string
1759 @return: instance name
1762 return self._UnlockedGetInstanceName(inst_uuid)
1764 @locking.ssynchronized(_config_lock, shared=1)
1765 def GetInstanceNames(self, inst_uuids):
1766 """Gets the instance names for the passed list of nodes.
1768 @param inst_uuids: list of instance UUIDs to get names for
1769 @type inst_uuids: list of strings
1770 @rtype: list of strings
1771 @return: list of instance names
1774 return self._UnlockedGetInstanceNames(inst_uuids)
1776 def _UnlockedGetInstanceNames(self, inst_uuids):
1777 return [self._UnlockedGetInstanceName(uuid) for uuid in inst_uuids]
1779 @locking.ssynchronized(_config_lock)
1780 def AddNode(self, node, ec_id):
1781 """Add a node to the configuration.
1783 @type node: L{objects.Node}
1784 @param node: a Node instance
1787 logging.info("Adding node %s to configuration", node.name)
1789 self._EnsureUUID(node, ec_id)
1792 node.ctime = node.mtime = time.time()
1793 self._UnlockedAddNodeToGroup(node.uuid, node.group)
1794 self._config_data.nodes[node.uuid] = node
1795 self._config_data.cluster.serial_no += 1
1798 @locking.ssynchronized(_config_lock)
1799 def RemoveNode(self, node_uuid):
1800 """Remove a node from the configuration.
1803 logging.info("Removing node %s from configuration", node_uuid)
1805 if node_uuid not in self._config_data.nodes:
1806 raise errors.ConfigurationError("Unknown node '%s'" % node_uuid)
1808 self._UnlockedRemoveNodeFromGroup(self._config_data.nodes[node_uuid])
1809 del self._config_data.nodes[node_uuid]
1810 self._config_data.cluster.serial_no += 1
1813 def ExpandNodeName(self, short_name):
1814 """Attempt to expand an incomplete node name into a node UUID.
1817 # Locking is done in L{ConfigWriter.GetAllNodesInfo}
1818 all_nodes = self.GetAllNodesInfo().values()
1819 expanded_name = _MatchNameComponentIgnoreCase(
1820 short_name, [node.name for node in all_nodes])
1822 if expanded_name is not None:
1823 # there has to be exactly one node with that name
1824 node = (filter(lambda n: n.name == expanded_name, all_nodes)[0])
1825 return (node.uuid, node.name)
1829 def _UnlockedGetNodeInfo(self, node_uuid):
1830 """Get the configuration of a node, as stored in the config.
1832 This function is for internal use, when the config lock is already
1835 @param node_uuid: the node UUID
1837 @rtype: L{objects.Node}
1838 @return: the node object
1841 if node_uuid not in self._config_data.nodes:
1844 return self._config_data.nodes[node_uuid]
1846 @locking.ssynchronized(_config_lock, shared=1)
1847 def GetNodeInfo(self, node_uuid):
1848 """Get the configuration of a node, as stored in the config.
1850 This is just a locked wrapper over L{_UnlockedGetNodeInfo}.
1852 @param node_uuid: the node UUID
1854 @rtype: L{objects.Node}
1855 @return: the node object
1858 return self._UnlockedGetNodeInfo(node_uuid)
1860 @locking.ssynchronized(_config_lock, shared=1)
1861 def GetNodeInstances(self, node_uuid):
1862 """Get the instances of a node, as stored in the config.
1864 @param node_uuid: the node UUID
1866 @rtype: (list, list)
1867 @return: a tuple with two lists: the primary and the secondary instances
1872 for inst in self._config_data.instances.values():
1873 if inst.primary_node == node_uuid:
1874 pri.append(inst.uuid)
1875 if node_uuid in inst.secondary_nodes:
1876 sec.append(inst.uuid)
1879 @locking.ssynchronized(_config_lock, shared=1)
1880 def GetNodeGroupInstances(self, uuid, primary_only=False):
1881 """Get the instances of a node group.
1883 @param uuid: Node group UUID
1884 @param primary_only: Whether to only consider primary nodes
1886 @return: List of instance UUIDs in node group
1890 nodes_fn = lambda inst: [inst.primary_node]
1892 nodes_fn = lambda inst: inst.all_nodes
1894 return frozenset(inst.uuid
1895 for inst in self._config_data.instances.values()
1896 for node_uuid in nodes_fn(inst)
1897 if self._UnlockedGetNodeInfo(node_uuid).group == uuid)
1899 def _UnlockedGetHvparamsString(self, hvname):
1900 """Return the string representation of the list of hyervisor parameters of
1901 the given hypervisor.
1903 @see: C{GetHvparams}
1907 hvparams = self._config_data.cluster.hvparams[hvname]
1908 for key in hvparams:
1909 result += "%s=%s\n" % (key, hvparams[key])
1912 @locking.ssynchronized(_config_lock, shared=1)
1913 def GetHvparamsString(self, hvname):
1914 """Return the hypervisor parameters of the given hypervisor.
1916 @type hvname: string
1917 @param hvname: name of a hypervisor
1919 @return: string containing key-value-pairs, one pair on each line;
1923 return self._UnlockedGetHvparamsString(hvname)
1925 def _UnlockedGetNodeList(self):
1926 """Return the list of nodes which are in the configuration.
1928 This function is for internal use, when the config lock is already
1934 return self._config_data.nodes.keys()
1936 @locking.ssynchronized(_config_lock, shared=1)
1937 def GetNodeList(self):
1938 """Return the list of nodes which are in the configuration.
1941 return self._UnlockedGetNodeList()
1943 def _UnlockedGetOnlineNodeList(self):
1944 """Return the list of nodes which are online.
1947 all_nodes = [self._UnlockedGetNodeInfo(node)
1948 for node in self._UnlockedGetNodeList()]
1949 return [node.uuid for node in all_nodes if not node.offline]
1951 @locking.ssynchronized(_config_lock, shared=1)
1952 def GetOnlineNodeList(self):
1953 """Return the list of nodes which are online.
1956 return self._UnlockedGetOnlineNodeList()
1958 @locking.ssynchronized(_config_lock, shared=1)
1959 def GetVmCapableNodeList(self):
1960 """Return the list of nodes which are not vm capable.
1963 all_nodes = [self._UnlockedGetNodeInfo(node)
1964 for node in self._UnlockedGetNodeList()]
1965 return [node.uuid for node in all_nodes if node.vm_capable]
1967 @locking.ssynchronized(_config_lock, shared=1)
1968 def GetNonVmCapableNodeList(self):
1969 """Return the list of nodes which are not vm capable.
1972 all_nodes = [self._UnlockedGetNodeInfo(node)
1973 for node in self._UnlockedGetNodeList()]
1974 return [node.uuid for node in all_nodes if not node.vm_capable]
1976 @locking.ssynchronized(_config_lock, shared=1)
1977 def GetMultiNodeInfo(self, node_uuids):
1978 """Get the configuration of multiple nodes.
1980 @param node_uuids: list of node UUIDs
1982 @return: list of tuples of (node, node_info), where node_info is
1983 what would GetNodeInfo return for the node, in the original
1987 return [(uuid, self._UnlockedGetNodeInfo(uuid)) for uuid in node_uuids]
1989 def _UnlockedGetAllNodesInfo(self):
1990 """Gets configuration of all nodes.
1992 @note: See L{GetAllNodesInfo}
1995 return dict([(node_uuid, self._UnlockedGetNodeInfo(node_uuid))
1996 for node_uuid in self._UnlockedGetNodeList()])
1998 @locking.ssynchronized(_config_lock, shared=1)
1999 def GetAllNodesInfo(self):
2000 """Get the configuration of all nodes.
2003 @return: dict of (node, node_info), where node_info is what
2004 would GetNodeInfo return for the node
2007 return self._UnlockedGetAllNodesInfo()
2009 def _UnlockedGetNodeInfoByName(self, node_name):
2010 for node in self._UnlockedGetAllNodesInfo().values():
2011 if node.name == node_name:
2015 @locking.ssynchronized(_config_lock, shared=1)
2016 def GetNodeInfoByName(self, node_name):
2017 """Get the L{objects.Node} object for a named node.
2019 @param node_name: name of the node to get information for
2020 @type node_name: string
2021 @return: the corresponding L{objects.Node} instance or None if no
2022 information is available
2025 return self._UnlockedGetNodeInfoByName(node_name)
2027 def _UnlockedGetNodeName(self, node_spec):
2028 if isinstance(node_spec, objects.Node):
2029 return node_spec.name
2030 elif isinstance(node_spec, basestring):
2031 node_info = self._UnlockedGetNodeInfo(node_spec)
2032 if node_info is None:
2033 raise errors.OpExecError("Unknown node: %s" % node_spec)
2034 return node_info.name
2036 raise errors.ProgrammerError("Can't handle node spec '%s'" % node_spec)
2038 @locking.ssynchronized(_config_lock, shared=1)
2039 def GetNodeName(self, node_spec):
2040 """Gets the node name for the passed node.
2042 @param node_spec: node to get names for
2043 @type node_spec: either node UUID or a L{objects.Node} object
2048 return self._UnlockedGetNodeName(node_spec)
2050 def _UnlockedGetNodeNames(self, node_specs):
2051 return [self._UnlockedGetNodeName(node_spec) for node_spec in node_specs]
2053 @locking.ssynchronized(_config_lock, shared=1)
2054 def GetNodeNames(self, node_specs):
2055 """Gets the node names for the passed list of nodes.
2057 @param node_specs: list of nodes to get names for
2058 @type node_specs: list of either node UUIDs or L{objects.Node} objects
2059 @rtype: list of strings
2060 @return: list of node names
2063 return self._UnlockedGetNodeNames(node_specs)
2065 @locking.ssynchronized(_config_lock, shared=1)
2066 def GetNodeGroupsFromNodes(self, node_uuids):
2067 """Returns groups for a list of nodes.
2069 @type node_uuids: list of string
2070 @param node_uuids: List of node UUIDs
2074 return frozenset(self._UnlockedGetNodeInfo(uuid).group
2075 for uuid in node_uuids)
2077 def _UnlockedGetMasterCandidateStats(self, exceptions=None):
2078 """Get the number of current and maximum desired and possible candidates.
2080 @type exceptions: list
2081 @param exceptions: if passed, list of nodes that should be ignored
2083 @return: tuple of (current, desired and possible, possible)
2086 mc_now = mc_should = mc_max = 0
2087 for node in self._config_data.nodes.values():
2088 if exceptions and node.uuid in exceptions:
2090 if not (node.offline or node.drained) and node.master_capable:
2092 if node.master_candidate:
2094 mc_should = min(mc_max, self._config_data.cluster.candidate_pool_size)
2095 return (mc_now, mc_should, mc_max)
2097 @locking.ssynchronized(_config_lock, shared=1)
2098 def GetMasterCandidateStats(self, exceptions=None):
2099 """Get the number of current and maximum possible candidates.
2101 This is just a wrapper over L{_UnlockedGetMasterCandidateStats}.
2103 @type exceptions: list
2104 @param exceptions: if passed, list of nodes that should be ignored
2106 @return: tuple of (current, max)
2109 return self._UnlockedGetMasterCandidateStats(exceptions)
2111 @locking.ssynchronized(_config_lock)
2112 def MaintainCandidatePool(self, exception_node_uuids):
2113 """Try to grow the candidate pool to the desired size.
2115 @type exception_node_uuids: list
2116 @param exception_node_uuids: if passed, list of nodes that should be ignored
2118 @return: list with the adjusted nodes (L{objects.Node} instances)
2121 mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats(
2122 exception_node_uuids)
2125 node_list = self._config_data.nodes.keys()
2126 random.shuffle(node_list)
2127 for uuid in node_list:
2128 if mc_now >= mc_max:
2130 node = self._config_data.nodes[uuid]
2131 if (node.master_candidate or node.offline or node.drained or
2132 node.uuid in exception_node_uuids or not node.master_capable):
2134 mod_list.append(node)
2135 node.master_candidate = True
2138 if mc_now != mc_max:
2139 # this should not happen
2140 logging.warning("Warning: MaintainCandidatePool didn't manage to"
2141 " fill the candidate pool (%d/%d)", mc_now, mc_max)
2143 self._config_data.cluster.serial_no += 1
2148 def _UnlockedAddNodeToGroup(self, node_uuid, nodegroup_uuid):
2149 """Add a given node to the specified group.
2152 if nodegroup_uuid not in self._config_data.nodegroups:
2153 # This can happen if a node group gets deleted between its lookup and
2154 # when we're adding the first node to it, since we don't keep a lock in
2155 # the meantime. It's ok though, as we'll fail cleanly if the node group
2156 # is not found anymore.
2157 raise errors.OpExecError("Unknown node group: %s" % nodegroup_uuid)
2158 if node_uuid not in self._config_data.nodegroups[nodegroup_uuid].members:
2159 self._config_data.nodegroups[nodegroup_uuid].members.append(node_uuid)
2161 def _UnlockedRemoveNodeFromGroup(self, node):
2162 """Remove a given node from its group.
2165 nodegroup = node.group
2166 if nodegroup not in self._config_data.nodegroups:
2167 logging.warning("Warning: node '%s' has unknown node group '%s'"
2168 " (while being removed from it)", node.uuid, nodegroup)
2169 nodegroup_obj = self._config_data.nodegroups[nodegroup]
2170 if node.uuid not in nodegroup_obj.members:
2171 logging.warning("Warning: node '%s' not a member of its node group '%s'"
2172 " (while being removed from it)", node.uuid, nodegroup)
2174 nodegroup_obj.members.remove(node.uuid)
2176 @locking.ssynchronized(_config_lock)
2177 def AssignGroupNodes(self, mods):
2178 """Changes the group of a number of nodes.
2180 @type mods: list of tuples; (node name, new group UUID)
2181 @param mods: Node membership modifications
2184 groups = self._config_data.nodegroups
2185 nodes = self._config_data.nodes
2189 # Try to resolve UUIDs first
2190 for (node_uuid, new_group_uuid) in mods:
2192 node = nodes[node_uuid]
2194 raise errors.ConfigurationError("Unable to find node '%s'" % node_uuid)
2196 if node.group == new_group_uuid:
2197 # Node is being assigned to its current group
2198 logging.debug("Node '%s' was assigned to its current group (%s)",
2199 node_uuid, node.group)
2202 # Try to find current group of node
2204 old_group = groups[node.group]
2206 raise errors.ConfigurationError("Unable to find old group '%s'" %
2209 # Try to find new group for node
2211 new_group = groups[new_group_uuid]
2213 raise errors.ConfigurationError("Unable to find new group '%s'" %
2216 assert node.uuid in old_group.members, \
2217 ("Inconsistent configuration: node '%s' not listed in members for its"
2218 " old group '%s'" % (node.uuid, old_group.uuid))
2219 assert node.uuid not in new_group.members, \
2220 ("Inconsistent configuration: node '%s' already listed in members for"
2221 " its new group '%s'" % (node.uuid, new_group.uuid))
2223 resmod.append((node, old_group, new_group))
2226 for (node, old_group, new_group) in resmod:
2227 assert node.uuid != new_group.uuid and old_group.uuid != new_group.uuid, \
2228 "Assigning to current group is not possible"
2230 node.group = new_group.uuid
2232 # Update members of involved groups
2233 if node.uuid in old_group.members:
2234 old_group.members.remove(node.uuid)
2235 if node.uuid not in new_group.members:
2236 new_group.members.append(node.uuid)
2238 # Update timestamps and serials (only once per node/group object)
2240 for obj in frozenset(itertools.chain(*resmod)): # pylint: disable=W0142
2244 # Force ssconf update
2245 self._config_data.cluster.serial_no += 1
2249 def _BumpSerialNo(self):
2250 """Bump up the serial number of the config.
2253 self._config_data.serial_no += 1
2254 self._config_data.mtime = time.time()
2256 def _AllUUIDObjects(self):
2257 """Returns all objects with uuid attributes.
2260 return (self._config_data.instances.values() +
2261 self._config_data.nodes.values() +
2262 self._config_data.nodegroups.values() +
2263 self._config_data.networks.values() +
2266 [self._config_data.cluster])
2268 def _OpenConfig(self, accept_foreign):
2269 """Read the config data from disk.
2272 raw_data = utils.ReadFile(self._cfg_file)
2275 data = objects.ConfigData.FromDict(serializer.Load(raw_data))
2276 except Exception, err:
2277 raise errors.ConfigurationError(err)
2279 # Make sure the configuration has the right version
2280 _ValidateConfig(data)
2282 if (not hasattr(data, "cluster") or
2283 not hasattr(data.cluster, "rsahostkeypub")):
2284 raise errors.ConfigurationError("Incomplete configuration"
2285 " (missing cluster.rsahostkeypub)")
2287 if not data.cluster.master_node in data.nodes:
2288 msg = ("The configuration denotes node %s as master, but does not"
2289 " contain information about this node" %
2290 data.cluster.master_node)
2291 raise errors.ConfigurationError(msg)
2293 master_info = data.nodes[data.cluster.master_node]
2294 if master_info.name != self._my_hostname and not accept_foreign:
2295 msg = ("The configuration denotes node %s as master, while my"
2296 " hostname is %s; opening a foreign configuration is only"
2297 " possible in accept_foreign mode" %
2298 (master_info.name, self._my_hostname))
2299 raise errors.ConfigurationError(msg)
2301 self._config_data = data
2302 # reset the last serial as -1 so that the next write will cause
2304 self._last_cluster_serial = -1
2306 # Upgrade configuration if needed
2307 self._UpgradeConfig()
2309 self._cfg_id = utils.GetFileID(path=self._cfg_file)
2311 def _UpgradeConfig(self):
2312 """Run any upgrade steps.
2314 This method performs both in-object upgrades and also update some data
2315 elements that need uniqueness across the whole configuration or interact
2318 @warning: this function will call L{_WriteConfig()}, but also
2319 L{DropECReservations} so it needs to be called only from a
2320 "safe" place (the constructor). If one wanted to call it with
2321 the lock held, a DropECReservationUnlocked would need to be
2322 created first, to avoid causing deadlock.
2325 # Keep a copy of the persistent part of _config_data to check for changes
2326 # Serialization doesn't guarantee order in dictionaries
2327 oldconf = copy.deepcopy(self._config_data.ToDict())
2329 # In-object upgrades
2330 self._config_data.UpgradeConfig()
2332 for item in self._AllUUIDObjects():
2333 if item.uuid is None:
2334 item.uuid = self._GenerateUniqueID(_UPGRADE_CONFIG_JID)
2335 if not self._config_data.nodegroups:
2336 default_nodegroup_name = constants.INITIAL_NODE_GROUP_NAME
2337 default_nodegroup = objects.NodeGroup(name=default_nodegroup_name,
2339 self._UnlockedAddNodeGroup(default_nodegroup, _UPGRADE_CONFIG_JID, True)
2340 for node in self._config_data.nodes.values():
2342 node.group = self.LookupNodeGroup(None)
2343 # This is technically *not* an upgrade, but needs to be done both when
2344 # nodegroups are being added, and upon normally loading the config,
2345 # because the members list of a node group is discarded upon
2346 # serializing/deserializing the object.
2347 self._UnlockedAddNodeToGroup(node.uuid, node.group)
2349 modified = (oldconf != self._config_data.ToDict())
2352 # This is ok even if it acquires the internal lock, as _UpgradeConfig is
2353 # only called at config init time, without the lock held
2354 self.DropECReservations(_UPGRADE_CONFIG_JID)
2356 config_errors = self._UnlockedVerifyConfig()
2358 errmsg = ("Loaded configuration data is not consistent: %s" %
2359 (utils.CommaJoin(config_errors)))
2360 logging.critical(errmsg)
2362 def _DistributeConfig(self, feedback_fn):
2363 """Distribute the configuration to the other nodes.
2365 Currently, this only copies the configuration file. In the future,
2366 it could be used to encapsulate the 2/3-phase update mechanism.
2376 myhostname = self._my_hostname
2377 # we can skip checking whether _UnlockedGetNodeInfo returns None
2378 # since the node list comes from _UnlocketGetNodeList, and we are
2379 # called with the lock held, so no modifications should take place
2381 for node_uuid in self._UnlockedGetNodeList():
2382 node_info = self._UnlockedGetNodeInfo(node_uuid)
2383 if node_info.name == myhostname or not node_info.master_candidate:
2385 node_list.append(node_info.name)
2386 addr_list.append(node_info.primary_ip)
2388 # TODO: Use dedicated resolver talking to config writer for name resolution
2390 self._GetRpc(addr_list).call_upload_file(node_list, self._cfg_file)
2391 for to_node, to_result in result.items():
2392 msg = to_result.fail_msg
2394 msg = ("Copy of file %s to node %s failed: %s" %
2395 (self._cfg_file, to_node, msg))
2405 def _WriteConfig(self, destination=None, feedback_fn=None):
2406 """Write the configuration data to persistent storage.
2409 assert feedback_fn is None or callable(feedback_fn)
2411 # Warn on config errors, but don't abort the save - the
2412 # configuration has already been modified, and we can't revert;
2413 # the best we can do is to warn the user and save as is, leaving
2414 # recovery to the user
2415 config_errors = self._UnlockedVerifyConfig()
2417 errmsg = ("Configuration data is not consistent: %s" %
2418 (utils.CommaJoin(config_errors)))
2419 logging.critical(errmsg)
2423 if destination is None:
2424 destination = self._cfg_file
2425 self._BumpSerialNo()
2426 txt = serializer.Dump(self._config_data.ToDict())
2428 getents = self._getents()
2430 fd = utils.SafeWriteFile(destination, self._cfg_id, data=txt,
2431 close=False, gid=getents.confd_gid, mode=0640)
2432 except errors.LockError:
2433 raise errors.ConfigurationError("The configuration file has been"
2434 " modified since the last write, cannot"
2437 self._cfg_id = utils.GetFileID(fd=fd)
2441 self.write_count += 1
2443 # and redistribute the config file to master candidates
2444 self._DistributeConfig(feedback_fn)
2446 # Write ssconf files on all nodes (including locally)
2447 if self._last_cluster_serial < self._config_data.cluster.serial_no:
2448 if not self._offline:
2449 result = self._GetRpc(None).call_write_ssconf_files(
2450 self._UnlockedGetNodeNames(self._UnlockedGetOnlineNodeList()),
2451 self._UnlockedGetSsconfValues())
2453 for nname, nresu in result.items():
2454 msg = nresu.fail_msg
2456 errmsg = ("Error while uploading ssconf files to"
2457 " node %s: %s" % (nname, msg))
2458 logging.warning(errmsg)
2463 self._last_cluster_serial = self._config_data.cluster.serial_no
2465 def _GetAllHvparamsStrings(self, hypervisors):
2466 """Get the hvparams of all given hypervisors from the config.
2468 @type hypervisors: list of string
2469 @param hypervisors: list of hypervisor names
2470 @rtype: dict of strings
2471 @returns: dictionary mapping the hypervisor name to a string representation
2472 of the hypervisor's hvparams
2476 for hv in hypervisors:
2477 hvparams[hv] = self._UnlockedGetHvparamsString(hv)
2481 def _ExtendByAllHvparamsStrings(ssconf_values, all_hvparams):
2482 """Extends the ssconf_values dictionary by hvparams.
2484 @type ssconf_values: dict of strings
2485 @param ssconf_values: dictionary mapping ssconf_keys to strings
2486 representing the content of ssconf files
2487 @type all_hvparams: dict of strings
2488 @param all_hvparams: dictionary mapping hypervisor names to a string
2489 representation of their hvparams
2490 @rtype: same as ssconf_values
2491 @returns: the ssconf_values dictionary extended by hvparams
2494 for hv in all_hvparams:
2495 ssconf_key = constants.SS_HVPARAMS_PREF + hv
2496 ssconf_values[ssconf_key] = all_hvparams[hv]
2497 return ssconf_values
2499 def _UnlockedGetSsconfValues(self):
2500 """Return the values needed by ssconf.
2503 @return: a dictionary with keys the ssconf names and values their
2508 instance_names = utils.NiceSort(
2509 [inst.name for inst in
2510 self._UnlockedGetAllInstancesInfo().values()])
2511 node_infos = self._UnlockedGetAllNodesInfo().values()
2512 node_names = [node.name for node in node_infos]
2513 node_pri_ips = ["%s %s" % (ninfo.name, ninfo.primary_ip)
2514 for ninfo in node_infos]
2515 node_snd_ips = ["%s %s" % (ninfo.name, ninfo.secondary_ip)
2516 for ninfo in node_infos]
2518 instance_data = fn(instance_names)
2519 off_data = fn(node.name for node in node_infos if node.offline)
2520 on_data = fn(node.name for node in node_infos if not node.offline)
2521 mc_data = fn(node.name for node in node_infos if node.master_candidate)
2522 mc_ips_data = fn(node.primary_ip for node in node_infos
2523 if node.master_candidate)
2524 node_data = fn(node_names)
2525 node_pri_ips_data = fn(node_pri_ips)
2526 node_snd_ips_data = fn(node_snd_ips)
2528 cluster = self._config_data.cluster
2529 cluster_tags = fn(cluster.GetTags())
2531 hypervisor_list = fn(cluster.enabled_hypervisors)
2532 all_hvparams = self._GetAllHvparamsStrings(constants.HYPER_TYPES)
2534 uid_pool = uidpool.FormatUidPool(cluster.uid_pool, separator="\n")
2536 nodegroups = ["%s %s" % (nodegroup.uuid, nodegroup.name) for nodegroup in
2537 self._config_data.nodegroups.values()]
2538 nodegroups_data = fn(utils.NiceSort(nodegroups))
2539 networks = ["%s %s" % (net.uuid, net.name) for net in
2540 self._config_data.networks.values()]
2541 networks_data = fn(utils.NiceSort(networks))
2544 constants.SS_CLUSTER_NAME: cluster.cluster_name,
2545 constants.SS_CLUSTER_TAGS: cluster_tags,
2546 constants.SS_FILE_STORAGE_DIR: cluster.file_storage_dir,
2547 constants.SS_SHARED_FILE_STORAGE_DIR: cluster.shared_file_storage_dir,
2548 constants.SS_MASTER_CANDIDATES: mc_data,
2549 constants.SS_MASTER_CANDIDATES_IPS: mc_ips_data,
2550 constants.SS_MASTER_IP: cluster.master_ip,
2551 constants.SS_MASTER_NETDEV: cluster.master_netdev,
2552 constants.SS_MASTER_NETMASK: str(cluster.master_netmask),
2553 constants.SS_MASTER_NODE: self._UnlockedGetNodeName(cluster.master_node),
2554 constants.SS_NODE_LIST: node_data,
2555 constants.SS_NODE_PRIMARY_IPS: node_pri_ips_data,
2556 constants.SS_NODE_SECONDARY_IPS: node_snd_ips_data,
2557 constants.SS_OFFLINE_NODES: off_data,
2558 constants.SS_ONLINE_NODES: on_data,
2559 constants.SS_PRIMARY_IP_FAMILY: str(cluster.primary_ip_family),
2560 constants.SS_INSTANCE_LIST: instance_data,
2561 constants.SS_RELEASE_VERSION: constants.RELEASE_VERSION,
2562 constants.SS_HYPERVISOR_LIST: hypervisor_list,
2563 constants.SS_MAINTAIN_NODE_HEALTH: str(cluster.maintain_node_health),
2564 constants.SS_UID_POOL: uid_pool,
2565 constants.SS_NODEGROUPS: nodegroups_data,
2566 constants.SS_NETWORKS: networks_data,
2568 ssconf_values = self._ExtendByAllHvparamsStrings(ssconf_values,
2570 bad_values = [(k, v) for k, v in ssconf_values.items()
2571 if not isinstance(v, (str, basestring))]
2573 err = utils.CommaJoin("%s=%s" % (k, v) for k, v in bad_values)
2574 raise errors.ConfigurationError("Some ssconf key(s) have non-string"
2575 " values: %s" % err)
2576 return ssconf_values
2578 @locking.ssynchronized(_config_lock, shared=1)
2579 def GetSsconfValues(self):
2580 """Wrapper using lock around _UnlockedGetSsconf().
2583 return self._UnlockedGetSsconfValues()
2585 @locking.ssynchronized(_config_lock, shared=1)
2586 def GetVGName(self):
2587 """Return the volume group name.
2590 return self._config_data.cluster.volume_group_name
2592 @locking.ssynchronized(_config_lock)
2593 def SetVGName(self, vg_name):
2594 """Set the volume group name.
2597 self._config_data.cluster.volume_group_name = vg_name
2598 self._config_data.cluster.serial_no += 1
2601 @locking.ssynchronized(_config_lock, shared=1)
2602 def GetDRBDHelper(self):
2603 """Return DRBD usermode helper.
2606 return self._config_data.cluster.drbd_usermode_helper
2608 @locking.ssynchronized(_config_lock)
2609 def SetDRBDHelper(self, drbd_helper):
2610 """Set DRBD usermode helper.
2613 self._config_data.cluster.drbd_usermode_helper = drbd_helper
2614 self._config_data.cluster.serial_no += 1
2617 @locking.ssynchronized(_config_lock, shared=1)
2618 def GetMACPrefix(self):
2619 """Return the mac prefix.
2622 return self._config_data.cluster.mac_prefix
2624 @locking.ssynchronized(_config_lock, shared=1)
2625 def GetClusterInfo(self):
2626 """Returns information about the cluster
2628 @rtype: L{objects.Cluster}
2629 @return: the cluster object
2632 return self._config_data.cluster
2634 @locking.ssynchronized(_config_lock, shared=1)
2635 def HasAnyDiskOfType(self, dev_type):
2636 """Check if in there is at disk of the given type in the configuration.
2639 return self._config_data.HasAnyDiskOfType(dev_type)
2641 @locking.ssynchronized(_config_lock)
2642 def Update(self, target, feedback_fn, ec_id=None):
2643 """Notify function to be called after updates.
2645 This function must be called when an object (as returned by
2646 GetInstanceInfo, GetNodeInfo, GetCluster) has been updated and the
2647 caller wants the modifications saved to the backing store. Note
2648 that all modified objects will be saved, but the target argument
2649 is the one the caller wants to ensure that it's saved.
2651 @param target: an instance of either L{objects.Cluster},
2652 L{objects.Node} or L{objects.Instance} which is existing in
2654 @param feedback_fn: Callable feedback function
2657 if self._config_data is None:
2658 raise errors.ProgrammerError("Configuration file not read,"
2660 update_serial = False
2661 if isinstance(target, objects.Cluster):
2662 test = target == self._config_data.cluster
2663 elif isinstance(target, objects.Node):
2664 test = target in self._config_data.nodes.values()
2665 update_serial = True
2666 elif isinstance(target, objects.Instance):
2667 test = target in self._config_data.instances.values()
2668 elif isinstance(target, objects.NodeGroup):
2669 test = target in self._config_data.nodegroups.values()
2670 elif isinstance(target, objects.Network):
2671 test = target in self._config_data.networks.values()
2673 raise errors.ProgrammerError("Invalid object type (%s) passed to"
2674 " ConfigWriter.Update" % type(target))
2676 raise errors.ConfigurationError("Configuration updated since object"
2677 " has been read or unknown object")
2678 target.serial_no += 1
2679 target.mtime = now = time.time()
2682 # for node updates, we need to increase the cluster serial too
2683 self._config_data.cluster.serial_no += 1
2684 self._config_data.cluster.mtime = now
2686 if isinstance(target, objects.Instance):
2687 self._UnlockedReleaseDRBDMinors(target.uuid)
2689 if ec_id is not None:
2690 # Commit all ips reserved by OpInstanceSetParams and OpGroupSetParams
2691 self._UnlockedCommitTemporaryIps(ec_id)
2693 self._WriteConfig(feedback_fn=feedback_fn)
2695 @locking.ssynchronized(_config_lock)
2696 def DropECReservations(self, ec_id):
2697 """Drop per-execution-context reservations
2700 for rm in self._all_rms:
2701 rm.DropECReservations(ec_id)
2703 @locking.ssynchronized(_config_lock, shared=1)
2704 def GetAllNetworksInfo(self):
2705 """Get configuration info of all the networks.
2708 return dict(self._config_data.networks)
2710 def _UnlockedGetNetworkList(self):
2711 """Get the list of networks.
2713 This function is for internal use, when the config lock is already held.
2716 return self._config_data.networks.keys()
2718 @locking.ssynchronized(_config_lock, shared=1)
2719 def GetNetworkList(self):
2720 """Get the list of networks.
2722 @return: array of networks, ex. ["main", "vlan100", "200]
2725 return self._UnlockedGetNetworkList()
2727 @locking.ssynchronized(_config_lock, shared=1)
2728 def GetNetworkNames(self):
2729 """Get a list of network names
2733 for net in self._config_data.networks.values()]
2736 def _UnlockedGetNetwork(self, uuid):
2737 """Returns information about a network.
2739 This function is for internal use, when the config lock is already held.
2742 if uuid not in self._config_data.networks:
2745 return self._config_data.networks[uuid]
2747 @locking.ssynchronized(_config_lock, shared=1)
2748 def GetNetwork(self, uuid):
2749 """Returns information about a network.
2751 It takes the information from the configuration file.
2753 @param uuid: UUID of the network
2755 @rtype: L{objects.Network}
2756 @return: the network object
2759 return self._UnlockedGetNetwork(uuid)
2761 @locking.ssynchronized(_config_lock)
2762 def AddNetwork(self, net, ec_id, check_uuid=True):
2763 """Add a network to the configuration.
2765 @type net: L{objects.Network}
2766 @param net: the Network object to add
2768 @param ec_id: unique id for the job to use when creating a missing UUID
2771 self._UnlockedAddNetwork(net, ec_id, check_uuid)
2774 def _UnlockedAddNetwork(self, net, ec_id, check_uuid):
2775 """Add a network to the configuration.
2778 logging.info("Adding network %s to configuration", net.name)
2781 self._EnsureUUID(net, ec_id)
2784 net.ctime = net.mtime = time.time()
2785 self._config_data.networks[net.uuid] = net
2786 self._config_data.cluster.serial_no += 1
2788 def _UnlockedLookupNetwork(self, target):
2789 """Lookup a network's UUID.
2791 @type target: string
2792 @param target: network name or UUID
2794 @return: network UUID
2795 @raises errors.OpPrereqError: when the target network cannot be found
2800 if target in self._config_data.networks:
2802 for net in self._config_data.networks.values():
2803 if net.name == target:
2805 raise errors.OpPrereqError("Network '%s' not found" % target,
2808 @locking.ssynchronized(_config_lock, shared=1)
2809 def LookupNetwork(self, target):
2810 """Lookup a network's UUID.
2812 This function is just a wrapper over L{_UnlockedLookupNetwork}.
2814 @type target: string
2815 @param target: network name or UUID
2817 @return: network UUID
2820 return self._UnlockedLookupNetwork(target)
2822 @locking.ssynchronized(_config_lock)
2823 def RemoveNetwork(self, network_uuid):
2824 """Remove a network from the configuration.
2826 @type network_uuid: string
2827 @param network_uuid: the UUID of the network to remove
2830 logging.info("Removing network %s from configuration", network_uuid)
2832 if network_uuid not in self._config_data.networks:
2833 raise errors.ConfigurationError("Unknown network '%s'" % network_uuid)
2835 del self._config_data.networks[network_uuid]
2836 self._config_data.cluster.serial_no += 1
2839 def _UnlockedGetGroupNetParams(self, net_uuid, node_uuid):
2840 """Get the netparams (mode, link) of a network.
2842 Get a network's netparams for a given node.
2844 @type net_uuid: string
2845 @param net_uuid: network uuid
2846 @type node_uuid: string
2847 @param node_uuid: node UUID
2848 @rtype: dict or None
2852 node_info = self._UnlockedGetNodeInfo(node_uuid)
2853 nodegroup_info = self._UnlockedGetNodeGroup(node_info.group)
2854 netparams = nodegroup_info.networks.get(net_uuid, None)
2858 @locking.ssynchronized(_config_lock, shared=1)
2859 def GetGroupNetParams(self, net_uuid, node_uuid):
2860 """Locking wrapper of _UnlockedGetGroupNetParams()
2863 return self._UnlockedGetGroupNetParams(net_uuid, node_uuid)
2865 @locking.ssynchronized(_config_lock, shared=1)
2866 def CheckIPInNodeGroup(self, ip, node_uuid):
2867 """Check IP uniqueness in nodegroup.
2869 Check networks that are connected in the node's node group
2870 if ip is contained in any of them. Used when creating/adding
2871 a NIC to ensure uniqueness among nodegroups.
2874 @param ip: ip address
2875 @type node_uuid: string
2876 @param node_uuid: node UUID
2877 @rtype: (string, dict) or (None, None)
2878 @return: (network name, netparams)
2883 node_info = self._UnlockedGetNodeInfo(node_uuid)
2884 nodegroup_info = self._UnlockedGetNodeGroup(node_info.group)
2885 for net_uuid in nodegroup_info.networks.keys():
2886 net_info = self._UnlockedGetNetwork(net_uuid)
2887 pool = network.AddressPool(net_info)
2888 if pool.Contains(ip):
2889 return (net_info.name, nodegroup_info.networks[net_uuid])