4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Configuration management for Ganeti
24 This module provides the interface to the Ganeti cluster configuration.
26 The configuration data is stored on every node but is updated on the master
27 only. After each update, the master distributes the data to the other nodes.
29 Currently, the data storage format is JSON. YAML was slow and consuming too
34 # pylint: disable=R0904
35 # R0904: Too many public methods
44 from ganeti import errors
45 from ganeti import locking
46 from ganeti import utils
47 from ganeti import constants
48 from ganeti import rpc
49 from ganeti import objects
50 from ganeti import serializer
51 from ganeti import uidpool
52 from ganeti import netutils
53 from ganeti import runtime
54 from ganeti import pathutils
55 from ganeti import network
58 _config_lock = locking.SharedLock("ConfigWriter")
60 # job id used for resource management at config upgrade time
61 _UPGRADE_CONFIG_JID = "jid-cfg-upgrade"
64 def _ValidateConfig(data):
65 """Verifies that a configuration objects looks valid.
67 This only verifies the version of the configuration.
69 @raise errors.ConfigurationError: if the version differs from what
73 if data.version != constants.CONFIG_VERSION:
74 raise errors.ConfigVersionMismatch(constants.CONFIG_VERSION, data.version)
77 class TemporaryReservationManager:
78 """A temporary resource reservation manager.
80 This is used to reserve resources in a job, before using them, making sure
81 other jobs cannot get them in the meantime.
85 self._ec_reserved = {}
87 def Reserved(self, resource):
88 for holder_reserved in self._ec_reserved.values():
89 if resource in holder_reserved:
93 def Reserve(self, ec_id, resource):
94 if self.Reserved(resource):
95 raise errors.ReservationError("Duplicate reservation for resource '%s'"
97 if ec_id not in self._ec_reserved:
98 self._ec_reserved[ec_id] = set([resource])
100 self._ec_reserved[ec_id].add(resource)
102 def DropECReservations(self, ec_id):
103 if ec_id in self._ec_reserved:
104 del self._ec_reserved[ec_id]
106 def GetReserved(self):
108 for holder_reserved in self._ec_reserved.values():
109 all_reserved.update(holder_reserved)
112 def GetECReserved(self, ec_id):
113 """ Used when you want to retrieve all reservations for a specific
114 execution context. E.g when commiting reserved IPs for a specific
119 if ec_id in self._ec_reserved:
120 ec_reserved.update(self._ec_reserved[ec_id])
123 def Generate(self, existing, generate_one_fn, ec_id):
124 """Generate a new resource of this type
127 assert callable(generate_one_fn)
129 all_elems = self.GetReserved()
130 all_elems.update(existing)
133 new_resource = generate_one_fn()
134 if new_resource is not None and new_resource not in all_elems:
137 raise errors.ConfigurationError("Not able generate new resource"
138 " (last tried: %s)" % new_resource)
139 self.Reserve(ec_id, new_resource)
143 def _MatchNameComponentIgnoreCase(short_name, names):
144 """Wrapper around L{utils.text.MatchNameComponent}.
147 return utils.MatchNameComponent(short_name, names, case_sensitive=False)
150 def _CheckInstanceDiskIvNames(disks):
151 """Checks if instance's disks' C{iv_name} attributes are in order.
153 @type disks: list of L{objects.Disk}
154 @param disks: List of disks
155 @rtype: list of tuples; (int, string, string)
156 @return: List of wrongly named disks, each tuple contains disk index,
157 expected and actual name
162 for (idx, disk) in enumerate(disks):
163 exp_iv_name = "disk/%s" % idx
164 if disk.iv_name != exp_iv_name:
165 result.append((idx, exp_iv_name, disk.iv_name))
171 """The interface to the cluster configuration.
173 @ivar _temporary_lvs: reservation manager for temporary LVs
174 @ivar _all_rms: a list of all temporary reservation managers
177 def __init__(self, cfg_file=None, offline=False, _getents=runtime.GetEnts,
178 accept_foreign=False):
180 self._lock = _config_lock
181 self._config_data = None
182 self._offline = offline
184 self._cfg_file = pathutils.CLUSTER_CONF_FILE
186 self._cfg_file = cfg_file
187 self._getents = _getents
188 self._temporary_ids = TemporaryReservationManager()
189 self._temporary_drbds = {}
190 self._temporary_macs = TemporaryReservationManager()
191 self._temporary_secrets = TemporaryReservationManager()
192 self._temporary_lvs = TemporaryReservationManager()
193 self._temporary_ips = TemporaryReservationManager()
194 self._all_rms = [self._temporary_ids, self._temporary_macs,
195 self._temporary_secrets, self._temporary_lvs,
197 # Note: in order to prevent errors when resolving our name in
198 # _DistributeConfig, we compute it here once and reuse it; it's
199 # better to raise an error before starting to modify the config
200 # file than after it was modified
201 self._my_hostname = netutils.Hostname.GetSysName()
202 self._last_cluster_serial = -1
205 self._OpenConfig(accept_foreign)
207 def _GetRpc(self, address_list):
208 """Returns RPC runner for configuration.
211 return rpc.ConfigRunner(self._context, address_list)
213 def SetContext(self, context):
214 """Sets Ganeti context.
217 self._context = context
219 # this method needs to be static, so that we can call it on the class
222 """Check if the cluster is configured.
225 return os.path.exists(pathutils.CLUSTER_CONF_FILE)
227 @locking.ssynchronized(_config_lock, shared=1)
228 def GetNdParams(self, node):
229 """Get the node params populated with cluster defaults.
231 @type node: L{objects.Node}
232 @param node: The node we want to know the params for
233 @return: A dict with the filled in node params
236 nodegroup = self._UnlockedGetNodeGroup(node.group)
237 return self._config_data.cluster.FillND(node, nodegroup)
239 @locking.ssynchronized(_config_lock, shared=1)
240 def GetInstanceDiskParams(self, instance):
241 """Get the disk params populated with inherit chain.
243 @type instance: L{objects.Instance}
244 @param instance: The instance we want to know the params for
245 @return: A dict with the filled in disk params
248 node = self._UnlockedGetNodeInfo(instance.primary_node)
249 nodegroup = self._UnlockedGetNodeGroup(node.group)
250 return self._UnlockedGetGroupDiskParams(nodegroup)
252 @locking.ssynchronized(_config_lock, shared=1)
253 def GetGroupDiskParams(self, group):
254 """Get the disk params populated with inherit chain.
256 @type group: L{objects.NodeGroup}
257 @param group: The group we want to know the params for
258 @return: A dict with the filled in disk params
261 return self._UnlockedGetGroupDiskParams(group)
263 def _UnlockedGetGroupDiskParams(self, group):
264 """Get the disk params populated with inherit chain down to node-group.
266 @type group: L{objects.NodeGroup}
267 @param group: The group we want to know the params for
268 @return: A dict with the filled in disk params
271 return self._config_data.cluster.SimpleFillDP(group.diskparams)
273 def _UnlockedGetNetworkMACPrefix(self, net_uuid):
274 """Return the network mac prefix if it exists or the cluster level default.
279 nobj = self._UnlockedGetNetwork(net_uuid)
281 prefix = nobj.mac_prefix
285 def _GenerateOneMAC(self, prefix=None):
286 """Return a function that randomly generates a MAC suffic
287 and appends it to the given prefix. If prefix is not given get
288 the cluster level default.
292 prefix = self._config_data.cluster.mac_prefix
295 byte1 = random.randrange(0, 256)
296 byte2 = random.randrange(0, 256)
297 byte3 = random.randrange(0, 256)
298 mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
303 @locking.ssynchronized(_config_lock, shared=1)
304 def GenerateMAC(self, net_uuid, ec_id):
305 """Generate a MAC for an instance.
307 This should check the current instances for duplicates.
310 existing = self._AllMACs()
311 prefix = self._UnlockedGetNetworkMACPrefix(net_uuid)
312 gen_mac = self._GenerateOneMAC(prefix)
313 return self._temporary_ids.Generate(existing, gen_mac, ec_id)
315 @locking.ssynchronized(_config_lock, shared=1)
316 def ReserveMAC(self, mac, ec_id):
317 """Reserve a MAC for an instance.
319 This only checks instances managed by this cluster, it does not
320 check for potential collisions elsewhere.
323 all_macs = self._AllMACs()
325 raise errors.ReservationError("mac already in use")
327 self._temporary_macs.Reserve(ec_id, mac)
329 def _UnlockedCommitTemporaryIps(self, ec_id):
330 """Commit all reserved IP address to their respective pools
333 for action, address, net_uuid in self._temporary_ips.GetECReserved(ec_id):
334 self._UnlockedCommitIp(action, net_uuid, address)
336 def _UnlockedCommitIp(self, action, net_uuid, address):
337 """Commit a reserved IP address to an IP pool.
339 The IP address is taken from the network's IP pool and marked as reserved.
342 nobj = self._UnlockedGetNetwork(net_uuid)
343 pool = network.AddressPool(nobj)
344 if action == constants.RESERVE_ACTION:
345 pool.Reserve(address)
346 elif action == constants.RELEASE_ACTION:
347 pool.Release(address)
349 def _UnlockedReleaseIp(self, net_uuid, address, ec_id):
350 """Give a specific IP address back to an IP pool.
352 The IP address is returned to the IP pool designated by pool_id and marked
356 self._temporary_ips.Reserve(ec_id,
357 (constants.RELEASE_ACTION, address, net_uuid))
359 @locking.ssynchronized(_config_lock, shared=1)
360 def ReleaseIp(self, net_uuid, address, ec_id):
361 """Give a specified IP address back to an IP pool.
363 This is just a wrapper around _UnlockedReleaseIp.
367 self._UnlockedReleaseIp(net_uuid, address, ec_id)
369 @locking.ssynchronized(_config_lock, shared=1)
370 def GenerateIp(self, net_uuid, ec_id):
371 """Find a free IPv4 address for an instance.
374 nobj = self._UnlockedGetNetwork(net_uuid)
375 pool = network.AddressPool(nobj)
379 ip = pool.GenerateFree()
380 except errors.AddressPoolError:
381 raise errors.ReservationError("Cannot generate IP. Network is full")
382 return (constants.RESERVE_ACTION, ip, net_uuid)
384 _, address, _ = self._temporary_ips.Generate([], gen_one, ec_id)
387 def _UnlockedReserveIp(self, net_uuid, address, ec_id):
388 """Reserve a given IPv4 address for use by an instance.
391 nobj = self._UnlockedGetNetwork(net_uuid)
392 pool = network.AddressPool(nobj)
394 isreserved = pool.IsReserved(address)
395 except errors.AddressPoolError:
396 raise errors.ReservationError("IP address not in network")
398 raise errors.ReservationError("IP address already in use")
400 return self._temporary_ips.Reserve(ec_id,
401 (constants.RESERVE_ACTION,
404 @locking.ssynchronized(_config_lock, shared=1)
405 def ReserveIp(self, net_uuid, address, ec_id):
406 """Reserve a given IPv4 address for use by an instance.
410 return self._UnlockedReserveIp(net_uuid, address, ec_id)
412 @locking.ssynchronized(_config_lock, shared=1)
413 def ReserveLV(self, lv_name, ec_id):
414 """Reserve an VG/LV pair for an instance.
416 @type lv_name: string
417 @param lv_name: the logical volume name to reserve
420 all_lvs = self._AllLVs()
421 if lv_name in all_lvs:
422 raise errors.ReservationError("LV already in use")
424 self._temporary_lvs.Reserve(ec_id, lv_name)
426 @locking.ssynchronized(_config_lock, shared=1)
427 def GenerateDRBDSecret(self, ec_id):
428 """Generate a DRBD secret.
430 This checks the current disks for duplicates.
433 return self._temporary_secrets.Generate(self._AllDRBDSecrets(),
434 utils.GenerateSecret,
438 """Compute the list of all LVs.
442 for instance in self._config_data.instances.values():
443 node_data = instance.MapLVsByNode()
444 for lv_list in node_data.values():
445 lvnames.update(lv_list)
449 """Compute the list of all Disks.
453 for instance in self._config_data.instances.values():
454 disks.extend(instance.disks)
458 """Compute the list of all NICs.
462 for instance in self._config_data.instances.values():
463 nics.extend(instance.nics)
466 def _AllIDs(self, include_temporary):
467 """Compute the list of all UUIDs and names we have.
469 @type include_temporary: boolean
470 @param include_temporary: whether to include the _temporary_ids set
472 @return: a set of IDs
476 if include_temporary:
477 existing.update(self._temporary_ids.GetReserved())
478 existing.update(self._AllLVs())
479 existing.update(self._config_data.instances.keys())
480 existing.update(self._config_data.nodes.keys())
481 existing.update([i.uuid for i in self._AllUUIDObjects() if i.uuid])
484 def _GenerateUniqueID(self, ec_id):
485 """Generate an unique UUID.
487 This checks the current node, instances and disk names for
491 @return: the unique id
494 existing = self._AllIDs(include_temporary=False)
495 return self._temporary_ids.Generate(existing, utils.NewUUID, ec_id)
497 @locking.ssynchronized(_config_lock, shared=1)
498 def GenerateUniqueID(self, ec_id):
499 """Generate an unique ID.
501 This is just a wrapper over the unlocked version.
504 @param ec_id: unique id for the job to reserve the id to
507 return self._GenerateUniqueID(ec_id)
510 """Return all MACs present in the config.
513 @return: the list of all MACs
517 for instance in self._config_data.instances.values():
518 for nic in instance.nics:
519 result.append(nic.mac)
523 def _AllDRBDSecrets(self):
524 """Return all DRBD secrets present in the config.
527 @return: the list of all DRBD secrets
530 def helper(disk, result):
531 """Recursively gather secrets from this disk."""
532 if disk.dev_type == constants.DT_DRBD8:
533 result.append(disk.logical_id[5])
535 for child in disk.children:
536 helper(child, result)
539 for instance in self._config_data.instances.values():
540 for disk in instance.disks:
545 def _CheckDiskIDs(self, disk, l_ids, p_ids):
546 """Compute duplicate disk IDs
548 @type disk: L{objects.Disk}
549 @param disk: the disk at which to start searching
551 @param l_ids: list of current logical ids
553 @param p_ids: list of current physical ids
555 @return: a list of error messages
559 if disk.logical_id is not None:
560 if disk.logical_id in l_ids:
561 result.append("duplicate logical id %s" % str(disk.logical_id))
563 l_ids.append(disk.logical_id)
564 if disk.physical_id is not None:
565 if disk.physical_id in p_ids:
566 result.append("duplicate physical id %s" % str(disk.physical_id))
568 p_ids.append(disk.physical_id)
571 for child in disk.children:
572 result.extend(self._CheckDiskIDs(child, l_ids, p_ids))
575 def _UnlockedVerifyConfig(self):
579 @return: a list of error messages; a non-empty list signifies
583 # pylint: disable=R0914
587 data = self._config_data
588 cluster = data.cluster
592 # global cluster checks
593 if not cluster.enabled_hypervisors:
594 result.append("enabled hypervisors list doesn't have any entries")
595 invalid_hvs = set(cluster.enabled_hypervisors) - constants.HYPER_TYPES
597 result.append("enabled hypervisors contains invalid entries: %s" %
598 utils.CommaJoin(invalid_hvs))
599 missing_hvp = (set(cluster.enabled_hypervisors) -
600 set(cluster.hvparams.keys()))
602 result.append("hypervisor parameters missing for the enabled"
603 " hypervisor(s) %s" % utils.CommaJoin(missing_hvp))
605 if not cluster.enabled_disk_templates:
606 result.append("enabled disk templates list doesn't have any entries")
607 invalid_disk_templates = set(cluster.enabled_disk_templates) \
608 - constants.DISK_TEMPLATES
609 if invalid_disk_templates:
610 result.append("enabled disk templates list contains invalid entries:"
611 " %s" % utils.CommaJoin(invalid_disk_templates))
613 if cluster.master_node not in data.nodes:
614 result.append("cluster has invalid primary node '%s'" %
617 def _helper(owner, attr, value, template):
619 utils.ForceDictType(value, template)
620 except errors.GenericError, err:
621 result.append("%s has invalid %s: %s" % (owner, attr, err))
623 def _helper_nic(owner, params):
625 objects.NIC.CheckParameterSyntax(params)
626 except errors.ConfigurationError, err:
627 result.append("%s has invalid nicparams: %s" % (owner, err))
629 def _helper_ipolicy(owner, ipolicy, iscluster):
631 objects.InstancePolicy.CheckParameterSyntax(ipolicy, iscluster)
632 except errors.ConfigurationError, err:
633 result.append("%s has invalid instance policy: %s" % (owner, err))
634 for key, value in ipolicy.items():
635 if key == constants.ISPECS_MINMAX:
636 for k in range(len(value)):
637 _helper_ispecs(owner, "ipolicy/%s[%s]" % (key, k), value[k])
638 elif key == constants.ISPECS_STD:
639 _helper(owner, "ipolicy/" + key, value,
640 constants.ISPECS_PARAMETER_TYPES)
642 # FIXME: assuming list type
643 if key in constants.IPOLICY_PARAMETERS:
647 if not isinstance(value, exp_type):
648 result.append("%s has invalid instance policy: for %s,"
649 " expecting %s, got %s" %
650 (owner, key, exp_type.__name__, type(value)))
652 def _helper_ispecs(owner, parentkey, params):
653 for (key, value) in params.items():
654 fullkey = "/".join([parentkey, key])
655 _helper(owner, fullkey, value, constants.ISPECS_PARAMETER_TYPES)
657 # check cluster parameters
658 _helper("cluster", "beparams", cluster.SimpleFillBE({}),
659 constants.BES_PARAMETER_TYPES)
660 _helper("cluster", "nicparams", cluster.SimpleFillNIC({}),
661 constants.NICS_PARAMETER_TYPES)
662 _helper_nic("cluster", cluster.SimpleFillNIC({}))
663 _helper("cluster", "ndparams", cluster.SimpleFillND({}),
664 constants.NDS_PARAMETER_TYPES)
665 _helper_ipolicy("cluster", cluster.ipolicy, True)
667 # per-instance checks
668 for instance_uuid in data.instances:
669 instance = data.instances[instance_uuid]
670 if instance.uuid != instance_uuid:
671 result.append("instance '%s' is indexed by wrong UUID '%s'" %
672 (instance.name, instance_uuid))
673 if instance.primary_node not in data.nodes:
674 result.append("instance '%s' has invalid primary node '%s'" %
675 (instance.name, instance.primary_node))
676 for snode in instance.secondary_nodes:
677 if snode not in data.nodes:
678 result.append("instance '%s' has invalid secondary node '%s'" %
679 (instance.name, snode))
680 for idx, nic in enumerate(instance.nics):
681 if nic.mac in seen_macs:
682 result.append("instance '%s' has NIC %d mac %s duplicate" %
683 (instance.name, idx, nic.mac))
685 seen_macs.append(nic.mac)
687 filled = cluster.SimpleFillNIC(nic.nicparams)
688 owner = "instance %s nic %d" % (instance.name, idx)
689 _helper(owner, "nicparams",
690 filled, constants.NICS_PARAMETER_TYPES)
691 _helper_nic(owner, filled)
693 # disk template checks
694 if not instance.disk_template in data.cluster.enabled_disk_templates:
695 result.append("instance '%s' uses the disabled disk template '%s'." %
696 (instance.name, instance.disk_template))
699 if instance.beparams:
700 _helper("instance %s" % instance.name, "beparams",
701 cluster.FillBE(instance), constants.BES_PARAMETER_TYPES)
703 # gather the drbd ports for duplicate checks
704 for (idx, dsk) in enumerate(instance.disks):
705 if dsk.dev_type in constants.LDS_DRBD:
706 tcp_port = dsk.logical_id[2]
707 if tcp_port not in ports:
709 ports[tcp_port].append((instance.name, "drbd disk %s" % idx))
710 # gather network port reservation
711 net_port = getattr(instance, "network_port", None)
712 if net_port is not None:
713 if net_port not in ports:
715 ports[net_port].append((instance.name, "network port"))
717 # instance disk verify
718 for idx, disk in enumerate(instance.disks):
719 result.extend(["instance '%s' disk %d error: %s" %
720 (instance.name, idx, msg) for msg in disk.Verify()])
721 result.extend(self._CheckDiskIDs(disk, seen_lids, seen_pids))
723 wrong_names = _CheckInstanceDiskIvNames(instance.disks)
725 tmp = "; ".join(("name of disk %s should be '%s', but is '%s'" %
726 (idx, exp_name, actual_name))
727 for (idx, exp_name, actual_name) in wrong_names)
729 result.append("Instance '%s' has wrongly named disks: %s" %
730 (instance.name, tmp))
732 # cluster-wide pool of free ports
733 for free_port in cluster.tcpudp_port_pool:
734 if free_port not in ports:
735 ports[free_port] = []
736 ports[free_port].append(("cluster", "port marked as free"))
738 # compute tcp/udp duplicate ports
744 txt = utils.CommaJoin(["%s/%s" % val for val in pdata])
745 result.append("tcp/udp port %s has duplicates: %s" % (pnum, txt))
747 # highest used tcp port check
749 if keys[-1] > cluster.highest_used_port:
750 result.append("Highest used port mismatch, saved %s, computed %s" %
751 (cluster.highest_used_port, keys[-1]))
753 if not data.nodes[cluster.master_node].master_candidate:
754 result.append("Master node is not a master candidate")
756 # master candidate checks
757 mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats()
759 result.append("Not enough master candidates: actual %d, target %d" %
763 for node_uuid, node in data.nodes.items():
764 if node.uuid != node_uuid:
765 result.append("Node '%s' is indexed by wrong UUID '%s'" %
766 (node.name, node_uuid))
767 if [node.master_candidate, node.drained, node.offline].count(True) > 1:
768 result.append("Node %s state is invalid: master_candidate=%s,"
769 " drain=%s, offline=%s" %
770 (node.name, node.master_candidate, node.drained,
772 if node.group not in data.nodegroups:
773 result.append("Node '%s' has invalid group '%s'" %
774 (node.name, node.group))
776 _helper("node %s" % node.name, "ndparams",
777 cluster.FillND(node, data.nodegroups[node.group]),
778 constants.NDS_PARAMETER_TYPES)
779 used_globals = constants.NDC_GLOBALS.intersection(node.ndparams)
781 result.append("Node '%s' has some global parameters set: %s" %
782 (node.name, utils.CommaJoin(used_globals)))
785 nodegroups_names = set()
786 for nodegroup_uuid in data.nodegroups:
787 nodegroup = data.nodegroups[nodegroup_uuid]
788 if nodegroup.uuid != nodegroup_uuid:
789 result.append("node group '%s' (uuid: '%s') indexed by wrong uuid '%s'"
790 % (nodegroup.name, nodegroup.uuid, nodegroup_uuid))
791 if utils.UUID_RE.match(nodegroup.name.lower()):
792 result.append("node group '%s' (uuid: '%s') has uuid-like name" %
793 (nodegroup.name, nodegroup.uuid))
794 if nodegroup.name in nodegroups_names:
795 result.append("duplicate node group name '%s'" % nodegroup.name)
797 nodegroups_names.add(nodegroup.name)
798 group_name = "group %s" % nodegroup.name
799 _helper_ipolicy(group_name, cluster.SimpleFillIPolicy(nodegroup.ipolicy),
801 if nodegroup.ndparams:
802 _helper(group_name, "ndparams",
803 cluster.SimpleFillND(nodegroup.ndparams),
804 constants.NDS_PARAMETER_TYPES)
807 _, duplicates = self._UnlockedComputeDRBDMap()
808 for node, minor, instance_a, instance_b in duplicates:
809 result.append("DRBD minor %d on node %s is assigned twice to instances"
810 " %s and %s" % (minor, node, instance_a, instance_b))
813 default_nicparams = cluster.nicparams[constants.PP_DEFAULT]
816 def _AddIpAddress(ip, name):
817 ips.setdefault(ip, []).append(name)
819 _AddIpAddress(cluster.master_ip, "cluster_ip")
821 for node in data.nodes.values():
822 _AddIpAddress(node.primary_ip, "node:%s/primary" % node.name)
823 if node.secondary_ip != node.primary_ip:
824 _AddIpAddress(node.secondary_ip, "node:%s/secondary" % node.name)
826 for instance in data.instances.values():
827 for idx, nic in enumerate(instance.nics):
831 nicparams = objects.FillDict(default_nicparams, nic.nicparams)
832 nic_mode = nicparams[constants.NIC_MODE]
833 nic_link = nicparams[constants.NIC_LINK]
835 if nic_mode == constants.NIC_MODE_BRIDGED:
836 link = "bridge:%s" % nic_link
837 elif nic_mode == constants.NIC_MODE_ROUTED:
838 link = "route:%s" % nic_link
840 raise errors.ProgrammerError("NIC mode '%s' not handled" % nic_mode)
842 _AddIpAddress("%s/%s/%s" % (link, nic.ip, nic.network),
843 "instance:%s/nic:%d" % (instance.name, idx))
845 for ip, owners in ips.items():
847 result.append("IP address %s is used by multiple owners: %s" %
848 (ip, utils.CommaJoin(owners)))
852 @locking.ssynchronized(_config_lock, shared=1)
853 def VerifyConfig(self):
856 This is just a wrapper over L{_UnlockedVerifyConfig}.
859 @return: a list of error messages; a non-empty list signifies
863 return self._UnlockedVerifyConfig()
865 def _UnlockedSetDiskID(self, disk, node_uuid):
866 """Convert the unique ID to the ID needed on the target nodes.
868 This is used only for drbd, which needs ip/port configuration.
870 The routine descends down and updates its children also, because
871 this helps when the only the top device is passed to the remote
874 This function is for internal use, when the config lock is already held.
878 for child in disk.children:
879 self._UnlockedSetDiskID(child, node_uuid)
881 if disk.logical_id is None and disk.physical_id is not None:
883 if disk.dev_type == constants.LD_DRBD8:
884 pnode, snode, port, pminor, sminor, secret = disk.logical_id
885 if node_uuid not in (pnode, snode):
886 raise errors.ConfigurationError("DRBD device not knowing node %s" %
888 pnode_info = self._UnlockedGetNodeInfo(pnode)
889 snode_info = self._UnlockedGetNodeInfo(snode)
890 if pnode_info is None or snode_info is None:
891 raise errors.ConfigurationError("Can't find primary or secondary node"
892 " for %s" % str(disk))
893 p_data = (pnode_info.secondary_ip, port)
894 s_data = (snode_info.secondary_ip, port)
895 if pnode == node_uuid:
896 disk.physical_id = p_data + s_data + (pminor, secret)
897 else: # it must be secondary, we tested above
898 disk.physical_id = s_data + p_data + (sminor, secret)
900 disk.physical_id = disk.logical_id
903 @locking.ssynchronized(_config_lock)
904 def SetDiskID(self, disk, node_uuid):
905 """Convert the unique ID to the ID needed on the target nodes.
907 This is used only for drbd, which needs ip/port configuration.
909 The routine descends down and updates its children also, because
910 this helps when the only the top device is passed to the remote
914 return self._UnlockedSetDiskID(disk, node_uuid)
916 @locking.ssynchronized(_config_lock)
917 def AddTcpUdpPort(self, port):
918 """Adds a new port to the available port pool.
920 @warning: this method does not "flush" the configuration (via
921 L{_WriteConfig}); callers should do that themselves once the
922 configuration is stable
925 if not isinstance(port, int):
926 raise errors.ProgrammerError("Invalid type passed for port")
928 self._config_data.cluster.tcpudp_port_pool.add(port)
930 @locking.ssynchronized(_config_lock, shared=1)
931 def GetPortList(self):
932 """Returns a copy of the current port list.
935 return self._config_data.cluster.tcpudp_port_pool.copy()
937 @locking.ssynchronized(_config_lock)
938 def AllocatePort(self):
941 The port will be taken from the available port pool or from the
942 default port range (and in this case we increase
946 # If there are TCP/IP ports configured, we use them first.
947 if self._config_data.cluster.tcpudp_port_pool:
948 port = self._config_data.cluster.tcpudp_port_pool.pop()
950 port = self._config_data.cluster.highest_used_port + 1
951 if port >= constants.LAST_DRBD_PORT:
952 raise errors.ConfigurationError("The highest used port is greater"
953 " than %s. Aborting." %
954 constants.LAST_DRBD_PORT)
955 self._config_data.cluster.highest_used_port = port
960 def _UnlockedComputeDRBDMap(self):
961 """Compute the used DRBD minor/nodes.
964 @return: dictionary of node_uuid: dict of minor: instance_uuid;
965 the returned dict will have all the nodes in it (even if with
966 an empty list), and a list of duplicates; if the duplicates
967 list is not empty, the configuration is corrupted and its caller
968 should raise an exception
971 def _AppendUsedMinors(get_node_name_fn, instance, disk, used):
973 if disk.dev_type == constants.LD_DRBD8 and len(disk.logical_id) >= 5:
974 node_a, node_b, _, minor_a, minor_b = disk.logical_id[:5]
975 for node_uuid, minor in ((node_a, minor_a), (node_b, minor_b)):
976 assert node_uuid in used, \
977 ("Node '%s' of instance '%s' not found in node list" %
978 (get_node_name_fn(node_uuid), instance.name))
979 if minor in used[node_uuid]:
980 duplicates.append((node_uuid, minor, instance.uuid,
981 used[node_uuid][minor]))
983 used[node_uuid][minor] = instance.uuid
985 for child in disk.children:
986 duplicates.extend(_AppendUsedMinors(get_node_name_fn, instance, child,
991 my_dict = dict((node_uuid, {}) for node_uuid in self._config_data.nodes)
992 for instance in self._config_data.instances.itervalues():
993 for disk in instance.disks:
994 duplicates.extend(_AppendUsedMinors(self._UnlockedGetNodeName,
995 instance, disk, my_dict))
996 for (node_uuid, minor), inst_uuid in self._temporary_drbds.iteritems():
997 if minor in my_dict[node_uuid] and my_dict[node_uuid][minor] != inst_uuid:
998 duplicates.append((node_uuid, minor, inst_uuid,
999 my_dict[node_uuid][minor]))
1001 my_dict[node_uuid][minor] = inst_uuid
1002 return my_dict, duplicates
1004 @locking.ssynchronized(_config_lock)
1005 def ComputeDRBDMap(self):
1006 """Compute the used DRBD minor/nodes.
1008 This is just a wrapper over L{_UnlockedComputeDRBDMap}.
1010 @return: dictionary of node_uuid: dict of minor: instance_uuid;
1011 the returned dict will have all the nodes in it (even if with
1015 d_map, duplicates = self._UnlockedComputeDRBDMap()
1017 raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
1021 @locking.ssynchronized(_config_lock)
1022 def AllocateDRBDMinor(self, node_uuids, inst_uuid):
1023 """Allocate a drbd minor.
1025 The free minor will be automatically computed from the existing
1026 devices. A node can be given multiple times in order to allocate
1027 multiple minors. The result is the list of minors, in the same
1028 order as the passed nodes.
1030 @type inst_uuid: string
1031 @param inst_uuid: the instance for which we allocate minors
1034 assert isinstance(inst_uuid, basestring), \
1035 "Invalid argument '%s' passed to AllocateDRBDMinor" % inst_uuid
1037 d_map, duplicates = self._UnlockedComputeDRBDMap()
1039 raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
1042 for nuuid in node_uuids:
1043 ndata = d_map[nuuid]
1045 # no minors used, we can start at 0
1047 ndata[0] = inst_uuid
1048 self._temporary_drbds[(nuuid, 0)] = inst_uuid
1052 ffree = utils.FirstFree(keys)
1054 # return the next minor
1055 # TODO: implement high-limit check
1056 minor = keys[-1] + 1
1059 # double-check minor against current instances
1060 assert minor not in d_map[nuuid], \
1061 ("Attempt to reuse allocated DRBD minor %d on node %s,"
1062 " already allocated to instance %s" %
1063 (minor, nuuid, d_map[nuuid][minor]))
1064 ndata[minor] = inst_uuid
1065 # double-check minor against reservation
1066 r_key = (nuuid, minor)
1067 assert r_key not in self._temporary_drbds, \
1068 ("Attempt to reuse reserved DRBD minor %d on node %s,"
1069 " reserved for instance %s" %
1070 (minor, nuuid, self._temporary_drbds[r_key]))
1071 self._temporary_drbds[r_key] = inst_uuid
1072 result.append(minor)
1073 logging.debug("Request to allocate drbd minors, input: %s, returning %s",
1077 def _UnlockedReleaseDRBDMinors(self, inst_uuid):
1078 """Release temporary drbd minors allocated for a given instance.
1080 @type inst_uuid: string
1081 @param inst_uuid: the instance for which temporary minors should be
1085 assert isinstance(inst_uuid, basestring), \
1086 "Invalid argument passed to ReleaseDRBDMinors"
1087 for key, uuid in self._temporary_drbds.items():
1088 if uuid == inst_uuid:
1089 del self._temporary_drbds[key]
1091 @locking.ssynchronized(_config_lock)
1092 def ReleaseDRBDMinors(self, inst_uuid):
1093 """Release temporary drbd minors allocated for a given instance.
1095 This should be called on the error paths, on the success paths
1096 it's automatically called by the ConfigWriter add and update
1099 This function is just a wrapper over L{_UnlockedReleaseDRBDMinors}.
1101 @type inst_uuid: string
1102 @param inst_uuid: the instance for which temporary minors should be
1106 self._UnlockedReleaseDRBDMinors(inst_uuid)
1108 @locking.ssynchronized(_config_lock, shared=1)
1109 def GetConfigVersion(self):
1110 """Get the configuration version.
1112 @return: Config version
1115 return self._config_data.version
1117 @locking.ssynchronized(_config_lock, shared=1)
1118 def GetClusterName(self):
1119 """Get cluster name.
1121 @return: Cluster name
1124 return self._config_data.cluster.cluster_name
1126 @locking.ssynchronized(_config_lock, shared=1)
1127 def GetMasterNode(self):
1128 """Get the UUID of the master node for this cluster.
1130 @return: Master node UUID
1133 return self._config_data.cluster.master_node
1135 @locking.ssynchronized(_config_lock, shared=1)
1136 def GetMasterNodeName(self):
1137 """Get the hostname of the master node for this cluster.
1139 @return: Master node hostname
1142 return self._UnlockedGetNodeName(self._config_data.cluster.master_node)
1144 @locking.ssynchronized(_config_lock, shared=1)
1145 def GetMasterIP(self):
1146 """Get the IP of the master node for this cluster.
1151 return self._config_data.cluster.master_ip
1153 @locking.ssynchronized(_config_lock, shared=1)
1154 def GetMasterNetdev(self):
1155 """Get the master network device for this cluster.
1158 return self._config_data.cluster.master_netdev
1160 @locking.ssynchronized(_config_lock, shared=1)
1161 def GetMasterNetmask(self):
1162 """Get the netmask of the master node for this cluster.
1165 return self._config_data.cluster.master_netmask
1167 @locking.ssynchronized(_config_lock, shared=1)
1168 def GetUseExternalMipScript(self):
1169 """Get flag representing whether to use the external master IP setup script.
1172 return self._config_data.cluster.use_external_mip_script
1174 @locking.ssynchronized(_config_lock, shared=1)
1175 def GetFileStorageDir(self):
1176 """Get the file storage dir for this cluster.
1179 return self._config_data.cluster.file_storage_dir
1181 @locking.ssynchronized(_config_lock, shared=1)
1182 def GetSharedFileStorageDir(self):
1183 """Get the shared file storage dir for this cluster.
1186 return self._config_data.cluster.shared_file_storage_dir
1188 @locking.ssynchronized(_config_lock, shared=1)
1189 def GetHypervisorType(self):
1190 """Get the hypervisor type for this cluster.
1193 return self._config_data.cluster.enabled_hypervisors[0]
1195 @locking.ssynchronized(_config_lock, shared=1)
1196 def GetHostKey(self):
1197 """Return the rsa hostkey from the config.
1200 @return: the rsa hostkey
1203 return self._config_data.cluster.rsahostkeypub
1205 @locking.ssynchronized(_config_lock, shared=1)
1206 def GetDefaultIAllocator(self):
1207 """Get the default instance allocator for this cluster.
1210 return self._config_data.cluster.default_iallocator
1212 @locking.ssynchronized(_config_lock, shared=1)
1213 def GetPrimaryIPFamily(self):
1214 """Get cluster primary ip family.
1216 @return: primary ip family
1219 return self._config_data.cluster.primary_ip_family
1221 @locking.ssynchronized(_config_lock, shared=1)
1222 def GetMasterNetworkParameters(self):
1223 """Get network parameters of the master node.
1225 @rtype: L{object.MasterNetworkParameters}
1226 @return: network parameters of the master node
1229 cluster = self._config_data.cluster
1230 result = objects.MasterNetworkParameters(
1231 uuid=cluster.master_node, ip=cluster.master_ip,
1232 netmask=cluster.master_netmask, netdev=cluster.master_netdev,
1233 ip_family=cluster.primary_ip_family)
1237 @locking.ssynchronized(_config_lock)
1238 def AddNodeGroup(self, group, ec_id, check_uuid=True):
1239 """Add a node group to the configuration.
1241 This method calls group.UpgradeConfig() to fill any missing attributes
1242 according to their default values.
1244 @type group: L{objects.NodeGroup}
1245 @param group: the NodeGroup object to add
1247 @param ec_id: unique id for the job to use when creating a missing UUID
1248 @type check_uuid: bool
1249 @param check_uuid: add an UUID to the group if it doesn't have one or, if
1250 it does, ensure that it does not exist in the
1251 configuration already
1254 self._UnlockedAddNodeGroup(group, ec_id, check_uuid)
1257 def _UnlockedAddNodeGroup(self, group, ec_id, check_uuid):
1258 """Add a node group to the configuration.
1261 logging.info("Adding node group %s to configuration", group.name)
1263 # Some code might need to add a node group with a pre-populated UUID
1264 # generated with ConfigWriter.GenerateUniqueID(). We allow them to bypass
1265 # the "does this UUID" exist already check.
1267 self._EnsureUUID(group, ec_id)
1270 existing_uuid = self._UnlockedLookupNodeGroup(group.name)
1271 except errors.OpPrereqError:
1274 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
1275 " node group (UUID: %s)" %
1276 (group.name, existing_uuid),
1277 errors.ECODE_EXISTS)
1280 group.ctime = group.mtime = time.time()
1281 group.UpgradeConfig()
1283 self._config_data.nodegroups[group.uuid] = group
1284 self._config_data.cluster.serial_no += 1
1286 @locking.ssynchronized(_config_lock)
1287 def RemoveNodeGroup(self, group_uuid):
1288 """Remove a node group from the configuration.
1290 @type group_uuid: string
1291 @param group_uuid: the UUID of the node group to remove
1294 logging.info("Removing node group %s from configuration", group_uuid)
1296 if group_uuid not in self._config_data.nodegroups:
1297 raise errors.ConfigurationError("Unknown node group '%s'" % group_uuid)
1299 assert len(self._config_data.nodegroups) != 1, \
1300 "Group '%s' is the only group, cannot be removed" % group_uuid
1302 del self._config_data.nodegroups[group_uuid]
1303 self._config_data.cluster.serial_no += 1
1306 def _UnlockedLookupNodeGroup(self, target):
1307 """Lookup a node group's UUID.
1309 @type target: string or None
1310 @param target: group name or UUID or None to look for the default
1312 @return: nodegroup UUID
1313 @raises errors.OpPrereqError: when the target group cannot be found
1317 if len(self._config_data.nodegroups) != 1:
1318 raise errors.OpPrereqError("More than one node group exists. Target"
1319 " group must be specified explicitly.")
1321 return self._config_data.nodegroups.keys()[0]
1322 if target in self._config_data.nodegroups:
1324 for nodegroup in self._config_data.nodegroups.values():
1325 if nodegroup.name == target:
1326 return nodegroup.uuid
1327 raise errors.OpPrereqError("Node group '%s' not found" % target,
1330 @locking.ssynchronized(_config_lock, shared=1)
1331 def LookupNodeGroup(self, target):
1332 """Lookup a node group's UUID.
1334 This function is just a wrapper over L{_UnlockedLookupNodeGroup}.
1336 @type target: string or None
1337 @param target: group name or UUID or None to look for the default
1339 @return: nodegroup UUID
1342 return self._UnlockedLookupNodeGroup(target)
1344 def _UnlockedGetNodeGroup(self, uuid):
1345 """Lookup a node group.
1348 @param uuid: group UUID
1349 @rtype: L{objects.NodeGroup} or None
1350 @return: nodegroup object, or None if not found
1353 if uuid not in self._config_data.nodegroups:
1356 return self._config_data.nodegroups[uuid]
1358 @locking.ssynchronized(_config_lock, shared=1)
1359 def GetNodeGroup(self, uuid):
1360 """Lookup a node group.
1363 @param uuid: group UUID
1364 @rtype: L{objects.NodeGroup} or None
1365 @return: nodegroup object, or None if not found
1368 return self._UnlockedGetNodeGroup(uuid)
1370 @locking.ssynchronized(_config_lock, shared=1)
1371 def GetAllNodeGroupsInfo(self):
1372 """Get the configuration of all node groups.
1375 return dict(self._config_data.nodegroups)
1377 @locking.ssynchronized(_config_lock, shared=1)
1378 def GetNodeGroupList(self):
1379 """Get a list of node groups.
1382 return self._config_data.nodegroups.keys()
1384 @locking.ssynchronized(_config_lock, shared=1)
1385 def GetNodeGroupMembersByNodes(self, nodes):
1386 """Get nodes which are member in the same nodegroups as the given nodes.
1389 ngfn = lambda node_uuid: self._UnlockedGetNodeInfo(node_uuid).group
1390 return frozenset(member_uuid
1391 for node_uuid in nodes
1393 self._UnlockedGetNodeGroup(ngfn(node_uuid)).members)
1395 @locking.ssynchronized(_config_lock, shared=1)
1396 def GetMultiNodeGroupInfo(self, group_uuids):
1397 """Get the configuration of multiple node groups.
1399 @param group_uuids: List of node group UUIDs
1401 @return: List of tuples of (group_uuid, group_info)
1404 return [(uuid, self._UnlockedGetNodeGroup(uuid)) for uuid in group_uuids]
1406 @locking.ssynchronized(_config_lock)
1407 def AddInstance(self, instance, ec_id):
1408 """Add an instance to the config.
1410 This should be used after creating a new instance.
1412 @type instance: L{objects.Instance}
1413 @param instance: the instance object
1416 if not isinstance(instance, objects.Instance):
1417 raise errors.ProgrammerError("Invalid type passed to AddInstance")
1419 if instance.disk_template != constants.DT_DISKLESS:
1420 all_lvs = instance.MapLVsByNode()
1421 logging.info("Instance '%s' DISK_LAYOUT: %s", instance.name, all_lvs)
1423 all_macs = self._AllMACs()
1424 for nic in instance.nics:
1425 if nic.mac in all_macs:
1426 raise errors.ConfigurationError("Cannot add instance %s:"
1427 " MAC address '%s' already in use." %
1428 (instance.name, nic.mac))
1430 self._CheckUniqueUUID(instance, include_temporary=False)
1432 instance.serial_no = 1
1433 instance.ctime = instance.mtime = time.time()
1434 self._config_data.instances[instance.uuid] = instance
1435 self._config_data.cluster.serial_no += 1
1436 self._UnlockedReleaseDRBDMinors(instance.uuid)
1437 self._UnlockedCommitTemporaryIps(ec_id)
1440 def _EnsureUUID(self, item, ec_id):
1441 """Ensures a given object has a valid UUID.
1443 @param item: the instance or node to be checked
1444 @param ec_id: the execution context id for the uuid reservation
1448 item.uuid = self._GenerateUniqueID(ec_id)
1450 self._CheckUniqueUUID(item, include_temporary=True)
1452 def _CheckUniqueUUID(self, item, include_temporary):
1453 """Checks that the UUID of the given object is unique.
1455 @param item: the instance or node to be checked
1456 @param include_temporary: whether temporarily generated UUID's should be
1457 included in the check. If the UUID of the item to be checked is
1458 a temporarily generated one, this has to be C{False}.
1462 raise errors.ConfigurationError("'%s' must have an UUID" % (item.name,))
1463 if item.uuid in self._AllIDs(include_temporary=include_temporary):
1464 raise errors.ConfigurationError("Cannot add '%s': UUID %s already"
1465 " in use" % (item.name, item.uuid))
1467 def _SetInstanceStatus(self, inst_uuid, status, disks_active):
1468 """Set the instance's status to a given value.
1471 if inst_uuid not in self._config_data.instances:
1472 raise errors.ConfigurationError("Unknown instance '%s'" %
1474 instance = self._config_data.instances[inst_uuid]
1477 status = instance.admin_state
1478 if disks_active is None:
1479 disks_active = instance.disks_active
1481 assert status in constants.ADMINST_ALL, \
1482 "Invalid status '%s' passed to SetInstanceStatus" % (status,)
1484 if instance.admin_state != status or \
1485 instance.disks_active != disks_active:
1486 instance.admin_state = status
1487 instance.disks_active = disks_active
1488 instance.serial_no += 1
1489 instance.mtime = time.time()
1492 @locking.ssynchronized(_config_lock)
1493 def MarkInstanceUp(self, inst_uuid):
1494 """Mark the instance status to up in the config.
1496 This also sets the instance disks active flag.
1499 self._SetInstanceStatus(inst_uuid, constants.ADMINST_UP, True)
1501 @locking.ssynchronized(_config_lock)
1502 def MarkInstanceOffline(self, inst_uuid):
1503 """Mark the instance status to down in the config.
1505 This also clears the instance disks active flag.
1508 self._SetInstanceStatus(inst_uuid, constants.ADMINST_OFFLINE, False)
1510 @locking.ssynchronized(_config_lock)
1511 def RemoveInstance(self, inst_uuid):
1512 """Remove the instance from the configuration.
1515 if inst_uuid not in self._config_data.instances:
1516 raise errors.ConfigurationError("Unknown instance '%s'" % inst_uuid)
1518 # If a network port has been allocated to the instance,
1519 # return it to the pool of free ports.
1520 inst = self._config_data.instances[inst_uuid]
1521 network_port = getattr(inst, "network_port", None)
1522 if network_port is not None:
1523 self._config_data.cluster.tcpudp_port_pool.add(network_port)
1525 instance = self._UnlockedGetInstanceInfo(inst_uuid)
1527 for nic in instance.nics:
1528 if nic.network and nic.ip:
1529 # Return all IP addresses to the respective address pools
1530 self._UnlockedCommitIp(constants.RELEASE_ACTION, nic.network, nic.ip)
1532 del self._config_data.instances[inst_uuid]
1533 self._config_data.cluster.serial_no += 1
1536 @locking.ssynchronized(_config_lock)
1537 def RenameInstance(self, inst_uuid, new_name):
1538 """Rename an instance.
1540 This needs to be done in ConfigWriter and not by RemoveInstance
1541 combined with AddInstance as only we can guarantee an atomic
1545 if inst_uuid not in self._config_data.instances:
1546 raise errors.ConfigurationError("Unknown instance '%s'" % inst_uuid)
1548 inst = self._config_data.instances[inst_uuid]
1549 inst.name = new_name
1551 for (idx, disk) in enumerate(inst.disks):
1552 if disk.dev_type == constants.LD_FILE:
1553 # rename the file paths in logical and physical id
1554 file_storage_dir = os.path.dirname(os.path.dirname(disk.logical_id[1]))
1555 disk.logical_id = (disk.logical_id[0],
1556 utils.PathJoin(file_storage_dir, inst.name,
1558 disk.physical_id = disk.logical_id
1560 # Force update of ssconf files
1561 self._config_data.cluster.serial_no += 1
1565 @locking.ssynchronized(_config_lock)
1566 def MarkInstanceDown(self, inst_uuid):
1567 """Mark the status of an instance to down in the configuration.
1569 This does not touch the instance disks active flag, as shut down instances
1570 can still have active disks.
1573 self._SetInstanceStatus(inst_uuid, constants.ADMINST_DOWN, None)
1575 @locking.ssynchronized(_config_lock)
1576 def MarkInstanceDisksActive(self, inst_uuid):
1577 """Mark the status of instance disks active.
1580 self._SetInstanceStatus(inst_uuid, None, True)
1582 @locking.ssynchronized(_config_lock)
1583 def MarkInstanceDisksInactive(self, inst_uuid):
1584 """Mark the status of instance disks inactive.
1587 self._SetInstanceStatus(inst_uuid, None, False)
1589 def _UnlockedGetInstanceList(self):
1590 """Get the list of instances.
1592 This function is for internal use, when the config lock is already held.
1595 return self._config_data.instances.keys()
1597 @locking.ssynchronized(_config_lock, shared=1)
1598 def GetInstanceList(self):
1599 """Get the list of instances.
1601 @return: array of instances, ex. ['instance2-uuid', 'instance1-uuid']
1604 return self._UnlockedGetInstanceList()
1606 def ExpandInstanceName(self, short_name):
1607 """Attempt to expand an incomplete instance name.
1610 # Locking is done in L{ConfigWriter.GetAllInstancesInfo}
1611 all_insts = self.GetAllInstancesInfo().values()
1612 expanded_name = _MatchNameComponentIgnoreCase(
1613 short_name, [inst.name for inst in all_insts])
1615 if expanded_name is not None:
1616 # there has to be exactly one instance with that name
1617 inst = (filter(lambda n: n.name == expanded_name, all_insts)[0])
1618 return (inst.uuid, inst.name)
1622 def _UnlockedGetInstanceInfo(self, inst_uuid):
1623 """Returns information about an instance.
1625 This function is for internal use, when the config lock is already held.
1628 if inst_uuid not in self._config_data.instances:
1631 return self._config_data.instances[inst_uuid]
1633 @locking.ssynchronized(_config_lock, shared=1)
1634 def GetInstanceInfo(self, inst_uuid):
1635 """Returns information about an instance.
1637 It takes the information from the configuration file. Other information of
1638 an instance are taken from the live systems.
1640 @param inst_uuid: UUID of the instance
1642 @rtype: L{objects.Instance}
1643 @return: the instance object
1646 return self._UnlockedGetInstanceInfo(inst_uuid)
1648 @locking.ssynchronized(_config_lock, shared=1)
1649 def GetInstanceNodeGroups(self, inst_uuid, primary_only=False):
1650 """Returns set of node group UUIDs for instance's nodes.
1655 instance = self._UnlockedGetInstanceInfo(inst_uuid)
1657 raise errors.ConfigurationError("Unknown instance '%s'" % inst_uuid)
1660 nodes = [instance.primary_node]
1662 nodes = instance.all_nodes
1664 return frozenset(self._UnlockedGetNodeInfo(node_uuid).group
1665 for node_uuid in nodes)
1667 @locking.ssynchronized(_config_lock, shared=1)
1668 def GetInstanceNetworks(self, inst_uuid):
1669 """Returns set of network UUIDs for instance's nics.
1674 instance = self._UnlockedGetInstanceInfo(inst_uuid)
1676 raise errors.ConfigurationError("Unknown instance '%s'" % inst_uuid)
1679 for nic in instance.nics:
1681 networks.add(nic.network)
1683 return frozenset(networks)
1685 @locking.ssynchronized(_config_lock, shared=1)
1686 def GetMultiInstanceInfo(self, inst_uuids):
1687 """Get the configuration of multiple instances.
1689 @param inst_uuids: list of instance UUIDs
1691 @return: list of tuples (instance UUID, instance_info), where
1692 instance_info is what would GetInstanceInfo return for the
1693 node, while keeping the original order
1696 return [(uuid, self._UnlockedGetInstanceInfo(uuid)) for uuid in inst_uuids]
1698 @locking.ssynchronized(_config_lock, shared=1)
1699 def GetMultiInstanceInfoByName(self, inst_names):
1700 """Get the configuration of multiple instances.
1702 @param inst_names: list of instance names
1704 @return: list of tuples (instance, instance_info), where
1705 instance_info is what would GetInstanceInfo return for the
1706 node, while keeping the original order
1710 for name in inst_names:
1711 instance = self._UnlockedGetInstanceInfoByName(name)
1712 result.append((instance.uuid, instance))
1715 @locking.ssynchronized(_config_lock, shared=1)
1716 def GetAllInstancesInfo(self):
1717 """Get the configuration of all instances.
1720 @return: dict of (instance, instance_info), where instance_info is what
1721 would GetInstanceInfo return for the node
1724 return self._UnlockedGetAllInstancesInfo()
1726 def _UnlockedGetAllInstancesInfo(self):
1727 my_dict = dict([(inst_uuid, self._UnlockedGetInstanceInfo(inst_uuid))
1728 for inst_uuid in self._UnlockedGetInstanceList()])
1731 @locking.ssynchronized(_config_lock, shared=1)
1732 def GetInstancesInfoByFilter(self, filter_fn):
1733 """Get instance configuration with a filter.
1735 @type filter_fn: callable
1736 @param filter_fn: Filter function receiving instance object as parameter,
1737 returning boolean. Important: this function is called while the
1738 configuration locks is held. It must not do any complex work or call
1739 functions potentially leading to a deadlock. Ideally it doesn't call any
1740 other functions and just compares instance attributes.
1743 return dict((uuid, inst)
1744 for (uuid, inst) in self._config_data.instances.items()
1747 @locking.ssynchronized(_config_lock, shared=1)
1748 def GetInstanceInfoByName(self, inst_name):
1749 """Get the L{objects.Instance} object for a named instance.
1751 @param inst_name: name of the instance to get information for
1752 @type inst_name: string
1753 @return: the corresponding L{objects.Instance} instance or None if no
1754 information is available
1757 return self._UnlockedGetInstanceInfoByName(inst_name)
1759 def _UnlockedGetInstanceInfoByName(self, inst_name):
1760 for inst in self._UnlockedGetAllInstancesInfo().values():
1761 if inst.name == inst_name:
1765 def _UnlockedGetInstanceName(self, inst_uuid):
1766 inst_info = self._UnlockedGetInstanceInfo(inst_uuid)
1767 if inst_info is None:
1768 raise errors.OpExecError("Unknown instance: %s" % inst_uuid)
1769 return inst_info.name
1771 @locking.ssynchronized(_config_lock, shared=1)
1772 def GetInstanceName(self, inst_uuid):
1773 """Gets the instance name for the passed instance.
1775 @param inst_uuid: instance UUID to get name for
1776 @type inst_uuid: string
1778 @return: instance name
1781 return self._UnlockedGetInstanceName(inst_uuid)
1783 @locking.ssynchronized(_config_lock, shared=1)
1784 def GetInstanceNames(self, inst_uuids):
1785 """Gets the instance names for the passed list of nodes.
1787 @param inst_uuids: list of instance UUIDs to get names for
1788 @type inst_uuids: list of strings
1789 @rtype: list of strings
1790 @return: list of instance names
1793 return self._UnlockedGetInstanceNames(inst_uuids)
1795 def _UnlockedGetInstanceNames(self, inst_uuids):
1796 return [self._UnlockedGetInstanceName(uuid) for uuid in inst_uuids]
1798 @locking.ssynchronized(_config_lock)
1799 def AddNode(self, node, ec_id):
1800 """Add a node to the configuration.
1802 @type node: L{objects.Node}
1803 @param node: a Node instance
1806 logging.info("Adding node %s to configuration", node.name)
1808 self._EnsureUUID(node, ec_id)
1811 node.ctime = node.mtime = time.time()
1812 self._UnlockedAddNodeToGroup(node.uuid, node.group)
1813 self._config_data.nodes[node.uuid] = node
1814 self._config_data.cluster.serial_no += 1
1817 @locking.ssynchronized(_config_lock)
1818 def RemoveNode(self, node_uuid):
1819 """Remove a node from the configuration.
1822 logging.info("Removing node %s from configuration", node_uuid)
1824 if node_uuid not in self._config_data.nodes:
1825 raise errors.ConfigurationError("Unknown node '%s'" % node_uuid)
1827 self._UnlockedRemoveNodeFromGroup(self._config_data.nodes[node_uuid])
1828 del self._config_data.nodes[node_uuid]
1829 self._config_data.cluster.serial_no += 1
1832 def ExpandNodeName(self, short_name):
1833 """Attempt to expand an incomplete node name into a node UUID.
1836 # Locking is done in L{ConfigWriter.GetAllNodesInfo}
1837 all_nodes = self.GetAllNodesInfo().values()
1838 expanded_name = _MatchNameComponentIgnoreCase(
1839 short_name, [node.name for node in all_nodes])
1841 if expanded_name is not None:
1842 # there has to be exactly one node with that name
1843 node = (filter(lambda n: n.name == expanded_name, all_nodes)[0])
1844 return (node.uuid, node.name)
1848 def _UnlockedGetNodeInfo(self, node_uuid):
1849 """Get the configuration of a node, as stored in the config.
1851 This function is for internal use, when the config lock is already
1854 @param node_uuid: the node UUID
1856 @rtype: L{objects.Node}
1857 @return: the node object
1860 if node_uuid not in self._config_data.nodes:
1863 return self._config_data.nodes[node_uuid]
1865 @locking.ssynchronized(_config_lock, shared=1)
1866 def GetNodeInfo(self, node_uuid):
1867 """Get the configuration of a node, as stored in the config.
1869 This is just a locked wrapper over L{_UnlockedGetNodeInfo}.
1871 @param node_uuid: the node UUID
1873 @rtype: L{objects.Node}
1874 @return: the node object
1877 return self._UnlockedGetNodeInfo(node_uuid)
1879 @locking.ssynchronized(_config_lock, shared=1)
1880 def GetNodeInstances(self, node_uuid):
1881 """Get the instances of a node, as stored in the config.
1883 @param node_uuid: the node UUID
1885 @rtype: (list, list)
1886 @return: a tuple with two lists: the primary and the secondary instances
1891 for inst in self._config_data.instances.values():
1892 if inst.primary_node == node_uuid:
1893 pri.append(inst.uuid)
1894 if node_uuid in inst.secondary_nodes:
1895 sec.append(inst.uuid)
1898 @locking.ssynchronized(_config_lock, shared=1)
1899 def GetNodeGroupInstances(self, uuid, primary_only=False):
1900 """Get the instances of a node group.
1902 @param uuid: Node group UUID
1903 @param primary_only: Whether to only consider primary nodes
1905 @return: List of instance UUIDs in node group
1909 nodes_fn = lambda inst: [inst.primary_node]
1911 nodes_fn = lambda inst: inst.all_nodes
1913 return frozenset(inst.uuid
1914 for inst in self._config_data.instances.values()
1915 for node_uuid in nodes_fn(inst)
1916 if self._UnlockedGetNodeInfo(node_uuid).group == uuid)
1918 def _UnlockedGetHvparamsString(self, hvname):
1919 """Return the string representation of the list of hyervisor parameters of
1920 the given hypervisor.
1922 @see: C{GetHvparams}
1926 hvparams = self._config_data.cluster.hvparams[hvname]
1927 for key in hvparams:
1928 result += "%s=%s\n" % (key, hvparams[key])
1931 @locking.ssynchronized(_config_lock, shared=1)
1932 def GetHvparamsString(self, hvname):
1933 """Return the hypervisor parameters of the given hypervisor.
1935 @type hvname: string
1936 @param hvname: name of a hypervisor
1938 @return: string containing key-value-pairs, one pair on each line;
1942 return self._UnlockedGetHvparamsString(hvname)
1944 def _UnlockedGetNodeList(self):
1945 """Return the list of nodes which are in the configuration.
1947 This function is for internal use, when the config lock is already
1953 return self._config_data.nodes.keys()
1955 @locking.ssynchronized(_config_lock, shared=1)
1956 def GetNodeList(self):
1957 """Return the list of nodes which are in the configuration.
1960 return self._UnlockedGetNodeList()
1962 def _UnlockedGetOnlineNodeList(self):
1963 """Return the list of nodes which are online.
1966 all_nodes = [self._UnlockedGetNodeInfo(node)
1967 for node in self._UnlockedGetNodeList()]
1968 return [node.uuid for node in all_nodes if not node.offline]
1970 @locking.ssynchronized(_config_lock, shared=1)
1971 def GetOnlineNodeList(self):
1972 """Return the list of nodes which are online.
1975 return self._UnlockedGetOnlineNodeList()
1977 @locking.ssynchronized(_config_lock, shared=1)
1978 def GetVmCapableNodeList(self):
1979 """Return the list of nodes which are not vm capable.
1982 all_nodes = [self._UnlockedGetNodeInfo(node)
1983 for node in self._UnlockedGetNodeList()]
1984 return [node.uuid for node in all_nodes if node.vm_capable]
1986 @locking.ssynchronized(_config_lock, shared=1)
1987 def GetNonVmCapableNodeList(self):
1988 """Return the list of nodes which are not vm capable.
1991 all_nodes = [self._UnlockedGetNodeInfo(node)
1992 for node in self._UnlockedGetNodeList()]
1993 return [node.uuid for node in all_nodes if not node.vm_capable]
1995 @locking.ssynchronized(_config_lock, shared=1)
1996 def GetMultiNodeInfo(self, node_uuids):
1997 """Get the configuration of multiple nodes.
1999 @param node_uuids: list of node UUIDs
2001 @return: list of tuples of (node, node_info), where node_info is
2002 what would GetNodeInfo return for the node, in the original
2006 return [(uuid, self._UnlockedGetNodeInfo(uuid)) for uuid in node_uuids]
2008 def _UnlockedGetAllNodesInfo(self):
2009 """Gets configuration of all nodes.
2011 @note: See L{GetAllNodesInfo}
2014 return dict([(node_uuid, self._UnlockedGetNodeInfo(node_uuid))
2015 for node_uuid in self._UnlockedGetNodeList()])
2017 @locking.ssynchronized(_config_lock, shared=1)
2018 def GetAllNodesInfo(self):
2019 """Get the configuration of all nodes.
2022 @return: dict of (node, node_info), where node_info is what
2023 would GetNodeInfo return for the node
2026 return self._UnlockedGetAllNodesInfo()
2028 def _UnlockedGetNodeInfoByName(self, node_name):
2029 for node in self._UnlockedGetAllNodesInfo().values():
2030 if node.name == node_name:
2034 @locking.ssynchronized(_config_lock, shared=1)
2035 def GetNodeInfoByName(self, node_name):
2036 """Get the L{objects.Node} object for a named node.
2038 @param node_name: name of the node to get information for
2039 @type node_name: string
2040 @return: the corresponding L{objects.Node} instance or None if no
2041 information is available
2044 return self._UnlockedGetNodeInfoByName(node_name)
2046 def _UnlockedGetNodeName(self, node_spec):
2047 if isinstance(node_spec, objects.Node):
2048 return node_spec.name
2049 elif isinstance(node_spec, basestring):
2050 node_info = self._UnlockedGetNodeInfo(node_spec)
2051 if node_info is None:
2052 raise errors.OpExecError("Unknown node: %s" % node_spec)
2053 return node_info.name
2055 raise errors.ProgrammerError("Can't handle node spec '%s'" % node_spec)
2057 @locking.ssynchronized(_config_lock, shared=1)
2058 def GetNodeName(self, node_spec):
2059 """Gets the node name for the passed node.
2061 @param node_spec: node to get names for
2062 @type node_spec: either node UUID or a L{objects.Node} object
2067 return self._UnlockedGetNodeName(node_spec)
2069 def _UnlockedGetNodeNames(self, node_specs):
2070 return [self._UnlockedGetNodeName(node_spec) for node_spec in node_specs]
2072 @locking.ssynchronized(_config_lock, shared=1)
2073 def GetNodeNames(self, node_specs):
2074 """Gets the node names for the passed list of nodes.
2076 @param node_specs: list of nodes to get names for
2077 @type node_specs: list of either node UUIDs or L{objects.Node} objects
2078 @rtype: list of strings
2079 @return: list of node names
2082 return self._UnlockedGetNodeNames(node_specs)
2084 @locking.ssynchronized(_config_lock, shared=1)
2085 def GetNodeGroupsFromNodes(self, node_uuids):
2086 """Returns groups for a list of nodes.
2088 @type node_uuids: list of string
2089 @param node_uuids: List of node UUIDs
2093 return frozenset(self._UnlockedGetNodeInfo(uuid).group
2094 for uuid in node_uuids)
2096 def _UnlockedGetMasterCandidateStats(self, exceptions=None):
2097 """Get the number of current and maximum desired and possible candidates.
2099 @type exceptions: list
2100 @param exceptions: if passed, list of nodes that should be ignored
2102 @return: tuple of (current, desired and possible, possible)
2105 mc_now = mc_should = mc_max = 0
2106 for node in self._config_data.nodes.values():
2107 if exceptions and node.uuid in exceptions:
2109 if not (node.offline or node.drained) and node.master_capable:
2111 if node.master_candidate:
2113 mc_should = min(mc_max, self._config_data.cluster.candidate_pool_size)
2114 return (mc_now, mc_should, mc_max)
2116 @locking.ssynchronized(_config_lock, shared=1)
2117 def GetMasterCandidateStats(self, exceptions=None):
2118 """Get the number of current and maximum possible candidates.
2120 This is just a wrapper over L{_UnlockedGetMasterCandidateStats}.
2122 @type exceptions: list
2123 @param exceptions: if passed, list of nodes that should be ignored
2125 @return: tuple of (current, max)
2128 return self._UnlockedGetMasterCandidateStats(exceptions)
2130 @locking.ssynchronized(_config_lock)
2131 def MaintainCandidatePool(self, exception_node_uuids):
2132 """Try to grow the candidate pool to the desired size.
2134 @type exception_node_uuids: list
2135 @param exception_node_uuids: if passed, list of nodes that should be ignored
2137 @return: list with the adjusted nodes (L{objects.Node} instances)
2140 mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats(
2141 exception_node_uuids)
2144 node_list = self._config_data.nodes.keys()
2145 random.shuffle(node_list)
2146 for uuid in node_list:
2147 if mc_now >= mc_max:
2149 node = self._config_data.nodes[uuid]
2150 if (node.master_candidate or node.offline or node.drained or
2151 node.uuid in exception_node_uuids or not node.master_capable):
2153 mod_list.append(node)
2154 node.master_candidate = True
2157 if mc_now != mc_max:
2158 # this should not happen
2159 logging.warning("Warning: MaintainCandidatePool didn't manage to"
2160 " fill the candidate pool (%d/%d)", mc_now, mc_max)
2162 self._config_data.cluster.serial_no += 1
2167 def _UnlockedAddNodeToGroup(self, node_uuid, nodegroup_uuid):
2168 """Add a given node to the specified group.
2171 if nodegroup_uuid not in self._config_data.nodegroups:
2172 # This can happen if a node group gets deleted between its lookup and
2173 # when we're adding the first node to it, since we don't keep a lock in
2174 # the meantime. It's ok though, as we'll fail cleanly if the node group
2175 # is not found anymore.
2176 raise errors.OpExecError("Unknown node group: %s" % nodegroup_uuid)
2177 if node_uuid not in self._config_data.nodegroups[nodegroup_uuid].members:
2178 self._config_data.nodegroups[nodegroup_uuid].members.append(node_uuid)
2180 def _UnlockedRemoveNodeFromGroup(self, node):
2181 """Remove a given node from its group.
2184 nodegroup = node.group
2185 if nodegroup not in self._config_data.nodegroups:
2186 logging.warning("Warning: node '%s' has unknown node group '%s'"
2187 " (while being removed from it)", node.uuid, nodegroup)
2188 nodegroup_obj = self._config_data.nodegroups[nodegroup]
2189 if node.uuid not in nodegroup_obj.members:
2190 logging.warning("Warning: node '%s' not a member of its node group '%s'"
2191 " (while being removed from it)", node.uuid, nodegroup)
2193 nodegroup_obj.members.remove(node.uuid)
2195 @locking.ssynchronized(_config_lock)
2196 def AssignGroupNodes(self, mods):
2197 """Changes the group of a number of nodes.
2199 @type mods: list of tuples; (node name, new group UUID)
2200 @param mods: Node membership modifications
2203 groups = self._config_data.nodegroups
2204 nodes = self._config_data.nodes
2208 # Try to resolve UUIDs first
2209 for (node_uuid, new_group_uuid) in mods:
2211 node = nodes[node_uuid]
2213 raise errors.ConfigurationError("Unable to find node '%s'" % node_uuid)
2215 if node.group == new_group_uuid:
2216 # Node is being assigned to its current group
2217 logging.debug("Node '%s' was assigned to its current group (%s)",
2218 node_uuid, node.group)
2221 # Try to find current group of node
2223 old_group = groups[node.group]
2225 raise errors.ConfigurationError("Unable to find old group '%s'" %
2228 # Try to find new group for node
2230 new_group = groups[new_group_uuid]
2232 raise errors.ConfigurationError("Unable to find new group '%s'" %
2235 assert node.uuid in old_group.members, \
2236 ("Inconsistent configuration: node '%s' not listed in members for its"
2237 " old group '%s'" % (node.uuid, old_group.uuid))
2238 assert node.uuid not in new_group.members, \
2239 ("Inconsistent configuration: node '%s' already listed in members for"
2240 " its new group '%s'" % (node.uuid, new_group.uuid))
2242 resmod.append((node, old_group, new_group))
2245 for (node, old_group, new_group) in resmod:
2246 assert node.uuid != new_group.uuid and old_group.uuid != new_group.uuid, \
2247 "Assigning to current group is not possible"
2249 node.group = new_group.uuid
2251 # Update members of involved groups
2252 if node.uuid in old_group.members:
2253 old_group.members.remove(node.uuid)
2254 if node.uuid not in new_group.members:
2255 new_group.members.append(node.uuid)
2257 # Update timestamps and serials (only once per node/group object)
2259 for obj in frozenset(itertools.chain(*resmod)): # pylint: disable=W0142
2263 # Force ssconf update
2264 self._config_data.cluster.serial_no += 1
2268 def _BumpSerialNo(self):
2269 """Bump up the serial number of the config.
2272 self._config_data.serial_no += 1
2273 self._config_data.mtime = time.time()
2275 def _AllUUIDObjects(self):
2276 """Returns all objects with uuid attributes.
2279 return (self._config_data.instances.values() +
2280 self._config_data.nodes.values() +
2281 self._config_data.nodegroups.values() +
2282 self._config_data.networks.values() +
2285 [self._config_data.cluster])
2287 def _OpenConfig(self, accept_foreign):
2288 """Read the config data from disk.
2291 raw_data = utils.ReadFile(self._cfg_file)
2294 data = objects.ConfigData.FromDict(serializer.Load(raw_data))
2295 except Exception, err:
2296 raise errors.ConfigurationError(err)
2298 # Make sure the configuration has the right version
2299 _ValidateConfig(data)
2301 if (not hasattr(data, "cluster") or
2302 not hasattr(data.cluster, "rsahostkeypub")):
2303 raise errors.ConfigurationError("Incomplete configuration"
2304 " (missing cluster.rsahostkeypub)")
2306 if not data.cluster.master_node in data.nodes:
2307 msg = ("The configuration denotes node %s as master, but does not"
2308 " contain information about this node" %
2309 data.cluster.master_node)
2310 raise errors.ConfigurationError(msg)
2312 master_info = data.nodes[data.cluster.master_node]
2313 if master_info.name != self._my_hostname and not accept_foreign:
2314 msg = ("The configuration denotes node %s as master, while my"
2315 " hostname is %s; opening a foreign configuration is only"
2316 " possible in accept_foreign mode" %
2317 (master_info.name, self._my_hostname))
2318 raise errors.ConfigurationError(msg)
2320 self._config_data = data
2321 # reset the last serial as -1 so that the next write will cause
2323 self._last_cluster_serial = -1
2325 # Upgrade configuration if needed
2326 self._UpgradeConfig()
2328 self._cfg_id = utils.GetFileID(path=self._cfg_file)
2330 def _UpgradeConfig(self):
2331 """Run any upgrade steps.
2333 This method performs both in-object upgrades and also update some data
2334 elements that need uniqueness across the whole configuration or interact
2337 @warning: this function will call L{_WriteConfig()}, but also
2338 L{DropECReservations} so it needs to be called only from a
2339 "safe" place (the constructor). If one wanted to call it with
2340 the lock held, a DropECReservationUnlocked would need to be
2341 created first, to avoid causing deadlock.
2344 # Keep a copy of the persistent part of _config_data to check for changes
2345 # Serialization doesn't guarantee order in dictionaries
2346 oldconf = copy.deepcopy(self._config_data.ToDict())
2348 # In-object upgrades
2349 self._config_data.UpgradeConfig()
2351 for item in self._AllUUIDObjects():
2352 if item.uuid is None:
2353 item.uuid = self._GenerateUniqueID(_UPGRADE_CONFIG_JID)
2354 if not self._config_data.nodegroups:
2355 default_nodegroup_name = constants.INITIAL_NODE_GROUP_NAME
2356 default_nodegroup = objects.NodeGroup(name=default_nodegroup_name,
2358 self._UnlockedAddNodeGroup(default_nodegroup, _UPGRADE_CONFIG_JID, True)
2359 for node in self._config_data.nodes.values():
2361 node.group = self.LookupNodeGroup(None)
2362 # This is technically *not* an upgrade, but needs to be done both when
2363 # nodegroups are being added, and upon normally loading the config,
2364 # because the members list of a node group is discarded upon
2365 # serializing/deserializing the object.
2366 self._UnlockedAddNodeToGroup(node.uuid, node.group)
2368 modified = (oldconf != self._config_data.ToDict())
2371 # This is ok even if it acquires the internal lock, as _UpgradeConfig is
2372 # only called at config init time, without the lock held
2373 self.DropECReservations(_UPGRADE_CONFIG_JID)
2375 config_errors = self._UnlockedVerifyConfig()
2377 errmsg = ("Loaded configuration data is not consistent: %s" %
2378 (utils.CommaJoin(config_errors)))
2379 logging.critical(errmsg)
2381 def _DistributeConfig(self, feedback_fn):
2382 """Distribute the configuration to the other nodes.
2384 Currently, this only copies the configuration file. In the future,
2385 it could be used to encapsulate the 2/3-phase update mechanism.
2395 myhostname = self._my_hostname
2396 # we can skip checking whether _UnlockedGetNodeInfo returns None
2397 # since the node list comes from _UnlocketGetNodeList, and we are
2398 # called with the lock held, so no modifications should take place
2400 for node_uuid in self._UnlockedGetNodeList():
2401 node_info = self._UnlockedGetNodeInfo(node_uuid)
2402 if node_info.name == myhostname or not node_info.master_candidate:
2404 node_list.append(node_info.name)
2405 addr_list.append(node_info.primary_ip)
2407 # TODO: Use dedicated resolver talking to config writer for name resolution
2409 self._GetRpc(addr_list).call_upload_file(node_list, self._cfg_file)
2410 for to_node, to_result in result.items():
2411 msg = to_result.fail_msg
2413 msg = ("Copy of file %s to node %s failed: %s" %
2414 (self._cfg_file, to_node, msg))
2424 def _WriteConfig(self, destination=None, feedback_fn=None):
2425 """Write the configuration data to persistent storage.
2428 assert feedback_fn is None or callable(feedback_fn)
2430 # Warn on config errors, but don't abort the save - the
2431 # configuration has already been modified, and we can't revert;
2432 # the best we can do is to warn the user and save as is, leaving
2433 # recovery to the user
2434 config_errors = self._UnlockedVerifyConfig()
2436 errmsg = ("Configuration data is not consistent: %s" %
2437 (utils.CommaJoin(config_errors)))
2438 logging.critical(errmsg)
2442 if destination is None:
2443 destination = self._cfg_file
2444 self._BumpSerialNo()
2445 txt = serializer.Dump(self._config_data.ToDict())
2447 getents = self._getents()
2449 fd = utils.SafeWriteFile(destination, self._cfg_id, data=txt,
2450 close=False, gid=getents.confd_gid, mode=0640)
2451 except errors.LockError:
2452 raise errors.ConfigurationError("The configuration file has been"
2453 " modified since the last write, cannot"
2456 self._cfg_id = utils.GetFileID(fd=fd)
2460 self.write_count += 1
2462 # and redistribute the config file to master candidates
2463 self._DistributeConfig(feedback_fn)
2465 # Write ssconf files on all nodes (including locally)
2466 if self._last_cluster_serial < self._config_data.cluster.serial_no:
2467 if not self._offline:
2468 result = self._GetRpc(None).call_write_ssconf_files(
2469 self._UnlockedGetNodeNames(self._UnlockedGetOnlineNodeList()),
2470 self._UnlockedGetSsconfValues())
2472 for nname, nresu in result.items():
2473 msg = nresu.fail_msg
2475 errmsg = ("Error while uploading ssconf files to"
2476 " node %s: %s" % (nname, msg))
2477 logging.warning(errmsg)
2482 self._last_cluster_serial = self._config_data.cluster.serial_no
2484 def _GetAllHvparamsStrings(self, hypervisors):
2485 """Get the hvparams of all given hypervisors from the config.
2487 @type hypervisors: list of string
2488 @param hypervisors: list of hypervisor names
2489 @rtype: dict of strings
2490 @returns: dictionary mapping the hypervisor name to a string representation
2491 of the hypervisor's hvparams
2495 for hv in hypervisors:
2496 hvparams[hv] = self._UnlockedGetHvparamsString(hv)
2500 def _ExtendByAllHvparamsStrings(ssconf_values, all_hvparams):
2501 """Extends the ssconf_values dictionary by hvparams.
2503 @type ssconf_values: dict of strings
2504 @param ssconf_values: dictionary mapping ssconf_keys to strings
2505 representing the content of ssconf files
2506 @type all_hvparams: dict of strings
2507 @param all_hvparams: dictionary mapping hypervisor names to a string
2508 representation of their hvparams
2509 @rtype: same as ssconf_values
2510 @returns: the ssconf_values dictionary extended by hvparams
2513 for hv in all_hvparams:
2514 ssconf_key = constants.SS_HVPARAMS_PREF + hv
2515 ssconf_values[ssconf_key] = all_hvparams[hv]
2516 return ssconf_values
2518 def _UnlockedGetSsconfValues(self):
2519 """Return the values needed by ssconf.
2522 @return: a dictionary with keys the ssconf names and values their
2527 instance_names = utils.NiceSort(
2528 [inst.name for inst in
2529 self._UnlockedGetAllInstancesInfo().values()])
2530 node_infos = self._UnlockedGetAllNodesInfo().values()
2531 node_names = [node.name for node in node_infos]
2532 node_pri_ips = ["%s %s" % (ninfo.name, ninfo.primary_ip)
2533 for ninfo in node_infos]
2534 node_snd_ips = ["%s %s" % (ninfo.name, ninfo.secondary_ip)
2535 for ninfo in node_infos]
2537 instance_data = fn(instance_names)
2538 off_data = fn(node.name for node in node_infos if node.offline)
2539 on_data = fn(node.name for node in node_infos if not node.offline)
2540 mc_data = fn(node.name for node in node_infos if node.master_candidate)
2541 mc_ips_data = fn(node.primary_ip for node in node_infos
2542 if node.master_candidate)
2543 node_data = fn(node_names)
2544 node_pri_ips_data = fn(node_pri_ips)
2545 node_snd_ips_data = fn(node_snd_ips)
2547 cluster = self._config_data.cluster
2548 cluster_tags = fn(cluster.GetTags())
2550 hypervisor_list = fn(cluster.enabled_hypervisors)
2551 all_hvparams = self._GetAllHvparamsStrings(constants.HYPER_TYPES)
2553 uid_pool = uidpool.FormatUidPool(cluster.uid_pool, separator="\n")
2555 nodegroups = ["%s %s" % (nodegroup.uuid, nodegroup.name) for nodegroup in
2556 self._config_data.nodegroups.values()]
2557 nodegroups_data = fn(utils.NiceSort(nodegroups))
2558 networks = ["%s %s" % (net.uuid, net.name) for net in
2559 self._config_data.networks.values()]
2560 networks_data = fn(utils.NiceSort(networks))
2563 constants.SS_CLUSTER_NAME: cluster.cluster_name,
2564 constants.SS_CLUSTER_TAGS: cluster_tags,
2565 constants.SS_FILE_STORAGE_DIR: cluster.file_storage_dir,
2566 constants.SS_SHARED_FILE_STORAGE_DIR: cluster.shared_file_storage_dir,
2567 constants.SS_MASTER_CANDIDATES: mc_data,
2568 constants.SS_MASTER_CANDIDATES_IPS: mc_ips_data,
2569 constants.SS_MASTER_IP: cluster.master_ip,
2570 constants.SS_MASTER_NETDEV: cluster.master_netdev,
2571 constants.SS_MASTER_NETMASK: str(cluster.master_netmask),
2572 constants.SS_MASTER_NODE: self._UnlockedGetNodeName(cluster.master_node),
2573 constants.SS_NODE_LIST: node_data,
2574 constants.SS_NODE_PRIMARY_IPS: node_pri_ips_data,
2575 constants.SS_NODE_SECONDARY_IPS: node_snd_ips_data,
2576 constants.SS_OFFLINE_NODES: off_data,
2577 constants.SS_ONLINE_NODES: on_data,
2578 constants.SS_PRIMARY_IP_FAMILY: str(cluster.primary_ip_family),
2579 constants.SS_INSTANCE_LIST: instance_data,
2580 constants.SS_RELEASE_VERSION: constants.RELEASE_VERSION,
2581 constants.SS_HYPERVISOR_LIST: hypervisor_list,
2582 constants.SS_MAINTAIN_NODE_HEALTH: str(cluster.maintain_node_health),
2583 constants.SS_UID_POOL: uid_pool,
2584 constants.SS_NODEGROUPS: nodegroups_data,
2585 constants.SS_NETWORKS: networks_data,
2587 ssconf_values = self._ExtendByAllHvparamsStrings(ssconf_values,
2589 bad_values = [(k, v) for k, v in ssconf_values.items()
2590 if not isinstance(v, (str, basestring))]
2592 err = utils.CommaJoin("%s=%s" % (k, v) for k, v in bad_values)
2593 raise errors.ConfigurationError("Some ssconf key(s) have non-string"
2594 " values: %s" % err)
2595 return ssconf_values
2597 @locking.ssynchronized(_config_lock, shared=1)
2598 def GetSsconfValues(self):
2599 """Wrapper using lock around _UnlockedGetSsconf().
2602 return self._UnlockedGetSsconfValues()
2604 @locking.ssynchronized(_config_lock, shared=1)
2605 def GetVGName(self):
2606 """Return the volume group name.
2609 return self._config_data.cluster.volume_group_name
2611 @locking.ssynchronized(_config_lock)
2612 def SetVGName(self, vg_name):
2613 """Set the volume group name.
2616 self._config_data.cluster.volume_group_name = vg_name
2617 self._config_data.cluster.serial_no += 1
2620 @locking.ssynchronized(_config_lock, shared=1)
2621 def GetDRBDHelper(self):
2622 """Return DRBD usermode helper.
2625 return self._config_data.cluster.drbd_usermode_helper
2627 @locking.ssynchronized(_config_lock)
2628 def SetDRBDHelper(self, drbd_helper):
2629 """Set DRBD usermode helper.
2632 self._config_data.cluster.drbd_usermode_helper = drbd_helper
2633 self._config_data.cluster.serial_no += 1
2636 @locking.ssynchronized(_config_lock, shared=1)
2637 def GetMACPrefix(self):
2638 """Return the mac prefix.
2641 return self._config_data.cluster.mac_prefix
2643 @locking.ssynchronized(_config_lock, shared=1)
2644 def GetClusterInfo(self):
2645 """Returns information about the cluster
2647 @rtype: L{objects.Cluster}
2648 @return: the cluster object
2651 return self._config_data.cluster
2653 @locking.ssynchronized(_config_lock, shared=1)
2654 def HasAnyDiskOfType(self, dev_type):
2655 """Check if in there is at disk of the given type in the configuration.
2658 return self._config_data.HasAnyDiskOfType(dev_type)
2660 @locking.ssynchronized(_config_lock)
2661 def Update(self, target, feedback_fn, ec_id=None):
2662 """Notify function to be called after updates.
2664 This function must be called when an object (as returned by
2665 GetInstanceInfo, GetNodeInfo, GetCluster) has been updated and the
2666 caller wants the modifications saved to the backing store. Note
2667 that all modified objects will be saved, but the target argument
2668 is the one the caller wants to ensure that it's saved.
2670 @param target: an instance of either L{objects.Cluster},
2671 L{objects.Node} or L{objects.Instance} which is existing in
2673 @param feedback_fn: Callable feedback function
2676 if self._config_data is None:
2677 raise errors.ProgrammerError("Configuration file not read,"
2679 update_serial = False
2680 if isinstance(target, objects.Cluster):
2681 test = target == self._config_data.cluster
2682 elif isinstance(target, objects.Node):
2683 test = target in self._config_data.nodes.values()
2684 update_serial = True
2685 elif isinstance(target, objects.Instance):
2686 test = target in self._config_data.instances.values()
2687 elif isinstance(target, objects.NodeGroup):
2688 test = target in self._config_data.nodegroups.values()
2689 elif isinstance(target, objects.Network):
2690 test = target in self._config_data.networks.values()
2692 raise errors.ProgrammerError("Invalid object type (%s) passed to"
2693 " ConfigWriter.Update" % type(target))
2695 raise errors.ConfigurationError("Configuration updated since object"
2696 " has been read or unknown object")
2697 target.serial_no += 1
2698 target.mtime = now = time.time()
2701 # for node updates, we need to increase the cluster serial too
2702 self._config_data.cluster.serial_no += 1
2703 self._config_data.cluster.mtime = now
2705 if isinstance(target, objects.Instance):
2706 self._UnlockedReleaseDRBDMinors(target.uuid)
2708 if ec_id is not None:
2709 # Commit all ips reserved by OpInstanceSetParams and OpGroupSetParams
2710 self._UnlockedCommitTemporaryIps(ec_id)
2712 self._WriteConfig(feedback_fn=feedback_fn)
2714 @locking.ssynchronized(_config_lock)
2715 def DropECReservations(self, ec_id):
2716 """Drop per-execution-context reservations
2719 for rm in self._all_rms:
2720 rm.DropECReservations(ec_id)
2722 @locking.ssynchronized(_config_lock, shared=1)
2723 def GetAllNetworksInfo(self):
2724 """Get configuration info of all the networks.
2727 return dict(self._config_data.networks)
2729 def _UnlockedGetNetworkList(self):
2730 """Get the list of networks.
2732 This function is for internal use, when the config lock is already held.
2735 return self._config_data.networks.keys()
2737 @locking.ssynchronized(_config_lock, shared=1)
2738 def GetNetworkList(self):
2739 """Get the list of networks.
2741 @return: array of networks, ex. ["main", "vlan100", "200]
2744 return self._UnlockedGetNetworkList()
2746 @locking.ssynchronized(_config_lock, shared=1)
2747 def GetNetworkNames(self):
2748 """Get a list of network names
2752 for net in self._config_data.networks.values()]
2755 def _UnlockedGetNetwork(self, uuid):
2756 """Returns information about a network.
2758 This function is for internal use, when the config lock is already held.
2761 if uuid not in self._config_data.networks:
2764 return self._config_data.networks[uuid]
2766 @locking.ssynchronized(_config_lock, shared=1)
2767 def GetNetwork(self, uuid):
2768 """Returns information about a network.
2770 It takes the information from the configuration file.
2772 @param uuid: UUID of the network
2774 @rtype: L{objects.Network}
2775 @return: the network object
2778 return self._UnlockedGetNetwork(uuid)
2780 @locking.ssynchronized(_config_lock)
2781 def AddNetwork(self, net, ec_id, check_uuid=True):
2782 """Add a network to the configuration.
2784 @type net: L{objects.Network}
2785 @param net: the Network object to add
2787 @param ec_id: unique id for the job to use when creating a missing UUID
2790 self._UnlockedAddNetwork(net, ec_id, check_uuid)
2793 def _UnlockedAddNetwork(self, net, ec_id, check_uuid):
2794 """Add a network to the configuration.
2797 logging.info("Adding network %s to configuration", net.name)
2800 self._EnsureUUID(net, ec_id)
2803 self._config_data.networks[net.uuid] = net
2804 self._config_data.cluster.serial_no += 1
2806 def _UnlockedLookupNetwork(self, target):
2807 """Lookup a network's UUID.
2809 @type target: string
2810 @param target: network name or UUID
2812 @return: network UUID
2813 @raises errors.OpPrereqError: when the target network cannot be found
2818 if target in self._config_data.networks:
2820 for net in self._config_data.networks.values():
2821 if net.name == target:
2823 raise errors.OpPrereqError("Network '%s' not found" % target,
2826 @locking.ssynchronized(_config_lock, shared=1)
2827 def LookupNetwork(self, target):
2828 """Lookup a network's UUID.
2830 This function is just a wrapper over L{_UnlockedLookupNetwork}.
2832 @type target: string
2833 @param target: network name or UUID
2835 @return: network UUID
2838 return self._UnlockedLookupNetwork(target)
2840 @locking.ssynchronized(_config_lock)
2841 def RemoveNetwork(self, network_uuid):
2842 """Remove a network from the configuration.
2844 @type network_uuid: string
2845 @param network_uuid: the UUID of the network to remove
2848 logging.info("Removing network %s from configuration", network_uuid)
2850 if network_uuid not in self._config_data.networks:
2851 raise errors.ConfigurationError("Unknown network '%s'" % network_uuid)
2853 del self._config_data.networks[network_uuid]
2854 self._config_data.cluster.serial_no += 1
2857 def _UnlockedGetGroupNetParams(self, net_uuid, node_uuid):
2858 """Get the netparams (mode, link) of a network.
2860 Get a network's netparams for a given node.
2862 @type net_uuid: string
2863 @param net_uuid: network uuid
2864 @type node_uuid: string
2865 @param node_uuid: node UUID
2866 @rtype: dict or None
2870 node_info = self._UnlockedGetNodeInfo(node_uuid)
2871 nodegroup_info = self._UnlockedGetNodeGroup(node_info.group)
2872 netparams = nodegroup_info.networks.get(net_uuid, None)
2876 @locking.ssynchronized(_config_lock, shared=1)
2877 def GetGroupNetParams(self, net_uuid, node_uuid):
2878 """Locking wrapper of _UnlockedGetGroupNetParams()
2881 return self._UnlockedGetGroupNetParams(net_uuid, node_uuid)
2883 @locking.ssynchronized(_config_lock, shared=1)
2884 def CheckIPInNodeGroup(self, ip, node_uuid):
2885 """Check IP uniqueness in nodegroup.
2887 Check networks that are connected in the node's node group
2888 if ip is contained in any of them. Used when creating/adding
2889 a NIC to ensure uniqueness among nodegroups.
2892 @param ip: ip address
2893 @type node_uuid: string
2894 @param node_uuid: node UUID
2895 @rtype: (string, dict) or (None, None)
2896 @return: (network name, netparams)
2901 node_info = self._UnlockedGetNodeInfo(node_uuid)
2902 nodegroup_info = self._UnlockedGetNodeGroup(node_info.group)
2903 for net_uuid in nodegroup_info.networks.keys():
2904 net_info = self._UnlockedGetNetwork(net_uuid)
2905 pool = network.AddressPool(net_info)
2906 if pool.Contains(ip):
2907 return (net_info.name, nodegroup_info.networks[net_uuid])