4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Configuration management for Ganeti
24 This module provides the interface to the Ganeti cluster configuration.
26 The configuration data is stored on every node but is updated on the master
27 only. After each update, the master distributes the data to the other nodes.
29 Currently, the data storage format is JSON. YAML was slow and consuming too
34 # pylint: disable=R0904
35 # R0904: Too many public methods
44 from ganeti import errors
45 from ganeti import locking
46 from ganeti import utils
47 from ganeti import constants
48 from ganeti import rpc
49 from ganeti import objects
50 from ganeti import serializer
51 from ganeti import uidpool
52 from ganeti import netutils
53 from ganeti import runtime
54 from ganeti import pathutils
55 from ganeti import network
58 _config_lock = locking.SharedLock("ConfigWriter")
60 # job id used for resource management at config upgrade time
61 _UPGRADE_CONFIG_JID = "jid-cfg-upgrade"
64 def _ValidateConfig(data):
65 """Verifies that a configuration objects looks valid.
67 This only verifies the version of the configuration.
69 @raise errors.ConfigurationError: if the version differs from what
73 if data.version != constants.CONFIG_VERSION:
74 raise errors.ConfigVersionMismatch(constants.CONFIG_VERSION, data.version)
77 class TemporaryReservationManager:
78 """A temporary resource reservation manager.
80 This is used to reserve resources in a job, before using them, making sure
81 other jobs cannot get them in the meantime.
85 self._ec_reserved = {}
87 def Reserved(self, resource):
88 for holder_reserved in self._ec_reserved.values():
89 if resource in holder_reserved:
93 def Reserve(self, ec_id, resource):
94 if self.Reserved(resource):
95 raise errors.ReservationError("Duplicate reservation for resource '%s'"
97 if ec_id not in self._ec_reserved:
98 self._ec_reserved[ec_id] = set([resource])
100 self._ec_reserved[ec_id].add(resource)
102 def DropECReservations(self, ec_id):
103 if ec_id in self._ec_reserved:
104 del self._ec_reserved[ec_id]
106 def GetReserved(self):
108 for holder_reserved in self._ec_reserved.values():
109 all_reserved.update(holder_reserved)
112 def GetECReserved(self, ec_id):
113 """ Used when you want to retrieve all reservations for a specific
114 execution context. E.g when commiting reserved IPs for a specific
119 if ec_id in self._ec_reserved:
120 ec_reserved.update(self._ec_reserved[ec_id])
123 def Generate(self, existing, generate_one_fn, ec_id):
124 """Generate a new resource of this type
127 assert callable(generate_one_fn)
129 all_elems = self.GetReserved()
130 all_elems.update(existing)
133 new_resource = generate_one_fn()
134 if new_resource is not None and new_resource not in all_elems:
137 raise errors.ConfigurationError("Not able generate new resource"
138 " (last tried: %s)" % new_resource)
139 self.Reserve(ec_id, new_resource)
143 def _MatchNameComponentIgnoreCase(short_name, names):
144 """Wrapper around L{utils.text.MatchNameComponent}.
147 return utils.MatchNameComponent(short_name, names, case_sensitive=False)
150 def _CheckInstanceDiskIvNames(disks):
151 """Checks if instance's disks' C{iv_name} attributes are in order.
153 @type disks: list of L{objects.Disk}
154 @param disks: List of disks
155 @rtype: list of tuples; (int, string, string)
156 @return: List of wrongly named disks, each tuple contains disk index,
157 expected and actual name
162 for (idx, disk) in enumerate(disks):
163 exp_iv_name = "disk/%s" % idx
164 if disk.iv_name != exp_iv_name:
165 result.append((idx, exp_iv_name, disk.iv_name))
171 """The interface to the cluster configuration.
173 @ivar _temporary_lvs: reservation manager for temporary LVs
174 @ivar _all_rms: a list of all temporary reservation managers
177 def __init__(self, cfg_file=None, offline=False, _getents=runtime.GetEnts,
178 accept_foreign=False):
180 self._lock = _config_lock
181 self._config_data = None
182 self._offline = offline
184 self._cfg_file = pathutils.CLUSTER_CONF_FILE
186 self._cfg_file = cfg_file
187 self._getents = _getents
188 self._temporary_ids = TemporaryReservationManager()
189 self._temporary_drbds = {}
190 self._temporary_macs = TemporaryReservationManager()
191 self._temporary_secrets = TemporaryReservationManager()
192 self._temporary_lvs = TemporaryReservationManager()
193 self._temporary_ips = TemporaryReservationManager()
194 self._all_rms = [self._temporary_ids, self._temporary_macs,
195 self._temporary_secrets, self._temporary_lvs,
197 # Note: in order to prevent errors when resolving our name in
198 # _DistributeConfig, we compute it here once and reuse it; it's
199 # better to raise an error before starting to modify the config
200 # file than after it was modified
201 self._my_hostname = netutils.Hostname.GetSysName()
202 self._last_cluster_serial = -1
205 self._OpenConfig(accept_foreign)
207 def _GetRpc(self, address_list):
208 """Returns RPC runner for configuration.
211 return rpc.ConfigRunner(self._context, address_list)
213 def SetContext(self, context):
214 """Sets Ganeti context.
217 self._context = context
219 # this method needs to be static, so that we can call it on the class
222 """Check if the cluster is configured.
225 return os.path.exists(pathutils.CLUSTER_CONF_FILE)
227 @locking.ssynchronized(_config_lock, shared=1)
228 def GetNdParams(self, node):
229 """Get the node params populated with cluster defaults.
231 @type node: L{objects.Node}
232 @param node: The node we want to know the params for
233 @return: A dict with the filled in node params
236 nodegroup = self._UnlockedGetNodeGroup(node.group)
237 return self._config_data.cluster.FillND(node, nodegroup)
239 @locking.ssynchronized(_config_lock, shared=1)
240 def GetInstanceDiskParams(self, instance):
241 """Get the disk params populated with inherit chain.
243 @type instance: L{objects.Instance}
244 @param instance: The instance we want to know the params for
245 @return: A dict with the filled in disk params
248 node = self._UnlockedGetNodeInfo(instance.primary_node)
249 nodegroup = self._UnlockedGetNodeGroup(node.group)
250 return self._UnlockedGetGroupDiskParams(nodegroup)
252 @locking.ssynchronized(_config_lock, shared=1)
253 def GetGroupDiskParams(self, group):
254 """Get the disk params populated with inherit chain.
256 @type group: L{objects.NodeGroup}
257 @param group: The group we want to know the params for
258 @return: A dict with the filled in disk params
261 return self._UnlockedGetGroupDiskParams(group)
263 def _UnlockedGetGroupDiskParams(self, group):
264 """Get the disk params populated with inherit chain down to node-group.
266 @type group: L{objects.NodeGroup}
267 @param group: The group we want to know the params for
268 @return: A dict with the filled in disk params
271 return self._config_data.cluster.SimpleFillDP(group.diskparams)
273 def _UnlockedGetNetworkMACPrefix(self, net_uuid):
274 """Return the network mac prefix if it exists or the cluster level default.
279 nobj = self._UnlockedGetNetwork(net_uuid)
281 prefix = nobj.mac_prefix
285 def _GenerateOneMAC(self, prefix=None):
286 """Return a function that randomly generates a MAC suffic
287 and appends it to the given prefix. If prefix is not given get
288 the cluster level default.
292 prefix = self._config_data.cluster.mac_prefix
295 byte1 = random.randrange(0, 256)
296 byte2 = random.randrange(0, 256)
297 byte3 = random.randrange(0, 256)
298 mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
303 @locking.ssynchronized(_config_lock, shared=1)
304 def GenerateMAC(self, net_uuid, ec_id):
305 """Generate a MAC for an instance.
307 This should check the current instances for duplicates.
310 existing = self._AllMACs()
311 prefix = self._UnlockedGetNetworkMACPrefix(net_uuid)
312 gen_mac = self._GenerateOneMAC(prefix)
313 return self._temporary_ids.Generate(existing, gen_mac, ec_id)
315 @locking.ssynchronized(_config_lock, shared=1)
316 def ReserveMAC(self, mac, ec_id):
317 """Reserve a MAC for an instance.
319 This only checks instances managed by this cluster, it does not
320 check for potential collisions elsewhere.
323 all_macs = self._AllMACs()
325 raise errors.ReservationError("mac already in use")
327 self._temporary_macs.Reserve(ec_id, mac)
329 def _UnlockedCommitTemporaryIps(self, ec_id):
330 """Commit all reserved IP address to their respective pools
333 for action, address, net_uuid in self._temporary_ips.GetECReserved(ec_id):
334 self._UnlockedCommitIp(action, net_uuid, address)
336 def _UnlockedCommitIp(self, action, net_uuid, address):
337 """Commit a reserved IP address to an IP pool.
339 The IP address is taken from the network's IP pool and marked as reserved.
342 nobj = self._UnlockedGetNetwork(net_uuid)
343 pool = network.AddressPool(nobj)
344 if action == constants.RESERVE_ACTION:
345 pool.Reserve(address)
346 elif action == constants.RELEASE_ACTION:
347 pool.Release(address)
349 def _UnlockedReleaseIp(self, net_uuid, address, ec_id):
350 """Give a specific IP address back to an IP pool.
352 The IP address is returned to the IP pool designated by pool_id and marked
356 self._temporary_ips.Reserve(ec_id,
357 (constants.RELEASE_ACTION, address, net_uuid))
359 @locking.ssynchronized(_config_lock, shared=1)
360 def ReleaseIp(self, net_uuid, address, ec_id):
361 """Give a specified IP address back to an IP pool.
363 This is just a wrapper around _UnlockedReleaseIp.
367 self._UnlockedReleaseIp(net_uuid, address, ec_id)
369 @locking.ssynchronized(_config_lock, shared=1)
370 def GenerateIp(self, net_uuid, ec_id):
371 """Find a free IPv4 address for an instance.
374 nobj = self._UnlockedGetNetwork(net_uuid)
375 pool = network.AddressPool(nobj)
379 ip = pool.GenerateFree()
380 except errors.AddressPoolError:
381 raise errors.ReservationError("Cannot generate IP. Network is full")
382 return (constants.RESERVE_ACTION, ip, net_uuid)
384 _, address, _ = self._temporary_ips.Generate([], gen_one, ec_id)
387 def _UnlockedReserveIp(self, net_uuid, address, ec_id):
388 """Reserve a given IPv4 address for use by an instance.
391 nobj = self._UnlockedGetNetwork(net_uuid)
392 pool = network.AddressPool(nobj)
394 isreserved = pool.IsReserved(address)
395 except errors.AddressPoolError:
396 raise errors.ReservationError("IP address not in network")
398 raise errors.ReservationError("IP address already in use")
400 return self._temporary_ips.Reserve(ec_id,
401 (constants.RESERVE_ACTION,
404 @locking.ssynchronized(_config_lock, shared=1)
405 def ReserveIp(self, net_uuid, address, ec_id):
406 """Reserve a given IPv4 address for use by an instance.
410 return self._UnlockedReserveIp(net_uuid, address, ec_id)
412 @locking.ssynchronized(_config_lock, shared=1)
413 def ReserveLV(self, lv_name, ec_id):
414 """Reserve an VG/LV pair for an instance.
416 @type lv_name: string
417 @param lv_name: the logical volume name to reserve
420 all_lvs = self._AllLVs()
421 if lv_name in all_lvs:
422 raise errors.ReservationError("LV already in use")
424 self._temporary_lvs.Reserve(ec_id, lv_name)
426 @locking.ssynchronized(_config_lock, shared=1)
427 def GenerateDRBDSecret(self, ec_id):
428 """Generate a DRBD secret.
430 This checks the current disks for duplicates.
433 return self._temporary_secrets.Generate(self._AllDRBDSecrets(),
434 utils.GenerateSecret,
438 """Compute the list of all LVs.
442 for instance in self._config_data.instances.values():
443 node_data = instance.MapLVsByNode()
444 for lv_list in node_data.values():
445 lvnames.update(lv_list)
449 """Compute the list of all Disks (recursively, including children).
452 def DiskAndAllChildren(disk):
453 """Returns a list containing the given disk and all of his children.
458 for child_disk in disk.children:
459 disks.extend(DiskAndAllChildren(child_disk))
463 for instance in self._config_data.instances.values():
464 for disk in instance.disks:
465 disks.extend(DiskAndAllChildren(disk))
469 """Compute the list of all NICs.
473 for instance in self._config_data.instances.values():
474 nics.extend(instance.nics)
477 def _AllIDs(self, include_temporary):
478 """Compute the list of all UUIDs and names we have.
480 @type include_temporary: boolean
481 @param include_temporary: whether to include the _temporary_ids set
483 @return: a set of IDs
487 if include_temporary:
488 existing.update(self._temporary_ids.GetReserved())
489 existing.update(self._AllLVs())
490 existing.update(self._config_data.instances.keys())
491 existing.update(self._config_data.nodes.keys())
492 existing.update([i.uuid for i in self._AllUUIDObjects() if i.uuid])
495 def _GenerateUniqueID(self, ec_id):
496 """Generate an unique UUID.
498 This checks the current node, instances and disk names for
502 @return: the unique id
505 existing = self._AllIDs(include_temporary=False)
506 return self._temporary_ids.Generate(existing, utils.NewUUID, ec_id)
508 @locking.ssynchronized(_config_lock, shared=1)
509 def GenerateUniqueID(self, ec_id):
510 """Generate an unique ID.
512 This is just a wrapper over the unlocked version.
515 @param ec_id: unique id for the job to reserve the id to
518 return self._GenerateUniqueID(ec_id)
521 """Return all MACs present in the config.
524 @return: the list of all MACs
528 for instance in self._config_data.instances.values():
529 for nic in instance.nics:
530 result.append(nic.mac)
534 def _AllDRBDSecrets(self):
535 """Return all DRBD secrets present in the config.
538 @return: the list of all DRBD secrets
541 def helper(disk, result):
542 """Recursively gather secrets from this disk."""
543 if disk.dev_type == constants.DT_DRBD8:
544 result.append(disk.logical_id[5])
546 for child in disk.children:
547 helper(child, result)
550 for instance in self._config_data.instances.values():
551 for disk in instance.disks:
556 def _CheckDiskIDs(self, disk, l_ids, p_ids):
557 """Compute duplicate disk IDs
559 @type disk: L{objects.Disk}
560 @param disk: the disk at which to start searching
562 @param l_ids: list of current logical ids
564 @param p_ids: list of current physical ids
566 @return: a list of error messages
570 if disk.logical_id is not None:
571 if disk.logical_id in l_ids:
572 result.append("duplicate logical id %s" % str(disk.logical_id))
574 l_ids.append(disk.logical_id)
575 if disk.physical_id is not None:
576 if disk.physical_id in p_ids:
577 result.append("duplicate physical id %s" % str(disk.physical_id))
579 p_ids.append(disk.physical_id)
582 for child in disk.children:
583 result.extend(self._CheckDiskIDs(child, l_ids, p_ids))
586 def _UnlockedVerifyConfig(self):
590 @return: a list of error messages; a non-empty list signifies
594 # pylint: disable=R0914
598 data = self._config_data
599 cluster = data.cluster
603 # global cluster checks
604 if not cluster.enabled_hypervisors:
605 result.append("enabled hypervisors list doesn't have any entries")
606 invalid_hvs = set(cluster.enabled_hypervisors) - constants.HYPER_TYPES
608 result.append("enabled hypervisors contains invalid entries: %s" %
609 utils.CommaJoin(invalid_hvs))
610 missing_hvp = (set(cluster.enabled_hypervisors) -
611 set(cluster.hvparams.keys()))
613 result.append("hypervisor parameters missing for the enabled"
614 " hypervisor(s) %s" % utils.CommaJoin(missing_hvp))
616 if not cluster.enabled_disk_templates:
617 result.append("enabled disk templates list doesn't have any entries")
618 invalid_disk_templates = set(cluster.enabled_disk_templates) \
619 - constants.DISK_TEMPLATES
620 if invalid_disk_templates:
621 result.append("enabled disk templates list contains invalid entries:"
622 " %s" % utils.CommaJoin(invalid_disk_templates))
624 if cluster.master_node not in data.nodes:
625 result.append("cluster has invalid primary node '%s'" %
628 def _helper(owner, attr, value, template):
630 utils.ForceDictType(value, template)
631 except errors.GenericError, err:
632 result.append("%s has invalid %s: %s" % (owner, attr, err))
634 def _helper_nic(owner, params):
636 objects.NIC.CheckParameterSyntax(params)
637 except errors.ConfigurationError, err:
638 result.append("%s has invalid nicparams: %s" % (owner, err))
640 def _helper_ipolicy(owner, ipolicy, iscluster):
642 objects.InstancePolicy.CheckParameterSyntax(ipolicy, iscluster)
643 except errors.ConfigurationError, err:
644 result.append("%s has invalid instance policy: %s" % (owner, err))
645 for key, value in ipolicy.items():
646 if key == constants.ISPECS_MINMAX:
647 for k in range(len(value)):
648 _helper_ispecs(owner, "ipolicy/%s[%s]" % (key, k), value[k])
649 elif key == constants.ISPECS_STD:
650 _helper(owner, "ipolicy/" + key, value,
651 constants.ISPECS_PARAMETER_TYPES)
653 # FIXME: assuming list type
654 if key in constants.IPOLICY_PARAMETERS:
658 if not isinstance(value, exp_type):
659 result.append("%s has invalid instance policy: for %s,"
660 " expecting %s, got %s" %
661 (owner, key, exp_type.__name__, type(value)))
663 def _helper_ispecs(owner, parentkey, params):
664 for (key, value) in params.items():
665 fullkey = "/".join([parentkey, key])
666 _helper(owner, fullkey, value, constants.ISPECS_PARAMETER_TYPES)
668 # check cluster parameters
669 _helper("cluster", "beparams", cluster.SimpleFillBE({}),
670 constants.BES_PARAMETER_TYPES)
671 _helper("cluster", "nicparams", cluster.SimpleFillNIC({}),
672 constants.NICS_PARAMETER_TYPES)
673 _helper_nic("cluster", cluster.SimpleFillNIC({}))
674 _helper("cluster", "ndparams", cluster.SimpleFillND({}),
675 constants.NDS_PARAMETER_TYPES)
676 _helper_ipolicy("cluster", cluster.ipolicy, True)
678 # per-instance checks
679 for instance_uuid in data.instances:
680 instance = data.instances[instance_uuid]
681 if instance.uuid != instance_uuid:
682 result.append("instance '%s' is indexed by wrong UUID '%s'" %
683 (instance.name, instance_uuid))
684 if instance.primary_node not in data.nodes:
685 result.append("instance '%s' has invalid primary node '%s'" %
686 (instance.name, instance.primary_node))
687 for snode in instance.secondary_nodes:
688 if snode not in data.nodes:
689 result.append("instance '%s' has invalid secondary node '%s'" %
690 (instance.name, snode))
691 for idx, nic in enumerate(instance.nics):
692 if nic.mac in seen_macs:
693 result.append("instance '%s' has NIC %d mac %s duplicate" %
694 (instance.name, idx, nic.mac))
696 seen_macs.append(nic.mac)
698 filled = cluster.SimpleFillNIC(nic.nicparams)
699 owner = "instance %s nic %d" % (instance.name, idx)
700 _helper(owner, "nicparams",
701 filled, constants.NICS_PARAMETER_TYPES)
702 _helper_nic(owner, filled)
704 # disk template checks
705 if not instance.disk_template in data.cluster.enabled_disk_templates:
706 result.append("instance '%s' uses the disabled disk template '%s'." %
707 (instance.name, instance.disk_template))
710 if instance.beparams:
711 _helper("instance %s" % instance.name, "beparams",
712 cluster.FillBE(instance), constants.BES_PARAMETER_TYPES)
714 # gather the drbd ports for duplicate checks
715 for (idx, dsk) in enumerate(instance.disks):
716 if dsk.dev_type in constants.LDS_DRBD:
717 tcp_port = dsk.logical_id[2]
718 if tcp_port not in ports:
720 ports[tcp_port].append((instance.name, "drbd disk %s" % idx))
721 # gather network port reservation
722 net_port = getattr(instance, "network_port", None)
723 if net_port is not None:
724 if net_port not in ports:
726 ports[net_port].append((instance.name, "network port"))
728 # instance disk verify
729 for idx, disk in enumerate(instance.disks):
730 result.extend(["instance '%s' disk %d error: %s" %
731 (instance.name, idx, msg) for msg in disk.Verify()])
732 result.extend(self._CheckDiskIDs(disk, seen_lids, seen_pids))
734 wrong_names = _CheckInstanceDiskIvNames(instance.disks)
736 tmp = "; ".join(("name of disk %s should be '%s', but is '%s'" %
737 (idx, exp_name, actual_name))
738 for (idx, exp_name, actual_name) in wrong_names)
740 result.append("Instance '%s' has wrongly named disks: %s" %
741 (instance.name, tmp))
743 # cluster-wide pool of free ports
744 for free_port in cluster.tcpudp_port_pool:
745 if free_port not in ports:
746 ports[free_port] = []
747 ports[free_port].append(("cluster", "port marked as free"))
749 # compute tcp/udp duplicate ports
755 txt = utils.CommaJoin(["%s/%s" % val for val in pdata])
756 result.append("tcp/udp port %s has duplicates: %s" % (pnum, txt))
758 # highest used tcp port check
760 if keys[-1] > cluster.highest_used_port:
761 result.append("Highest used port mismatch, saved %s, computed %s" %
762 (cluster.highest_used_port, keys[-1]))
764 if not data.nodes[cluster.master_node].master_candidate:
765 result.append("Master node is not a master candidate")
767 # master candidate checks
768 mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats()
770 result.append("Not enough master candidates: actual %d, target %d" %
774 for node_uuid, node in data.nodes.items():
775 if node.uuid != node_uuid:
776 result.append("Node '%s' is indexed by wrong UUID '%s'" %
777 (node.name, node_uuid))
778 if [node.master_candidate, node.drained, node.offline].count(True) > 1:
779 result.append("Node %s state is invalid: master_candidate=%s,"
780 " drain=%s, offline=%s" %
781 (node.name, node.master_candidate, node.drained,
783 if node.group not in data.nodegroups:
784 result.append("Node '%s' has invalid group '%s'" %
785 (node.name, node.group))
787 _helper("node %s" % node.name, "ndparams",
788 cluster.FillND(node, data.nodegroups[node.group]),
789 constants.NDS_PARAMETER_TYPES)
790 used_globals = constants.NDC_GLOBALS.intersection(node.ndparams)
792 result.append("Node '%s' has some global parameters set: %s" %
793 (node.name, utils.CommaJoin(used_globals)))
796 nodegroups_names = set()
797 for nodegroup_uuid in data.nodegroups:
798 nodegroup = data.nodegroups[nodegroup_uuid]
799 if nodegroup.uuid != nodegroup_uuid:
800 result.append("node group '%s' (uuid: '%s') indexed by wrong uuid '%s'"
801 % (nodegroup.name, nodegroup.uuid, nodegroup_uuid))
802 if utils.UUID_RE.match(nodegroup.name.lower()):
803 result.append("node group '%s' (uuid: '%s') has uuid-like name" %
804 (nodegroup.name, nodegroup.uuid))
805 if nodegroup.name in nodegroups_names:
806 result.append("duplicate node group name '%s'" % nodegroup.name)
808 nodegroups_names.add(nodegroup.name)
809 group_name = "group %s" % nodegroup.name
810 _helper_ipolicy(group_name, cluster.SimpleFillIPolicy(nodegroup.ipolicy),
812 if nodegroup.ndparams:
813 _helper(group_name, "ndparams",
814 cluster.SimpleFillND(nodegroup.ndparams),
815 constants.NDS_PARAMETER_TYPES)
818 _, duplicates = self._UnlockedComputeDRBDMap()
819 for node, minor, instance_a, instance_b in duplicates:
820 result.append("DRBD minor %d on node %s is assigned twice to instances"
821 " %s and %s" % (minor, node, instance_a, instance_b))
824 default_nicparams = cluster.nicparams[constants.PP_DEFAULT]
827 def _AddIpAddress(ip, name):
828 ips.setdefault(ip, []).append(name)
830 _AddIpAddress(cluster.master_ip, "cluster_ip")
832 for node in data.nodes.values():
833 _AddIpAddress(node.primary_ip, "node:%s/primary" % node.name)
834 if node.secondary_ip != node.primary_ip:
835 _AddIpAddress(node.secondary_ip, "node:%s/secondary" % node.name)
837 for instance in data.instances.values():
838 for idx, nic in enumerate(instance.nics):
842 nicparams = objects.FillDict(default_nicparams, nic.nicparams)
843 nic_mode = nicparams[constants.NIC_MODE]
844 nic_link = nicparams[constants.NIC_LINK]
846 if nic_mode == constants.NIC_MODE_BRIDGED:
847 link = "bridge:%s" % nic_link
848 elif nic_mode == constants.NIC_MODE_ROUTED:
849 link = "route:%s" % nic_link
851 raise errors.ProgrammerError("NIC mode '%s' not handled" % nic_mode)
853 _AddIpAddress("%s/%s/%s" % (link, nic.ip, nic.network),
854 "instance:%s/nic:%d" % (instance.name, idx))
856 for ip, owners in ips.items():
858 result.append("IP address %s is used by multiple owners: %s" %
859 (ip, utils.CommaJoin(owners)))
863 @locking.ssynchronized(_config_lock, shared=1)
864 def VerifyConfig(self):
867 This is just a wrapper over L{_UnlockedVerifyConfig}.
870 @return: a list of error messages; a non-empty list signifies
874 return self._UnlockedVerifyConfig()
876 def _UnlockedSetDiskID(self, disk, node_uuid):
877 """Convert the unique ID to the ID needed on the target nodes.
879 This is used only for drbd, which needs ip/port configuration.
881 The routine descends down and updates its children also, because
882 this helps when the only the top device is passed to the remote
885 This function is for internal use, when the config lock is already held.
889 for child in disk.children:
890 self._UnlockedSetDiskID(child, node_uuid)
892 if disk.logical_id is None and disk.physical_id is not None:
894 if disk.dev_type == constants.LD_DRBD8:
895 pnode, snode, port, pminor, sminor, secret = disk.logical_id
896 if node_uuid not in (pnode, snode):
897 raise errors.ConfigurationError("DRBD device not knowing node %s" %
899 pnode_info = self._UnlockedGetNodeInfo(pnode)
900 snode_info = self._UnlockedGetNodeInfo(snode)
901 if pnode_info is None or snode_info is None:
902 raise errors.ConfigurationError("Can't find primary or secondary node"
903 " for %s" % str(disk))
904 p_data = (pnode_info.secondary_ip, port)
905 s_data = (snode_info.secondary_ip, port)
906 if pnode == node_uuid:
907 disk.physical_id = p_data + s_data + (pminor, secret)
908 else: # it must be secondary, we tested above
909 disk.physical_id = s_data + p_data + (sminor, secret)
911 disk.physical_id = disk.logical_id
914 @locking.ssynchronized(_config_lock)
915 def SetDiskID(self, disk, node_uuid):
916 """Convert the unique ID to the ID needed on the target nodes.
918 This is used only for drbd, which needs ip/port configuration.
920 The routine descends down and updates its children also, because
921 this helps when the only the top device is passed to the remote
925 return self._UnlockedSetDiskID(disk, node_uuid)
927 @locking.ssynchronized(_config_lock)
928 def AddTcpUdpPort(self, port):
929 """Adds a new port to the available port pool.
931 @warning: this method does not "flush" the configuration (via
932 L{_WriteConfig}); callers should do that themselves once the
933 configuration is stable
936 if not isinstance(port, int):
937 raise errors.ProgrammerError("Invalid type passed for port")
939 self._config_data.cluster.tcpudp_port_pool.add(port)
941 @locking.ssynchronized(_config_lock, shared=1)
942 def GetPortList(self):
943 """Returns a copy of the current port list.
946 return self._config_data.cluster.tcpudp_port_pool.copy()
948 @locking.ssynchronized(_config_lock)
949 def AllocatePort(self):
952 The port will be taken from the available port pool or from the
953 default port range (and in this case we increase
957 # If there are TCP/IP ports configured, we use them first.
958 if self._config_data.cluster.tcpudp_port_pool:
959 port = self._config_data.cluster.tcpudp_port_pool.pop()
961 port = self._config_data.cluster.highest_used_port + 1
962 if port >= constants.LAST_DRBD_PORT:
963 raise errors.ConfigurationError("The highest used port is greater"
964 " than %s. Aborting." %
965 constants.LAST_DRBD_PORT)
966 self._config_data.cluster.highest_used_port = port
971 def _UnlockedComputeDRBDMap(self):
972 """Compute the used DRBD minor/nodes.
975 @return: dictionary of node_uuid: dict of minor: instance_uuid;
976 the returned dict will have all the nodes in it (even if with
977 an empty list), and a list of duplicates; if the duplicates
978 list is not empty, the configuration is corrupted and its caller
979 should raise an exception
982 def _AppendUsedMinors(get_node_name_fn, instance, disk, used):
984 if disk.dev_type == constants.LD_DRBD8 and len(disk.logical_id) >= 5:
985 node_a, node_b, _, minor_a, minor_b = disk.logical_id[:5]
986 for node_uuid, minor in ((node_a, minor_a), (node_b, minor_b)):
987 assert node_uuid in used, \
988 ("Node '%s' of instance '%s' not found in node list" %
989 (get_node_name_fn(node_uuid), instance.name))
990 if minor in used[node_uuid]:
991 duplicates.append((node_uuid, minor, instance.uuid,
992 used[node_uuid][minor]))
994 used[node_uuid][minor] = instance.uuid
996 for child in disk.children:
997 duplicates.extend(_AppendUsedMinors(get_node_name_fn, instance, child,
1002 my_dict = dict((node_uuid, {}) for node_uuid in self._config_data.nodes)
1003 for instance in self._config_data.instances.itervalues():
1004 for disk in instance.disks:
1005 duplicates.extend(_AppendUsedMinors(self._UnlockedGetNodeName,
1006 instance, disk, my_dict))
1007 for (node_uuid, minor), inst_uuid in self._temporary_drbds.iteritems():
1008 if minor in my_dict[node_uuid] and my_dict[node_uuid][minor] != inst_uuid:
1009 duplicates.append((node_uuid, minor, inst_uuid,
1010 my_dict[node_uuid][minor]))
1012 my_dict[node_uuid][minor] = inst_uuid
1013 return my_dict, duplicates
1015 @locking.ssynchronized(_config_lock)
1016 def ComputeDRBDMap(self):
1017 """Compute the used DRBD minor/nodes.
1019 This is just a wrapper over L{_UnlockedComputeDRBDMap}.
1021 @return: dictionary of node_uuid: dict of minor: instance_uuid;
1022 the returned dict will have all the nodes in it (even if with
1026 d_map, duplicates = self._UnlockedComputeDRBDMap()
1028 raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
1032 @locking.ssynchronized(_config_lock)
1033 def AllocateDRBDMinor(self, node_uuids, inst_uuid):
1034 """Allocate a drbd minor.
1036 The free minor will be automatically computed from the existing
1037 devices. A node can be given multiple times in order to allocate
1038 multiple minors. The result is the list of minors, in the same
1039 order as the passed nodes.
1041 @type inst_uuid: string
1042 @param inst_uuid: the instance for which we allocate minors
1045 assert isinstance(inst_uuid, basestring), \
1046 "Invalid argument '%s' passed to AllocateDRBDMinor" % inst_uuid
1048 d_map, duplicates = self._UnlockedComputeDRBDMap()
1050 raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
1053 for nuuid in node_uuids:
1054 ndata = d_map[nuuid]
1056 # no minors used, we can start at 0
1058 ndata[0] = inst_uuid
1059 self._temporary_drbds[(nuuid, 0)] = inst_uuid
1063 ffree = utils.FirstFree(keys)
1065 # return the next minor
1066 # TODO: implement high-limit check
1067 minor = keys[-1] + 1
1070 # double-check minor against current instances
1071 assert minor not in d_map[nuuid], \
1072 ("Attempt to reuse allocated DRBD minor %d on node %s,"
1073 " already allocated to instance %s" %
1074 (minor, nuuid, d_map[nuuid][minor]))
1075 ndata[minor] = inst_uuid
1076 # double-check minor against reservation
1077 r_key = (nuuid, minor)
1078 assert r_key not in self._temporary_drbds, \
1079 ("Attempt to reuse reserved DRBD minor %d on node %s,"
1080 " reserved for instance %s" %
1081 (minor, nuuid, self._temporary_drbds[r_key]))
1082 self._temporary_drbds[r_key] = inst_uuid
1083 result.append(minor)
1084 logging.debug("Request to allocate drbd minors, input: %s, returning %s",
1088 def _UnlockedReleaseDRBDMinors(self, inst_uuid):
1089 """Release temporary drbd minors allocated for a given instance.
1091 @type inst_uuid: string
1092 @param inst_uuid: the instance for which temporary minors should be
1096 assert isinstance(inst_uuid, basestring), \
1097 "Invalid argument passed to ReleaseDRBDMinors"
1098 for key, uuid in self._temporary_drbds.items():
1099 if uuid == inst_uuid:
1100 del self._temporary_drbds[key]
1102 @locking.ssynchronized(_config_lock)
1103 def ReleaseDRBDMinors(self, inst_uuid):
1104 """Release temporary drbd minors allocated for a given instance.
1106 This should be called on the error paths, on the success paths
1107 it's automatically called by the ConfigWriter add and update
1110 This function is just a wrapper over L{_UnlockedReleaseDRBDMinors}.
1112 @type inst_uuid: string
1113 @param inst_uuid: the instance for which temporary minors should be
1117 self._UnlockedReleaseDRBDMinors(inst_uuid)
1119 @locking.ssynchronized(_config_lock, shared=1)
1120 def GetConfigVersion(self):
1121 """Get the configuration version.
1123 @return: Config version
1126 return self._config_data.version
1128 @locking.ssynchronized(_config_lock, shared=1)
1129 def GetClusterName(self):
1130 """Get cluster name.
1132 @return: Cluster name
1135 return self._config_data.cluster.cluster_name
1137 @locking.ssynchronized(_config_lock, shared=1)
1138 def GetMasterNode(self):
1139 """Get the UUID of the master node for this cluster.
1141 @return: Master node UUID
1144 return self._config_data.cluster.master_node
1146 @locking.ssynchronized(_config_lock, shared=1)
1147 def GetMasterNodeName(self):
1148 """Get the hostname of the master node for this cluster.
1150 @return: Master node hostname
1153 return self._UnlockedGetNodeName(self._config_data.cluster.master_node)
1155 @locking.ssynchronized(_config_lock, shared=1)
1156 def GetMasterIP(self):
1157 """Get the IP of the master node for this cluster.
1162 return self._config_data.cluster.master_ip
1164 @locking.ssynchronized(_config_lock, shared=1)
1165 def GetMasterNetdev(self):
1166 """Get the master network device for this cluster.
1169 return self._config_data.cluster.master_netdev
1171 @locking.ssynchronized(_config_lock, shared=1)
1172 def GetMasterNetmask(self):
1173 """Get the netmask of the master node for this cluster.
1176 return self._config_data.cluster.master_netmask
1178 @locking.ssynchronized(_config_lock, shared=1)
1179 def GetUseExternalMipScript(self):
1180 """Get flag representing whether to use the external master IP setup script.
1183 return self._config_data.cluster.use_external_mip_script
1185 @locking.ssynchronized(_config_lock, shared=1)
1186 def GetFileStorageDir(self):
1187 """Get the file storage dir for this cluster.
1190 return self._config_data.cluster.file_storage_dir
1192 @locking.ssynchronized(_config_lock, shared=1)
1193 def GetSharedFileStorageDir(self):
1194 """Get the shared file storage dir for this cluster.
1197 return self._config_data.cluster.shared_file_storage_dir
1199 @locking.ssynchronized(_config_lock, shared=1)
1200 def GetHypervisorType(self):
1201 """Get the hypervisor type for this cluster.
1204 return self._config_data.cluster.enabled_hypervisors[0]
1206 @locking.ssynchronized(_config_lock, shared=1)
1207 def GetRsaHostKey(self):
1208 """Return the rsa hostkey from the config.
1211 @return: the rsa hostkey
1214 return self._config_data.cluster.rsahostkeypub
1216 @locking.ssynchronized(_config_lock, shared=1)
1217 def GetDsaHostKey(self):
1218 """Return the dsa hostkey from the config.
1221 @return: the dsa hostkey
1224 return self._config_data.cluster.dsahostkeypub
1226 @locking.ssynchronized(_config_lock, shared=1)
1227 def GetDefaultIAllocator(self):
1228 """Get the default instance allocator for this cluster.
1231 return self._config_data.cluster.default_iallocator
1233 @locking.ssynchronized(_config_lock, shared=1)
1234 def GetPrimaryIPFamily(self):
1235 """Get cluster primary ip family.
1237 @return: primary ip family
1240 return self._config_data.cluster.primary_ip_family
1242 @locking.ssynchronized(_config_lock, shared=1)
1243 def GetMasterNetworkParameters(self):
1244 """Get network parameters of the master node.
1246 @rtype: L{object.MasterNetworkParameters}
1247 @return: network parameters of the master node
1250 cluster = self._config_data.cluster
1251 result = objects.MasterNetworkParameters(
1252 uuid=cluster.master_node, ip=cluster.master_ip,
1253 netmask=cluster.master_netmask, netdev=cluster.master_netdev,
1254 ip_family=cluster.primary_ip_family)
1258 @locking.ssynchronized(_config_lock)
1259 def AddNodeGroup(self, group, ec_id, check_uuid=True):
1260 """Add a node group to the configuration.
1262 This method calls group.UpgradeConfig() to fill any missing attributes
1263 according to their default values.
1265 @type group: L{objects.NodeGroup}
1266 @param group: the NodeGroup object to add
1268 @param ec_id: unique id for the job to use when creating a missing UUID
1269 @type check_uuid: bool
1270 @param check_uuid: add an UUID to the group if it doesn't have one or, if
1271 it does, ensure that it does not exist in the
1272 configuration already
1275 self._UnlockedAddNodeGroup(group, ec_id, check_uuid)
1278 def _UnlockedAddNodeGroup(self, group, ec_id, check_uuid):
1279 """Add a node group to the configuration.
1282 logging.info("Adding node group %s to configuration", group.name)
1284 # Some code might need to add a node group with a pre-populated UUID
1285 # generated with ConfigWriter.GenerateUniqueID(). We allow them to bypass
1286 # the "does this UUID" exist already check.
1288 self._EnsureUUID(group, ec_id)
1291 existing_uuid = self._UnlockedLookupNodeGroup(group.name)
1292 except errors.OpPrereqError:
1295 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
1296 " node group (UUID: %s)" %
1297 (group.name, existing_uuid),
1298 errors.ECODE_EXISTS)
1301 group.ctime = group.mtime = time.time()
1302 group.UpgradeConfig()
1304 self._config_data.nodegroups[group.uuid] = group
1305 self._config_data.cluster.serial_no += 1
1307 @locking.ssynchronized(_config_lock)
1308 def RemoveNodeGroup(self, group_uuid):
1309 """Remove a node group from the configuration.
1311 @type group_uuid: string
1312 @param group_uuid: the UUID of the node group to remove
1315 logging.info("Removing node group %s from configuration", group_uuid)
1317 if group_uuid not in self._config_data.nodegroups:
1318 raise errors.ConfigurationError("Unknown node group '%s'" % group_uuid)
1320 assert len(self._config_data.nodegroups) != 1, \
1321 "Group '%s' is the only group, cannot be removed" % group_uuid
1323 del self._config_data.nodegroups[group_uuid]
1324 self._config_data.cluster.serial_no += 1
1327 def _UnlockedLookupNodeGroup(self, target):
1328 """Lookup a node group's UUID.
1330 @type target: string or None
1331 @param target: group name or UUID or None to look for the default
1333 @return: nodegroup UUID
1334 @raises errors.OpPrereqError: when the target group cannot be found
1338 if len(self._config_data.nodegroups) != 1:
1339 raise errors.OpPrereqError("More than one node group exists. Target"
1340 " group must be specified explicitly.")
1342 return self._config_data.nodegroups.keys()[0]
1343 if target in self._config_data.nodegroups:
1345 for nodegroup in self._config_data.nodegroups.values():
1346 if nodegroup.name == target:
1347 return nodegroup.uuid
1348 raise errors.OpPrereqError("Node group '%s' not found" % target,
1351 @locking.ssynchronized(_config_lock, shared=1)
1352 def LookupNodeGroup(self, target):
1353 """Lookup a node group's UUID.
1355 This function is just a wrapper over L{_UnlockedLookupNodeGroup}.
1357 @type target: string or None
1358 @param target: group name or UUID or None to look for the default
1360 @return: nodegroup UUID
1363 return self._UnlockedLookupNodeGroup(target)
1365 def _UnlockedGetNodeGroup(self, uuid):
1366 """Lookup a node group.
1369 @param uuid: group UUID
1370 @rtype: L{objects.NodeGroup} or None
1371 @return: nodegroup object, or None if not found
1374 if uuid not in self._config_data.nodegroups:
1377 return self._config_data.nodegroups[uuid]
1379 @locking.ssynchronized(_config_lock, shared=1)
1380 def GetNodeGroup(self, uuid):
1381 """Lookup a node group.
1384 @param uuid: group UUID
1385 @rtype: L{objects.NodeGroup} or None
1386 @return: nodegroup object, or None if not found
1389 return self._UnlockedGetNodeGroup(uuid)
1391 @locking.ssynchronized(_config_lock, shared=1)
1392 def GetAllNodeGroupsInfo(self):
1393 """Get the configuration of all node groups.
1396 return dict(self._config_data.nodegroups)
1398 @locking.ssynchronized(_config_lock, shared=1)
1399 def GetNodeGroupList(self):
1400 """Get a list of node groups.
1403 return self._config_data.nodegroups.keys()
1405 @locking.ssynchronized(_config_lock, shared=1)
1406 def GetNodeGroupMembersByNodes(self, nodes):
1407 """Get nodes which are member in the same nodegroups as the given nodes.
1410 ngfn = lambda node_uuid: self._UnlockedGetNodeInfo(node_uuid).group
1411 return frozenset(member_uuid
1412 for node_uuid in nodes
1414 self._UnlockedGetNodeGroup(ngfn(node_uuid)).members)
1416 @locking.ssynchronized(_config_lock, shared=1)
1417 def GetMultiNodeGroupInfo(self, group_uuids):
1418 """Get the configuration of multiple node groups.
1420 @param group_uuids: List of node group UUIDs
1422 @return: List of tuples of (group_uuid, group_info)
1425 return [(uuid, self._UnlockedGetNodeGroup(uuid)) for uuid in group_uuids]
1427 @locking.ssynchronized(_config_lock)
1428 def AddInstance(self, instance, ec_id):
1429 """Add an instance to the config.
1431 This should be used after creating a new instance.
1433 @type instance: L{objects.Instance}
1434 @param instance: the instance object
1437 if not isinstance(instance, objects.Instance):
1438 raise errors.ProgrammerError("Invalid type passed to AddInstance")
1440 if instance.disk_template != constants.DT_DISKLESS:
1441 all_lvs = instance.MapLVsByNode()
1442 logging.info("Instance '%s' DISK_LAYOUT: %s", instance.name, all_lvs)
1444 all_macs = self._AllMACs()
1445 for nic in instance.nics:
1446 if nic.mac in all_macs:
1447 raise errors.ConfigurationError("Cannot add instance %s:"
1448 " MAC address '%s' already in use." %
1449 (instance.name, nic.mac))
1451 self._CheckUniqueUUID(instance, include_temporary=False)
1453 instance.serial_no = 1
1454 instance.ctime = instance.mtime = time.time()
1455 self._config_data.instances[instance.uuid] = instance
1456 self._config_data.cluster.serial_no += 1
1457 self._UnlockedReleaseDRBDMinors(instance.uuid)
1458 self._UnlockedCommitTemporaryIps(ec_id)
1461 def _EnsureUUID(self, item, ec_id):
1462 """Ensures a given object has a valid UUID.
1464 @param item: the instance or node to be checked
1465 @param ec_id: the execution context id for the uuid reservation
1469 item.uuid = self._GenerateUniqueID(ec_id)
1471 self._CheckUniqueUUID(item, include_temporary=True)
1473 def _CheckUniqueUUID(self, item, include_temporary):
1474 """Checks that the UUID of the given object is unique.
1476 @param item: the instance or node to be checked
1477 @param include_temporary: whether temporarily generated UUID's should be
1478 included in the check. If the UUID of the item to be checked is
1479 a temporarily generated one, this has to be C{False}.
1483 raise errors.ConfigurationError("'%s' must have an UUID" % (item.name,))
1484 if item.uuid in self._AllIDs(include_temporary=include_temporary):
1485 raise errors.ConfigurationError("Cannot add '%s': UUID %s already"
1486 " in use" % (item.name, item.uuid))
1488 def _SetInstanceStatus(self, inst_uuid, status, disks_active):
1489 """Set the instance's status to a given value.
1492 if inst_uuid not in self._config_data.instances:
1493 raise errors.ConfigurationError("Unknown instance '%s'" %
1495 instance = self._config_data.instances[inst_uuid]
1498 status = instance.admin_state
1499 if disks_active is None:
1500 disks_active = instance.disks_active
1502 assert status in constants.ADMINST_ALL, \
1503 "Invalid status '%s' passed to SetInstanceStatus" % (status,)
1505 if instance.admin_state != status or \
1506 instance.disks_active != disks_active:
1507 instance.admin_state = status
1508 instance.disks_active = disks_active
1509 instance.serial_no += 1
1510 instance.mtime = time.time()
1513 @locking.ssynchronized(_config_lock)
1514 def MarkInstanceUp(self, inst_uuid):
1515 """Mark the instance status to up in the config.
1517 This also sets the instance disks active flag.
1520 self._SetInstanceStatus(inst_uuid, constants.ADMINST_UP, True)
1522 @locking.ssynchronized(_config_lock)
1523 def MarkInstanceOffline(self, inst_uuid):
1524 """Mark the instance status to down in the config.
1526 This also clears the instance disks active flag.
1529 self._SetInstanceStatus(inst_uuid, constants.ADMINST_OFFLINE, False)
1531 @locking.ssynchronized(_config_lock)
1532 def RemoveInstance(self, inst_uuid):
1533 """Remove the instance from the configuration.
1536 if inst_uuid not in self._config_data.instances:
1537 raise errors.ConfigurationError("Unknown instance '%s'" % inst_uuid)
1539 # If a network port has been allocated to the instance,
1540 # return it to the pool of free ports.
1541 inst = self._config_data.instances[inst_uuid]
1542 network_port = getattr(inst, "network_port", None)
1543 if network_port is not None:
1544 self._config_data.cluster.tcpudp_port_pool.add(network_port)
1546 instance = self._UnlockedGetInstanceInfo(inst_uuid)
1548 for nic in instance.nics:
1549 if nic.network and nic.ip:
1550 # Return all IP addresses to the respective address pools
1551 self._UnlockedCommitIp(constants.RELEASE_ACTION, nic.network, nic.ip)
1553 del self._config_data.instances[inst_uuid]
1554 self._config_data.cluster.serial_no += 1
1557 @locking.ssynchronized(_config_lock)
1558 def RenameInstance(self, inst_uuid, new_name):
1559 """Rename an instance.
1561 This needs to be done in ConfigWriter and not by RemoveInstance
1562 combined with AddInstance as only we can guarantee an atomic
1566 if inst_uuid not in self._config_data.instances:
1567 raise errors.ConfigurationError("Unknown instance '%s'" % inst_uuid)
1569 inst = self._config_data.instances[inst_uuid]
1570 inst.name = new_name
1572 for (idx, disk) in enumerate(inst.disks):
1573 if disk.dev_type == constants.LD_FILE:
1574 # rename the file paths in logical and physical id
1575 file_storage_dir = os.path.dirname(os.path.dirname(disk.logical_id[1]))
1576 disk.logical_id = (disk.logical_id[0],
1577 utils.PathJoin(file_storage_dir, inst.name,
1579 disk.physical_id = disk.logical_id
1581 # Force update of ssconf files
1582 self._config_data.cluster.serial_no += 1
1586 @locking.ssynchronized(_config_lock)
1587 def MarkInstanceDown(self, inst_uuid):
1588 """Mark the status of an instance to down in the configuration.
1590 This does not touch the instance disks active flag, as shut down instances
1591 can still have active disks.
1594 self._SetInstanceStatus(inst_uuid, constants.ADMINST_DOWN, None)
1596 @locking.ssynchronized(_config_lock)
1597 def MarkInstanceDisksActive(self, inst_uuid):
1598 """Mark the status of instance disks active.
1601 self._SetInstanceStatus(inst_uuid, None, True)
1603 @locking.ssynchronized(_config_lock)
1604 def MarkInstanceDisksInactive(self, inst_uuid):
1605 """Mark the status of instance disks inactive.
1608 self._SetInstanceStatus(inst_uuid, None, False)
1610 def _UnlockedGetInstanceList(self):
1611 """Get the list of instances.
1613 This function is for internal use, when the config lock is already held.
1616 return self._config_data.instances.keys()
1618 @locking.ssynchronized(_config_lock, shared=1)
1619 def GetInstanceList(self):
1620 """Get the list of instances.
1622 @return: array of instances, ex. ['instance2-uuid', 'instance1-uuid']
1625 return self._UnlockedGetInstanceList()
1627 def ExpandInstanceName(self, short_name):
1628 """Attempt to expand an incomplete instance name.
1631 # Locking is done in L{ConfigWriter.GetAllInstancesInfo}
1632 all_insts = self.GetAllInstancesInfo().values()
1633 expanded_name = _MatchNameComponentIgnoreCase(
1634 short_name, [inst.name for inst in all_insts])
1636 if expanded_name is not None:
1637 # there has to be exactly one instance with that name
1638 inst = (filter(lambda n: n.name == expanded_name, all_insts)[0])
1639 return (inst.uuid, inst.name)
1643 def _UnlockedGetInstanceInfo(self, inst_uuid):
1644 """Returns information about an instance.
1646 This function is for internal use, when the config lock is already held.
1649 if inst_uuid not in self._config_data.instances:
1652 return self._config_data.instances[inst_uuid]
1654 @locking.ssynchronized(_config_lock, shared=1)
1655 def GetInstanceInfo(self, inst_uuid):
1656 """Returns information about an instance.
1658 It takes the information from the configuration file. Other information of
1659 an instance are taken from the live systems.
1661 @param inst_uuid: UUID of the instance
1663 @rtype: L{objects.Instance}
1664 @return: the instance object
1667 return self._UnlockedGetInstanceInfo(inst_uuid)
1669 @locking.ssynchronized(_config_lock, shared=1)
1670 def GetInstanceNodeGroups(self, inst_uuid, primary_only=False):
1671 """Returns set of node group UUIDs for instance's nodes.
1676 instance = self._UnlockedGetInstanceInfo(inst_uuid)
1678 raise errors.ConfigurationError("Unknown instance '%s'" % inst_uuid)
1681 nodes = [instance.primary_node]
1683 nodes = instance.all_nodes
1685 return frozenset(self._UnlockedGetNodeInfo(node_uuid).group
1686 for node_uuid in nodes)
1688 @locking.ssynchronized(_config_lock, shared=1)
1689 def GetInstanceNetworks(self, inst_uuid):
1690 """Returns set of network UUIDs for instance's nics.
1695 instance = self._UnlockedGetInstanceInfo(inst_uuid)
1697 raise errors.ConfigurationError("Unknown instance '%s'" % inst_uuid)
1700 for nic in instance.nics:
1702 networks.add(nic.network)
1704 return frozenset(networks)
1706 @locking.ssynchronized(_config_lock, shared=1)
1707 def GetMultiInstanceInfo(self, inst_uuids):
1708 """Get the configuration of multiple instances.
1710 @param inst_uuids: list of instance UUIDs
1712 @return: list of tuples (instance UUID, instance_info), where
1713 instance_info is what would GetInstanceInfo return for the
1714 node, while keeping the original order
1717 return [(uuid, self._UnlockedGetInstanceInfo(uuid)) for uuid in inst_uuids]
1719 @locking.ssynchronized(_config_lock, shared=1)
1720 def GetMultiInstanceInfoByName(self, inst_names):
1721 """Get the configuration of multiple instances.
1723 @param inst_names: list of instance names
1725 @return: list of tuples (instance, instance_info), where
1726 instance_info is what would GetInstanceInfo return for the
1727 node, while keeping the original order
1731 for name in inst_names:
1732 instance = self._UnlockedGetInstanceInfoByName(name)
1733 result.append((instance.uuid, instance))
1736 @locking.ssynchronized(_config_lock, shared=1)
1737 def GetAllInstancesInfo(self):
1738 """Get the configuration of all instances.
1741 @return: dict of (instance, instance_info), where instance_info is what
1742 would GetInstanceInfo return for the node
1745 return self._UnlockedGetAllInstancesInfo()
1747 def _UnlockedGetAllInstancesInfo(self):
1748 my_dict = dict([(inst_uuid, self._UnlockedGetInstanceInfo(inst_uuid))
1749 for inst_uuid in self._UnlockedGetInstanceList()])
1752 @locking.ssynchronized(_config_lock, shared=1)
1753 def GetInstancesInfoByFilter(self, filter_fn):
1754 """Get instance configuration with a filter.
1756 @type filter_fn: callable
1757 @param filter_fn: Filter function receiving instance object as parameter,
1758 returning boolean. Important: this function is called while the
1759 configuration locks is held. It must not do any complex work or call
1760 functions potentially leading to a deadlock. Ideally it doesn't call any
1761 other functions and just compares instance attributes.
1764 return dict((uuid, inst)
1765 for (uuid, inst) in self._config_data.instances.items()
1768 @locking.ssynchronized(_config_lock, shared=1)
1769 def GetInstanceInfoByName(self, inst_name):
1770 """Get the L{objects.Instance} object for a named instance.
1772 @param inst_name: name of the instance to get information for
1773 @type inst_name: string
1774 @return: the corresponding L{objects.Instance} instance or None if no
1775 information is available
1778 return self._UnlockedGetInstanceInfoByName(inst_name)
1780 def _UnlockedGetInstanceInfoByName(self, inst_name):
1781 for inst in self._UnlockedGetAllInstancesInfo().values():
1782 if inst.name == inst_name:
1786 def _UnlockedGetInstanceName(self, inst_uuid):
1787 inst_info = self._UnlockedGetInstanceInfo(inst_uuid)
1788 if inst_info is None:
1789 raise errors.OpExecError("Unknown instance: %s" % inst_uuid)
1790 return inst_info.name
1792 @locking.ssynchronized(_config_lock, shared=1)
1793 def GetInstanceName(self, inst_uuid):
1794 """Gets the instance name for the passed instance.
1796 @param inst_uuid: instance UUID to get name for
1797 @type inst_uuid: string
1799 @return: instance name
1802 return self._UnlockedGetInstanceName(inst_uuid)
1804 @locking.ssynchronized(_config_lock, shared=1)
1805 def GetInstanceNames(self, inst_uuids):
1806 """Gets the instance names for the passed list of nodes.
1808 @param inst_uuids: list of instance UUIDs to get names for
1809 @type inst_uuids: list of strings
1810 @rtype: list of strings
1811 @return: list of instance names
1814 return self._UnlockedGetInstanceNames(inst_uuids)
1816 def _UnlockedGetInstanceNames(self, inst_uuids):
1817 return [self._UnlockedGetInstanceName(uuid) for uuid in inst_uuids]
1819 @locking.ssynchronized(_config_lock)
1820 def AddNode(self, node, ec_id):
1821 """Add a node to the configuration.
1823 @type node: L{objects.Node}
1824 @param node: a Node instance
1827 logging.info("Adding node %s to configuration", node.name)
1829 self._EnsureUUID(node, ec_id)
1832 node.ctime = node.mtime = time.time()
1833 self._UnlockedAddNodeToGroup(node.uuid, node.group)
1834 self._config_data.nodes[node.uuid] = node
1835 self._config_data.cluster.serial_no += 1
1838 @locking.ssynchronized(_config_lock)
1839 def RemoveNode(self, node_uuid):
1840 """Remove a node from the configuration.
1843 logging.info("Removing node %s from configuration", node_uuid)
1845 if node_uuid not in self._config_data.nodes:
1846 raise errors.ConfigurationError("Unknown node '%s'" % node_uuid)
1848 self._UnlockedRemoveNodeFromGroup(self._config_data.nodes[node_uuid])
1849 del self._config_data.nodes[node_uuid]
1850 self._config_data.cluster.serial_no += 1
1853 def ExpandNodeName(self, short_name):
1854 """Attempt to expand an incomplete node name into a node UUID.
1857 # Locking is done in L{ConfigWriter.GetAllNodesInfo}
1858 all_nodes = self.GetAllNodesInfo().values()
1859 expanded_name = _MatchNameComponentIgnoreCase(
1860 short_name, [node.name for node in all_nodes])
1862 if expanded_name is not None:
1863 # there has to be exactly one node with that name
1864 node = (filter(lambda n: n.name == expanded_name, all_nodes)[0])
1865 return (node.uuid, node.name)
1869 def _UnlockedGetNodeInfo(self, node_uuid):
1870 """Get the configuration of a node, as stored in the config.
1872 This function is for internal use, when the config lock is already
1875 @param node_uuid: the node UUID
1877 @rtype: L{objects.Node}
1878 @return: the node object
1881 if node_uuid not in self._config_data.nodes:
1884 return self._config_data.nodes[node_uuid]
1886 @locking.ssynchronized(_config_lock, shared=1)
1887 def GetNodeInfo(self, node_uuid):
1888 """Get the configuration of a node, as stored in the config.
1890 This is just a locked wrapper over L{_UnlockedGetNodeInfo}.
1892 @param node_uuid: the node UUID
1894 @rtype: L{objects.Node}
1895 @return: the node object
1898 return self._UnlockedGetNodeInfo(node_uuid)
1900 @locking.ssynchronized(_config_lock, shared=1)
1901 def GetNodeInstances(self, node_uuid):
1902 """Get the instances of a node, as stored in the config.
1904 @param node_uuid: the node UUID
1906 @rtype: (list, list)
1907 @return: a tuple with two lists: the primary and the secondary instances
1912 for inst in self._config_data.instances.values():
1913 if inst.primary_node == node_uuid:
1914 pri.append(inst.uuid)
1915 if node_uuid in inst.secondary_nodes:
1916 sec.append(inst.uuid)
1919 @locking.ssynchronized(_config_lock, shared=1)
1920 def GetNodeGroupInstances(self, uuid, primary_only=False):
1921 """Get the instances of a node group.
1923 @param uuid: Node group UUID
1924 @param primary_only: Whether to only consider primary nodes
1926 @return: List of instance UUIDs in node group
1930 nodes_fn = lambda inst: [inst.primary_node]
1932 nodes_fn = lambda inst: inst.all_nodes
1934 return frozenset(inst.uuid
1935 for inst in self._config_data.instances.values()
1936 for node_uuid in nodes_fn(inst)
1937 if self._UnlockedGetNodeInfo(node_uuid).group == uuid)
1939 def _UnlockedGetHvparamsString(self, hvname):
1940 """Return the string representation of the list of hyervisor parameters of
1941 the given hypervisor.
1943 @see: C{GetHvparams}
1947 hvparams = self._config_data.cluster.hvparams[hvname]
1948 for key in hvparams:
1949 result += "%s=%s\n" % (key, hvparams[key])
1952 @locking.ssynchronized(_config_lock, shared=1)
1953 def GetHvparamsString(self, hvname):
1954 """Return the hypervisor parameters of the given hypervisor.
1956 @type hvname: string
1957 @param hvname: name of a hypervisor
1959 @return: string containing key-value-pairs, one pair on each line;
1963 return self._UnlockedGetHvparamsString(hvname)
1965 def _UnlockedGetNodeList(self):
1966 """Return the list of nodes which are in the configuration.
1968 This function is for internal use, when the config lock is already
1974 return self._config_data.nodes.keys()
1976 @locking.ssynchronized(_config_lock, shared=1)
1977 def GetNodeList(self):
1978 """Return the list of nodes which are in the configuration.
1981 return self._UnlockedGetNodeList()
1983 def _UnlockedGetOnlineNodeList(self):
1984 """Return the list of nodes which are online.
1987 all_nodes = [self._UnlockedGetNodeInfo(node)
1988 for node in self._UnlockedGetNodeList()]
1989 return [node.uuid for node in all_nodes if not node.offline]
1991 @locking.ssynchronized(_config_lock, shared=1)
1992 def GetOnlineNodeList(self):
1993 """Return the list of nodes which are online.
1996 return self._UnlockedGetOnlineNodeList()
1998 @locking.ssynchronized(_config_lock, shared=1)
1999 def GetVmCapableNodeList(self):
2000 """Return the list of nodes which are not vm capable.
2003 all_nodes = [self._UnlockedGetNodeInfo(node)
2004 for node in self._UnlockedGetNodeList()]
2005 return [node.uuid for node in all_nodes if node.vm_capable]
2007 @locking.ssynchronized(_config_lock, shared=1)
2008 def GetNonVmCapableNodeList(self):
2009 """Return the list of nodes which are not vm capable.
2012 all_nodes = [self._UnlockedGetNodeInfo(node)
2013 for node in self._UnlockedGetNodeList()]
2014 return [node.uuid for node in all_nodes if not node.vm_capable]
2016 @locking.ssynchronized(_config_lock, shared=1)
2017 def GetMultiNodeInfo(self, node_uuids):
2018 """Get the configuration of multiple nodes.
2020 @param node_uuids: list of node UUIDs
2022 @return: list of tuples of (node, node_info), where node_info is
2023 what would GetNodeInfo return for the node, in the original
2027 return [(uuid, self._UnlockedGetNodeInfo(uuid)) for uuid in node_uuids]
2029 def _UnlockedGetAllNodesInfo(self):
2030 """Gets configuration of all nodes.
2032 @note: See L{GetAllNodesInfo}
2035 return dict([(node_uuid, self._UnlockedGetNodeInfo(node_uuid))
2036 for node_uuid in self._UnlockedGetNodeList()])
2038 @locking.ssynchronized(_config_lock, shared=1)
2039 def GetAllNodesInfo(self):
2040 """Get the configuration of all nodes.
2043 @return: dict of (node, node_info), where node_info is what
2044 would GetNodeInfo return for the node
2047 return self._UnlockedGetAllNodesInfo()
2049 def _UnlockedGetNodeInfoByName(self, node_name):
2050 for node in self._UnlockedGetAllNodesInfo().values():
2051 if node.name == node_name:
2055 @locking.ssynchronized(_config_lock, shared=1)
2056 def GetNodeInfoByName(self, node_name):
2057 """Get the L{objects.Node} object for a named node.
2059 @param node_name: name of the node to get information for
2060 @type node_name: string
2061 @return: the corresponding L{objects.Node} instance or None if no
2062 information is available
2065 return self._UnlockedGetNodeInfoByName(node_name)
2067 def _UnlockedGetNodeName(self, node_spec):
2068 if isinstance(node_spec, objects.Node):
2069 return node_spec.name
2070 elif isinstance(node_spec, basestring):
2071 node_info = self._UnlockedGetNodeInfo(node_spec)
2072 if node_info is None:
2073 raise errors.OpExecError("Unknown node: %s" % node_spec)
2074 return node_info.name
2076 raise errors.ProgrammerError("Can't handle node spec '%s'" % node_spec)
2078 @locking.ssynchronized(_config_lock, shared=1)
2079 def GetNodeName(self, node_spec):
2080 """Gets the node name for the passed node.
2082 @param node_spec: node to get names for
2083 @type node_spec: either node UUID or a L{objects.Node} object
2088 return self._UnlockedGetNodeName(node_spec)
2090 def _UnlockedGetNodeNames(self, node_specs):
2091 return [self._UnlockedGetNodeName(node_spec) for node_spec in node_specs]
2093 @locking.ssynchronized(_config_lock, shared=1)
2094 def GetNodeNames(self, node_specs):
2095 """Gets the node names for the passed list of nodes.
2097 @param node_specs: list of nodes to get names for
2098 @type node_specs: list of either node UUIDs or L{objects.Node} objects
2099 @rtype: list of strings
2100 @return: list of node names
2103 return self._UnlockedGetNodeNames(node_specs)
2105 @locking.ssynchronized(_config_lock, shared=1)
2106 def GetNodeGroupsFromNodes(self, node_uuids):
2107 """Returns groups for a list of nodes.
2109 @type node_uuids: list of string
2110 @param node_uuids: List of node UUIDs
2114 return frozenset(self._UnlockedGetNodeInfo(uuid).group
2115 for uuid in node_uuids)
2117 def _UnlockedGetMasterCandidateStats(self, exceptions=None):
2118 """Get the number of current and maximum desired and possible candidates.
2120 @type exceptions: list
2121 @param exceptions: if passed, list of nodes that should be ignored
2123 @return: tuple of (current, desired and possible, possible)
2126 mc_now = mc_should = mc_max = 0
2127 for node in self._config_data.nodes.values():
2128 if exceptions and node.uuid in exceptions:
2130 if not (node.offline or node.drained) and node.master_capable:
2132 if node.master_candidate:
2134 mc_should = min(mc_max, self._config_data.cluster.candidate_pool_size)
2135 return (mc_now, mc_should, mc_max)
2137 @locking.ssynchronized(_config_lock, shared=1)
2138 def GetMasterCandidateStats(self, exceptions=None):
2139 """Get the number of current and maximum possible candidates.
2141 This is just a wrapper over L{_UnlockedGetMasterCandidateStats}.
2143 @type exceptions: list
2144 @param exceptions: if passed, list of nodes that should be ignored
2146 @return: tuple of (current, max)
2149 return self._UnlockedGetMasterCandidateStats(exceptions)
2151 @locking.ssynchronized(_config_lock)
2152 def MaintainCandidatePool(self, exception_node_uuids):
2153 """Try to grow the candidate pool to the desired size.
2155 @type exception_node_uuids: list
2156 @param exception_node_uuids: if passed, list of nodes that should be ignored
2158 @return: list with the adjusted nodes (L{objects.Node} instances)
2161 mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats(
2162 exception_node_uuids)
2165 node_list = self._config_data.nodes.keys()
2166 random.shuffle(node_list)
2167 for uuid in node_list:
2168 if mc_now >= mc_max:
2170 node = self._config_data.nodes[uuid]
2171 if (node.master_candidate or node.offline or node.drained or
2172 node.uuid in exception_node_uuids or not node.master_capable):
2174 mod_list.append(node)
2175 node.master_candidate = True
2178 if mc_now != mc_max:
2179 # this should not happen
2180 logging.warning("Warning: MaintainCandidatePool didn't manage to"
2181 " fill the candidate pool (%d/%d)", mc_now, mc_max)
2183 self._config_data.cluster.serial_no += 1
2188 def _UnlockedAddNodeToGroup(self, node_uuid, nodegroup_uuid):
2189 """Add a given node to the specified group.
2192 if nodegroup_uuid not in self._config_data.nodegroups:
2193 # This can happen if a node group gets deleted between its lookup and
2194 # when we're adding the first node to it, since we don't keep a lock in
2195 # the meantime. It's ok though, as we'll fail cleanly if the node group
2196 # is not found anymore.
2197 raise errors.OpExecError("Unknown node group: %s" % nodegroup_uuid)
2198 if node_uuid not in self._config_data.nodegroups[nodegroup_uuid].members:
2199 self._config_data.nodegroups[nodegroup_uuid].members.append(node_uuid)
2201 def _UnlockedRemoveNodeFromGroup(self, node):
2202 """Remove a given node from its group.
2205 nodegroup = node.group
2206 if nodegroup not in self._config_data.nodegroups:
2207 logging.warning("Warning: node '%s' has unknown node group '%s'"
2208 " (while being removed from it)", node.uuid, nodegroup)
2209 nodegroup_obj = self._config_data.nodegroups[nodegroup]
2210 if node.uuid not in nodegroup_obj.members:
2211 logging.warning("Warning: node '%s' not a member of its node group '%s'"
2212 " (while being removed from it)", node.uuid, nodegroup)
2214 nodegroup_obj.members.remove(node.uuid)
2216 @locking.ssynchronized(_config_lock)
2217 def AssignGroupNodes(self, mods):
2218 """Changes the group of a number of nodes.
2220 @type mods: list of tuples; (node name, new group UUID)
2221 @param mods: Node membership modifications
2224 groups = self._config_data.nodegroups
2225 nodes = self._config_data.nodes
2229 # Try to resolve UUIDs first
2230 for (node_uuid, new_group_uuid) in mods:
2232 node = nodes[node_uuid]
2234 raise errors.ConfigurationError("Unable to find node '%s'" % node_uuid)
2236 if node.group == new_group_uuid:
2237 # Node is being assigned to its current group
2238 logging.debug("Node '%s' was assigned to its current group (%s)",
2239 node_uuid, node.group)
2242 # Try to find current group of node
2244 old_group = groups[node.group]
2246 raise errors.ConfigurationError("Unable to find old group '%s'" %
2249 # Try to find new group for node
2251 new_group = groups[new_group_uuid]
2253 raise errors.ConfigurationError("Unable to find new group '%s'" %
2256 assert node.uuid in old_group.members, \
2257 ("Inconsistent configuration: node '%s' not listed in members for its"
2258 " old group '%s'" % (node.uuid, old_group.uuid))
2259 assert node.uuid not in new_group.members, \
2260 ("Inconsistent configuration: node '%s' already listed in members for"
2261 " its new group '%s'" % (node.uuid, new_group.uuid))
2263 resmod.append((node, old_group, new_group))
2266 for (node, old_group, new_group) in resmod:
2267 assert node.uuid != new_group.uuid and old_group.uuid != new_group.uuid, \
2268 "Assigning to current group is not possible"
2270 node.group = new_group.uuid
2272 # Update members of involved groups
2273 if node.uuid in old_group.members:
2274 old_group.members.remove(node.uuid)
2275 if node.uuid not in new_group.members:
2276 new_group.members.append(node.uuid)
2278 # Update timestamps and serials (only once per node/group object)
2280 for obj in frozenset(itertools.chain(*resmod)): # pylint: disable=W0142
2284 # Force ssconf update
2285 self._config_data.cluster.serial_no += 1
2289 def _BumpSerialNo(self):
2290 """Bump up the serial number of the config.
2293 self._config_data.serial_no += 1
2294 self._config_data.mtime = time.time()
2296 def _AllUUIDObjects(self):
2297 """Returns all objects with uuid attributes.
2300 return (self._config_data.instances.values() +
2301 self._config_data.nodes.values() +
2302 self._config_data.nodegroups.values() +
2303 self._config_data.networks.values() +
2306 [self._config_data.cluster])
2308 def _OpenConfig(self, accept_foreign):
2309 """Read the config data from disk.
2312 raw_data = utils.ReadFile(self._cfg_file)
2315 data = objects.ConfigData.FromDict(serializer.Load(raw_data))
2316 except Exception, err:
2317 raise errors.ConfigurationError(err)
2319 # Make sure the configuration has the right version
2320 _ValidateConfig(data)
2322 if (not hasattr(data, "cluster") or
2323 not hasattr(data.cluster, "rsahostkeypub")):
2324 raise errors.ConfigurationError("Incomplete configuration"
2325 " (missing cluster.rsahostkeypub)")
2327 if not data.cluster.master_node in data.nodes:
2328 msg = ("The configuration denotes node %s as master, but does not"
2329 " contain information about this node" %
2330 data.cluster.master_node)
2331 raise errors.ConfigurationError(msg)
2333 master_info = data.nodes[data.cluster.master_node]
2334 if master_info.name != self._my_hostname and not accept_foreign:
2335 msg = ("The configuration denotes node %s as master, while my"
2336 " hostname is %s; opening a foreign configuration is only"
2337 " possible in accept_foreign mode" %
2338 (master_info.name, self._my_hostname))
2339 raise errors.ConfigurationError(msg)
2341 self._config_data = data
2342 # reset the last serial as -1 so that the next write will cause
2344 self._last_cluster_serial = -1
2346 # Upgrade configuration if needed
2347 self._UpgradeConfig()
2349 self._cfg_id = utils.GetFileID(path=self._cfg_file)
2351 def _UpgradeConfig(self):
2352 """Run any upgrade steps.
2354 This method performs both in-object upgrades and also update some data
2355 elements that need uniqueness across the whole configuration or interact
2358 @warning: this function will call L{_WriteConfig()}, but also
2359 L{DropECReservations} so it needs to be called only from a
2360 "safe" place (the constructor). If one wanted to call it with
2361 the lock held, a DropECReservationUnlocked would need to be
2362 created first, to avoid causing deadlock.
2365 # Keep a copy of the persistent part of _config_data to check for changes
2366 # Serialization doesn't guarantee order in dictionaries
2367 oldconf = copy.deepcopy(self._config_data.ToDict())
2369 # In-object upgrades
2370 self._config_data.UpgradeConfig()
2372 for item in self._AllUUIDObjects():
2373 if item.uuid is None:
2374 item.uuid = self._GenerateUniqueID(_UPGRADE_CONFIG_JID)
2375 if not self._config_data.nodegroups:
2376 default_nodegroup_name = constants.INITIAL_NODE_GROUP_NAME
2377 default_nodegroup = objects.NodeGroup(name=default_nodegroup_name,
2379 self._UnlockedAddNodeGroup(default_nodegroup, _UPGRADE_CONFIG_JID, True)
2380 for node in self._config_data.nodes.values():
2382 node.group = self.LookupNodeGroup(None)
2383 # This is technically *not* an upgrade, but needs to be done both when
2384 # nodegroups are being added, and upon normally loading the config,
2385 # because the members list of a node group is discarded upon
2386 # serializing/deserializing the object.
2387 self._UnlockedAddNodeToGroup(node.uuid, node.group)
2389 modified = (oldconf != self._config_data.ToDict())
2392 # This is ok even if it acquires the internal lock, as _UpgradeConfig is
2393 # only called at config init time, without the lock held
2394 self.DropECReservations(_UPGRADE_CONFIG_JID)
2396 config_errors = self._UnlockedVerifyConfig()
2398 errmsg = ("Loaded configuration data is not consistent: %s" %
2399 (utils.CommaJoin(config_errors)))
2400 logging.critical(errmsg)
2402 def _DistributeConfig(self, feedback_fn):
2403 """Distribute the configuration to the other nodes.
2405 Currently, this only copies the configuration file. In the future,
2406 it could be used to encapsulate the 2/3-phase update mechanism.
2416 myhostname = self._my_hostname
2417 # we can skip checking whether _UnlockedGetNodeInfo returns None
2418 # since the node list comes from _UnlocketGetNodeList, and we are
2419 # called with the lock held, so no modifications should take place
2421 for node_uuid in self._UnlockedGetNodeList():
2422 node_info = self._UnlockedGetNodeInfo(node_uuid)
2423 if node_info.name == myhostname or not node_info.master_candidate:
2425 node_list.append(node_info.name)
2426 addr_list.append(node_info.primary_ip)
2428 # TODO: Use dedicated resolver talking to config writer for name resolution
2430 self._GetRpc(addr_list).call_upload_file(node_list, self._cfg_file)
2431 for to_node, to_result in result.items():
2432 msg = to_result.fail_msg
2434 msg = ("Copy of file %s to node %s failed: %s" %
2435 (self._cfg_file, to_node, msg))
2445 def _WriteConfig(self, destination=None, feedback_fn=None):
2446 """Write the configuration data to persistent storage.
2449 assert feedback_fn is None or callable(feedback_fn)
2451 # Warn on config errors, but don't abort the save - the
2452 # configuration has already been modified, and we can't revert;
2453 # the best we can do is to warn the user and save as is, leaving
2454 # recovery to the user
2455 config_errors = self._UnlockedVerifyConfig()
2457 errmsg = ("Configuration data is not consistent: %s" %
2458 (utils.CommaJoin(config_errors)))
2459 logging.critical(errmsg)
2463 if destination is None:
2464 destination = self._cfg_file
2465 self._BumpSerialNo()
2466 txt = serializer.Dump(self._config_data.ToDict())
2468 getents = self._getents()
2470 fd = utils.SafeWriteFile(destination, self._cfg_id, data=txt,
2471 close=False, gid=getents.confd_gid, mode=0640)
2472 except errors.LockError:
2473 raise errors.ConfigurationError("The configuration file has been"
2474 " modified since the last write, cannot"
2477 self._cfg_id = utils.GetFileID(fd=fd)
2481 self.write_count += 1
2483 # and redistribute the config file to master candidates
2484 self._DistributeConfig(feedback_fn)
2486 # Write ssconf files on all nodes (including locally)
2487 if self._last_cluster_serial < self._config_data.cluster.serial_no:
2488 if not self._offline:
2489 result = self._GetRpc(None).call_write_ssconf_files(
2490 self._UnlockedGetNodeNames(self._UnlockedGetOnlineNodeList()),
2491 self._UnlockedGetSsconfValues())
2493 for nname, nresu in result.items():
2494 msg = nresu.fail_msg
2496 errmsg = ("Error while uploading ssconf files to"
2497 " node %s: %s" % (nname, msg))
2498 logging.warning(errmsg)
2503 self._last_cluster_serial = self._config_data.cluster.serial_no
2505 def _GetAllHvparamsStrings(self, hypervisors):
2506 """Get the hvparams of all given hypervisors from the config.
2508 @type hypervisors: list of string
2509 @param hypervisors: list of hypervisor names
2510 @rtype: dict of strings
2511 @returns: dictionary mapping the hypervisor name to a string representation
2512 of the hypervisor's hvparams
2516 for hv in hypervisors:
2517 hvparams[hv] = self._UnlockedGetHvparamsString(hv)
2521 def _ExtendByAllHvparamsStrings(ssconf_values, all_hvparams):
2522 """Extends the ssconf_values dictionary by hvparams.
2524 @type ssconf_values: dict of strings
2525 @param ssconf_values: dictionary mapping ssconf_keys to strings
2526 representing the content of ssconf files
2527 @type all_hvparams: dict of strings
2528 @param all_hvparams: dictionary mapping hypervisor names to a string
2529 representation of their hvparams
2530 @rtype: same as ssconf_values
2531 @returns: the ssconf_values dictionary extended by hvparams
2534 for hv in all_hvparams:
2535 ssconf_key = constants.SS_HVPARAMS_PREF + hv
2536 ssconf_values[ssconf_key] = all_hvparams[hv]
2537 return ssconf_values
2539 def _UnlockedGetSsconfValues(self):
2540 """Return the values needed by ssconf.
2543 @return: a dictionary with keys the ssconf names and values their
2548 instance_names = utils.NiceSort(
2549 [inst.name for inst in
2550 self._UnlockedGetAllInstancesInfo().values()])
2551 node_infos = self._UnlockedGetAllNodesInfo().values()
2552 node_names = [node.name for node in node_infos]
2553 node_pri_ips = ["%s %s" % (ninfo.name, ninfo.primary_ip)
2554 for ninfo in node_infos]
2555 node_snd_ips = ["%s %s" % (ninfo.name, ninfo.secondary_ip)
2556 for ninfo in node_infos]
2558 instance_data = fn(instance_names)
2559 off_data = fn(node.name for node in node_infos if node.offline)
2560 on_data = fn(node.name for node in node_infos if not node.offline)
2561 mc_data = fn(node.name for node in node_infos if node.master_candidate)
2562 mc_ips_data = fn(node.primary_ip for node in node_infos
2563 if node.master_candidate)
2564 node_data = fn(node_names)
2565 node_pri_ips_data = fn(node_pri_ips)
2566 node_snd_ips_data = fn(node_snd_ips)
2568 cluster = self._config_data.cluster
2569 cluster_tags = fn(cluster.GetTags())
2571 hypervisor_list = fn(cluster.enabled_hypervisors)
2572 all_hvparams = self._GetAllHvparamsStrings(constants.HYPER_TYPES)
2574 uid_pool = uidpool.FormatUidPool(cluster.uid_pool, separator="\n")
2576 nodegroups = ["%s %s" % (nodegroup.uuid, nodegroup.name) for nodegroup in
2577 self._config_data.nodegroups.values()]
2578 nodegroups_data = fn(utils.NiceSort(nodegroups))
2579 networks = ["%s %s" % (net.uuid, net.name) for net in
2580 self._config_data.networks.values()]
2581 networks_data = fn(utils.NiceSort(networks))
2584 constants.SS_CLUSTER_NAME: cluster.cluster_name,
2585 constants.SS_CLUSTER_TAGS: cluster_tags,
2586 constants.SS_FILE_STORAGE_DIR: cluster.file_storage_dir,
2587 constants.SS_SHARED_FILE_STORAGE_DIR: cluster.shared_file_storage_dir,
2588 constants.SS_MASTER_CANDIDATES: mc_data,
2589 constants.SS_MASTER_CANDIDATES_IPS: mc_ips_data,
2590 constants.SS_MASTER_IP: cluster.master_ip,
2591 constants.SS_MASTER_NETDEV: cluster.master_netdev,
2592 constants.SS_MASTER_NETMASK: str(cluster.master_netmask),
2593 constants.SS_MASTER_NODE: self._UnlockedGetNodeName(cluster.master_node),
2594 constants.SS_NODE_LIST: node_data,
2595 constants.SS_NODE_PRIMARY_IPS: node_pri_ips_data,
2596 constants.SS_NODE_SECONDARY_IPS: node_snd_ips_data,
2597 constants.SS_OFFLINE_NODES: off_data,
2598 constants.SS_ONLINE_NODES: on_data,
2599 constants.SS_PRIMARY_IP_FAMILY: str(cluster.primary_ip_family),
2600 constants.SS_INSTANCE_LIST: instance_data,
2601 constants.SS_RELEASE_VERSION: constants.RELEASE_VERSION,
2602 constants.SS_HYPERVISOR_LIST: hypervisor_list,
2603 constants.SS_MAINTAIN_NODE_HEALTH: str(cluster.maintain_node_health),
2604 constants.SS_UID_POOL: uid_pool,
2605 constants.SS_NODEGROUPS: nodegroups_data,
2606 constants.SS_NETWORKS: networks_data,
2608 ssconf_values = self._ExtendByAllHvparamsStrings(ssconf_values,
2610 bad_values = [(k, v) for k, v in ssconf_values.items()
2611 if not isinstance(v, (str, basestring))]
2613 err = utils.CommaJoin("%s=%s" % (k, v) for k, v in bad_values)
2614 raise errors.ConfigurationError("Some ssconf key(s) have non-string"
2615 " values: %s" % err)
2616 return ssconf_values
2618 @locking.ssynchronized(_config_lock, shared=1)
2619 def GetSsconfValues(self):
2620 """Wrapper using lock around _UnlockedGetSsconf().
2623 return self._UnlockedGetSsconfValues()
2625 @locking.ssynchronized(_config_lock, shared=1)
2626 def GetVGName(self):
2627 """Return the volume group name.
2630 return self._config_data.cluster.volume_group_name
2632 @locking.ssynchronized(_config_lock)
2633 def SetVGName(self, vg_name):
2634 """Set the volume group name.
2637 self._config_data.cluster.volume_group_name = vg_name
2638 self._config_data.cluster.serial_no += 1
2641 @locking.ssynchronized(_config_lock, shared=1)
2642 def GetDRBDHelper(self):
2643 """Return DRBD usermode helper.
2646 return self._config_data.cluster.drbd_usermode_helper
2648 @locking.ssynchronized(_config_lock)
2649 def SetDRBDHelper(self, drbd_helper):
2650 """Set DRBD usermode helper.
2653 self._config_data.cluster.drbd_usermode_helper = drbd_helper
2654 self._config_data.cluster.serial_no += 1
2657 @locking.ssynchronized(_config_lock, shared=1)
2658 def GetMACPrefix(self):
2659 """Return the mac prefix.
2662 return self._config_data.cluster.mac_prefix
2664 @locking.ssynchronized(_config_lock, shared=1)
2665 def GetClusterInfo(self):
2666 """Returns information about the cluster
2668 @rtype: L{objects.Cluster}
2669 @return: the cluster object
2672 return self._config_data.cluster
2674 @locking.ssynchronized(_config_lock, shared=1)
2675 def HasAnyDiskOfType(self, dev_type):
2676 """Check if in there is at disk of the given type in the configuration.
2679 return self._config_data.HasAnyDiskOfType(dev_type)
2681 @locking.ssynchronized(_config_lock)
2682 def Update(self, target, feedback_fn, ec_id=None):
2683 """Notify function to be called after updates.
2685 This function must be called when an object (as returned by
2686 GetInstanceInfo, GetNodeInfo, GetCluster) has been updated and the
2687 caller wants the modifications saved to the backing store. Note
2688 that all modified objects will be saved, but the target argument
2689 is the one the caller wants to ensure that it's saved.
2691 @param target: an instance of either L{objects.Cluster},
2692 L{objects.Node} or L{objects.Instance} which is existing in
2694 @param feedback_fn: Callable feedback function
2697 if self._config_data is None:
2698 raise errors.ProgrammerError("Configuration file not read,"
2700 update_serial = False
2701 if isinstance(target, objects.Cluster):
2702 test = target == self._config_data.cluster
2703 elif isinstance(target, objects.Node):
2704 test = target in self._config_data.nodes.values()
2705 update_serial = True
2706 elif isinstance(target, objects.Instance):
2707 test = target in self._config_data.instances.values()
2708 elif isinstance(target, objects.NodeGroup):
2709 test = target in self._config_data.nodegroups.values()
2710 elif isinstance(target, objects.Network):
2711 test = target in self._config_data.networks.values()
2713 raise errors.ProgrammerError("Invalid object type (%s) passed to"
2714 " ConfigWriter.Update" % type(target))
2716 raise errors.ConfigurationError("Configuration updated since object"
2717 " has been read or unknown object")
2718 target.serial_no += 1
2719 target.mtime = now = time.time()
2722 # for node updates, we need to increase the cluster serial too
2723 self._config_data.cluster.serial_no += 1
2724 self._config_data.cluster.mtime = now
2726 if isinstance(target, objects.Instance):
2727 self._UnlockedReleaseDRBDMinors(target.uuid)
2729 if ec_id is not None:
2730 # Commit all ips reserved by OpInstanceSetParams and OpGroupSetParams
2731 self._UnlockedCommitTemporaryIps(ec_id)
2733 self._WriteConfig(feedback_fn=feedback_fn)
2735 @locking.ssynchronized(_config_lock)
2736 def DropECReservations(self, ec_id):
2737 """Drop per-execution-context reservations
2740 for rm in self._all_rms:
2741 rm.DropECReservations(ec_id)
2743 @locking.ssynchronized(_config_lock, shared=1)
2744 def GetAllNetworksInfo(self):
2745 """Get configuration info of all the networks.
2748 return dict(self._config_data.networks)
2750 def _UnlockedGetNetworkList(self):
2751 """Get the list of networks.
2753 This function is for internal use, when the config lock is already held.
2756 return self._config_data.networks.keys()
2758 @locking.ssynchronized(_config_lock, shared=1)
2759 def GetNetworkList(self):
2760 """Get the list of networks.
2762 @return: array of networks, ex. ["main", "vlan100", "200]
2765 return self._UnlockedGetNetworkList()
2767 @locking.ssynchronized(_config_lock, shared=1)
2768 def GetNetworkNames(self):
2769 """Get a list of network names
2773 for net in self._config_data.networks.values()]
2776 def _UnlockedGetNetwork(self, uuid):
2777 """Returns information about a network.
2779 This function is for internal use, when the config lock is already held.
2782 if uuid not in self._config_data.networks:
2785 return self._config_data.networks[uuid]
2787 @locking.ssynchronized(_config_lock, shared=1)
2788 def GetNetwork(self, uuid):
2789 """Returns information about a network.
2791 It takes the information from the configuration file.
2793 @param uuid: UUID of the network
2795 @rtype: L{objects.Network}
2796 @return: the network object
2799 return self._UnlockedGetNetwork(uuid)
2801 @locking.ssynchronized(_config_lock)
2802 def AddNetwork(self, net, ec_id, check_uuid=True):
2803 """Add a network to the configuration.
2805 @type net: L{objects.Network}
2806 @param net: the Network object to add
2808 @param ec_id: unique id for the job to use when creating a missing UUID
2811 self._UnlockedAddNetwork(net, ec_id, check_uuid)
2814 def _UnlockedAddNetwork(self, net, ec_id, check_uuid):
2815 """Add a network to the configuration.
2818 logging.info("Adding network %s to configuration", net.name)
2821 self._EnsureUUID(net, ec_id)
2824 net.ctime = net.mtime = time.time()
2825 self._config_data.networks[net.uuid] = net
2826 self._config_data.cluster.serial_no += 1
2828 def _UnlockedLookupNetwork(self, target):
2829 """Lookup a network's UUID.
2831 @type target: string
2832 @param target: network name or UUID
2834 @return: network UUID
2835 @raises errors.OpPrereqError: when the target network cannot be found
2840 if target in self._config_data.networks:
2842 for net in self._config_data.networks.values():
2843 if net.name == target:
2845 raise errors.OpPrereqError("Network '%s' not found" % target,
2848 @locking.ssynchronized(_config_lock, shared=1)
2849 def LookupNetwork(self, target):
2850 """Lookup a network's UUID.
2852 This function is just a wrapper over L{_UnlockedLookupNetwork}.
2854 @type target: string
2855 @param target: network name or UUID
2857 @return: network UUID
2860 return self._UnlockedLookupNetwork(target)
2862 @locking.ssynchronized(_config_lock)
2863 def RemoveNetwork(self, network_uuid):
2864 """Remove a network from the configuration.
2866 @type network_uuid: string
2867 @param network_uuid: the UUID of the network to remove
2870 logging.info("Removing network %s from configuration", network_uuid)
2872 if network_uuid not in self._config_data.networks:
2873 raise errors.ConfigurationError("Unknown network '%s'" % network_uuid)
2875 del self._config_data.networks[network_uuid]
2876 self._config_data.cluster.serial_no += 1
2879 def _UnlockedGetGroupNetParams(self, net_uuid, node_uuid):
2880 """Get the netparams (mode, link) of a network.
2882 Get a network's netparams for a given node.
2884 @type net_uuid: string
2885 @param net_uuid: network uuid
2886 @type node_uuid: string
2887 @param node_uuid: node UUID
2888 @rtype: dict or None
2892 node_info = self._UnlockedGetNodeInfo(node_uuid)
2893 nodegroup_info = self._UnlockedGetNodeGroup(node_info.group)
2894 netparams = nodegroup_info.networks.get(net_uuid, None)
2898 @locking.ssynchronized(_config_lock, shared=1)
2899 def GetGroupNetParams(self, net_uuid, node_uuid):
2900 """Locking wrapper of _UnlockedGetGroupNetParams()
2903 return self._UnlockedGetGroupNetParams(net_uuid, node_uuid)
2905 @locking.ssynchronized(_config_lock, shared=1)
2906 def CheckIPInNodeGroup(self, ip, node_uuid):
2907 """Check IP uniqueness in nodegroup.
2909 Check networks that are connected in the node's node group
2910 if ip is contained in any of them. Used when creating/adding
2911 a NIC to ensure uniqueness among nodegroups.
2914 @param ip: ip address
2915 @type node_uuid: string
2916 @param node_uuid: node UUID
2917 @rtype: (string, dict) or (None, None)
2918 @return: (network name, netparams)
2923 node_info = self._UnlockedGetNodeInfo(node_uuid)
2924 nodegroup_info = self._UnlockedGetNodeGroup(node_info.group)
2925 for net_uuid in nodegroup_info.networks.keys():
2926 net_info = self._UnlockedGetNetwork(net_uuid)
2927 pool = network.AddressPool(net_info)
2928 if pool.Contains(ip):
2929 return (net_info.name, nodegroup_info.networks[net_uuid])