4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Configuration management for Ganeti
24 This module provides the interface to the Ganeti cluster configuration.
26 The configuration data is stored on every node but is updated on the master
27 only. After each update, the master distributes the data to the other nodes.
29 Currently, the data storage format is JSON. YAML was slow and consuming too
34 # pylint: disable=R0904
35 # R0904: Too many public methods
42 from ganeti import errors
43 from ganeti import locking
44 from ganeti import utils
45 from ganeti import constants
46 from ganeti import rpc
47 from ganeti import objects
48 from ganeti import serializer
49 from ganeti import uidpool
50 from ganeti import netutils
51 from ganeti import runtime
54 _config_lock = locking.SharedLock("ConfigWriter")
56 # job id used for resource management at config upgrade time
57 _UPGRADE_CONFIG_JID = "jid-cfg-upgrade"
60 def _ValidateConfig(data):
61 """Verifies that a configuration objects looks valid.
63 This only verifies the version of the configuration.
65 @raise errors.ConfigurationError: if the version differs from what
69 if data.version != constants.CONFIG_VERSION:
70 raise errors.ConfigVersionMismatch(constants.CONFIG_VERSION, data.version)
73 class TemporaryReservationManager:
74 """A temporary resource reservation manager.
76 This is used to reserve resources in a job, before using them, making sure
77 other jobs cannot get them in the meantime.
81 self._ec_reserved = {}
83 def Reserved(self, resource):
84 for holder_reserved in self._ec_reserved.values():
85 if resource in holder_reserved:
89 def Reserve(self, ec_id, resource):
90 if self.Reserved(resource):
91 raise errors.ReservationError("Duplicate reservation for resource '%s'"
93 if ec_id not in self._ec_reserved:
94 self._ec_reserved[ec_id] = set([resource])
96 self._ec_reserved[ec_id].add(resource)
98 def DropECReservations(self, ec_id):
99 if ec_id in self._ec_reserved:
100 del self._ec_reserved[ec_id]
102 def GetReserved(self):
104 for holder_reserved in self._ec_reserved.values():
105 all_reserved.update(holder_reserved)
108 def Generate(self, existing, generate_one_fn, ec_id):
109 """Generate a new resource of this type
112 assert callable(generate_one_fn)
114 all_elems = self.GetReserved()
115 all_elems.update(existing)
118 new_resource = generate_one_fn()
119 if new_resource is not None and new_resource not in all_elems:
122 raise errors.ConfigurationError("Not able generate new resource"
123 " (last tried: %s)" % new_resource)
124 self.Reserve(ec_id, new_resource)
128 def _MatchNameComponentIgnoreCase(short_name, names):
129 """Wrapper around L{utils.text.MatchNameComponent}.
132 return utils.MatchNameComponent(short_name, names, case_sensitive=False)
136 """The interface to the cluster configuration.
138 @ivar _temporary_lvs: reservation manager for temporary LVs
139 @ivar _all_rms: a list of all temporary reservation managers
142 def __init__(self, cfg_file=None, offline=False, _getents=runtime.GetEnts,
143 accept_foreign=False):
145 self._lock = _config_lock
146 self._config_data = None
147 self._offline = offline
149 self._cfg_file = constants.CLUSTER_CONF_FILE
151 self._cfg_file = cfg_file
152 self._getents = _getents
153 self._temporary_ids = TemporaryReservationManager()
154 self._temporary_drbds = {}
155 self._temporary_macs = TemporaryReservationManager()
156 self._temporary_secrets = TemporaryReservationManager()
157 self._temporary_lvs = TemporaryReservationManager()
158 self._all_rms = [self._temporary_ids, self._temporary_macs,
159 self._temporary_secrets, self._temporary_lvs]
160 # Note: in order to prevent errors when resolving our name in
161 # _DistributeConfig, we compute it here once and reuse it; it's
162 # better to raise an error before starting to modify the config
163 # file than after it was modified
164 self._my_hostname = netutils.Hostname.GetSysName()
165 self._last_cluster_serial = -1
167 self._OpenConfig(accept_foreign)
169 # this method needs to be static, so that we can call it on the class
172 """Check if the cluster is configured.
175 return os.path.exists(constants.CLUSTER_CONF_FILE)
177 def _GenerateOneMAC(self):
178 """Generate one mac address
181 prefix = self._config_data.cluster.mac_prefix
182 byte1 = random.randrange(0, 256)
183 byte2 = random.randrange(0, 256)
184 byte3 = random.randrange(0, 256)
185 mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
188 @locking.ssynchronized(_config_lock, shared=1)
189 def GetNdParams(self, node):
190 """Get the node params populated with cluster defaults.
192 @type node: L{objects.Node}
193 @param node: The node we want to know the params for
194 @return: A dict with the filled in node params
197 nodegroup = self._UnlockedGetNodeGroup(node.group)
198 return self._config_data.cluster.FillND(node, nodegroup)
200 @locking.ssynchronized(_config_lock, shared=1)
201 def GenerateMAC(self, ec_id):
202 """Generate a MAC for an instance.
204 This should check the current instances for duplicates.
207 existing = self._AllMACs()
208 return self._temporary_ids.Generate(existing, self._GenerateOneMAC, ec_id)
210 @locking.ssynchronized(_config_lock, shared=1)
211 def ReserveMAC(self, mac, ec_id):
212 """Reserve a MAC for an instance.
214 This only checks instances managed by this cluster, it does not
215 check for potential collisions elsewhere.
218 all_macs = self._AllMACs()
220 raise errors.ReservationError("mac already in use")
222 self._temporary_macs.Reserve(ec_id, mac)
224 @locking.ssynchronized(_config_lock, shared=1)
225 def ReserveLV(self, lv_name, ec_id):
226 """Reserve an VG/LV pair for an instance.
228 @type lv_name: string
229 @param lv_name: the logical volume name to reserve
232 all_lvs = self._AllLVs()
233 if lv_name in all_lvs:
234 raise errors.ReservationError("LV already in use")
236 self._temporary_lvs.Reserve(ec_id, lv_name)
238 @locking.ssynchronized(_config_lock, shared=1)
239 def GenerateDRBDSecret(self, ec_id):
240 """Generate a DRBD secret.
242 This checks the current disks for duplicates.
245 return self._temporary_secrets.Generate(self._AllDRBDSecrets(),
246 utils.GenerateSecret,
250 """Compute the list of all LVs.
254 for instance in self._config_data.instances.values():
255 node_data = instance.MapLVsByNode()
256 for lv_list in node_data.values():
257 lvnames.update(lv_list)
260 def _AllIDs(self, include_temporary):
261 """Compute the list of all UUIDs and names we have.
263 @type include_temporary: boolean
264 @param include_temporary: whether to include the _temporary_ids set
266 @return: a set of IDs
270 if include_temporary:
271 existing.update(self._temporary_ids.GetReserved())
272 existing.update(self._AllLVs())
273 existing.update(self._config_data.instances.keys())
274 existing.update(self._config_data.nodes.keys())
275 existing.update([i.uuid for i in self._AllUUIDObjects() if i.uuid])
278 def _GenerateUniqueID(self, ec_id):
279 """Generate an unique UUID.
281 This checks the current node, instances and disk names for
285 @return: the unique id
288 existing = self._AllIDs(include_temporary=False)
289 return self._temporary_ids.Generate(existing, utils.NewUUID, ec_id)
291 @locking.ssynchronized(_config_lock, shared=1)
292 def GenerateUniqueID(self, ec_id):
293 """Generate an unique ID.
295 This is just a wrapper over the unlocked version.
298 @param ec_id: unique id for the job to reserve the id to
301 return self._GenerateUniqueID(ec_id)
304 """Return all MACs present in the config.
307 @return: the list of all MACs
311 for instance in self._config_data.instances.values():
312 for nic in instance.nics:
313 result.append(nic.mac)
317 def _AllDRBDSecrets(self):
318 """Return all DRBD secrets present in the config.
321 @return: the list of all DRBD secrets
324 def helper(disk, result):
325 """Recursively gather secrets from this disk."""
326 if disk.dev_type == constants.DT_DRBD8:
327 result.append(disk.logical_id[5])
329 for child in disk.children:
330 helper(child, result)
333 for instance in self._config_data.instances.values():
334 for disk in instance.disks:
339 def _CheckDiskIDs(self, disk, l_ids, p_ids):
340 """Compute duplicate disk IDs
342 @type disk: L{objects.Disk}
343 @param disk: the disk at which to start searching
345 @param l_ids: list of current logical ids
347 @param p_ids: list of current physical ids
349 @return: a list of error messages
353 if disk.logical_id is not None:
354 if disk.logical_id in l_ids:
355 result.append("duplicate logical id %s" % str(disk.logical_id))
357 l_ids.append(disk.logical_id)
358 if disk.physical_id is not None:
359 if disk.physical_id in p_ids:
360 result.append("duplicate physical id %s" % str(disk.physical_id))
362 p_ids.append(disk.physical_id)
365 for child in disk.children:
366 result.extend(self._CheckDiskIDs(child, l_ids, p_ids))
369 def _UnlockedVerifyConfig(self):
373 @return: a list of error messages; a non-empty list signifies
377 # pylint: disable=R0914
381 data = self._config_data
382 cluster = data.cluster
386 # global cluster checks
387 if not cluster.enabled_hypervisors:
388 result.append("enabled hypervisors list doesn't have any entries")
389 invalid_hvs = set(cluster.enabled_hypervisors) - constants.HYPER_TYPES
391 result.append("enabled hypervisors contains invalid entries: %s" %
393 missing_hvp = (set(cluster.enabled_hypervisors) -
394 set(cluster.hvparams.keys()))
396 result.append("hypervisor parameters missing for the enabled"
397 " hypervisor(s) %s" % utils.CommaJoin(missing_hvp))
399 if cluster.master_node not in data.nodes:
400 result.append("cluster has invalid primary node '%s'" %
403 def _helper(owner, attr, value, template):
405 utils.ForceDictType(value, template)
406 except errors.GenericError, err:
407 result.append("%s has invalid %s: %s" % (owner, attr, err))
409 def _helper_nic(owner, params):
411 objects.NIC.CheckParameterSyntax(params)
412 except errors.ConfigurationError, err:
413 result.append("%s has invalid nicparams: %s" % (owner, err))
415 # check cluster parameters
416 _helper("cluster", "beparams", cluster.SimpleFillBE({}),
417 constants.BES_PARAMETER_TYPES)
418 _helper("cluster", "nicparams", cluster.SimpleFillNIC({}),
419 constants.NICS_PARAMETER_TYPES)
420 _helper_nic("cluster", cluster.SimpleFillNIC({}))
421 _helper("cluster", "ndparams", cluster.SimpleFillND({}),
422 constants.NDS_PARAMETER_TYPES)
424 # per-instance checks
425 for instance_name in data.instances:
426 instance = data.instances[instance_name]
427 if instance.name != instance_name:
428 result.append("instance '%s' is indexed by wrong name '%s'" %
429 (instance.name, instance_name))
430 if instance.primary_node not in data.nodes:
431 result.append("instance '%s' has invalid primary node '%s'" %
432 (instance_name, instance.primary_node))
433 for snode in instance.secondary_nodes:
434 if snode not in data.nodes:
435 result.append("instance '%s' has invalid secondary node '%s'" %
436 (instance_name, snode))
437 for idx, nic in enumerate(instance.nics):
438 if nic.mac in seen_macs:
439 result.append("instance '%s' has NIC %d mac %s duplicate" %
440 (instance_name, idx, nic.mac))
442 seen_macs.append(nic.mac)
444 filled = cluster.SimpleFillNIC(nic.nicparams)
445 owner = "instance %s nic %d" % (instance.name, idx)
446 _helper(owner, "nicparams",
447 filled, constants.NICS_PARAMETER_TYPES)
448 _helper_nic(owner, filled)
451 if instance.beparams:
452 _helper("instance %s" % instance.name, "beparams",
453 cluster.FillBE(instance), constants.BES_PARAMETER_TYPES)
455 # gather the drbd ports for duplicate checks
456 for dsk in instance.disks:
457 if dsk.dev_type in constants.LDS_DRBD:
458 tcp_port = dsk.logical_id[2]
459 if tcp_port not in ports:
461 ports[tcp_port].append((instance.name, "drbd disk %s" % dsk.iv_name))
462 # gather network port reservation
463 net_port = getattr(instance, "network_port", None)
464 if net_port is not None:
465 if net_port not in ports:
467 ports[net_port].append((instance.name, "network port"))
469 # instance disk verify
470 for idx, disk in enumerate(instance.disks):
471 result.extend(["instance '%s' disk %d error: %s" %
472 (instance.name, idx, msg) for msg in disk.Verify()])
473 result.extend(self._CheckDiskIDs(disk, seen_lids, seen_pids))
475 # cluster-wide pool of free ports
476 for free_port in cluster.tcpudp_port_pool:
477 if free_port not in ports:
478 ports[free_port] = []
479 ports[free_port].append(("cluster", "port marked as free"))
481 # compute tcp/udp duplicate ports
487 txt = utils.CommaJoin(["%s/%s" % val for val in pdata])
488 result.append("tcp/udp port %s has duplicates: %s" % (pnum, txt))
490 # highest used tcp port check
492 if keys[-1] > cluster.highest_used_port:
493 result.append("Highest used port mismatch, saved %s, computed %s" %
494 (cluster.highest_used_port, keys[-1]))
496 if not data.nodes[cluster.master_node].master_candidate:
497 result.append("Master node is not a master candidate")
499 # master candidate checks
500 mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats()
502 result.append("Not enough master candidates: actual %d, target %d" %
506 for node_name, node in data.nodes.items():
507 if node.name != node_name:
508 result.append("Node '%s' is indexed by wrong name '%s'" %
509 (node.name, node_name))
510 if [node.master_candidate, node.drained, node.offline].count(True) > 1:
511 result.append("Node %s state is invalid: master_candidate=%s,"
512 " drain=%s, offline=%s" %
513 (node.name, node.master_candidate, node.drained,
515 if node.group not in data.nodegroups:
516 result.append("Node '%s' has invalid group '%s'" %
517 (node.name, node.group))
519 _helper("node %s" % node.name, "ndparams",
520 cluster.FillND(node, data.nodegroups[node.group]),
521 constants.NDS_PARAMETER_TYPES)
524 nodegroups_names = set()
525 for nodegroup_uuid in data.nodegroups:
526 nodegroup = data.nodegroups[nodegroup_uuid]
527 if nodegroup.uuid != nodegroup_uuid:
528 result.append("node group '%s' (uuid: '%s') indexed by wrong uuid '%s'"
529 % (nodegroup.name, nodegroup.uuid, nodegroup_uuid))
530 if utils.UUID_RE.match(nodegroup.name.lower()):
531 result.append("node group '%s' (uuid: '%s') has uuid-like name" %
532 (nodegroup.name, nodegroup.uuid))
533 if nodegroup.name in nodegroups_names:
534 result.append("duplicate node group name '%s'" % nodegroup.name)
536 nodegroups_names.add(nodegroup.name)
537 if nodegroup.ndparams:
538 _helper("group %s" % nodegroup.name, "ndparams",
539 cluster.SimpleFillND(nodegroup.ndparams),
540 constants.NDS_PARAMETER_TYPES)
543 _, duplicates = self._UnlockedComputeDRBDMap()
544 for node, minor, instance_a, instance_b in duplicates:
545 result.append("DRBD minor %d on node %s is assigned twice to instances"
546 " %s and %s" % (minor, node, instance_a, instance_b))
549 default_nicparams = cluster.nicparams[constants.PP_DEFAULT]
552 def _AddIpAddress(ip, name):
553 ips.setdefault(ip, []).append(name)
555 _AddIpAddress(cluster.master_ip, "cluster_ip")
557 for node in data.nodes.values():
558 _AddIpAddress(node.primary_ip, "node:%s/primary" % node.name)
559 if node.secondary_ip != node.primary_ip:
560 _AddIpAddress(node.secondary_ip, "node:%s/secondary" % node.name)
562 for instance in data.instances.values():
563 for idx, nic in enumerate(instance.nics):
567 nicparams = objects.FillDict(default_nicparams, nic.nicparams)
568 nic_mode = nicparams[constants.NIC_MODE]
569 nic_link = nicparams[constants.NIC_LINK]
571 if nic_mode == constants.NIC_MODE_BRIDGED:
572 link = "bridge:%s" % nic_link
573 elif nic_mode == constants.NIC_MODE_ROUTED:
574 link = "route:%s" % nic_link
576 raise errors.ProgrammerError("NIC mode '%s' not handled" % nic_mode)
578 _AddIpAddress("%s/%s" % (link, nic.ip),
579 "instance:%s/nic:%d" % (instance.name, idx))
581 for ip, owners in ips.items():
583 result.append("IP address %s is used by multiple owners: %s" %
584 (ip, utils.CommaJoin(owners)))
588 @locking.ssynchronized(_config_lock, shared=1)
589 def VerifyConfig(self):
592 This is just a wrapper over L{_UnlockedVerifyConfig}.
595 @return: a list of error messages; a non-empty list signifies
599 return self._UnlockedVerifyConfig()
601 def _UnlockedSetDiskID(self, disk, node_name):
602 """Convert the unique ID to the ID needed on the target nodes.
604 This is used only for drbd, which needs ip/port configuration.
606 The routine descends down and updates its children also, because
607 this helps when the only the top device is passed to the remote
610 This function is for internal use, when the config lock is already held.
614 for child in disk.children:
615 self._UnlockedSetDiskID(child, node_name)
617 if disk.logical_id is None and disk.physical_id is not None:
619 if disk.dev_type == constants.LD_DRBD8:
620 pnode, snode, port, pminor, sminor, secret = disk.logical_id
621 if node_name not in (pnode, snode):
622 raise errors.ConfigurationError("DRBD device not knowing node %s" %
624 pnode_info = self._UnlockedGetNodeInfo(pnode)
625 snode_info = self._UnlockedGetNodeInfo(snode)
626 if pnode_info is None or snode_info is None:
627 raise errors.ConfigurationError("Can't find primary or secondary node"
628 " for %s" % str(disk))
629 p_data = (pnode_info.secondary_ip, port)
630 s_data = (snode_info.secondary_ip, port)
631 if pnode == node_name:
632 disk.physical_id = p_data + s_data + (pminor, secret)
633 else: # it must be secondary, we tested above
634 disk.physical_id = s_data + p_data + (sminor, secret)
636 disk.physical_id = disk.logical_id
639 @locking.ssynchronized(_config_lock)
640 def SetDiskID(self, disk, node_name):
641 """Convert the unique ID to the ID needed on the target nodes.
643 This is used only for drbd, which needs ip/port configuration.
645 The routine descends down and updates its children also, because
646 this helps when the only the top device is passed to the remote
650 return self._UnlockedSetDiskID(disk, node_name)
652 @locking.ssynchronized(_config_lock)
653 def AddTcpUdpPort(self, port):
654 """Adds a new port to the available port pool.
657 if not isinstance(port, int):
658 raise errors.ProgrammerError("Invalid type passed for port")
660 self._config_data.cluster.tcpudp_port_pool.add(port)
663 @locking.ssynchronized(_config_lock, shared=1)
664 def GetPortList(self):
665 """Returns a copy of the current port list.
668 return self._config_data.cluster.tcpudp_port_pool.copy()
670 @locking.ssynchronized(_config_lock)
671 def AllocatePort(self):
674 The port will be taken from the available port pool or from the
675 default port range (and in this case we increase
679 # If there are TCP/IP ports configured, we use them first.
680 if self._config_data.cluster.tcpudp_port_pool:
681 port = self._config_data.cluster.tcpudp_port_pool.pop()
683 port = self._config_data.cluster.highest_used_port + 1
684 if port >= constants.LAST_DRBD_PORT:
685 raise errors.ConfigurationError("The highest used port is greater"
686 " than %s. Aborting." %
687 constants.LAST_DRBD_PORT)
688 self._config_data.cluster.highest_used_port = port
693 def _UnlockedComputeDRBDMap(self):
694 """Compute the used DRBD minor/nodes.
697 @return: dictionary of node_name: dict of minor: instance_name;
698 the returned dict will have all the nodes in it (even if with
699 an empty list), and a list of duplicates; if the duplicates
700 list is not empty, the configuration is corrupted and its caller
701 should raise an exception
704 def _AppendUsedPorts(instance_name, disk, used):
706 if disk.dev_type == constants.LD_DRBD8 and len(disk.logical_id) >= 5:
707 node_a, node_b, _, minor_a, minor_b = disk.logical_id[:5]
708 for node, port in ((node_a, minor_a), (node_b, minor_b)):
709 assert node in used, ("Node '%s' of instance '%s' not found"
710 " in node list" % (node, instance_name))
711 if port in used[node]:
712 duplicates.append((node, port, instance_name, used[node][port]))
714 used[node][port] = instance_name
716 for child in disk.children:
717 duplicates.extend(_AppendUsedPorts(instance_name, child, used))
721 my_dict = dict((node, {}) for node in self._config_data.nodes)
722 for instance in self._config_data.instances.itervalues():
723 for disk in instance.disks:
724 duplicates.extend(_AppendUsedPorts(instance.name, disk, my_dict))
725 for (node, minor), instance in self._temporary_drbds.iteritems():
726 if minor in my_dict[node] and my_dict[node][minor] != instance:
727 duplicates.append((node, minor, instance, my_dict[node][minor]))
729 my_dict[node][minor] = instance
730 return my_dict, duplicates
732 @locking.ssynchronized(_config_lock)
733 def ComputeDRBDMap(self):
734 """Compute the used DRBD minor/nodes.
736 This is just a wrapper over L{_UnlockedComputeDRBDMap}.
738 @return: dictionary of node_name: dict of minor: instance_name;
739 the returned dict will have all the nodes in it (even if with
743 d_map, duplicates = self._UnlockedComputeDRBDMap()
745 raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
749 @locking.ssynchronized(_config_lock)
750 def AllocateDRBDMinor(self, nodes, instance):
751 """Allocate a drbd minor.
753 The free minor will be automatically computed from the existing
754 devices. A node can be given multiple times in order to allocate
755 multiple minors. The result is the list of minors, in the same
756 order as the passed nodes.
758 @type instance: string
759 @param instance: the instance for which we allocate minors
762 assert isinstance(instance, basestring), \
763 "Invalid argument '%s' passed to AllocateDRBDMinor" % instance
765 d_map, duplicates = self._UnlockedComputeDRBDMap()
767 raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
773 # no minors used, we can start at 0
776 self._temporary_drbds[(nname, 0)] = instance
780 ffree = utils.FirstFree(keys)
782 # return the next minor
783 # TODO: implement high-limit check
787 # double-check minor against current instances
788 assert minor not in d_map[nname], \
789 ("Attempt to reuse allocated DRBD minor %d on node %s,"
790 " already allocated to instance %s" %
791 (minor, nname, d_map[nname][minor]))
792 ndata[minor] = instance
793 # double-check minor against reservation
794 r_key = (nname, minor)
795 assert r_key not in self._temporary_drbds, \
796 ("Attempt to reuse reserved DRBD minor %d on node %s,"
797 " reserved for instance %s" %
798 (minor, nname, self._temporary_drbds[r_key]))
799 self._temporary_drbds[r_key] = instance
801 logging.debug("Request to allocate drbd minors, input: %s, returning %s",
805 def _UnlockedReleaseDRBDMinors(self, instance):
806 """Release temporary drbd minors allocated for a given instance.
808 @type instance: string
809 @param instance: the instance for which temporary minors should be
813 assert isinstance(instance, basestring), \
814 "Invalid argument passed to ReleaseDRBDMinors"
815 for key, name in self._temporary_drbds.items():
817 del self._temporary_drbds[key]
819 @locking.ssynchronized(_config_lock)
820 def ReleaseDRBDMinors(self, instance):
821 """Release temporary drbd minors allocated for a given instance.
823 This should be called on the error paths, on the success paths
824 it's automatically called by the ConfigWriter add and update
827 This function is just a wrapper over L{_UnlockedReleaseDRBDMinors}.
829 @type instance: string
830 @param instance: the instance for which temporary minors should be
834 self._UnlockedReleaseDRBDMinors(instance)
836 @locking.ssynchronized(_config_lock, shared=1)
837 def GetConfigVersion(self):
838 """Get the configuration version.
840 @return: Config version
843 return self._config_data.version
845 @locking.ssynchronized(_config_lock, shared=1)
846 def GetClusterName(self):
849 @return: Cluster name
852 return self._config_data.cluster.cluster_name
854 @locking.ssynchronized(_config_lock, shared=1)
855 def GetMasterNode(self):
856 """Get the hostname of the master node for this cluster.
858 @return: Master hostname
861 return self._config_data.cluster.master_node
863 @locking.ssynchronized(_config_lock, shared=1)
864 def GetMasterIP(self):
865 """Get the IP of the master node for this cluster.
870 return self._config_data.cluster.master_ip
872 @locking.ssynchronized(_config_lock, shared=1)
873 def GetMasterNetdev(self):
874 """Get the master network device for this cluster.
877 return self._config_data.cluster.master_netdev
879 @locking.ssynchronized(_config_lock, shared=1)
880 def GetMasterNetmask(self):
881 """Get the netmask of the master node for this cluster.
884 return self._config_data.cluster.master_netmask
886 @locking.ssynchronized(_config_lock, shared=1)
887 def GetFileStorageDir(self):
888 """Get the file storage dir for this cluster.
891 return self._config_data.cluster.file_storage_dir
893 @locking.ssynchronized(_config_lock, shared=1)
894 def GetSharedFileStorageDir(self):
895 """Get the shared file storage dir for this cluster.
898 return self._config_data.cluster.shared_file_storage_dir
900 @locking.ssynchronized(_config_lock, shared=1)
901 def GetHypervisorType(self):
902 """Get the hypervisor type for this cluster.
905 return self._config_data.cluster.enabled_hypervisors[0]
907 @locking.ssynchronized(_config_lock, shared=1)
908 def GetHostKey(self):
909 """Return the rsa hostkey from the config.
912 @return: the rsa hostkey
915 return self._config_data.cluster.rsahostkeypub
917 @locking.ssynchronized(_config_lock, shared=1)
918 def GetDefaultIAllocator(self):
919 """Get the default instance allocator for this cluster.
922 return self._config_data.cluster.default_iallocator
924 @locking.ssynchronized(_config_lock, shared=1)
925 def GetPrimaryIPFamily(self):
926 """Get cluster primary ip family.
928 @return: primary ip family
931 return self._config_data.cluster.primary_ip_family
933 @locking.ssynchronized(_config_lock)
934 def AddNodeGroup(self, group, ec_id, check_uuid=True):
935 """Add a node group to the configuration.
937 This method calls group.UpgradeConfig() to fill any missing attributes
938 according to their default values.
940 @type group: L{objects.NodeGroup}
941 @param group: the NodeGroup object to add
943 @param ec_id: unique id for the job to use when creating a missing UUID
944 @type check_uuid: bool
945 @param check_uuid: add an UUID to the group if it doesn't have one or, if
946 it does, ensure that it does not exist in the
947 configuration already
950 self._UnlockedAddNodeGroup(group, ec_id, check_uuid)
953 def _UnlockedAddNodeGroup(self, group, ec_id, check_uuid):
954 """Add a node group to the configuration.
957 logging.info("Adding node group %s to configuration", group.name)
959 # Some code might need to add a node group with a pre-populated UUID
960 # generated with ConfigWriter.GenerateUniqueID(). We allow them to bypass
961 # the "does this UUID" exist already check.
963 self._EnsureUUID(group, ec_id)
966 existing_uuid = self._UnlockedLookupNodeGroup(group.name)
967 except errors.OpPrereqError:
970 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
971 " node group (UUID: %s)" %
972 (group.name, existing_uuid),
976 group.ctime = group.mtime = time.time()
977 group.UpgradeConfig()
979 self._config_data.nodegroups[group.uuid] = group
980 self._config_data.cluster.serial_no += 1
982 @locking.ssynchronized(_config_lock)
983 def RemoveNodeGroup(self, group_uuid):
984 """Remove a node group from the configuration.
986 @type group_uuid: string
987 @param group_uuid: the UUID of the node group to remove
990 logging.info("Removing node group %s from configuration", group_uuid)
992 if group_uuid not in self._config_data.nodegroups:
993 raise errors.ConfigurationError("Unknown node group '%s'" % group_uuid)
995 assert len(self._config_data.nodegroups) != 1, \
996 "Group '%s' is the only group, cannot be removed" % group_uuid
998 del self._config_data.nodegroups[group_uuid]
999 self._config_data.cluster.serial_no += 1
1002 def _UnlockedLookupNodeGroup(self, target):
1003 """Lookup a node group's UUID.
1005 @type target: string or None
1006 @param target: group name or UUID or None to look for the default
1008 @return: nodegroup UUID
1009 @raises errors.OpPrereqError: when the target group cannot be found
1013 if len(self._config_data.nodegroups) != 1:
1014 raise errors.OpPrereqError("More than one node group exists. Target"
1015 " group must be specified explicitely.")
1017 return self._config_data.nodegroups.keys()[0]
1018 if target in self._config_data.nodegroups:
1020 for nodegroup in self._config_data.nodegroups.values():
1021 if nodegroup.name == target:
1022 return nodegroup.uuid
1023 raise errors.OpPrereqError("Node group '%s' not found" % target,
1026 @locking.ssynchronized(_config_lock, shared=1)
1027 def LookupNodeGroup(self, target):
1028 """Lookup a node group's UUID.
1030 This function is just a wrapper over L{_UnlockedLookupNodeGroup}.
1032 @type target: string or None
1033 @param target: group name or UUID or None to look for the default
1035 @return: nodegroup UUID
1038 return self._UnlockedLookupNodeGroup(target)
1040 def _UnlockedGetNodeGroup(self, uuid):
1041 """Lookup a node group.
1044 @param uuid: group UUID
1045 @rtype: L{objects.NodeGroup} or None
1046 @return: nodegroup object, or None if not found
1049 if uuid not in self._config_data.nodegroups:
1052 return self._config_data.nodegroups[uuid]
1054 @locking.ssynchronized(_config_lock, shared=1)
1055 def GetNodeGroup(self, uuid):
1056 """Lookup a node group.
1059 @param uuid: group UUID
1060 @rtype: L{objects.NodeGroup} or None
1061 @return: nodegroup object, or None if not found
1064 return self._UnlockedGetNodeGroup(uuid)
1066 @locking.ssynchronized(_config_lock, shared=1)
1067 def GetAllNodeGroupsInfo(self):
1068 """Get the configuration of all node groups.
1071 return dict(self._config_data.nodegroups)
1073 @locking.ssynchronized(_config_lock, shared=1)
1074 def GetNodeGroupList(self):
1075 """Get a list of node groups.
1078 return self._config_data.nodegroups.keys()
1080 @locking.ssynchronized(_config_lock, shared=1)
1081 def GetNodeGroupMembersByNodes(self, nodes):
1082 """Get nodes which are member in the same nodegroups as the given nodes.
1085 ngfn = lambda node_name: self._UnlockedGetNodeInfo(node_name).group
1086 return frozenset(member_name
1087 for node_name in nodes
1089 self._UnlockedGetNodeGroup(ngfn(node_name)).members)
1091 @locking.ssynchronized(_config_lock)
1092 def AddInstance(self, instance, ec_id):
1093 """Add an instance to the config.
1095 This should be used after creating a new instance.
1097 @type instance: L{objects.Instance}
1098 @param instance: the instance object
1101 if not isinstance(instance, objects.Instance):
1102 raise errors.ProgrammerError("Invalid type passed to AddInstance")
1104 if instance.disk_template != constants.DT_DISKLESS:
1105 all_lvs = instance.MapLVsByNode()
1106 logging.info("Instance '%s' DISK_LAYOUT: %s", instance.name, all_lvs)
1108 all_macs = self._AllMACs()
1109 for nic in instance.nics:
1110 if nic.mac in all_macs:
1111 raise errors.ConfigurationError("Cannot add instance %s:"
1112 " MAC address '%s' already in use." %
1113 (instance.name, nic.mac))
1115 self._EnsureUUID(instance, ec_id)
1117 instance.serial_no = 1
1118 instance.ctime = instance.mtime = time.time()
1119 self._config_data.instances[instance.name] = instance
1120 self._config_data.cluster.serial_no += 1
1121 self._UnlockedReleaseDRBDMinors(instance.name)
1124 def _EnsureUUID(self, item, ec_id):
1125 """Ensures a given object has a valid UUID.
1127 @param item: the instance or node to be checked
1128 @param ec_id: the execution context id for the uuid reservation
1132 item.uuid = self._GenerateUniqueID(ec_id)
1133 elif item.uuid in self._AllIDs(include_temporary=True):
1134 raise errors.ConfigurationError("Cannot add '%s': UUID %s already"
1135 " in use" % (item.name, item.uuid))
1137 def _SetInstanceStatus(self, instance_name, status):
1138 """Set the instance's status to a given value.
1141 assert isinstance(status, bool), \
1142 "Invalid status '%s' passed to SetInstanceStatus" % (status,)
1144 if instance_name not in self._config_data.instances:
1145 raise errors.ConfigurationError("Unknown instance '%s'" %
1147 instance = self._config_data.instances[instance_name]
1148 if instance.admin_up != status:
1149 instance.admin_up = status
1150 instance.serial_no += 1
1151 instance.mtime = time.time()
1154 @locking.ssynchronized(_config_lock)
1155 def MarkInstanceUp(self, instance_name):
1156 """Mark the instance status to up in the config.
1159 self._SetInstanceStatus(instance_name, True)
1161 @locking.ssynchronized(_config_lock)
1162 def RemoveInstance(self, instance_name):
1163 """Remove the instance from the configuration.
1166 if instance_name not in self._config_data.instances:
1167 raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
1168 del self._config_data.instances[instance_name]
1169 self._config_data.cluster.serial_no += 1
1172 @locking.ssynchronized(_config_lock)
1173 def RenameInstance(self, old_name, new_name):
1174 """Rename an instance.
1176 This needs to be done in ConfigWriter and not by RemoveInstance
1177 combined with AddInstance as only we can guarantee an atomic
1181 if old_name not in self._config_data.instances:
1182 raise errors.ConfigurationError("Unknown instance '%s'" % old_name)
1183 inst = self._config_data.instances[old_name]
1184 del self._config_data.instances[old_name]
1185 inst.name = new_name
1187 for disk in inst.disks:
1188 if disk.dev_type == constants.LD_FILE:
1189 # rename the file paths in logical and physical id
1190 file_storage_dir = os.path.dirname(os.path.dirname(disk.logical_id[1]))
1191 disk_fname = "disk%s" % disk.iv_name.split("/")[1]
1192 disk.physical_id = disk.logical_id = (disk.logical_id[0],
1193 utils.PathJoin(file_storage_dir,
1197 # Force update of ssconf files
1198 self._config_data.cluster.serial_no += 1
1200 self._config_data.instances[inst.name] = inst
1203 @locking.ssynchronized(_config_lock)
1204 def MarkInstanceDown(self, instance_name):
1205 """Mark the status of an instance to down in the configuration.
1208 self._SetInstanceStatus(instance_name, False)
1210 def _UnlockedGetInstanceList(self):
1211 """Get the list of instances.
1213 This function is for internal use, when the config lock is already held.
1216 return self._config_data.instances.keys()
1218 @locking.ssynchronized(_config_lock, shared=1)
1219 def GetInstanceList(self):
1220 """Get the list of instances.
1222 @return: array of instances, ex. ['instance2.example.com',
1223 'instance1.example.com']
1226 return self._UnlockedGetInstanceList()
1228 def ExpandInstanceName(self, short_name):
1229 """Attempt to expand an incomplete instance name.
1232 # Locking is done in L{ConfigWriter.GetInstanceList}
1233 return _MatchNameComponentIgnoreCase(short_name, self.GetInstanceList())
1235 def _UnlockedGetInstanceInfo(self, instance_name):
1236 """Returns information about an instance.
1238 This function is for internal use, when the config lock is already held.
1241 if instance_name not in self._config_data.instances:
1244 return self._config_data.instances[instance_name]
1246 @locking.ssynchronized(_config_lock, shared=1)
1247 def GetInstanceInfo(self, instance_name):
1248 """Returns information about an instance.
1250 It takes the information from the configuration file. Other information of
1251 an instance are taken from the live systems.
1253 @param instance_name: name of the instance, e.g.
1254 I{instance1.example.com}
1256 @rtype: L{objects.Instance}
1257 @return: the instance object
1260 return self._UnlockedGetInstanceInfo(instance_name)
1262 @locking.ssynchronized(_config_lock, shared=1)
1263 def GetInstanceNodeGroups(self, instance_name, primary_only=False):
1264 """Returns set of node group UUIDs for instance's nodes.
1269 instance = self._UnlockedGetInstanceInfo(instance_name)
1271 raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
1274 nodes = [instance.primary_node]
1276 nodes = instance.all_nodes
1278 return frozenset(self._UnlockedGetNodeInfo(node_name).group
1279 for node_name in nodes)
1281 @locking.ssynchronized(_config_lock, shared=1)
1282 def GetMultiInstanceInfo(self, instances):
1283 """Get the configuration of multiple instances.
1285 @param instances: list of instance names
1287 @return: list of tuples (instance, instance_info), where
1288 instance_info is what would GetInstanceInfo return for the
1289 node, while keeping the original order
1292 return [(name, self._UnlockedGetInstanceInfo(name)) for name in instances]
1294 @locking.ssynchronized(_config_lock, shared=1)
1295 def GetAllInstancesInfo(self):
1296 """Get the configuration of all instances.
1299 @return: dict of (instance, instance_info), where instance_info is what
1300 would GetInstanceInfo return for the node
1303 my_dict = dict([(instance, self._UnlockedGetInstanceInfo(instance))
1304 for instance in self._UnlockedGetInstanceList()])
1307 @locking.ssynchronized(_config_lock)
1308 def AddNode(self, node, ec_id):
1309 """Add a node to the configuration.
1311 @type node: L{objects.Node}
1312 @param node: a Node instance
1315 logging.info("Adding node %s to configuration", node.name)
1317 self._EnsureUUID(node, ec_id)
1320 node.ctime = node.mtime = time.time()
1321 self._UnlockedAddNodeToGroup(node.name, node.group)
1322 self._config_data.nodes[node.name] = node
1323 self._config_data.cluster.serial_no += 1
1326 @locking.ssynchronized(_config_lock)
1327 def RemoveNode(self, node_name):
1328 """Remove a node from the configuration.
1331 logging.info("Removing node %s from configuration", node_name)
1333 if node_name not in self._config_data.nodes:
1334 raise errors.ConfigurationError("Unknown node '%s'" % node_name)
1336 self._UnlockedRemoveNodeFromGroup(self._config_data.nodes[node_name])
1337 del self._config_data.nodes[node_name]
1338 self._config_data.cluster.serial_no += 1
1341 def ExpandNodeName(self, short_name):
1342 """Attempt to expand an incomplete node name.
1345 # Locking is done in L{ConfigWriter.GetNodeList}
1346 return _MatchNameComponentIgnoreCase(short_name, self.GetNodeList())
1348 def _UnlockedGetNodeInfo(self, node_name):
1349 """Get the configuration of a node, as stored in the config.
1351 This function is for internal use, when the config lock is already
1354 @param node_name: the node name, e.g. I{node1.example.com}
1356 @rtype: L{objects.Node}
1357 @return: the node object
1360 if node_name not in self._config_data.nodes:
1363 return self._config_data.nodes[node_name]
1365 @locking.ssynchronized(_config_lock, shared=1)
1366 def GetNodeInfo(self, node_name):
1367 """Get the configuration of a node, as stored in the config.
1369 This is just a locked wrapper over L{_UnlockedGetNodeInfo}.
1371 @param node_name: the node name, e.g. I{node1.example.com}
1373 @rtype: L{objects.Node}
1374 @return: the node object
1377 return self._UnlockedGetNodeInfo(node_name)
1379 @locking.ssynchronized(_config_lock, shared=1)
1380 def GetNodeInstances(self, node_name):
1381 """Get the instances of a node, as stored in the config.
1383 @param node_name: the node name, e.g. I{node1.example.com}
1385 @rtype: (list, list)
1386 @return: a tuple with two lists: the primary and the secondary instances
1391 for inst in self._config_data.instances.values():
1392 if inst.primary_node == node_name:
1393 pri.append(inst.name)
1394 if node_name in inst.secondary_nodes:
1395 sec.append(inst.name)
1398 @locking.ssynchronized(_config_lock, shared=1)
1399 def GetNodeGroupInstances(self, uuid, primary_only=False):
1400 """Get the instances of a node group.
1402 @param uuid: Node group UUID
1403 @param primary_only: Whether to only consider primary nodes
1405 @return: List of instance names in node group
1409 nodes_fn = lambda inst: [inst.primary_node]
1411 nodes_fn = lambda inst: inst.all_nodes
1413 return frozenset(inst.name
1414 for inst in self._config_data.instances.values()
1415 for node_name in nodes_fn(inst)
1416 if self._UnlockedGetNodeInfo(node_name).group == uuid)
1418 def _UnlockedGetNodeList(self):
1419 """Return the list of nodes which are in the configuration.
1421 This function is for internal use, when the config lock is already
1427 return self._config_data.nodes.keys()
1429 @locking.ssynchronized(_config_lock, shared=1)
1430 def GetNodeList(self):
1431 """Return the list of nodes which are in the configuration.
1434 return self._UnlockedGetNodeList()
1436 def _UnlockedGetOnlineNodeList(self):
1437 """Return the list of nodes which are online.
1440 all_nodes = [self._UnlockedGetNodeInfo(node)
1441 for node in self._UnlockedGetNodeList()]
1442 return [node.name for node in all_nodes if not node.offline]
1444 @locking.ssynchronized(_config_lock, shared=1)
1445 def GetOnlineNodeList(self):
1446 """Return the list of nodes which are online.
1449 return self._UnlockedGetOnlineNodeList()
1451 @locking.ssynchronized(_config_lock, shared=1)
1452 def GetVmCapableNodeList(self):
1453 """Return the list of nodes which are not vm capable.
1456 all_nodes = [self._UnlockedGetNodeInfo(node)
1457 for node in self._UnlockedGetNodeList()]
1458 return [node.name for node in all_nodes if node.vm_capable]
1460 @locking.ssynchronized(_config_lock, shared=1)
1461 def GetNonVmCapableNodeList(self):
1462 """Return the list of nodes which are not vm capable.
1465 all_nodes = [self._UnlockedGetNodeInfo(node)
1466 for node in self._UnlockedGetNodeList()]
1467 return [node.name for node in all_nodes if not node.vm_capable]
1469 @locking.ssynchronized(_config_lock, shared=1)
1470 def GetMultiNodeInfo(self, nodes):
1471 """Get the configuration of multiple nodes.
1473 @param nodes: list of node names
1475 @return: list of tuples of (node, node_info), where node_info is
1476 what would GetNodeInfo return for the node, in the original
1480 return [(name, self._UnlockedGetNodeInfo(name)) for name in nodes]
1482 @locking.ssynchronized(_config_lock, shared=1)
1483 def GetAllNodesInfo(self):
1484 """Get the configuration of all nodes.
1487 @return: dict of (node, node_info), where node_info is what
1488 would GetNodeInfo return for the node
1491 my_dict = dict([(node, self._UnlockedGetNodeInfo(node))
1492 for node in self._UnlockedGetNodeList()])
1495 @locking.ssynchronized(_config_lock, shared=1)
1496 def GetNodeGroupsFromNodes(self, nodes):
1497 """Returns groups for a list of nodes.
1499 @type nodes: list of string
1500 @param nodes: List of node names
1504 return frozenset(self._UnlockedGetNodeInfo(name).group for name in nodes)
1506 def _UnlockedGetMasterCandidateStats(self, exceptions=None):
1507 """Get the number of current and maximum desired and possible candidates.
1509 @type exceptions: list
1510 @param exceptions: if passed, list of nodes that should be ignored
1512 @return: tuple of (current, desired and possible, possible)
1515 mc_now = mc_should = mc_max = 0
1516 for node in self._config_data.nodes.values():
1517 if exceptions and node.name in exceptions:
1519 if not (node.offline or node.drained) and node.master_capable:
1521 if node.master_candidate:
1523 mc_should = min(mc_max, self._config_data.cluster.candidate_pool_size)
1524 return (mc_now, mc_should, mc_max)
1526 @locking.ssynchronized(_config_lock, shared=1)
1527 def GetMasterCandidateStats(self, exceptions=None):
1528 """Get the number of current and maximum possible candidates.
1530 This is just a wrapper over L{_UnlockedGetMasterCandidateStats}.
1532 @type exceptions: list
1533 @param exceptions: if passed, list of nodes that should be ignored
1535 @return: tuple of (current, max)
1538 return self._UnlockedGetMasterCandidateStats(exceptions)
1540 @locking.ssynchronized(_config_lock)
1541 def MaintainCandidatePool(self, exceptions):
1542 """Try to grow the candidate pool to the desired size.
1544 @type exceptions: list
1545 @param exceptions: if passed, list of nodes that should be ignored
1547 @return: list with the adjusted nodes (L{objects.Node} instances)
1550 mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats(exceptions)
1553 node_list = self._config_data.nodes.keys()
1554 random.shuffle(node_list)
1555 for name in node_list:
1556 if mc_now >= mc_max:
1558 node = self._config_data.nodes[name]
1559 if (node.master_candidate or node.offline or node.drained or
1560 node.name in exceptions or not node.master_capable):
1562 mod_list.append(node)
1563 node.master_candidate = True
1566 if mc_now != mc_max:
1567 # this should not happen
1568 logging.warning("Warning: MaintainCandidatePool didn't manage to"
1569 " fill the candidate pool (%d/%d)", mc_now, mc_max)
1571 self._config_data.cluster.serial_no += 1
1576 def _UnlockedAddNodeToGroup(self, node_name, nodegroup_uuid):
1577 """Add a given node to the specified group.
1580 if nodegroup_uuid not in self._config_data.nodegroups:
1581 # This can happen if a node group gets deleted between its lookup and
1582 # when we're adding the first node to it, since we don't keep a lock in
1583 # the meantime. It's ok though, as we'll fail cleanly if the node group
1584 # is not found anymore.
1585 raise errors.OpExecError("Unknown node group: %s" % nodegroup_uuid)
1586 if node_name not in self._config_data.nodegroups[nodegroup_uuid].members:
1587 self._config_data.nodegroups[nodegroup_uuid].members.append(node_name)
1589 def _UnlockedRemoveNodeFromGroup(self, node):
1590 """Remove a given node from its group.
1593 nodegroup = node.group
1594 if nodegroup not in self._config_data.nodegroups:
1595 logging.warning("Warning: node '%s' has unknown node group '%s'"
1596 " (while being removed from it)", node.name, nodegroup)
1597 nodegroup_obj = self._config_data.nodegroups[nodegroup]
1598 if node.name not in nodegroup_obj.members:
1599 logging.warning("Warning: node '%s' not a member of its node group '%s'"
1600 " (while being removed from it)", node.name, nodegroup)
1602 nodegroup_obj.members.remove(node.name)
1604 def _BumpSerialNo(self):
1605 """Bump up the serial number of the config.
1608 self._config_data.serial_no += 1
1609 self._config_data.mtime = time.time()
1611 def _AllUUIDObjects(self):
1612 """Returns all objects with uuid attributes.
1615 return (self._config_data.instances.values() +
1616 self._config_data.nodes.values() +
1617 self._config_data.nodegroups.values() +
1618 [self._config_data.cluster])
1620 def _OpenConfig(self, accept_foreign):
1621 """Read the config data from disk.
1624 raw_data = utils.ReadFile(self._cfg_file)
1627 data = objects.ConfigData.FromDict(serializer.Load(raw_data))
1628 except Exception, err:
1629 raise errors.ConfigurationError(err)
1631 # Make sure the configuration has the right version
1632 _ValidateConfig(data)
1634 if (not hasattr(data, 'cluster') or
1635 not hasattr(data.cluster, 'rsahostkeypub')):
1636 raise errors.ConfigurationError("Incomplete configuration"
1637 " (missing cluster.rsahostkeypub)")
1639 if data.cluster.master_node != self._my_hostname and not accept_foreign:
1640 msg = ("The configuration denotes node %s as master, while my"
1641 " hostname is %s; opening a foreign configuration is only"
1642 " possible in accept_foreign mode" %
1643 (data.cluster.master_node, self._my_hostname))
1644 raise errors.ConfigurationError(msg)
1646 # Upgrade configuration if needed
1647 data.UpgradeConfig()
1649 self._config_data = data
1650 # reset the last serial as -1 so that the next write will cause
1652 self._last_cluster_serial = -1
1654 # And finally run our (custom) config upgrade sequence
1655 self._UpgradeConfig()
1657 self._cfg_id = utils.GetFileID(path=self._cfg_file)
1659 def _UpgradeConfig(self):
1660 """Run upgrade steps that cannot be done purely in the objects.
1662 This is because some data elements need uniqueness across the
1663 whole configuration, etc.
1665 @warning: this function will call L{_WriteConfig()}, but also
1666 L{DropECReservations} so it needs to be called only from a
1667 "safe" place (the constructor). If one wanted to call it with
1668 the lock held, a DropECReservationUnlocked would need to be
1669 created first, to avoid causing deadlock.
1673 for item in self._AllUUIDObjects():
1674 if item.uuid is None:
1675 item.uuid = self._GenerateUniqueID(_UPGRADE_CONFIG_JID)
1677 if not self._config_data.nodegroups:
1678 default_nodegroup_name = constants.INITIAL_NODE_GROUP_NAME
1679 default_nodegroup = objects.NodeGroup(name=default_nodegroup_name,
1681 self._UnlockedAddNodeGroup(default_nodegroup, _UPGRADE_CONFIG_JID, True)
1683 for node in self._config_data.nodes.values():
1685 node.group = self.LookupNodeGroup(None)
1687 # This is technically *not* an upgrade, but needs to be done both when
1688 # nodegroups are being added, and upon normally loading the config,
1689 # because the members list of a node group is discarded upon
1690 # serializing/deserializing the object.
1691 self._UnlockedAddNodeToGroup(node.name, node.group)
1694 # This is ok even if it acquires the internal lock, as _UpgradeConfig is
1695 # only called at config init time, without the lock held
1696 self.DropECReservations(_UPGRADE_CONFIG_JID)
1698 def _DistributeConfig(self, feedback_fn):
1699 """Distribute the configuration to the other nodes.
1701 Currently, this only copies the configuration file. In the future,
1702 it could be used to encapsulate the 2/3-phase update mechanism.
1712 myhostname = self._my_hostname
1713 # we can skip checking whether _UnlockedGetNodeInfo returns None
1714 # since the node list comes from _UnlocketGetNodeList, and we are
1715 # called with the lock held, so no modifications should take place
1717 for node_name in self._UnlockedGetNodeList():
1718 if node_name == myhostname:
1720 node_info = self._UnlockedGetNodeInfo(node_name)
1721 if not node_info.master_candidate:
1723 node_list.append(node_info.name)
1724 addr_list.append(node_info.primary_ip)
1726 result = rpc.RpcRunner.call_upload_file(node_list, self._cfg_file,
1727 address_list=addr_list)
1728 for to_node, to_result in result.items():
1729 msg = to_result.fail_msg
1731 msg = ("Copy of file %s to node %s failed: %s" %
1732 (self._cfg_file, to_node, msg))
1742 def _WriteConfig(self, destination=None, feedback_fn=None):
1743 """Write the configuration data to persistent storage.
1746 assert feedback_fn is None or callable(feedback_fn)
1748 # Warn on config errors, but don't abort the save - the
1749 # configuration has already been modified, and we can't revert;
1750 # the best we can do is to warn the user and save as is, leaving
1751 # recovery to the user
1752 config_errors = self._UnlockedVerifyConfig()
1754 errmsg = ("Configuration data is not consistent: %s" %
1755 (utils.CommaJoin(config_errors)))
1756 logging.critical(errmsg)
1760 if destination is None:
1761 destination = self._cfg_file
1762 self._BumpSerialNo()
1763 txt = serializer.Dump(self._config_data.ToDict())
1765 getents = self._getents()
1767 fd = utils.SafeWriteFile(destination, self._cfg_id, data=txt,
1768 close=False, gid=getents.confd_gid, mode=0640)
1769 except errors.LockError:
1770 raise errors.ConfigurationError("The configuration file has been"
1771 " modified since the last write, cannot"
1774 self._cfg_id = utils.GetFileID(fd=fd)
1778 self.write_count += 1
1780 # and redistribute the config file to master candidates
1781 self._DistributeConfig(feedback_fn)
1783 # Write ssconf files on all nodes (including locally)
1784 if self._last_cluster_serial < self._config_data.cluster.serial_no:
1785 if not self._offline:
1786 result = rpc.RpcRunner.call_write_ssconf_files(
1787 self._UnlockedGetOnlineNodeList(),
1788 self._UnlockedGetSsconfValues())
1790 for nname, nresu in result.items():
1791 msg = nresu.fail_msg
1793 errmsg = ("Error while uploading ssconf files to"
1794 " node %s: %s" % (nname, msg))
1795 logging.warning(errmsg)
1800 self._last_cluster_serial = self._config_data.cluster.serial_no
1802 def _UnlockedGetSsconfValues(self):
1803 """Return the values needed by ssconf.
1806 @return: a dictionary with keys the ssconf names and values their
1811 instance_names = utils.NiceSort(self._UnlockedGetInstanceList())
1812 node_names = utils.NiceSort(self._UnlockedGetNodeList())
1813 node_info = [self._UnlockedGetNodeInfo(name) for name in node_names]
1814 node_pri_ips = ["%s %s" % (ninfo.name, ninfo.primary_ip)
1815 for ninfo in node_info]
1816 node_snd_ips = ["%s %s" % (ninfo.name, ninfo.secondary_ip)
1817 for ninfo in node_info]
1819 instance_data = fn(instance_names)
1820 off_data = fn(node.name for node in node_info if node.offline)
1821 on_data = fn(node.name for node in node_info if not node.offline)
1822 mc_data = fn(node.name for node in node_info if node.master_candidate)
1823 mc_ips_data = fn(node.primary_ip for node in node_info
1824 if node.master_candidate)
1825 node_data = fn(node_names)
1826 node_pri_ips_data = fn(node_pri_ips)
1827 node_snd_ips_data = fn(node_snd_ips)
1829 cluster = self._config_data.cluster
1830 cluster_tags = fn(cluster.GetTags())
1832 hypervisor_list = fn(cluster.enabled_hypervisors)
1834 uid_pool = uidpool.FormatUidPool(cluster.uid_pool, separator="\n")
1836 nodegroups = ["%s %s" % (nodegroup.uuid, nodegroup.name) for nodegroup in
1837 self._config_data.nodegroups.values()]
1838 nodegroups_data = fn(utils.NiceSort(nodegroups))
1841 constants.SS_CLUSTER_NAME: cluster.cluster_name,
1842 constants.SS_CLUSTER_TAGS: cluster_tags,
1843 constants.SS_FILE_STORAGE_DIR: cluster.file_storage_dir,
1844 constants.SS_SHARED_FILE_STORAGE_DIR: cluster.shared_file_storage_dir,
1845 constants.SS_MASTER_CANDIDATES: mc_data,
1846 constants.SS_MASTER_CANDIDATES_IPS: mc_ips_data,
1847 constants.SS_MASTER_IP: cluster.master_ip,
1848 constants.SS_MASTER_NETDEV: cluster.master_netdev,
1849 constants.SS_MASTER_NETMASK: str(cluster.master_netmask),
1850 constants.SS_MASTER_NODE: cluster.master_node,
1851 constants.SS_NODE_LIST: node_data,
1852 constants.SS_NODE_PRIMARY_IPS: node_pri_ips_data,
1853 constants.SS_NODE_SECONDARY_IPS: node_snd_ips_data,
1854 constants.SS_OFFLINE_NODES: off_data,
1855 constants.SS_ONLINE_NODES: on_data,
1856 constants.SS_PRIMARY_IP_FAMILY: str(cluster.primary_ip_family),
1857 constants.SS_INSTANCE_LIST: instance_data,
1858 constants.SS_RELEASE_VERSION: constants.RELEASE_VERSION,
1859 constants.SS_HYPERVISOR_LIST: hypervisor_list,
1860 constants.SS_MAINTAIN_NODE_HEALTH: str(cluster.maintain_node_health),
1861 constants.SS_UID_POOL: uid_pool,
1862 constants.SS_NODEGROUPS: nodegroups_data,
1864 bad_values = [(k, v) for k, v in ssconf_values.items()
1865 if not isinstance(v, (str, basestring))]
1867 err = utils.CommaJoin("%s=%s" % (k, v) for k, v in bad_values)
1868 raise errors.ConfigurationError("Some ssconf key(s) have non-string"
1869 " values: %s" % err)
1870 return ssconf_values
1872 @locking.ssynchronized(_config_lock, shared=1)
1873 def GetSsconfValues(self):
1874 """Wrapper using lock around _UnlockedGetSsconf().
1877 return self._UnlockedGetSsconfValues()
1879 @locking.ssynchronized(_config_lock, shared=1)
1880 def GetVGName(self):
1881 """Return the volume group name.
1884 return self._config_data.cluster.volume_group_name
1886 @locking.ssynchronized(_config_lock)
1887 def SetVGName(self, vg_name):
1888 """Set the volume group name.
1891 self._config_data.cluster.volume_group_name = vg_name
1892 self._config_data.cluster.serial_no += 1
1895 @locking.ssynchronized(_config_lock, shared=1)
1896 def GetDRBDHelper(self):
1897 """Return DRBD usermode helper.
1900 return self._config_data.cluster.drbd_usermode_helper
1902 @locking.ssynchronized(_config_lock)
1903 def SetDRBDHelper(self, drbd_helper):
1904 """Set DRBD usermode helper.
1907 self._config_data.cluster.drbd_usermode_helper = drbd_helper
1908 self._config_data.cluster.serial_no += 1
1911 @locking.ssynchronized(_config_lock, shared=1)
1912 def GetMACPrefix(self):
1913 """Return the mac prefix.
1916 return self._config_data.cluster.mac_prefix
1918 @locking.ssynchronized(_config_lock, shared=1)
1919 def GetClusterInfo(self):
1920 """Returns information about the cluster
1922 @rtype: L{objects.Cluster}
1923 @return: the cluster object
1926 return self._config_data.cluster
1928 @locking.ssynchronized(_config_lock, shared=1)
1929 def HasAnyDiskOfType(self, dev_type):
1930 """Check if in there is at disk of the given type in the configuration.
1933 return self._config_data.HasAnyDiskOfType(dev_type)
1935 @locking.ssynchronized(_config_lock)
1936 def Update(self, target, feedback_fn):
1937 """Notify function to be called after updates.
1939 This function must be called when an object (as returned by
1940 GetInstanceInfo, GetNodeInfo, GetCluster) has been updated and the
1941 caller wants the modifications saved to the backing store. Note
1942 that all modified objects will be saved, but the target argument
1943 is the one the caller wants to ensure that it's saved.
1945 @param target: an instance of either L{objects.Cluster},
1946 L{objects.Node} or L{objects.Instance} which is existing in
1948 @param feedback_fn: Callable feedback function
1951 if self._config_data is None:
1952 raise errors.ProgrammerError("Configuration file not read,"
1954 update_serial = False
1955 if isinstance(target, objects.Cluster):
1956 test = target == self._config_data.cluster
1957 elif isinstance(target, objects.Node):
1958 test = target in self._config_data.nodes.values()
1959 update_serial = True
1960 elif isinstance(target, objects.Instance):
1961 test = target in self._config_data.instances.values()
1962 elif isinstance(target, objects.NodeGroup):
1963 test = target in self._config_data.nodegroups.values()
1965 raise errors.ProgrammerError("Invalid object type (%s) passed to"
1966 " ConfigWriter.Update" % type(target))
1968 raise errors.ConfigurationError("Configuration updated since object"
1969 " has been read or unknown object")
1970 target.serial_no += 1
1971 target.mtime = now = time.time()
1974 # for node updates, we need to increase the cluster serial too
1975 self._config_data.cluster.serial_no += 1
1976 self._config_data.cluster.mtime = now
1978 if isinstance(target, objects.Instance):
1979 self._UnlockedReleaseDRBDMinors(target.name)
1981 self._WriteConfig(feedback_fn=feedback_fn)
1983 @locking.ssynchronized(_config_lock)
1984 def DropECReservations(self, ec_id):
1985 """Drop per-execution-context reservations
1988 for rm in self._all_rms:
1989 rm.DropECReservations(ec_id)