4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Configuration management for Ganeti
24 This module provides the interface to the Ganeti cluster configuration.
26 The configuration data is stored on every node but is updated on the master
27 only. After each update, the master distributes the data to the other nodes.
29 Currently, the data storage format is JSON. YAML was slow and consuming too
34 # pylint: disable=R0904
35 # R0904: Too many public methods
43 from ganeti import errors
44 from ganeti import locking
45 from ganeti import utils
46 from ganeti import constants
47 from ganeti import rpc
48 from ganeti import objects
49 from ganeti import serializer
50 from ganeti import uidpool
51 from ganeti import netutils
52 from ganeti import runtime
55 _config_lock = locking.SharedLock("ConfigWriter")
57 # job id used for resource management at config upgrade time
58 _UPGRADE_CONFIG_JID = "jid-cfg-upgrade"
61 def _ValidateConfig(data):
62 """Verifies that a configuration objects looks valid.
64 This only verifies the version of the configuration.
66 @raise errors.ConfigurationError: if the version differs from what
70 if data.version != constants.CONFIG_VERSION:
71 raise errors.ConfigVersionMismatch(constants.CONFIG_VERSION, data.version)
74 class TemporaryReservationManager:
75 """A temporary resource reservation manager.
77 This is used to reserve resources in a job, before using them, making sure
78 other jobs cannot get them in the meantime.
82 self._ec_reserved = {}
84 def Reserved(self, resource):
85 for holder_reserved in self._ec_reserved.values():
86 if resource in holder_reserved:
90 def Reserve(self, ec_id, resource):
91 if self.Reserved(resource):
92 raise errors.ReservationError("Duplicate reservation for resource '%s'"
94 if ec_id not in self._ec_reserved:
95 self._ec_reserved[ec_id] = set([resource])
97 self._ec_reserved[ec_id].add(resource)
99 def DropECReservations(self, ec_id):
100 if ec_id in self._ec_reserved:
101 del self._ec_reserved[ec_id]
103 def GetReserved(self):
105 for holder_reserved in self._ec_reserved.values():
106 all_reserved.update(holder_reserved)
109 def Generate(self, existing, generate_one_fn, ec_id):
110 """Generate a new resource of this type
113 assert callable(generate_one_fn)
115 all_elems = self.GetReserved()
116 all_elems.update(existing)
119 new_resource = generate_one_fn()
120 if new_resource is not None and new_resource not in all_elems:
123 raise errors.ConfigurationError("Not able generate new resource"
124 " (last tried: %s)" % new_resource)
125 self.Reserve(ec_id, new_resource)
129 def _MatchNameComponentIgnoreCase(short_name, names):
130 """Wrapper around L{utils.text.MatchNameComponent}.
133 return utils.MatchNameComponent(short_name, names, case_sensitive=False)
137 """The interface to the cluster configuration.
139 @ivar _temporary_lvs: reservation manager for temporary LVs
140 @ivar _all_rms: a list of all temporary reservation managers
143 def __init__(self, cfg_file=None, offline=False, _getents=runtime.GetEnts,
144 accept_foreign=False):
146 self._lock = _config_lock
147 self._config_data = None
148 self._offline = offline
150 self._cfg_file = constants.CLUSTER_CONF_FILE
152 self._cfg_file = cfg_file
153 self._getents = _getents
154 self._temporary_ids = TemporaryReservationManager()
155 self._temporary_drbds = {}
156 self._temporary_macs = TemporaryReservationManager()
157 self._temporary_secrets = TemporaryReservationManager()
158 self._temporary_lvs = TemporaryReservationManager()
159 self._all_rms = [self._temporary_ids, self._temporary_macs,
160 self._temporary_secrets, self._temporary_lvs]
161 # Note: in order to prevent errors when resolving our name in
162 # _DistributeConfig, we compute it here once and reuse it; it's
163 # better to raise an error before starting to modify the config
164 # file than after it was modified
165 self._my_hostname = netutils.Hostname.GetSysName()
166 self._last_cluster_serial = -1
168 self._OpenConfig(accept_foreign)
170 # this method needs to be static, so that we can call it on the class
173 """Check if the cluster is configured.
176 return os.path.exists(constants.CLUSTER_CONF_FILE)
178 def _GenerateOneMAC(self):
179 """Generate one mac address
182 prefix = self._config_data.cluster.mac_prefix
183 byte1 = random.randrange(0, 256)
184 byte2 = random.randrange(0, 256)
185 byte3 = random.randrange(0, 256)
186 mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
189 @locking.ssynchronized(_config_lock, shared=1)
190 def GetNdParams(self, node):
191 """Get the node params populated with cluster defaults.
193 @type node: L{objects.Node}
194 @param node: The node we want to know the params for
195 @return: A dict with the filled in node params
198 nodegroup = self._UnlockedGetNodeGroup(node.group)
199 return self._config_data.cluster.FillND(node, nodegroup)
201 @locking.ssynchronized(_config_lock, shared=1)
202 def GenerateMAC(self, ec_id):
203 """Generate a MAC for an instance.
205 This should check the current instances for duplicates.
208 existing = self._AllMACs()
209 return self._temporary_ids.Generate(existing, self._GenerateOneMAC, ec_id)
211 @locking.ssynchronized(_config_lock, shared=1)
212 def ReserveMAC(self, mac, ec_id):
213 """Reserve a MAC for an instance.
215 This only checks instances managed by this cluster, it does not
216 check for potential collisions elsewhere.
219 all_macs = self._AllMACs()
221 raise errors.ReservationError("mac already in use")
223 self._temporary_macs.Reserve(ec_id, mac)
225 @locking.ssynchronized(_config_lock, shared=1)
226 def ReserveLV(self, lv_name, ec_id):
227 """Reserve an VG/LV pair for an instance.
229 @type lv_name: string
230 @param lv_name: the logical volume name to reserve
233 all_lvs = self._AllLVs()
234 if lv_name in all_lvs:
235 raise errors.ReservationError("LV already in use")
237 self._temporary_lvs.Reserve(ec_id, lv_name)
239 @locking.ssynchronized(_config_lock, shared=1)
240 def GenerateDRBDSecret(self, ec_id):
241 """Generate a DRBD secret.
243 This checks the current disks for duplicates.
246 return self._temporary_secrets.Generate(self._AllDRBDSecrets(),
247 utils.GenerateSecret,
251 """Compute the list of all LVs.
255 for instance in self._config_data.instances.values():
256 node_data = instance.MapLVsByNode()
257 for lv_list in node_data.values():
258 lvnames.update(lv_list)
261 def _AllIDs(self, include_temporary):
262 """Compute the list of all UUIDs and names we have.
264 @type include_temporary: boolean
265 @param include_temporary: whether to include the _temporary_ids set
267 @return: a set of IDs
271 if include_temporary:
272 existing.update(self._temporary_ids.GetReserved())
273 existing.update(self._AllLVs())
274 existing.update(self._config_data.instances.keys())
275 existing.update(self._config_data.nodes.keys())
276 existing.update([i.uuid for i in self._AllUUIDObjects() if i.uuid])
279 def _GenerateUniqueID(self, ec_id):
280 """Generate an unique UUID.
282 This checks the current node, instances and disk names for
286 @return: the unique id
289 existing = self._AllIDs(include_temporary=False)
290 return self._temporary_ids.Generate(existing, utils.NewUUID, ec_id)
292 @locking.ssynchronized(_config_lock, shared=1)
293 def GenerateUniqueID(self, ec_id):
294 """Generate an unique ID.
296 This is just a wrapper over the unlocked version.
299 @param ec_id: unique id for the job to reserve the id to
302 return self._GenerateUniqueID(ec_id)
305 """Return all MACs present in the config.
308 @return: the list of all MACs
312 for instance in self._config_data.instances.values():
313 for nic in instance.nics:
314 result.append(nic.mac)
318 def _AllDRBDSecrets(self):
319 """Return all DRBD secrets present in the config.
322 @return: the list of all DRBD secrets
325 def helper(disk, result):
326 """Recursively gather secrets from this disk."""
327 if disk.dev_type == constants.DT_DRBD8:
328 result.append(disk.logical_id[5])
330 for child in disk.children:
331 helper(child, result)
334 for instance in self._config_data.instances.values():
335 for disk in instance.disks:
340 def _CheckDiskIDs(self, disk, l_ids, p_ids):
341 """Compute duplicate disk IDs
343 @type disk: L{objects.Disk}
344 @param disk: the disk at which to start searching
346 @param l_ids: list of current logical ids
348 @param p_ids: list of current physical ids
350 @return: a list of error messages
354 if disk.logical_id is not None:
355 if disk.logical_id in l_ids:
356 result.append("duplicate logical id %s" % str(disk.logical_id))
358 l_ids.append(disk.logical_id)
359 if disk.physical_id is not None:
360 if disk.physical_id in p_ids:
361 result.append("duplicate physical id %s" % str(disk.physical_id))
363 p_ids.append(disk.physical_id)
366 for child in disk.children:
367 result.extend(self._CheckDiskIDs(child, l_ids, p_ids))
370 def _UnlockedVerifyConfig(self):
374 @return: a list of error messages; a non-empty list signifies
378 # pylint: disable=R0914
382 data = self._config_data
383 cluster = data.cluster
387 # global cluster checks
388 if not cluster.enabled_hypervisors:
389 result.append("enabled hypervisors list doesn't have any entries")
390 invalid_hvs = set(cluster.enabled_hypervisors) - constants.HYPER_TYPES
392 result.append("enabled hypervisors contains invalid entries: %s" %
394 missing_hvp = (set(cluster.enabled_hypervisors) -
395 set(cluster.hvparams.keys()))
397 result.append("hypervisor parameters missing for the enabled"
398 " hypervisor(s) %s" % utils.CommaJoin(missing_hvp))
400 if cluster.master_node not in data.nodes:
401 result.append("cluster has invalid primary node '%s'" %
404 def _helper(owner, attr, value, template):
406 utils.ForceDictType(value, template)
407 except errors.GenericError, err:
408 result.append("%s has invalid %s: %s" % (owner, attr, err))
410 def _helper_nic(owner, params):
412 objects.NIC.CheckParameterSyntax(params)
413 except errors.ConfigurationError, err:
414 result.append("%s has invalid nicparams: %s" % (owner, err))
416 # check cluster parameters
417 _helper("cluster", "beparams", cluster.SimpleFillBE({}),
418 constants.BES_PARAMETER_TYPES)
419 _helper("cluster", "nicparams", cluster.SimpleFillNIC({}),
420 constants.NICS_PARAMETER_TYPES)
421 _helper_nic("cluster", cluster.SimpleFillNIC({}))
422 _helper("cluster", "ndparams", cluster.SimpleFillND({}),
423 constants.NDS_PARAMETER_TYPES)
425 # per-instance checks
426 for instance_name in data.instances:
427 instance = data.instances[instance_name]
428 if instance.name != instance_name:
429 result.append("instance '%s' is indexed by wrong name '%s'" %
430 (instance.name, instance_name))
431 if instance.primary_node not in data.nodes:
432 result.append("instance '%s' has invalid primary node '%s'" %
433 (instance_name, instance.primary_node))
434 for snode in instance.secondary_nodes:
435 if snode not in data.nodes:
436 result.append("instance '%s' has invalid secondary node '%s'" %
437 (instance_name, snode))
438 for idx, nic in enumerate(instance.nics):
439 if nic.mac in seen_macs:
440 result.append("instance '%s' has NIC %d mac %s duplicate" %
441 (instance_name, idx, nic.mac))
443 seen_macs.append(nic.mac)
445 filled = cluster.SimpleFillNIC(nic.nicparams)
446 owner = "instance %s nic %d" % (instance.name, idx)
447 _helper(owner, "nicparams",
448 filled, constants.NICS_PARAMETER_TYPES)
449 _helper_nic(owner, filled)
452 if instance.beparams:
453 _helper("instance %s" % instance.name, "beparams",
454 cluster.FillBE(instance), constants.BES_PARAMETER_TYPES)
456 # gather the drbd ports for duplicate checks
457 for dsk in instance.disks:
458 if dsk.dev_type in constants.LDS_DRBD:
459 tcp_port = dsk.logical_id[2]
460 if tcp_port not in ports:
462 ports[tcp_port].append((instance.name, "drbd disk %s" % dsk.iv_name))
463 # gather network port reservation
464 net_port = getattr(instance, "network_port", None)
465 if net_port is not None:
466 if net_port not in ports:
468 ports[net_port].append((instance.name, "network port"))
470 # instance disk verify
471 for idx, disk in enumerate(instance.disks):
472 result.extend(["instance '%s' disk %d error: %s" %
473 (instance.name, idx, msg) for msg in disk.Verify()])
474 result.extend(self._CheckDiskIDs(disk, seen_lids, seen_pids))
476 # cluster-wide pool of free ports
477 for free_port in cluster.tcpudp_port_pool:
478 if free_port not in ports:
479 ports[free_port] = []
480 ports[free_port].append(("cluster", "port marked as free"))
482 # compute tcp/udp duplicate ports
488 txt = utils.CommaJoin(["%s/%s" % val for val in pdata])
489 result.append("tcp/udp port %s has duplicates: %s" % (pnum, txt))
491 # highest used tcp port check
493 if keys[-1] > cluster.highest_used_port:
494 result.append("Highest used port mismatch, saved %s, computed %s" %
495 (cluster.highest_used_port, keys[-1]))
497 if not data.nodes[cluster.master_node].master_candidate:
498 result.append("Master node is not a master candidate")
500 # master candidate checks
501 mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats()
503 result.append("Not enough master candidates: actual %d, target %d" %
507 for node_name, node in data.nodes.items():
508 if node.name != node_name:
509 result.append("Node '%s' is indexed by wrong name '%s'" %
510 (node.name, node_name))
511 if [node.master_candidate, node.drained, node.offline].count(True) > 1:
512 result.append("Node %s state is invalid: master_candidate=%s,"
513 " drain=%s, offline=%s" %
514 (node.name, node.master_candidate, node.drained,
516 if node.group not in data.nodegroups:
517 result.append("Node '%s' has invalid group '%s'" %
518 (node.name, node.group))
520 _helper("node %s" % node.name, "ndparams",
521 cluster.FillND(node, data.nodegroups[node.group]),
522 constants.NDS_PARAMETER_TYPES)
525 nodegroups_names = set()
526 for nodegroup_uuid in data.nodegroups:
527 nodegroup = data.nodegroups[nodegroup_uuid]
528 if nodegroup.uuid != nodegroup_uuid:
529 result.append("node group '%s' (uuid: '%s') indexed by wrong uuid '%s'"
530 % (nodegroup.name, nodegroup.uuid, nodegroup_uuid))
531 if utils.UUID_RE.match(nodegroup.name.lower()):
532 result.append("node group '%s' (uuid: '%s') has uuid-like name" %
533 (nodegroup.name, nodegroup.uuid))
534 if nodegroup.name in nodegroups_names:
535 result.append("duplicate node group name '%s'" % nodegroup.name)
537 nodegroups_names.add(nodegroup.name)
538 if nodegroup.ndparams:
539 _helper("group %s" % nodegroup.name, "ndparams",
540 cluster.SimpleFillND(nodegroup.ndparams),
541 constants.NDS_PARAMETER_TYPES)
544 _, duplicates = self._UnlockedComputeDRBDMap()
545 for node, minor, instance_a, instance_b in duplicates:
546 result.append("DRBD minor %d on node %s is assigned twice to instances"
547 " %s and %s" % (minor, node, instance_a, instance_b))
550 default_nicparams = cluster.nicparams[constants.PP_DEFAULT]
553 def _AddIpAddress(ip, name):
554 ips.setdefault(ip, []).append(name)
556 _AddIpAddress(cluster.master_ip, "cluster_ip")
558 for node in data.nodes.values():
559 _AddIpAddress(node.primary_ip, "node:%s/primary" % node.name)
560 if node.secondary_ip != node.primary_ip:
561 _AddIpAddress(node.secondary_ip, "node:%s/secondary" % node.name)
563 for instance in data.instances.values():
564 for idx, nic in enumerate(instance.nics):
568 nicparams = objects.FillDict(default_nicparams, nic.nicparams)
569 nic_mode = nicparams[constants.NIC_MODE]
570 nic_link = nicparams[constants.NIC_LINK]
572 if nic_mode == constants.NIC_MODE_BRIDGED:
573 link = "bridge:%s" % nic_link
574 elif nic_mode == constants.NIC_MODE_ROUTED:
575 link = "route:%s" % nic_link
577 raise errors.ProgrammerError("NIC mode '%s' not handled" % nic_mode)
579 _AddIpAddress("%s/%s" % (link, nic.ip),
580 "instance:%s/nic:%d" % (instance.name, idx))
582 for ip, owners in ips.items():
584 result.append("IP address %s is used by multiple owners: %s" %
585 (ip, utils.CommaJoin(owners)))
589 @locking.ssynchronized(_config_lock, shared=1)
590 def VerifyConfig(self):
593 This is just a wrapper over L{_UnlockedVerifyConfig}.
596 @return: a list of error messages; a non-empty list signifies
600 return self._UnlockedVerifyConfig()
602 def _UnlockedSetDiskID(self, disk, node_name):
603 """Convert the unique ID to the ID needed on the target nodes.
605 This is used only for drbd, which needs ip/port configuration.
607 The routine descends down and updates its children also, because
608 this helps when the only the top device is passed to the remote
611 This function is for internal use, when the config lock is already held.
615 for child in disk.children:
616 self._UnlockedSetDiskID(child, node_name)
618 if disk.logical_id is None and disk.physical_id is not None:
620 if disk.dev_type == constants.LD_DRBD8:
621 pnode, snode, port, pminor, sminor, secret = disk.logical_id
622 if node_name not in (pnode, snode):
623 raise errors.ConfigurationError("DRBD device not knowing node %s" %
625 pnode_info = self._UnlockedGetNodeInfo(pnode)
626 snode_info = self._UnlockedGetNodeInfo(snode)
627 if pnode_info is None or snode_info is None:
628 raise errors.ConfigurationError("Can't find primary or secondary node"
629 " for %s" % str(disk))
630 p_data = (pnode_info.secondary_ip, port)
631 s_data = (snode_info.secondary_ip, port)
632 if pnode == node_name:
633 disk.physical_id = p_data + s_data + (pminor, secret)
634 else: # it must be secondary, we tested above
635 disk.physical_id = s_data + p_data + (sminor, secret)
637 disk.physical_id = disk.logical_id
640 @locking.ssynchronized(_config_lock)
641 def SetDiskID(self, disk, node_name):
642 """Convert the unique ID to the ID needed on the target nodes.
644 This is used only for drbd, which needs ip/port configuration.
646 The routine descends down and updates its children also, because
647 this helps when the only the top device is passed to the remote
651 return self._UnlockedSetDiskID(disk, node_name)
653 @locking.ssynchronized(_config_lock)
654 def AddTcpUdpPort(self, port):
655 """Adds a new port to the available port pool.
658 if not isinstance(port, int):
659 raise errors.ProgrammerError("Invalid type passed for port")
661 self._config_data.cluster.tcpudp_port_pool.add(port)
664 @locking.ssynchronized(_config_lock, shared=1)
665 def GetPortList(self):
666 """Returns a copy of the current port list.
669 return self._config_data.cluster.tcpudp_port_pool.copy()
671 @locking.ssynchronized(_config_lock)
672 def AllocatePort(self):
675 The port will be taken from the available port pool or from the
676 default port range (and in this case we increase
680 # If there are TCP/IP ports configured, we use them first.
681 if self._config_data.cluster.tcpudp_port_pool:
682 port = self._config_data.cluster.tcpudp_port_pool.pop()
684 port = self._config_data.cluster.highest_used_port + 1
685 if port >= constants.LAST_DRBD_PORT:
686 raise errors.ConfigurationError("The highest used port is greater"
687 " than %s. Aborting." %
688 constants.LAST_DRBD_PORT)
689 self._config_data.cluster.highest_used_port = port
694 def _UnlockedComputeDRBDMap(self):
695 """Compute the used DRBD minor/nodes.
698 @return: dictionary of node_name: dict of minor: instance_name;
699 the returned dict will have all the nodes in it (even if with
700 an empty list), and a list of duplicates; if the duplicates
701 list is not empty, the configuration is corrupted and its caller
702 should raise an exception
705 def _AppendUsedPorts(instance_name, disk, used):
707 if disk.dev_type == constants.LD_DRBD8 and len(disk.logical_id) >= 5:
708 node_a, node_b, _, minor_a, minor_b = disk.logical_id[:5]
709 for node, port in ((node_a, minor_a), (node_b, minor_b)):
710 assert node in used, ("Node '%s' of instance '%s' not found"
711 " in node list" % (node, instance_name))
712 if port in used[node]:
713 duplicates.append((node, port, instance_name, used[node][port]))
715 used[node][port] = instance_name
717 for child in disk.children:
718 duplicates.extend(_AppendUsedPorts(instance_name, child, used))
722 my_dict = dict((node, {}) for node in self._config_data.nodes)
723 for instance in self._config_data.instances.itervalues():
724 for disk in instance.disks:
725 duplicates.extend(_AppendUsedPorts(instance.name, disk, my_dict))
726 for (node, minor), instance in self._temporary_drbds.iteritems():
727 if minor in my_dict[node] and my_dict[node][minor] != instance:
728 duplicates.append((node, minor, instance, my_dict[node][minor]))
730 my_dict[node][minor] = instance
731 return my_dict, duplicates
733 @locking.ssynchronized(_config_lock)
734 def ComputeDRBDMap(self):
735 """Compute the used DRBD minor/nodes.
737 This is just a wrapper over L{_UnlockedComputeDRBDMap}.
739 @return: dictionary of node_name: dict of minor: instance_name;
740 the returned dict will have all the nodes in it (even if with
744 d_map, duplicates = self._UnlockedComputeDRBDMap()
746 raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
750 @locking.ssynchronized(_config_lock)
751 def AllocateDRBDMinor(self, nodes, instance):
752 """Allocate a drbd minor.
754 The free minor will be automatically computed from the existing
755 devices. A node can be given multiple times in order to allocate
756 multiple minors. The result is the list of minors, in the same
757 order as the passed nodes.
759 @type instance: string
760 @param instance: the instance for which we allocate minors
763 assert isinstance(instance, basestring), \
764 "Invalid argument '%s' passed to AllocateDRBDMinor" % instance
766 d_map, duplicates = self._UnlockedComputeDRBDMap()
768 raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
774 # no minors used, we can start at 0
777 self._temporary_drbds[(nname, 0)] = instance
781 ffree = utils.FirstFree(keys)
783 # return the next minor
784 # TODO: implement high-limit check
788 # double-check minor against current instances
789 assert minor not in d_map[nname], \
790 ("Attempt to reuse allocated DRBD minor %d on node %s,"
791 " already allocated to instance %s" %
792 (minor, nname, d_map[nname][minor]))
793 ndata[minor] = instance
794 # double-check minor against reservation
795 r_key = (nname, minor)
796 assert r_key not in self._temporary_drbds, \
797 ("Attempt to reuse reserved DRBD minor %d on node %s,"
798 " reserved for instance %s" %
799 (minor, nname, self._temporary_drbds[r_key]))
800 self._temporary_drbds[r_key] = instance
802 logging.debug("Request to allocate drbd minors, input: %s, returning %s",
806 def _UnlockedReleaseDRBDMinors(self, instance):
807 """Release temporary drbd minors allocated for a given instance.
809 @type instance: string
810 @param instance: the instance for which temporary minors should be
814 assert isinstance(instance, basestring), \
815 "Invalid argument passed to ReleaseDRBDMinors"
816 for key, name in self._temporary_drbds.items():
818 del self._temporary_drbds[key]
820 @locking.ssynchronized(_config_lock)
821 def ReleaseDRBDMinors(self, instance):
822 """Release temporary drbd minors allocated for a given instance.
824 This should be called on the error paths, on the success paths
825 it's automatically called by the ConfigWriter add and update
828 This function is just a wrapper over L{_UnlockedReleaseDRBDMinors}.
830 @type instance: string
831 @param instance: the instance for which temporary minors should be
835 self._UnlockedReleaseDRBDMinors(instance)
837 @locking.ssynchronized(_config_lock, shared=1)
838 def GetConfigVersion(self):
839 """Get the configuration version.
841 @return: Config version
844 return self._config_data.version
846 @locking.ssynchronized(_config_lock, shared=1)
847 def GetClusterName(self):
850 @return: Cluster name
853 return self._config_data.cluster.cluster_name
855 @locking.ssynchronized(_config_lock, shared=1)
856 def GetMasterNode(self):
857 """Get the hostname of the master node for this cluster.
859 @return: Master hostname
862 return self._config_data.cluster.master_node
864 @locking.ssynchronized(_config_lock, shared=1)
865 def GetMasterIP(self):
866 """Get the IP of the master node for this cluster.
871 return self._config_data.cluster.master_ip
873 @locking.ssynchronized(_config_lock, shared=1)
874 def GetMasterNetdev(self):
875 """Get the master network device for this cluster.
878 return self._config_data.cluster.master_netdev
880 @locking.ssynchronized(_config_lock, shared=1)
881 def GetFileStorageDir(self):
882 """Get the file storage dir for this cluster.
885 return self._config_data.cluster.file_storage_dir
887 @locking.ssynchronized(_config_lock, shared=1)
888 def GetSharedFileStorageDir(self):
889 """Get the shared file storage dir for this cluster.
892 return self._config_data.cluster.shared_file_storage_dir
894 @locking.ssynchronized(_config_lock, shared=1)
895 def GetHypervisorType(self):
896 """Get the hypervisor type for this cluster.
899 return self._config_data.cluster.enabled_hypervisors[0]
901 @locking.ssynchronized(_config_lock, shared=1)
902 def GetHostKey(self):
903 """Return the rsa hostkey from the config.
906 @return: the rsa hostkey
909 return self._config_data.cluster.rsahostkeypub
911 @locking.ssynchronized(_config_lock, shared=1)
912 def GetDefaultIAllocator(self):
913 """Get the default instance allocator for this cluster.
916 return self._config_data.cluster.default_iallocator
918 @locking.ssynchronized(_config_lock, shared=1)
919 def GetPrimaryIPFamily(self):
920 """Get cluster primary ip family.
922 @return: primary ip family
925 return self._config_data.cluster.primary_ip_family
927 @locking.ssynchronized(_config_lock)
928 def AddNodeGroup(self, group, ec_id, check_uuid=True):
929 """Add a node group to the configuration.
931 This method calls group.UpgradeConfig() to fill any missing attributes
932 according to their default values.
934 @type group: L{objects.NodeGroup}
935 @param group: the NodeGroup object to add
937 @param ec_id: unique id for the job to use when creating a missing UUID
938 @type check_uuid: bool
939 @param check_uuid: add an UUID to the group if it doesn't have one or, if
940 it does, ensure that it does not exist in the
941 configuration already
944 self._UnlockedAddNodeGroup(group, ec_id, check_uuid)
947 def _UnlockedAddNodeGroup(self, group, ec_id, check_uuid):
948 """Add a node group to the configuration.
951 logging.info("Adding node group %s to configuration", group.name)
953 # Some code might need to add a node group with a pre-populated UUID
954 # generated with ConfigWriter.GenerateUniqueID(). We allow them to bypass
955 # the "does this UUID" exist already check.
957 self._EnsureUUID(group, ec_id)
960 existing_uuid = self._UnlockedLookupNodeGroup(group.name)
961 except errors.OpPrereqError:
964 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
965 " node group (UUID: %s)" %
966 (group.name, existing_uuid),
970 group.ctime = group.mtime = time.time()
971 group.UpgradeConfig()
973 self._config_data.nodegroups[group.uuid] = group
974 self._config_data.cluster.serial_no += 1
976 @locking.ssynchronized(_config_lock)
977 def RemoveNodeGroup(self, group_uuid):
978 """Remove a node group from the configuration.
980 @type group_uuid: string
981 @param group_uuid: the UUID of the node group to remove
984 logging.info("Removing node group %s from configuration", group_uuid)
986 if group_uuid not in self._config_data.nodegroups:
987 raise errors.ConfigurationError("Unknown node group '%s'" % group_uuid)
989 assert len(self._config_data.nodegroups) != 1, \
990 "Group '%s' is the only group, cannot be removed" % group_uuid
992 del self._config_data.nodegroups[group_uuid]
993 self._config_data.cluster.serial_no += 1
996 def _UnlockedLookupNodeGroup(self, target):
997 """Lookup a node group's UUID.
999 @type target: string or None
1000 @param target: group name or UUID or None to look for the default
1002 @return: nodegroup UUID
1003 @raises errors.OpPrereqError: when the target group cannot be found
1007 if len(self._config_data.nodegroups) != 1:
1008 raise errors.OpPrereqError("More than one node group exists. Target"
1009 " group must be specified explicitely.")
1011 return self._config_data.nodegroups.keys()[0]
1012 if target in self._config_data.nodegroups:
1014 for nodegroup in self._config_data.nodegroups.values():
1015 if nodegroup.name == target:
1016 return nodegroup.uuid
1017 raise errors.OpPrereqError("Node group '%s' not found" % target,
1020 @locking.ssynchronized(_config_lock, shared=1)
1021 def LookupNodeGroup(self, target):
1022 """Lookup a node group's UUID.
1024 This function is just a wrapper over L{_UnlockedLookupNodeGroup}.
1026 @type target: string or None
1027 @param target: group name or UUID or None to look for the default
1029 @return: nodegroup UUID
1032 return self._UnlockedLookupNodeGroup(target)
1034 def _UnlockedGetNodeGroup(self, uuid):
1035 """Lookup a node group.
1038 @param uuid: group UUID
1039 @rtype: L{objects.NodeGroup} or None
1040 @return: nodegroup object, or None if not found
1043 if uuid not in self._config_data.nodegroups:
1046 return self._config_data.nodegroups[uuid]
1048 @locking.ssynchronized(_config_lock, shared=1)
1049 def GetNodeGroup(self, uuid):
1050 """Lookup a node group.
1053 @param uuid: group UUID
1054 @rtype: L{objects.NodeGroup} or None
1055 @return: nodegroup object, or None if not found
1058 return self._UnlockedGetNodeGroup(uuid)
1060 @locking.ssynchronized(_config_lock, shared=1)
1061 def GetAllNodeGroupsInfo(self):
1062 """Get the configuration of all node groups.
1065 return dict(self._config_data.nodegroups)
1067 @locking.ssynchronized(_config_lock, shared=1)
1068 def GetNodeGroupList(self):
1069 """Get a list of node groups.
1072 return self._config_data.nodegroups.keys()
1074 @locking.ssynchronized(_config_lock, shared=1)
1075 def GetNodeGroupMembersByNodes(self, nodes):
1076 """Get nodes which are member in the same nodegroups as the given nodes.
1079 ngfn = lambda node_name: self._UnlockedGetNodeInfo(node_name).group
1080 return frozenset(member_name
1081 for node_name in nodes
1083 self._UnlockedGetNodeGroup(ngfn(node_name)).members)
1085 @locking.ssynchronized(_config_lock, shared=1)
1086 def GetMultiNodeGroupInfo(self, group_uuids):
1087 """Get the configuration of multiple node groups.
1089 @param group_uuids: List of node group UUIDs
1091 @return: List of tuples of (group_uuid, group_info)
1094 return [(uuid, self._UnlockedGetNodeGroup(uuid)) for uuid in group_uuids]
1096 @locking.ssynchronized(_config_lock)
1097 def AddInstance(self, instance, ec_id):
1098 """Add an instance to the config.
1100 This should be used after creating a new instance.
1102 @type instance: L{objects.Instance}
1103 @param instance: the instance object
1106 if not isinstance(instance, objects.Instance):
1107 raise errors.ProgrammerError("Invalid type passed to AddInstance")
1109 if instance.disk_template != constants.DT_DISKLESS:
1110 all_lvs = instance.MapLVsByNode()
1111 logging.info("Instance '%s' DISK_LAYOUT: %s", instance.name, all_lvs)
1113 all_macs = self._AllMACs()
1114 for nic in instance.nics:
1115 if nic.mac in all_macs:
1116 raise errors.ConfigurationError("Cannot add instance %s:"
1117 " MAC address '%s' already in use." %
1118 (instance.name, nic.mac))
1120 self._EnsureUUID(instance, ec_id)
1122 instance.serial_no = 1
1123 instance.ctime = instance.mtime = time.time()
1124 self._config_data.instances[instance.name] = instance
1125 self._config_data.cluster.serial_no += 1
1126 self._UnlockedReleaseDRBDMinors(instance.name)
1129 def _EnsureUUID(self, item, ec_id):
1130 """Ensures a given object has a valid UUID.
1132 @param item: the instance or node to be checked
1133 @param ec_id: the execution context id for the uuid reservation
1137 item.uuid = self._GenerateUniqueID(ec_id)
1138 elif item.uuid in self._AllIDs(include_temporary=True):
1139 raise errors.ConfigurationError("Cannot add '%s': UUID %s already"
1140 " in use" % (item.name, item.uuid))
1142 def _SetInstanceStatus(self, instance_name, status):
1143 """Set the instance's status to a given value.
1146 assert isinstance(status, bool), \
1147 "Invalid status '%s' passed to SetInstanceStatus" % (status,)
1149 if instance_name not in self._config_data.instances:
1150 raise errors.ConfigurationError("Unknown instance '%s'" %
1152 instance = self._config_data.instances[instance_name]
1153 if instance.admin_up != status:
1154 instance.admin_up = status
1155 instance.serial_no += 1
1156 instance.mtime = time.time()
1159 @locking.ssynchronized(_config_lock)
1160 def MarkInstanceUp(self, instance_name):
1161 """Mark the instance status to up in the config.
1164 self._SetInstanceStatus(instance_name, True)
1166 @locking.ssynchronized(_config_lock)
1167 def RemoveInstance(self, instance_name):
1168 """Remove the instance from the configuration.
1171 if instance_name not in self._config_data.instances:
1172 raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
1174 # If a network port has been allocated to the instance,
1175 # return it to the pool of free ports.
1176 inst = self._config_data.instances[instance_name]
1177 network_port = getattr(inst, "network_port", None)
1178 if network_port is not None:
1179 self._config_data.cluster.tcpudp_port_pool.add(network_port)
1181 del self._config_data.instances[instance_name]
1182 self._config_data.cluster.serial_no += 1
1185 @locking.ssynchronized(_config_lock)
1186 def RenameInstance(self, old_name, new_name):
1187 """Rename an instance.
1189 This needs to be done in ConfigWriter and not by RemoveInstance
1190 combined with AddInstance as only we can guarantee an atomic
1194 if old_name not in self._config_data.instances:
1195 raise errors.ConfigurationError("Unknown instance '%s'" % old_name)
1196 inst = self._config_data.instances[old_name]
1197 del self._config_data.instances[old_name]
1198 inst.name = new_name
1200 for disk in inst.disks:
1201 if disk.dev_type == constants.LD_FILE:
1202 # rename the file paths in logical and physical id
1203 file_storage_dir = os.path.dirname(os.path.dirname(disk.logical_id[1]))
1204 disk_fname = "disk%s" % disk.iv_name.split("/")[1]
1205 disk.physical_id = disk.logical_id = (disk.logical_id[0],
1206 utils.PathJoin(file_storage_dir,
1210 # Force update of ssconf files
1211 self._config_data.cluster.serial_no += 1
1213 self._config_data.instances[inst.name] = inst
1216 @locking.ssynchronized(_config_lock)
1217 def MarkInstanceDown(self, instance_name):
1218 """Mark the status of an instance to down in the configuration.
1221 self._SetInstanceStatus(instance_name, False)
1223 def _UnlockedGetInstanceList(self):
1224 """Get the list of instances.
1226 This function is for internal use, when the config lock is already held.
1229 return self._config_data.instances.keys()
1231 @locking.ssynchronized(_config_lock, shared=1)
1232 def GetInstanceList(self):
1233 """Get the list of instances.
1235 @return: array of instances, ex. ['instance2.example.com',
1236 'instance1.example.com']
1239 return self._UnlockedGetInstanceList()
1241 def ExpandInstanceName(self, short_name):
1242 """Attempt to expand an incomplete instance name.
1245 # Locking is done in L{ConfigWriter.GetInstanceList}
1246 return _MatchNameComponentIgnoreCase(short_name, self.GetInstanceList())
1248 def _UnlockedGetInstanceInfo(self, instance_name):
1249 """Returns information about an instance.
1251 This function is for internal use, when the config lock is already held.
1254 if instance_name not in self._config_data.instances:
1257 return self._config_data.instances[instance_name]
1259 @locking.ssynchronized(_config_lock, shared=1)
1260 def GetInstanceInfo(self, instance_name):
1261 """Returns information about an instance.
1263 It takes the information from the configuration file. Other information of
1264 an instance are taken from the live systems.
1266 @param instance_name: name of the instance, e.g.
1267 I{instance1.example.com}
1269 @rtype: L{objects.Instance}
1270 @return: the instance object
1273 return self._UnlockedGetInstanceInfo(instance_name)
1275 @locking.ssynchronized(_config_lock, shared=1)
1276 def GetInstanceNodeGroups(self, instance_name, primary_only=False):
1277 """Returns set of node group UUIDs for instance's nodes.
1282 instance = self._UnlockedGetInstanceInfo(instance_name)
1284 raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
1287 nodes = [instance.primary_node]
1289 nodes = instance.all_nodes
1291 return frozenset(self._UnlockedGetNodeInfo(node_name).group
1292 for node_name in nodes)
1294 @locking.ssynchronized(_config_lock, shared=1)
1295 def GetMultiInstanceInfo(self, instances):
1296 """Get the configuration of multiple instances.
1298 @param instances: list of instance names
1300 @return: list of tuples (instance, instance_info), where
1301 instance_info is what would GetInstanceInfo return for the
1302 node, while keeping the original order
1305 return [(name, self._UnlockedGetInstanceInfo(name)) for name in instances]
1307 @locking.ssynchronized(_config_lock, shared=1)
1308 def GetAllInstancesInfo(self):
1309 """Get the configuration of all instances.
1312 @return: dict of (instance, instance_info), where instance_info is what
1313 would GetInstanceInfo return for the node
1316 my_dict = dict([(instance, self._UnlockedGetInstanceInfo(instance))
1317 for instance in self._UnlockedGetInstanceList()])
1320 @locking.ssynchronized(_config_lock)
1321 def AddNode(self, node, ec_id):
1322 """Add a node to the configuration.
1324 @type node: L{objects.Node}
1325 @param node: a Node instance
1328 logging.info("Adding node %s to configuration", node.name)
1330 self._EnsureUUID(node, ec_id)
1333 node.ctime = node.mtime = time.time()
1334 self._UnlockedAddNodeToGroup(node.name, node.group)
1335 self._config_data.nodes[node.name] = node
1336 self._config_data.cluster.serial_no += 1
1339 @locking.ssynchronized(_config_lock)
1340 def RemoveNode(self, node_name):
1341 """Remove a node from the configuration.
1344 logging.info("Removing node %s from configuration", node_name)
1346 if node_name not in self._config_data.nodes:
1347 raise errors.ConfigurationError("Unknown node '%s'" % node_name)
1349 self._UnlockedRemoveNodeFromGroup(self._config_data.nodes[node_name])
1350 del self._config_data.nodes[node_name]
1351 self._config_data.cluster.serial_no += 1
1354 def ExpandNodeName(self, short_name):
1355 """Attempt to expand an incomplete node name.
1358 # Locking is done in L{ConfigWriter.GetNodeList}
1359 return _MatchNameComponentIgnoreCase(short_name, self.GetNodeList())
1361 def _UnlockedGetNodeInfo(self, node_name):
1362 """Get the configuration of a node, as stored in the config.
1364 This function is for internal use, when the config lock is already
1367 @param node_name: the node name, e.g. I{node1.example.com}
1369 @rtype: L{objects.Node}
1370 @return: the node object
1373 if node_name not in self._config_data.nodes:
1376 return self._config_data.nodes[node_name]
1378 @locking.ssynchronized(_config_lock, shared=1)
1379 def GetNodeInfo(self, node_name):
1380 """Get the configuration of a node, as stored in the config.
1382 This is just a locked wrapper over L{_UnlockedGetNodeInfo}.
1384 @param node_name: the node name, e.g. I{node1.example.com}
1386 @rtype: L{objects.Node}
1387 @return: the node object
1390 return self._UnlockedGetNodeInfo(node_name)
1392 @locking.ssynchronized(_config_lock, shared=1)
1393 def GetNodeInstances(self, node_name):
1394 """Get the instances of a node, as stored in the config.
1396 @param node_name: the node name, e.g. I{node1.example.com}
1398 @rtype: (list, list)
1399 @return: a tuple with two lists: the primary and the secondary instances
1404 for inst in self._config_data.instances.values():
1405 if inst.primary_node == node_name:
1406 pri.append(inst.name)
1407 if node_name in inst.secondary_nodes:
1408 sec.append(inst.name)
1411 @locking.ssynchronized(_config_lock, shared=1)
1412 def GetNodeGroupInstances(self, uuid, primary_only=False):
1413 """Get the instances of a node group.
1415 @param uuid: Node group UUID
1416 @param primary_only: Whether to only consider primary nodes
1418 @return: List of instance names in node group
1422 nodes_fn = lambda inst: [inst.primary_node]
1424 nodes_fn = lambda inst: inst.all_nodes
1426 return frozenset(inst.name
1427 for inst in self._config_data.instances.values()
1428 for node_name in nodes_fn(inst)
1429 if self._UnlockedGetNodeInfo(node_name).group == uuid)
1431 def _UnlockedGetNodeList(self):
1432 """Return the list of nodes which are in the configuration.
1434 This function is for internal use, when the config lock is already
1440 return self._config_data.nodes.keys()
1442 @locking.ssynchronized(_config_lock, shared=1)
1443 def GetNodeList(self):
1444 """Return the list of nodes which are in the configuration.
1447 return self._UnlockedGetNodeList()
1449 def _UnlockedGetOnlineNodeList(self):
1450 """Return the list of nodes which are online.
1453 all_nodes = [self._UnlockedGetNodeInfo(node)
1454 for node in self._UnlockedGetNodeList()]
1455 return [node.name for node in all_nodes if not node.offline]
1457 @locking.ssynchronized(_config_lock, shared=1)
1458 def GetOnlineNodeList(self):
1459 """Return the list of nodes which are online.
1462 return self._UnlockedGetOnlineNodeList()
1464 @locking.ssynchronized(_config_lock, shared=1)
1465 def GetVmCapableNodeList(self):
1466 """Return the list of nodes which are not vm capable.
1469 all_nodes = [self._UnlockedGetNodeInfo(node)
1470 for node in self._UnlockedGetNodeList()]
1471 return [node.name for node in all_nodes if node.vm_capable]
1473 @locking.ssynchronized(_config_lock, shared=1)
1474 def GetNonVmCapableNodeList(self):
1475 """Return the list of nodes which are not vm capable.
1478 all_nodes = [self._UnlockedGetNodeInfo(node)
1479 for node in self._UnlockedGetNodeList()]
1480 return [node.name for node in all_nodes if not node.vm_capable]
1482 @locking.ssynchronized(_config_lock, shared=1)
1483 def GetMultiNodeInfo(self, nodes):
1484 """Get the configuration of multiple nodes.
1486 @param nodes: list of node names
1488 @return: list of tuples of (node, node_info), where node_info is
1489 what would GetNodeInfo return for the node, in the original
1493 return [(name, self._UnlockedGetNodeInfo(name)) for name in nodes]
1495 @locking.ssynchronized(_config_lock, shared=1)
1496 def GetAllNodesInfo(self):
1497 """Get the configuration of all nodes.
1500 @return: dict of (node, node_info), where node_info is what
1501 would GetNodeInfo return for the node
1504 my_dict = dict([(node, self._UnlockedGetNodeInfo(node))
1505 for node in self._UnlockedGetNodeList()])
1508 @locking.ssynchronized(_config_lock, shared=1)
1509 def GetNodeGroupsFromNodes(self, nodes):
1510 """Returns groups for a list of nodes.
1512 @type nodes: list of string
1513 @param nodes: List of node names
1517 return frozenset(self._UnlockedGetNodeInfo(name).group for name in nodes)
1519 def _UnlockedGetMasterCandidateStats(self, exceptions=None):
1520 """Get the number of current and maximum desired and possible candidates.
1522 @type exceptions: list
1523 @param exceptions: if passed, list of nodes that should be ignored
1525 @return: tuple of (current, desired and possible, possible)
1528 mc_now = mc_should = mc_max = 0
1529 for node in self._config_data.nodes.values():
1530 if exceptions and node.name in exceptions:
1532 if not (node.offline or node.drained) and node.master_capable:
1534 if node.master_candidate:
1536 mc_should = min(mc_max, self._config_data.cluster.candidate_pool_size)
1537 return (mc_now, mc_should, mc_max)
1539 @locking.ssynchronized(_config_lock, shared=1)
1540 def GetMasterCandidateStats(self, exceptions=None):
1541 """Get the number of current and maximum possible candidates.
1543 This is just a wrapper over L{_UnlockedGetMasterCandidateStats}.
1545 @type exceptions: list
1546 @param exceptions: if passed, list of nodes that should be ignored
1548 @return: tuple of (current, max)
1551 return self._UnlockedGetMasterCandidateStats(exceptions)
1553 @locking.ssynchronized(_config_lock)
1554 def MaintainCandidatePool(self, exceptions):
1555 """Try to grow the candidate pool to the desired size.
1557 @type exceptions: list
1558 @param exceptions: if passed, list of nodes that should be ignored
1560 @return: list with the adjusted nodes (L{objects.Node} instances)
1563 mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats(exceptions)
1566 node_list = self._config_data.nodes.keys()
1567 random.shuffle(node_list)
1568 for name in node_list:
1569 if mc_now >= mc_max:
1571 node = self._config_data.nodes[name]
1572 if (node.master_candidate or node.offline or node.drained or
1573 node.name in exceptions or not node.master_capable):
1575 mod_list.append(node)
1576 node.master_candidate = True
1579 if mc_now != mc_max:
1580 # this should not happen
1581 logging.warning("Warning: MaintainCandidatePool didn't manage to"
1582 " fill the candidate pool (%d/%d)", mc_now, mc_max)
1584 self._config_data.cluster.serial_no += 1
1589 def _UnlockedAddNodeToGroup(self, node_name, nodegroup_uuid):
1590 """Add a given node to the specified group.
1593 if nodegroup_uuid not in self._config_data.nodegroups:
1594 # This can happen if a node group gets deleted between its lookup and
1595 # when we're adding the first node to it, since we don't keep a lock in
1596 # the meantime. It's ok though, as we'll fail cleanly if the node group
1597 # is not found anymore.
1598 raise errors.OpExecError("Unknown node group: %s" % nodegroup_uuid)
1599 if node_name not in self._config_data.nodegroups[nodegroup_uuid].members:
1600 self._config_data.nodegroups[nodegroup_uuid].members.append(node_name)
1602 def _UnlockedRemoveNodeFromGroup(self, node):
1603 """Remove a given node from its group.
1606 nodegroup = node.group
1607 if nodegroup not in self._config_data.nodegroups:
1608 logging.warning("Warning: node '%s' has unknown node group '%s'"
1609 " (while being removed from it)", node.name, nodegroup)
1610 nodegroup_obj = self._config_data.nodegroups[nodegroup]
1611 if node.name not in nodegroup_obj.members:
1612 logging.warning("Warning: node '%s' not a member of its node group '%s'"
1613 " (while being removed from it)", node.name, nodegroup)
1615 nodegroup_obj.members.remove(node.name)
1617 @locking.ssynchronized(_config_lock)
1618 def AssignGroupNodes(self, mods):
1619 """Changes the group of a number of nodes.
1621 @type mods: list of tuples; (node name, new group UUID)
1622 @param mods: Node membership modifications
1625 groups = self._config_data.nodegroups
1626 nodes = self._config_data.nodes
1630 # Try to resolve names/UUIDs first
1631 for (node_name, new_group_uuid) in mods:
1633 node = nodes[node_name]
1635 raise errors.ConfigurationError("Unable to find node '%s'" % node_name)
1637 if node.group == new_group_uuid:
1638 # Node is being assigned to its current group
1639 logging.debug("Node '%s' was assigned to its current group (%s)",
1640 node_name, node.group)
1643 # Try to find current group of node
1645 old_group = groups[node.group]
1647 raise errors.ConfigurationError("Unable to find old group '%s'" %
1650 # Try to find new group for node
1652 new_group = groups[new_group_uuid]
1654 raise errors.ConfigurationError("Unable to find new group '%s'" %
1657 assert node.name in old_group.members, \
1658 ("Inconsistent configuration: node '%s' not listed in members for its"
1659 " old group '%s'" % (node.name, old_group.uuid))
1660 assert node.name not in new_group.members, \
1661 ("Inconsistent configuration: node '%s' already listed in members for"
1662 " its new group '%s'" % (node.name, new_group.uuid))
1664 resmod.append((node, old_group, new_group))
1667 for (node, old_group, new_group) in resmod:
1668 assert node.uuid != new_group.uuid and old_group.uuid != new_group.uuid, \
1669 "Assigning to current group is not possible"
1671 node.group = new_group.uuid
1673 # Update members of involved groups
1674 if node.name in old_group.members:
1675 old_group.members.remove(node.name)
1676 if node.name not in new_group.members:
1677 new_group.members.append(node.name)
1679 # Update timestamps and serials (only once per node/group object)
1681 for obj in frozenset(itertools.chain(*resmod)): # pylint: disable-msg=W0142
1685 # Force ssconf update
1686 self._config_data.cluster.serial_no += 1
1690 def _BumpSerialNo(self):
1691 """Bump up the serial number of the config.
1694 self._config_data.serial_no += 1
1695 self._config_data.mtime = time.time()
1697 def _AllUUIDObjects(self):
1698 """Returns all objects with uuid attributes.
1701 return (self._config_data.instances.values() +
1702 self._config_data.nodes.values() +
1703 self._config_data.nodegroups.values() +
1704 [self._config_data.cluster])
1706 def _OpenConfig(self, accept_foreign):
1707 """Read the config data from disk.
1710 raw_data = utils.ReadFile(self._cfg_file)
1713 data = objects.ConfigData.FromDict(serializer.Load(raw_data))
1714 except Exception, err:
1715 raise errors.ConfigurationError(err)
1717 # Make sure the configuration has the right version
1718 _ValidateConfig(data)
1720 if (not hasattr(data, 'cluster') or
1721 not hasattr(data.cluster, 'rsahostkeypub')):
1722 raise errors.ConfigurationError("Incomplete configuration"
1723 " (missing cluster.rsahostkeypub)")
1725 if data.cluster.master_node != self._my_hostname and not accept_foreign:
1726 msg = ("The configuration denotes node %s as master, while my"
1727 " hostname is %s; opening a foreign configuration is only"
1728 " possible in accept_foreign mode" %
1729 (data.cluster.master_node, self._my_hostname))
1730 raise errors.ConfigurationError(msg)
1732 # Upgrade configuration if needed
1733 data.UpgradeConfig()
1735 self._config_data = data
1736 # reset the last serial as -1 so that the next write will cause
1738 self._last_cluster_serial = -1
1740 # And finally run our (custom) config upgrade sequence
1741 self._UpgradeConfig()
1743 self._cfg_id = utils.GetFileID(path=self._cfg_file)
1745 def _UpgradeConfig(self):
1746 """Run upgrade steps that cannot be done purely in the objects.
1748 This is because some data elements need uniqueness across the
1749 whole configuration, etc.
1751 @warning: this function will call L{_WriteConfig()}, but also
1752 L{DropECReservations} so it needs to be called only from a
1753 "safe" place (the constructor). If one wanted to call it with
1754 the lock held, a DropECReservationUnlocked would need to be
1755 created first, to avoid causing deadlock.
1759 for item in self._AllUUIDObjects():
1760 if item.uuid is None:
1761 item.uuid = self._GenerateUniqueID(_UPGRADE_CONFIG_JID)
1763 if not self._config_data.nodegroups:
1764 default_nodegroup_name = constants.INITIAL_NODE_GROUP_NAME
1765 default_nodegroup = objects.NodeGroup(name=default_nodegroup_name,
1767 self._UnlockedAddNodeGroup(default_nodegroup, _UPGRADE_CONFIG_JID, True)
1769 for node in self._config_data.nodes.values():
1771 node.group = self.LookupNodeGroup(None)
1773 # This is technically *not* an upgrade, but needs to be done both when
1774 # nodegroups are being added, and upon normally loading the config,
1775 # because the members list of a node group is discarded upon
1776 # serializing/deserializing the object.
1777 self._UnlockedAddNodeToGroup(node.name, node.group)
1780 # This is ok even if it acquires the internal lock, as _UpgradeConfig is
1781 # only called at config init time, without the lock held
1782 self.DropECReservations(_UPGRADE_CONFIG_JID)
1784 def _DistributeConfig(self, feedback_fn):
1785 """Distribute the configuration to the other nodes.
1787 Currently, this only copies the configuration file. In the future,
1788 it could be used to encapsulate the 2/3-phase update mechanism.
1798 myhostname = self._my_hostname
1799 # we can skip checking whether _UnlockedGetNodeInfo returns None
1800 # since the node list comes from _UnlocketGetNodeList, and we are
1801 # called with the lock held, so no modifications should take place
1803 for node_name in self._UnlockedGetNodeList():
1804 if node_name == myhostname:
1806 node_info = self._UnlockedGetNodeInfo(node_name)
1807 if not node_info.master_candidate:
1809 node_list.append(node_info.name)
1810 addr_list.append(node_info.primary_ip)
1812 result = rpc.RpcRunner.call_upload_file(node_list, self._cfg_file,
1813 address_list=addr_list)
1814 for to_node, to_result in result.items():
1815 msg = to_result.fail_msg
1817 msg = ("Copy of file %s to node %s failed: %s" %
1818 (self._cfg_file, to_node, msg))
1828 def _WriteConfig(self, destination=None, feedback_fn=None):
1829 """Write the configuration data to persistent storage.
1832 assert feedback_fn is None or callable(feedback_fn)
1834 # Warn on config errors, but don't abort the save - the
1835 # configuration has already been modified, and we can't revert;
1836 # the best we can do is to warn the user and save as is, leaving
1837 # recovery to the user
1838 config_errors = self._UnlockedVerifyConfig()
1840 errmsg = ("Configuration data is not consistent: %s" %
1841 (utils.CommaJoin(config_errors)))
1842 logging.critical(errmsg)
1846 if destination is None:
1847 destination = self._cfg_file
1848 self._BumpSerialNo()
1849 txt = serializer.Dump(self._config_data.ToDict())
1851 getents = self._getents()
1853 fd = utils.SafeWriteFile(destination, self._cfg_id, data=txt,
1854 close=False, gid=getents.confd_gid, mode=0640)
1855 except errors.LockError:
1856 raise errors.ConfigurationError("The configuration file has been"
1857 " modified since the last write, cannot"
1860 self._cfg_id = utils.GetFileID(fd=fd)
1864 self.write_count += 1
1866 # and redistribute the config file to master candidates
1867 self._DistributeConfig(feedback_fn)
1869 # Write ssconf files on all nodes (including locally)
1870 if self._last_cluster_serial < self._config_data.cluster.serial_no:
1871 if not self._offline:
1872 result = rpc.RpcRunner.call_write_ssconf_files(
1873 self._UnlockedGetOnlineNodeList(),
1874 self._UnlockedGetSsconfValues())
1876 for nname, nresu in result.items():
1877 msg = nresu.fail_msg
1879 errmsg = ("Error while uploading ssconf files to"
1880 " node %s: %s" % (nname, msg))
1881 logging.warning(errmsg)
1886 self._last_cluster_serial = self._config_data.cluster.serial_no
1888 def _UnlockedGetSsconfValues(self):
1889 """Return the values needed by ssconf.
1892 @return: a dictionary with keys the ssconf names and values their
1897 instance_names = utils.NiceSort(self._UnlockedGetInstanceList())
1898 node_names = utils.NiceSort(self._UnlockedGetNodeList())
1899 node_info = [self._UnlockedGetNodeInfo(name) for name in node_names]
1900 node_pri_ips = ["%s %s" % (ninfo.name, ninfo.primary_ip)
1901 for ninfo in node_info]
1902 node_snd_ips = ["%s %s" % (ninfo.name, ninfo.secondary_ip)
1903 for ninfo in node_info]
1905 instance_data = fn(instance_names)
1906 off_data = fn(node.name for node in node_info if node.offline)
1907 on_data = fn(node.name for node in node_info if not node.offline)
1908 mc_data = fn(node.name for node in node_info if node.master_candidate)
1909 mc_ips_data = fn(node.primary_ip for node in node_info
1910 if node.master_candidate)
1911 node_data = fn(node_names)
1912 node_pri_ips_data = fn(node_pri_ips)
1913 node_snd_ips_data = fn(node_snd_ips)
1915 cluster = self._config_data.cluster
1916 cluster_tags = fn(cluster.GetTags())
1918 hypervisor_list = fn(cluster.enabled_hypervisors)
1920 uid_pool = uidpool.FormatUidPool(cluster.uid_pool, separator="\n")
1922 nodegroups = ["%s %s" % (nodegroup.uuid, nodegroup.name) for nodegroup in
1923 self._config_data.nodegroups.values()]
1924 nodegroups_data = fn(utils.NiceSort(nodegroups))
1927 constants.SS_CLUSTER_NAME: cluster.cluster_name,
1928 constants.SS_CLUSTER_TAGS: cluster_tags,
1929 constants.SS_FILE_STORAGE_DIR: cluster.file_storage_dir,
1930 constants.SS_SHARED_FILE_STORAGE_DIR: cluster.shared_file_storage_dir,
1931 constants.SS_MASTER_CANDIDATES: mc_data,
1932 constants.SS_MASTER_CANDIDATES_IPS: mc_ips_data,
1933 constants.SS_MASTER_IP: cluster.master_ip,
1934 constants.SS_MASTER_NETDEV: cluster.master_netdev,
1935 constants.SS_MASTER_NODE: cluster.master_node,
1936 constants.SS_NODE_LIST: node_data,
1937 constants.SS_NODE_PRIMARY_IPS: node_pri_ips_data,
1938 constants.SS_NODE_SECONDARY_IPS: node_snd_ips_data,
1939 constants.SS_OFFLINE_NODES: off_data,
1940 constants.SS_ONLINE_NODES: on_data,
1941 constants.SS_PRIMARY_IP_FAMILY: str(cluster.primary_ip_family),
1942 constants.SS_INSTANCE_LIST: instance_data,
1943 constants.SS_RELEASE_VERSION: constants.RELEASE_VERSION,
1944 constants.SS_HYPERVISOR_LIST: hypervisor_list,
1945 constants.SS_MAINTAIN_NODE_HEALTH: str(cluster.maintain_node_health),
1946 constants.SS_UID_POOL: uid_pool,
1947 constants.SS_NODEGROUPS: nodegroups_data,
1949 bad_values = [(k, v) for k, v in ssconf_values.items()
1950 if not isinstance(v, (str, basestring))]
1952 err = utils.CommaJoin("%s=%s" % (k, v) for k, v in bad_values)
1953 raise errors.ConfigurationError("Some ssconf key(s) have non-string"
1954 " values: %s" % err)
1955 return ssconf_values
1957 @locking.ssynchronized(_config_lock, shared=1)
1958 def GetSsconfValues(self):
1959 """Wrapper using lock around _UnlockedGetSsconf().
1962 return self._UnlockedGetSsconfValues()
1964 @locking.ssynchronized(_config_lock, shared=1)
1965 def GetVGName(self):
1966 """Return the volume group name.
1969 return self._config_data.cluster.volume_group_name
1971 @locking.ssynchronized(_config_lock)
1972 def SetVGName(self, vg_name):
1973 """Set the volume group name.
1976 self._config_data.cluster.volume_group_name = vg_name
1977 self._config_data.cluster.serial_no += 1
1980 @locking.ssynchronized(_config_lock, shared=1)
1981 def GetDRBDHelper(self):
1982 """Return DRBD usermode helper.
1985 return self._config_data.cluster.drbd_usermode_helper
1987 @locking.ssynchronized(_config_lock)
1988 def SetDRBDHelper(self, drbd_helper):
1989 """Set DRBD usermode helper.
1992 self._config_data.cluster.drbd_usermode_helper = drbd_helper
1993 self._config_data.cluster.serial_no += 1
1996 @locking.ssynchronized(_config_lock, shared=1)
1997 def GetMACPrefix(self):
1998 """Return the mac prefix.
2001 return self._config_data.cluster.mac_prefix
2003 @locking.ssynchronized(_config_lock, shared=1)
2004 def GetClusterInfo(self):
2005 """Returns information about the cluster
2007 @rtype: L{objects.Cluster}
2008 @return: the cluster object
2011 return self._config_data.cluster
2013 @locking.ssynchronized(_config_lock, shared=1)
2014 def HasAnyDiskOfType(self, dev_type):
2015 """Check if in there is at disk of the given type in the configuration.
2018 return self._config_data.HasAnyDiskOfType(dev_type)
2020 @locking.ssynchronized(_config_lock)
2021 def Update(self, target, feedback_fn):
2022 """Notify function to be called after updates.
2024 This function must be called when an object (as returned by
2025 GetInstanceInfo, GetNodeInfo, GetCluster) has been updated and the
2026 caller wants the modifications saved to the backing store. Note
2027 that all modified objects will be saved, but the target argument
2028 is the one the caller wants to ensure that it's saved.
2030 @param target: an instance of either L{objects.Cluster},
2031 L{objects.Node} or L{objects.Instance} which is existing in
2033 @param feedback_fn: Callable feedback function
2036 if self._config_data is None:
2037 raise errors.ProgrammerError("Configuration file not read,"
2039 update_serial = False
2040 if isinstance(target, objects.Cluster):
2041 test = target == self._config_data.cluster
2042 elif isinstance(target, objects.Node):
2043 test = target in self._config_data.nodes.values()
2044 update_serial = True
2045 elif isinstance(target, objects.Instance):
2046 test = target in self._config_data.instances.values()
2047 elif isinstance(target, objects.NodeGroup):
2048 test = target in self._config_data.nodegroups.values()
2050 raise errors.ProgrammerError("Invalid object type (%s) passed to"
2051 " ConfigWriter.Update" % type(target))
2053 raise errors.ConfigurationError("Configuration updated since object"
2054 " has been read or unknown object")
2055 target.serial_no += 1
2056 target.mtime = now = time.time()
2059 # for node updates, we need to increase the cluster serial too
2060 self._config_data.cluster.serial_no += 1
2061 self._config_data.cluster.mtime = now
2063 if isinstance(target, objects.Instance):
2064 self._UnlockedReleaseDRBDMinors(target.name)
2066 self._WriteConfig(feedback_fn=feedback_fn)
2068 @locking.ssynchronized(_config_lock)
2069 def DropECReservations(self, ec_id):
2070 """Drop per-execution-context reservations
2073 for rm in self._all_rms:
2074 rm.DropECReservations(ec_id)