4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Configuration management for Ganeti
24 This module provides the interface to the Ganeti cluster configuration.
26 The configuration data is stored on every node but is updated on the master
27 only. After each update, the master distributes the data to the other nodes.
29 Currently, the data storage format is JSON. YAML was slow and consuming too
34 # pylint: disable=R0904
35 # R0904: Too many public methods
43 from ganeti import errors
44 from ganeti import locking
45 from ganeti import utils
46 from ganeti import constants
47 from ganeti import rpc
48 from ganeti import objects
49 from ganeti import serializer
50 from ganeti import uidpool
51 from ganeti import netutils
52 from ganeti import runtime
55 _config_lock = locking.SharedLock("ConfigWriter")
57 # job id used for resource management at config upgrade time
58 _UPGRADE_CONFIG_JID = "jid-cfg-upgrade"
61 def _ValidateConfig(data):
62 """Verifies that a configuration objects looks valid.
64 This only verifies the version of the configuration.
66 @raise errors.ConfigurationError: if the version differs from what
70 if data.version != constants.CONFIG_VERSION:
71 raise errors.ConfigVersionMismatch(constants.CONFIG_VERSION, data.version)
74 class TemporaryReservationManager:
75 """A temporary resource reservation manager.
77 This is used to reserve resources in a job, before using them, making sure
78 other jobs cannot get them in the meantime.
82 self._ec_reserved = {}
84 def Reserved(self, resource):
85 for holder_reserved in self._ec_reserved.values():
86 if resource in holder_reserved:
90 def Reserve(self, ec_id, resource):
91 if self.Reserved(resource):
92 raise errors.ReservationError("Duplicate reservation for resource '%s'"
94 if ec_id not in self._ec_reserved:
95 self._ec_reserved[ec_id] = set([resource])
97 self._ec_reserved[ec_id].add(resource)
99 def DropECReservations(self, ec_id):
100 if ec_id in self._ec_reserved:
101 del self._ec_reserved[ec_id]
103 def GetReserved(self):
105 for holder_reserved in self._ec_reserved.values():
106 all_reserved.update(holder_reserved)
109 def Generate(self, existing, generate_one_fn, ec_id):
110 """Generate a new resource of this type
113 assert callable(generate_one_fn)
115 all_elems = self.GetReserved()
116 all_elems.update(existing)
119 new_resource = generate_one_fn()
120 if new_resource is not None and new_resource not in all_elems:
123 raise errors.ConfigurationError("Not able generate new resource"
124 " (last tried: %s)" % new_resource)
125 self.Reserve(ec_id, new_resource)
129 def _MatchNameComponentIgnoreCase(short_name, names):
130 """Wrapper around L{utils.text.MatchNameComponent}.
133 return utils.MatchNameComponent(short_name, names, case_sensitive=False)
137 """The interface to the cluster configuration.
139 @ivar _temporary_lvs: reservation manager for temporary LVs
140 @ivar _all_rms: a list of all temporary reservation managers
143 def __init__(self, cfg_file=None, offline=False, _getents=runtime.GetEnts,
144 accept_foreign=False):
146 self._lock = _config_lock
147 self._config_data = None
148 self._offline = offline
150 self._cfg_file = constants.CLUSTER_CONF_FILE
152 self._cfg_file = cfg_file
153 self._getents = _getents
154 self._temporary_ids = TemporaryReservationManager()
155 self._temporary_drbds = {}
156 self._temporary_macs = TemporaryReservationManager()
157 self._temporary_secrets = TemporaryReservationManager()
158 self._temporary_lvs = TemporaryReservationManager()
159 self._all_rms = [self._temporary_ids, self._temporary_macs,
160 self._temporary_secrets, self._temporary_lvs]
161 # Note: in order to prevent errors when resolving our name in
162 # _DistributeConfig, we compute it here once and reuse it; it's
163 # better to raise an error before starting to modify the config
164 # file than after it was modified
165 self._my_hostname = netutils.Hostname.GetSysName()
166 self._last_cluster_serial = -1
169 self._OpenConfig(accept_foreign)
171 def _GetRpc(self, address_list):
172 """Returns RPC runner for configuration.
175 return rpc.ConfigRunner(self._context, address_list)
177 def SetContext(self, context):
178 """Sets Ganeti context.
181 self._context = context
183 # this method needs to be static, so that we can call it on the class
186 """Check if the cluster is configured.
189 return os.path.exists(constants.CLUSTER_CONF_FILE)
191 def _GenerateOneMAC(self):
192 """Generate one mac address
195 prefix = self._config_data.cluster.mac_prefix
196 byte1 = random.randrange(0, 256)
197 byte2 = random.randrange(0, 256)
198 byte3 = random.randrange(0, 256)
199 mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
202 @locking.ssynchronized(_config_lock, shared=1)
203 def GetNdParams(self, node):
204 """Get the node params populated with cluster defaults.
206 @type node: L{objects.Node}
207 @param node: The node we want to know the params for
208 @return: A dict with the filled in node params
211 nodegroup = self._UnlockedGetNodeGroup(node.group)
212 return self._config_data.cluster.FillND(node, nodegroup)
214 @locking.ssynchronized(_config_lock, shared=1)
215 def GenerateMAC(self, ec_id):
216 """Generate a MAC for an instance.
218 This should check the current instances for duplicates.
221 existing = self._AllMACs()
222 return self._temporary_ids.Generate(existing, self._GenerateOneMAC, ec_id)
224 @locking.ssynchronized(_config_lock, shared=1)
225 def ReserveMAC(self, mac, ec_id):
226 """Reserve a MAC for an instance.
228 This only checks instances managed by this cluster, it does not
229 check for potential collisions elsewhere.
232 all_macs = self._AllMACs()
234 raise errors.ReservationError("mac already in use")
236 self._temporary_macs.Reserve(ec_id, mac)
238 @locking.ssynchronized(_config_lock, shared=1)
239 def ReserveLV(self, lv_name, ec_id):
240 """Reserve an VG/LV pair for an instance.
242 @type lv_name: string
243 @param lv_name: the logical volume name to reserve
246 all_lvs = self._AllLVs()
247 if lv_name in all_lvs:
248 raise errors.ReservationError("LV already in use")
250 self._temporary_lvs.Reserve(ec_id, lv_name)
252 @locking.ssynchronized(_config_lock, shared=1)
253 def GenerateDRBDSecret(self, ec_id):
254 """Generate a DRBD secret.
256 This checks the current disks for duplicates.
259 return self._temporary_secrets.Generate(self._AllDRBDSecrets(),
260 utils.GenerateSecret,
264 """Compute the list of all LVs.
268 for instance in self._config_data.instances.values():
269 node_data = instance.MapLVsByNode()
270 for lv_list in node_data.values():
271 lvnames.update(lv_list)
274 def _AllIDs(self, include_temporary):
275 """Compute the list of all UUIDs and names we have.
277 @type include_temporary: boolean
278 @param include_temporary: whether to include the _temporary_ids set
280 @return: a set of IDs
284 if include_temporary:
285 existing.update(self._temporary_ids.GetReserved())
286 existing.update(self._AllLVs())
287 existing.update(self._config_data.instances.keys())
288 existing.update(self._config_data.nodes.keys())
289 existing.update([i.uuid for i in self._AllUUIDObjects() if i.uuid])
292 def _GenerateUniqueID(self, ec_id):
293 """Generate an unique UUID.
295 This checks the current node, instances and disk names for
299 @return: the unique id
302 existing = self._AllIDs(include_temporary=False)
303 return self._temporary_ids.Generate(existing, utils.NewUUID, ec_id)
305 @locking.ssynchronized(_config_lock, shared=1)
306 def GenerateUniqueID(self, ec_id):
307 """Generate an unique ID.
309 This is just a wrapper over the unlocked version.
312 @param ec_id: unique id for the job to reserve the id to
315 return self._GenerateUniqueID(ec_id)
318 """Return all MACs present in the config.
321 @return: the list of all MACs
325 for instance in self._config_data.instances.values():
326 for nic in instance.nics:
327 result.append(nic.mac)
331 def _AllDRBDSecrets(self):
332 """Return all DRBD secrets present in the config.
335 @return: the list of all DRBD secrets
338 def helper(disk, result):
339 """Recursively gather secrets from this disk."""
340 if disk.dev_type == constants.DT_DRBD8:
341 result.append(disk.logical_id[5])
343 for child in disk.children:
344 helper(child, result)
347 for instance in self._config_data.instances.values():
348 for disk in instance.disks:
353 def _CheckDiskIDs(self, disk, l_ids, p_ids):
354 """Compute duplicate disk IDs
356 @type disk: L{objects.Disk}
357 @param disk: the disk at which to start searching
359 @param l_ids: list of current logical ids
361 @param p_ids: list of current physical ids
363 @return: a list of error messages
367 if disk.logical_id is not None:
368 if disk.logical_id in l_ids:
369 result.append("duplicate logical id %s" % str(disk.logical_id))
371 l_ids.append(disk.logical_id)
372 if disk.physical_id is not None:
373 if disk.physical_id in p_ids:
374 result.append("duplicate physical id %s" % str(disk.physical_id))
376 p_ids.append(disk.physical_id)
379 for child in disk.children:
380 result.extend(self._CheckDiskIDs(child, l_ids, p_ids))
383 def _UnlockedVerifyConfig(self):
387 @return: a list of error messages; a non-empty list signifies
391 # pylint: disable=R0914
395 data = self._config_data
396 cluster = data.cluster
400 # global cluster checks
401 if not cluster.enabled_hypervisors:
402 result.append("enabled hypervisors list doesn't have any entries")
403 invalid_hvs = set(cluster.enabled_hypervisors) - constants.HYPER_TYPES
405 result.append("enabled hypervisors contains invalid entries: %s" %
407 missing_hvp = (set(cluster.enabled_hypervisors) -
408 set(cluster.hvparams.keys()))
410 result.append("hypervisor parameters missing for the enabled"
411 " hypervisor(s) %s" % utils.CommaJoin(missing_hvp))
413 if cluster.master_node not in data.nodes:
414 result.append("cluster has invalid primary node '%s'" %
417 def _helper(owner, attr, value, template):
419 utils.ForceDictType(value, template)
420 except errors.GenericError, err:
421 result.append("%s has invalid %s: %s" % (owner, attr, err))
423 def _helper_nic(owner, params):
425 objects.NIC.CheckParameterSyntax(params)
426 except errors.ConfigurationError, err:
427 result.append("%s has invalid nicparams: %s" % (owner, err))
429 def _helper_ipolicy(owner, params):
431 objects.InstancePolicy.CheckParameterSyntax(params)
432 except errors.ConfigurationError, err:
433 result.append("%s has invalid instance policy: %s" % (owner, err))
435 def _helper_ispecs(owner, params):
436 for key, value in params.iteritems():
437 fullkey = "ipolicy/" + key
438 _helper(owner, fullkey, value, constants.ISPECS_PARAMETER_TYPES)
440 # check cluster parameters
441 _helper("cluster", "beparams", cluster.SimpleFillBE({}),
442 constants.BES_PARAMETER_TYPES)
443 _helper("cluster", "nicparams", cluster.SimpleFillNIC({}),
444 constants.NICS_PARAMETER_TYPES)
445 _helper_nic("cluster", cluster.SimpleFillNIC({}))
446 _helper("cluster", "ndparams", cluster.SimpleFillND({}),
447 constants.NDS_PARAMETER_TYPES)
448 _helper_ipolicy("cluster", cluster.SimpleFillIPolicy({}))
449 _helper_ispecs("cluster", cluster.SimpleFillIPolicy({}))
451 # per-instance checks
452 for instance_name in data.instances:
453 instance = data.instances[instance_name]
454 if instance.name != instance_name:
455 result.append("instance '%s' is indexed by wrong name '%s'" %
456 (instance.name, instance_name))
457 if instance.primary_node not in data.nodes:
458 result.append("instance '%s' has invalid primary node '%s'" %
459 (instance_name, instance.primary_node))
460 for snode in instance.secondary_nodes:
461 if snode not in data.nodes:
462 result.append("instance '%s' has invalid secondary node '%s'" %
463 (instance_name, snode))
464 for idx, nic in enumerate(instance.nics):
465 if nic.mac in seen_macs:
466 result.append("instance '%s' has NIC %d mac %s duplicate" %
467 (instance_name, idx, nic.mac))
469 seen_macs.append(nic.mac)
471 filled = cluster.SimpleFillNIC(nic.nicparams)
472 owner = "instance %s nic %d" % (instance.name, idx)
473 _helper(owner, "nicparams",
474 filled, constants.NICS_PARAMETER_TYPES)
475 _helper_nic(owner, filled)
478 if instance.beparams:
479 _helper("instance %s" % instance.name, "beparams",
480 cluster.FillBE(instance), constants.BES_PARAMETER_TYPES)
482 # gather the drbd ports for duplicate checks
483 for dsk in instance.disks:
484 if dsk.dev_type in constants.LDS_DRBD:
485 tcp_port = dsk.logical_id[2]
486 if tcp_port not in ports:
488 ports[tcp_port].append((instance.name, "drbd disk %s" % dsk.iv_name))
489 # gather network port reservation
490 net_port = getattr(instance, "network_port", None)
491 if net_port is not None:
492 if net_port not in ports:
494 ports[net_port].append((instance.name, "network port"))
496 # instance disk verify
497 for idx, disk in enumerate(instance.disks):
498 result.extend(["instance '%s' disk %d error: %s" %
499 (instance.name, idx, msg) for msg in disk.Verify()])
500 result.extend(self._CheckDiskIDs(disk, seen_lids, seen_pids))
502 # cluster-wide pool of free ports
503 for free_port in cluster.tcpudp_port_pool:
504 if free_port not in ports:
505 ports[free_port] = []
506 ports[free_port].append(("cluster", "port marked as free"))
508 # compute tcp/udp duplicate ports
514 txt = utils.CommaJoin(["%s/%s" % val for val in pdata])
515 result.append("tcp/udp port %s has duplicates: %s" % (pnum, txt))
517 # highest used tcp port check
519 if keys[-1] > cluster.highest_used_port:
520 result.append("Highest used port mismatch, saved %s, computed %s" %
521 (cluster.highest_used_port, keys[-1]))
523 if not data.nodes[cluster.master_node].master_candidate:
524 result.append("Master node is not a master candidate")
526 # master candidate checks
527 mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats()
529 result.append("Not enough master candidates: actual %d, target %d" %
533 for node_name, node in data.nodes.items():
534 if node.name != node_name:
535 result.append("Node '%s' is indexed by wrong name '%s'" %
536 (node.name, node_name))
537 if [node.master_candidate, node.drained, node.offline].count(True) > 1:
538 result.append("Node %s state is invalid: master_candidate=%s,"
539 " drain=%s, offline=%s" %
540 (node.name, node.master_candidate, node.drained,
542 if node.group not in data.nodegroups:
543 result.append("Node '%s' has invalid group '%s'" %
544 (node.name, node.group))
546 _helper("node %s" % node.name, "ndparams",
547 cluster.FillND(node, data.nodegroups[node.group]),
548 constants.NDS_PARAMETER_TYPES)
551 nodegroups_names = set()
552 for nodegroup_uuid in data.nodegroups:
553 nodegroup = data.nodegroups[nodegroup_uuid]
554 if nodegroup.uuid != nodegroup_uuid:
555 result.append("node group '%s' (uuid: '%s') indexed by wrong uuid '%s'"
556 % (nodegroup.name, nodegroup.uuid, nodegroup_uuid))
557 if utils.UUID_RE.match(nodegroup.name.lower()):
558 result.append("node group '%s' (uuid: '%s') has uuid-like name" %
559 (nodegroup.name, nodegroup.uuid))
560 if nodegroup.name in nodegroups_names:
561 result.append("duplicate node group name '%s'" % nodegroup.name)
563 nodegroups_names.add(nodegroup.name)
564 group_name = "group %s" % nodegroup.name
565 _helper_ipolicy(group_name, cluster.SimpleFillIPolicy(nodegroup.ipolicy))
566 _helper_ispecs(group_name, cluster.SimpleFillIPolicy(nodegroup.ipolicy))
567 if nodegroup.ndparams:
568 _helper(group_name, "ndparams",
569 cluster.SimpleFillND(nodegroup.ndparams),
570 constants.NDS_PARAMETER_TYPES)
573 _, duplicates = self._UnlockedComputeDRBDMap()
574 for node, minor, instance_a, instance_b in duplicates:
575 result.append("DRBD minor %d on node %s is assigned twice to instances"
576 " %s and %s" % (minor, node, instance_a, instance_b))
579 default_nicparams = cluster.nicparams[constants.PP_DEFAULT]
582 def _AddIpAddress(ip, name):
583 ips.setdefault(ip, []).append(name)
585 _AddIpAddress(cluster.master_ip, "cluster_ip")
587 for node in data.nodes.values():
588 _AddIpAddress(node.primary_ip, "node:%s/primary" % node.name)
589 if node.secondary_ip != node.primary_ip:
590 _AddIpAddress(node.secondary_ip, "node:%s/secondary" % node.name)
592 for instance in data.instances.values():
593 for idx, nic in enumerate(instance.nics):
597 nicparams = objects.FillDict(default_nicparams, nic.nicparams)
598 nic_mode = nicparams[constants.NIC_MODE]
599 nic_link = nicparams[constants.NIC_LINK]
601 if nic_mode == constants.NIC_MODE_BRIDGED:
602 link = "bridge:%s" % nic_link
603 elif nic_mode == constants.NIC_MODE_ROUTED:
604 link = "route:%s" % nic_link
606 raise errors.ProgrammerError("NIC mode '%s' not handled" % nic_mode)
608 _AddIpAddress("%s/%s" % (link, nic.ip),
609 "instance:%s/nic:%d" % (instance.name, idx))
611 for ip, owners in ips.items():
613 result.append("IP address %s is used by multiple owners: %s" %
614 (ip, utils.CommaJoin(owners)))
618 @locking.ssynchronized(_config_lock, shared=1)
619 def VerifyConfig(self):
622 This is just a wrapper over L{_UnlockedVerifyConfig}.
625 @return: a list of error messages; a non-empty list signifies
629 return self._UnlockedVerifyConfig()
631 def _UnlockedSetDiskID(self, disk, node_name):
632 """Convert the unique ID to the ID needed on the target nodes.
634 This is used only for drbd, which needs ip/port configuration.
636 The routine descends down and updates its children also, because
637 this helps when the only the top device is passed to the remote
640 This function is for internal use, when the config lock is already held.
644 for child in disk.children:
645 self._UnlockedSetDiskID(child, node_name)
647 if disk.logical_id is None and disk.physical_id is not None:
649 if disk.dev_type == constants.LD_DRBD8:
650 pnode, snode, port, pminor, sminor, secret = disk.logical_id
651 if node_name not in (pnode, snode):
652 raise errors.ConfigurationError("DRBD device not knowing node %s" %
654 pnode_info = self._UnlockedGetNodeInfo(pnode)
655 snode_info = self._UnlockedGetNodeInfo(snode)
656 if pnode_info is None or snode_info is None:
657 raise errors.ConfigurationError("Can't find primary or secondary node"
658 " for %s" % str(disk))
659 p_data = (pnode_info.secondary_ip, port)
660 s_data = (snode_info.secondary_ip, port)
661 if pnode == node_name:
662 disk.physical_id = p_data + s_data + (pminor, secret)
663 else: # it must be secondary, we tested above
664 disk.physical_id = s_data + p_data + (sminor, secret)
666 disk.physical_id = disk.logical_id
669 @locking.ssynchronized(_config_lock)
670 def SetDiskID(self, disk, node_name):
671 """Convert the unique ID to the ID needed on the target nodes.
673 This is used only for drbd, which needs ip/port configuration.
675 The routine descends down and updates its children also, because
676 this helps when the only the top device is passed to the remote
680 return self._UnlockedSetDiskID(disk, node_name)
682 @locking.ssynchronized(_config_lock)
683 def AddTcpUdpPort(self, port):
684 """Adds a new port to the available port pool.
687 if not isinstance(port, int):
688 raise errors.ProgrammerError("Invalid type passed for port")
690 self._config_data.cluster.tcpudp_port_pool.add(port)
693 @locking.ssynchronized(_config_lock, shared=1)
694 def GetPortList(self):
695 """Returns a copy of the current port list.
698 return self._config_data.cluster.tcpudp_port_pool.copy()
700 @locking.ssynchronized(_config_lock)
701 def AllocatePort(self):
704 The port will be taken from the available port pool or from the
705 default port range (and in this case we increase
709 # If there are TCP/IP ports configured, we use them first.
710 if self._config_data.cluster.tcpudp_port_pool:
711 port = self._config_data.cluster.tcpudp_port_pool.pop()
713 port = self._config_data.cluster.highest_used_port + 1
714 if port >= constants.LAST_DRBD_PORT:
715 raise errors.ConfigurationError("The highest used port is greater"
716 " than %s. Aborting." %
717 constants.LAST_DRBD_PORT)
718 self._config_data.cluster.highest_used_port = port
723 def _UnlockedComputeDRBDMap(self):
724 """Compute the used DRBD minor/nodes.
727 @return: dictionary of node_name: dict of minor: instance_name;
728 the returned dict will have all the nodes in it (even if with
729 an empty list), and a list of duplicates; if the duplicates
730 list is not empty, the configuration is corrupted and its caller
731 should raise an exception
734 def _AppendUsedPorts(instance_name, disk, used):
736 if disk.dev_type == constants.LD_DRBD8 and len(disk.logical_id) >= 5:
737 node_a, node_b, _, minor_a, minor_b = disk.logical_id[:5]
738 for node, port in ((node_a, minor_a), (node_b, minor_b)):
739 assert node in used, ("Node '%s' of instance '%s' not found"
740 " in node list" % (node, instance_name))
741 if port in used[node]:
742 duplicates.append((node, port, instance_name, used[node][port]))
744 used[node][port] = instance_name
746 for child in disk.children:
747 duplicates.extend(_AppendUsedPorts(instance_name, child, used))
751 my_dict = dict((node, {}) for node in self._config_data.nodes)
752 for instance in self._config_data.instances.itervalues():
753 for disk in instance.disks:
754 duplicates.extend(_AppendUsedPorts(instance.name, disk, my_dict))
755 for (node, minor), instance in self._temporary_drbds.iteritems():
756 if minor in my_dict[node] and my_dict[node][minor] != instance:
757 duplicates.append((node, minor, instance, my_dict[node][minor]))
759 my_dict[node][minor] = instance
760 return my_dict, duplicates
762 @locking.ssynchronized(_config_lock)
763 def ComputeDRBDMap(self):
764 """Compute the used DRBD minor/nodes.
766 This is just a wrapper over L{_UnlockedComputeDRBDMap}.
768 @return: dictionary of node_name: dict of minor: instance_name;
769 the returned dict will have all the nodes in it (even if with
773 d_map, duplicates = self._UnlockedComputeDRBDMap()
775 raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
779 @locking.ssynchronized(_config_lock)
780 def AllocateDRBDMinor(self, nodes, instance):
781 """Allocate a drbd minor.
783 The free minor will be automatically computed from the existing
784 devices. A node can be given multiple times in order to allocate
785 multiple minors. The result is the list of minors, in the same
786 order as the passed nodes.
788 @type instance: string
789 @param instance: the instance for which we allocate minors
792 assert isinstance(instance, basestring), \
793 "Invalid argument '%s' passed to AllocateDRBDMinor" % instance
795 d_map, duplicates = self._UnlockedComputeDRBDMap()
797 raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
803 # no minors used, we can start at 0
806 self._temporary_drbds[(nname, 0)] = instance
810 ffree = utils.FirstFree(keys)
812 # return the next minor
813 # TODO: implement high-limit check
817 # double-check minor against current instances
818 assert minor not in d_map[nname], \
819 ("Attempt to reuse allocated DRBD minor %d on node %s,"
820 " already allocated to instance %s" %
821 (minor, nname, d_map[nname][minor]))
822 ndata[minor] = instance
823 # double-check minor against reservation
824 r_key = (nname, minor)
825 assert r_key not in self._temporary_drbds, \
826 ("Attempt to reuse reserved DRBD minor %d on node %s,"
827 " reserved for instance %s" %
828 (minor, nname, self._temporary_drbds[r_key]))
829 self._temporary_drbds[r_key] = instance
831 logging.debug("Request to allocate drbd minors, input: %s, returning %s",
835 def _UnlockedReleaseDRBDMinors(self, instance):
836 """Release temporary drbd minors allocated for a given instance.
838 @type instance: string
839 @param instance: the instance for which temporary minors should be
843 assert isinstance(instance, basestring), \
844 "Invalid argument passed to ReleaseDRBDMinors"
845 for key, name in self._temporary_drbds.items():
847 del self._temporary_drbds[key]
849 @locking.ssynchronized(_config_lock)
850 def ReleaseDRBDMinors(self, instance):
851 """Release temporary drbd minors allocated for a given instance.
853 This should be called on the error paths, on the success paths
854 it's automatically called by the ConfigWriter add and update
857 This function is just a wrapper over L{_UnlockedReleaseDRBDMinors}.
859 @type instance: string
860 @param instance: the instance for which temporary minors should be
864 self._UnlockedReleaseDRBDMinors(instance)
866 @locking.ssynchronized(_config_lock, shared=1)
867 def GetConfigVersion(self):
868 """Get the configuration version.
870 @return: Config version
873 return self._config_data.version
875 @locking.ssynchronized(_config_lock, shared=1)
876 def GetClusterName(self):
879 @return: Cluster name
882 return self._config_data.cluster.cluster_name
884 @locking.ssynchronized(_config_lock, shared=1)
885 def GetMasterNode(self):
886 """Get the hostname of the master node for this cluster.
888 @return: Master hostname
891 return self._config_data.cluster.master_node
893 @locking.ssynchronized(_config_lock, shared=1)
894 def GetMasterIP(self):
895 """Get the IP of the master node for this cluster.
900 return self._config_data.cluster.master_ip
902 @locking.ssynchronized(_config_lock, shared=1)
903 def GetMasterNetdev(self):
904 """Get the master network device for this cluster.
907 return self._config_data.cluster.master_netdev
909 @locking.ssynchronized(_config_lock, shared=1)
910 def GetMasterNetmask(self):
911 """Get the netmask of the master node for this cluster.
914 return self._config_data.cluster.master_netmask
916 @locking.ssynchronized(_config_lock, shared=1)
917 def GetUseExternalMipScript(self):
918 """Get flag representing whether to use the external master IP setup script.
921 return self._config_data.cluster.use_external_mip_script
923 @locking.ssynchronized(_config_lock, shared=1)
924 def GetFileStorageDir(self):
925 """Get the file storage dir for this cluster.
928 return self._config_data.cluster.file_storage_dir
930 @locking.ssynchronized(_config_lock, shared=1)
931 def GetSharedFileStorageDir(self):
932 """Get the shared file storage dir for this cluster.
935 return self._config_data.cluster.shared_file_storage_dir
937 @locking.ssynchronized(_config_lock, shared=1)
938 def GetHypervisorType(self):
939 """Get the hypervisor type for this cluster.
942 return self._config_data.cluster.enabled_hypervisors[0]
944 @locking.ssynchronized(_config_lock, shared=1)
945 def GetHostKey(self):
946 """Return the rsa hostkey from the config.
949 @return: the rsa hostkey
952 return self._config_data.cluster.rsahostkeypub
954 @locking.ssynchronized(_config_lock, shared=1)
955 def GetDefaultIAllocator(self):
956 """Get the default instance allocator for this cluster.
959 return self._config_data.cluster.default_iallocator
961 @locking.ssynchronized(_config_lock, shared=1)
962 def GetPrimaryIPFamily(self):
963 """Get cluster primary ip family.
965 @return: primary ip family
968 return self._config_data.cluster.primary_ip_family
970 @locking.ssynchronized(_config_lock, shared=1)
971 def GetMasterNetworkParameters(self):
972 """Get network parameters of the master node.
974 @rtype: L{object.MasterNetworkParameters}
975 @return: network parameters of the master node
978 cluster = self._config_data.cluster
979 result = objects.MasterNetworkParameters(name=cluster.master_node,
980 ip=cluster.master_ip,
981 netmask=cluster.master_netmask,
982 netdev=cluster.master_netdev,
983 ip_family=cluster.primary_ip_family)
987 @locking.ssynchronized(_config_lock)
988 def AddNodeGroup(self, group, ec_id, check_uuid=True):
989 """Add a node group to the configuration.
991 This method calls group.UpgradeConfig() to fill any missing attributes
992 according to their default values.
994 @type group: L{objects.NodeGroup}
995 @param group: the NodeGroup object to add
997 @param ec_id: unique id for the job to use when creating a missing UUID
998 @type check_uuid: bool
999 @param check_uuid: add an UUID to the group if it doesn't have one or, if
1000 it does, ensure that it does not exist in the
1001 configuration already
1004 self._UnlockedAddNodeGroup(group, ec_id, check_uuid)
1007 def _UnlockedAddNodeGroup(self, group, ec_id, check_uuid):
1008 """Add a node group to the configuration.
1011 logging.info("Adding node group %s to configuration", group.name)
1013 # Some code might need to add a node group with a pre-populated UUID
1014 # generated with ConfigWriter.GenerateUniqueID(). We allow them to bypass
1015 # the "does this UUID" exist already check.
1017 self._EnsureUUID(group, ec_id)
1020 existing_uuid = self._UnlockedLookupNodeGroup(group.name)
1021 except errors.OpPrereqError:
1024 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
1025 " node group (UUID: %s)" %
1026 (group.name, existing_uuid),
1027 errors.ECODE_EXISTS)
1030 group.ctime = group.mtime = time.time()
1031 group.UpgradeConfig()
1033 self._config_data.nodegroups[group.uuid] = group
1034 self._config_data.cluster.serial_no += 1
1036 @locking.ssynchronized(_config_lock)
1037 def RemoveNodeGroup(self, group_uuid):
1038 """Remove a node group from the configuration.
1040 @type group_uuid: string
1041 @param group_uuid: the UUID of the node group to remove
1044 logging.info("Removing node group %s from configuration", group_uuid)
1046 if group_uuid not in self._config_data.nodegroups:
1047 raise errors.ConfigurationError("Unknown node group '%s'" % group_uuid)
1049 assert len(self._config_data.nodegroups) != 1, \
1050 "Group '%s' is the only group, cannot be removed" % group_uuid
1052 del self._config_data.nodegroups[group_uuid]
1053 self._config_data.cluster.serial_no += 1
1056 def _UnlockedLookupNodeGroup(self, target):
1057 """Lookup a node group's UUID.
1059 @type target: string or None
1060 @param target: group name or UUID or None to look for the default
1062 @return: nodegroup UUID
1063 @raises errors.OpPrereqError: when the target group cannot be found
1067 if len(self._config_data.nodegroups) != 1:
1068 raise errors.OpPrereqError("More than one node group exists. Target"
1069 " group must be specified explicitely.")
1071 return self._config_data.nodegroups.keys()[0]
1072 if target in self._config_data.nodegroups:
1074 for nodegroup in self._config_data.nodegroups.values():
1075 if nodegroup.name == target:
1076 return nodegroup.uuid
1077 raise errors.OpPrereqError("Node group '%s' not found" % target,
1080 @locking.ssynchronized(_config_lock, shared=1)
1081 def LookupNodeGroup(self, target):
1082 """Lookup a node group's UUID.
1084 This function is just a wrapper over L{_UnlockedLookupNodeGroup}.
1086 @type target: string or None
1087 @param target: group name or UUID or None to look for the default
1089 @return: nodegroup UUID
1092 return self._UnlockedLookupNodeGroup(target)
1094 def _UnlockedGetNodeGroup(self, uuid):
1095 """Lookup a node group.
1098 @param uuid: group UUID
1099 @rtype: L{objects.NodeGroup} or None
1100 @return: nodegroup object, or None if not found
1103 if uuid not in self._config_data.nodegroups:
1106 return self._config_data.nodegroups[uuid]
1108 @locking.ssynchronized(_config_lock, shared=1)
1109 def GetNodeGroup(self, uuid):
1110 """Lookup a node group.
1113 @param uuid: group UUID
1114 @rtype: L{objects.NodeGroup} or None
1115 @return: nodegroup object, or None if not found
1118 return self._UnlockedGetNodeGroup(uuid)
1120 @locking.ssynchronized(_config_lock, shared=1)
1121 def GetAllNodeGroupsInfo(self):
1122 """Get the configuration of all node groups.
1125 return dict(self._config_data.nodegroups)
1127 @locking.ssynchronized(_config_lock, shared=1)
1128 def GetNodeGroupList(self):
1129 """Get a list of node groups.
1132 return self._config_data.nodegroups.keys()
1134 @locking.ssynchronized(_config_lock, shared=1)
1135 def GetNodeGroupMembersByNodes(self, nodes):
1136 """Get nodes which are member in the same nodegroups as the given nodes.
1139 ngfn = lambda node_name: self._UnlockedGetNodeInfo(node_name).group
1140 return frozenset(member_name
1141 for node_name in nodes
1143 self._UnlockedGetNodeGroup(ngfn(node_name)).members)
1145 @locking.ssynchronized(_config_lock)
1146 def AddInstance(self, instance, ec_id):
1147 """Add an instance to the config.
1149 This should be used after creating a new instance.
1151 @type instance: L{objects.Instance}
1152 @param instance: the instance object
1155 if not isinstance(instance, objects.Instance):
1156 raise errors.ProgrammerError("Invalid type passed to AddInstance")
1158 if instance.disk_template != constants.DT_DISKLESS:
1159 all_lvs = instance.MapLVsByNode()
1160 logging.info("Instance '%s' DISK_LAYOUT: %s", instance.name, all_lvs)
1162 all_macs = self._AllMACs()
1163 for nic in instance.nics:
1164 if nic.mac in all_macs:
1165 raise errors.ConfigurationError("Cannot add instance %s:"
1166 " MAC address '%s' already in use." %
1167 (instance.name, nic.mac))
1169 self._EnsureUUID(instance, ec_id)
1171 instance.serial_no = 1
1172 instance.ctime = instance.mtime = time.time()
1173 self._config_data.instances[instance.name] = instance
1174 self._config_data.cluster.serial_no += 1
1175 self._UnlockedReleaseDRBDMinors(instance.name)
1178 def _EnsureUUID(self, item, ec_id):
1179 """Ensures a given object has a valid UUID.
1181 @param item: the instance or node to be checked
1182 @param ec_id: the execution context id for the uuid reservation
1186 item.uuid = self._GenerateUniqueID(ec_id)
1187 elif item.uuid in self._AllIDs(include_temporary=True):
1188 raise errors.ConfigurationError("Cannot add '%s': UUID %s already"
1189 " in use" % (item.name, item.uuid))
1191 def _SetInstanceStatus(self, instance_name, status):
1192 """Set the instance's status to a given value.
1195 assert status in constants.ADMINST_ALL, \
1196 "Invalid status '%s' passed to SetInstanceStatus" % (status,)
1198 if instance_name not in self._config_data.instances:
1199 raise errors.ConfigurationError("Unknown instance '%s'" %
1201 instance = self._config_data.instances[instance_name]
1202 if instance.admin_state != status:
1203 instance.admin_state = status
1204 instance.serial_no += 1
1205 instance.mtime = time.time()
1208 @locking.ssynchronized(_config_lock)
1209 def MarkInstanceUp(self, instance_name):
1210 """Mark the instance status to up in the config.
1213 self._SetInstanceStatus(instance_name, constants.ADMINST_UP)
1215 @locking.ssynchronized(_config_lock)
1216 def MarkInstanceOffline(self, instance_name):
1217 """Mark the instance status to down in the config.
1220 self._SetInstanceStatus(instance_name, constants.ADMINST_OFFLINE)
1222 @locking.ssynchronized(_config_lock)
1223 def RemoveInstance(self, instance_name):
1224 """Remove the instance from the configuration.
1227 if instance_name not in self._config_data.instances:
1228 raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
1230 # If a network port has been allocated to the instance,
1231 # return it to the pool of free ports.
1232 inst = self._config_data.instances[instance_name]
1233 network_port = getattr(inst, "network_port", None)
1234 if network_port is not None:
1235 self._config_data.cluster.tcpudp_port_pool.add(network_port)
1237 del self._config_data.instances[instance_name]
1238 self._config_data.cluster.serial_no += 1
1241 @locking.ssynchronized(_config_lock)
1242 def RenameInstance(self, old_name, new_name):
1243 """Rename an instance.
1245 This needs to be done in ConfigWriter and not by RemoveInstance
1246 combined with AddInstance as only we can guarantee an atomic
1250 if old_name not in self._config_data.instances:
1251 raise errors.ConfigurationError("Unknown instance '%s'" % old_name)
1252 inst = self._config_data.instances[old_name]
1253 del self._config_data.instances[old_name]
1254 inst.name = new_name
1256 for disk in inst.disks:
1257 if disk.dev_type == constants.LD_FILE:
1258 # rename the file paths in logical and physical id
1259 file_storage_dir = os.path.dirname(os.path.dirname(disk.logical_id[1]))
1260 disk_fname = "disk%s" % disk.iv_name.split("/")[1]
1261 disk.physical_id = disk.logical_id = (disk.logical_id[0],
1262 utils.PathJoin(file_storage_dir,
1266 # Force update of ssconf files
1267 self._config_data.cluster.serial_no += 1
1269 self._config_data.instances[inst.name] = inst
1272 @locking.ssynchronized(_config_lock)
1273 def MarkInstanceDown(self, instance_name):
1274 """Mark the status of an instance to down in the configuration.
1277 self._SetInstanceStatus(instance_name, constants.ADMINST_DOWN)
1279 def _UnlockedGetInstanceList(self):
1280 """Get the list of instances.
1282 This function is for internal use, when the config lock is already held.
1285 return self._config_data.instances.keys()
1287 @locking.ssynchronized(_config_lock, shared=1)
1288 def GetInstanceList(self):
1289 """Get the list of instances.
1291 @return: array of instances, ex. ['instance2.example.com',
1292 'instance1.example.com']
1295 return self._UnlockedGetInstanceList()
1297 def ExpandInstanceName(self, short_name):
1298 """Attempt to expand an incomplete instance name.
1301 # Locking is done in L{ConfigWriter.GetInstanceList}
1302 return _MatchNameComponentIgnoreCase(short_name, self.GetInstanceList())
1304 def _UnlockedGetInstanceInfo(self, instance_name):
1305 """Returns information about an instance.
1307 This function is for internal use, when the config lock is already held.
1310 if instance_name not in self._config_data.instances:
1313 return self._config_data.instances[instance_name]
1315 @locking.ssynchronized(_config_lock, shared=1)
1316 def GetInstanceInfo(self, instance_name):
1317 """Returns information about an instance.
1319 It takes the information from the configuration file. Other information of
1320 an instance are taken from the live systems.
1322 @param instance_name: name of the instance, e.g.
1323 I{instance1.example.com}
1325 @rtype: L{objects.Instance}
1326 @return: the instance object
1329 return self._UnlockedGetInstanceInfo(instance_name)
1331 @locking.ssynchronized(_config_lock, shared=1)
1332 def GetInstanceNodeGroups(self, instance_name, primary_only=False):
1333 """Returns set of node group UUIDs for instance's nodes.
1338 instance = self._UnlockedGetInstanceInfo(instance_name)
1340 raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
1343 nodes = [instance.primary_node]
1345 nodes = instance.all_nodes
1347 return frozenset(self._UnlockedGetNodeInfo(node_name).group
1348 for node_name in nodes)
1350 @locking.ssynchronized(_config_lock, shared=1)
1351 def GetMultiInstanceInfo(self, instances):
1352 """Get the configuration of multiple instances.
1354 @param instances: list of instance names
1356 @return: list of tuples (instance, instance_info), where
1357 instance_info is what would GetInstanceInfo return for the
1358 node, while keeping the original order
1361 return [(name, self._UnlockedGetInstanceInfo(name)) for name in instances]
1363 @locking.ssynchronized(_config_lock, shared=1)
1364 def GetAllInstancesInfo(self):
1365 """Get the configuration of all instances.
1368 @return: dict of (instance, instance_info), where instance_info is what
1369 would GetInstanceInfo return for the node
1372 my_dict = dict([(instance, self._UnlockedGetInstanceInfo(instance))
1373 for instance in self._UnlockedGetInstanceList()])
1376 @locking.ssynchronized(_config_lock, shared=1)
1377 def GetInstancesInfoByFilter(self, filter_fn):
1378 """Get instance configuration with a filter.
1380 @type filter_fn: callable
1381 @param filter_fn: Filter function receiving instance object as parameter,
1382 returning boolean. Important: this function is called while the
1383 configuration locks is held. It must not do any complex work or call
1384 functions potentially leading to a deadlock. Ideally it doesn't call any
1385 other functions and just compares instance attributes.
1388 return dict((name, inst)
1389 for (name, inst) in self._config_data.instances.items()
1392 @locking.ssynchronized(_config_lock)
1393 def AddNode(self, node, ec_id):
1394 """Add a node to the configuration.
1396 @type node: L{objects.Node}
1397 @param node: a Node instance
1400 logging.info("Adding node %s to configuration", node.name)
1402 self._EnsureUUID(node, ec_id)
1405 node.ctime = node.mtime = time.time()
1406 self._UnlockedAddNodeToGroup(node.name, node.group)
1407 self._config_data.nodes[node.name] = node
1408 self._config_data.cluster.serial_no += 1
1411 @locking.ssynchronized(_config_lock)
1412 def RemoveNode(self, node_name):
1413 """Remove a node from the configuration.
1416 logging.info("Removing node %s from configuration", node_name)
1418 if node_name not in self._config_data.nodes:
1419 raise errors.ConfigurationError("Unknown node '%s'" % node_name)
1421 self._UnlockedRemoveNodeFromGroup(self._config_data.nodes[node_name])
1422 del self._config_data.nodes[node_name]
1423 self._config_data.cluster.serial_no += 1
1426 def ExpandNodeName(self, short_name):
1427 """Attempt to expand an incomplete node name.
1430 # Locking is done in L{ConfigWriter.GetNodeList}
1431 return _MatchNameComponentIgnoreCase(short_name, self.GetNodeList())
1433 def _UnlockedGetNodeInfo(self, node_name):
1434 """Get the configuration of a node, as stored in the config.
1436 This function is for internal use, when the config lock is already
1439 @param node_name: the node name, e.g. I{node1.example.com}
1441 @rtype: L{objects.Node}
1442 @return: the node object
1445 if node_name not in self._config_data.nodes:
1448 return self._config_data.nodes[node_name]
1450 @locking.ssynchronized(_config_lock, shared=1)
1451 def GetNodeInfo(self, node_name):
1452 """Get the configuration of a node, as stored in the config.
1454 This is just a locked wrapper over L{_UnlockedGetNodeInfo}.
1456 @param node_name: the node name, e.g. I{node1.example.com}
1458 @rtype: L{objects.Node}
1459 @return: the node object
1462 return self._UnlockedGetNodeInfo(node_name)
1464 @locking.ssynchronized(_config_lock, shared=1)
1465 def GetNodeInstances(self, node_name):
1466 """Get the instances of a node, as stored in the config.
1468 @param node_name: the node name, e.g. I{node1.example.com}
1470 @rtype: (list, list)
1471 @return: a tuple with two lists: the primary and the secondary instances
1476 for inst in self._config_data.instances.values():
1477 if inst.primary_node == node_name:
1478 pri.append(inst.name)
1479 if node_name in inst.secondary_nodes:
1480 sec.append(inst.name)
1483 @locking.ssynchronized(_config_lock, shared=1)
1484 def GetNodeGroupInstances(self, uuid, primary_only=False):
1485 """Get the instances of a node group.
1487 @param uuid: Node group UUID
1488 @param primary_only: Whether to only consider primary nodes
1490 @return: List of instance names in node group
1494 nodes_fn = lambda inst: [inst.primary_node]
1496 nodes_fn = lambda inst: inst.all_nodes
1498 return frozenset(inst.name
1499 for inst in self._config_data.instances.values()
1500 for node_name in nodes_fn(inst)
1501 if self._UnlockedGetNodeInfo(node_name).group == uuid)
1503 def _UnlockedGetNodeList(self):
1504 """Return the list of nodes which are in the configuration.
1506 This function is for internal use, when the config lock is already
1512 return self._config_data.nodes.keys()
1514 @locking.ssynchronized(_config_lock, shared=1)
1515 def GetNodeList(self):
1516 """Return the list of nodes which are in the configuration.
1519 return self._UnlockedGetNodeList()
1521 def _UnlockedGetOnlineNodeList(self):
1522 """Return the list of nodes which are online.
1525 all_nodes = [self._UnlockedGetNodeInfo(node)
1526 for node in self._UnlockedGetNodeList()]
1527 return [node.name for node in all_nodes if not node.offline]
1529 @locking.ssynchronized(_config_lock, shared=1)
1530 def GetOnlineNodeList(self):
1531 """Return the list of nodes which are online.
1534 return self._UnlockedGetOnlineNodeList()
1536 @locking.ssynchronized(_config_lock, shared=1)
1537 def GetVmCapableNodeList(self):
1538 """Return the list of nodes which are not vm capable.
1541 all_nodes = [self._UnlockedGetNodeInfo(node)
1542 for node in self._UnlockedGetNodeList()]
1543 return [node.name for node in all_nodes if node.vm_capable]
1545 @locking.ssynchronized(_config_lock, shared=1)
1546 def GetNonVmCapableNodeList(self):
1547 """Return the list of nodes which are not vm capable.
1550 all_nodes = [self._UnlockedGetNodeInfo(node)
1551 for node in self._UnlockedGetNodeList()]
1552 return [node.name for node in all_nodes if not node.vm_capable]
1554 @locking.ssynchronized(_config_lock, shared=1)
1555 def GetMultiNodeInfo(self, nodes):
1556 """Get the configuration of multiple nodes.
1558 @param nodes: list of node names
1560 @return: list of tuples of (node, node_info), where node_info is
1561 what would GetNodeInfo return for the node, in the original
1565 return [(name, self._UnlockedGetNodeInfo(name)) for name in nodes]
1567 @locking.ssynchronized(_config_lock, shared=1)
1568 def GetAllNodesInfo(self):
1569 """Get the configuration of all nodes.
1572 @return: dict of (node, node_info), where node_info is what
1573 would GetNodeInfo return for the node
1576 return self._UnlockedGetAllNodesInfo()
1578 def _UnlockedGetAllNodesInfo(self):
1579 """Gets configuration of all nodes.
1581 @note: See L{GetAllNodesInfo}
1584 return dict([(node, self._UnlockedGetNodeInfo(node))
1585 for node in self._UnlockedGetNodeList()])
1587 @locking.ssynchronized(_config_lock, shared=1)
1588 def GetNodeGroupsFromNodes(self, nodes):
1589 """Returns groups for a list of nodes.
1591 @type nodes: list of string
1592 @param nodes: List of node names
1596 return frozenset(self._UnlockedGetNodeInfo(name).group for name in nodes)
1598 def _UnlockedGetMasterCandidateStats(self, exceptions=None):
1599 """Get the number of current and maximum desired and possible candidates.
1601 @type exceptions: list
1602 @param exceptions: if passed, list of nodes that should be ignored
1604 @return: tuple of (current, desired and possible, possible)
1607 mc_now = mc_should = mc_max = 0
1608 for node in self._config_data.nodes.values():
1609 if exceptions and node.name in exceptions:
1611 if not (node.offline or node.drained) and node.master_capable:
1613 if node.master_candidate:
1615 mc_should = min(mc_max, self._config_data.cluster.candidate_pool_size)
1616 return (mc_now, mc_should, mc_max)
1618 @locking.ssynchronized(_config_lock, shared=1)
1619 def GetMasterCandidateStats(self, exceptions=None):
1620 """Get the number of current and maximum possible candidates.
1622 This is just a wrapper over L{_UnlockedGetMasterCandidateStats}.
1624 @type exceptions: list
1625 @param exceptions: if passed, list of nodes that should be ignored
1627 @return: tuple of (current, max)
1630 return self._UnlockedGetMasterCandidateStats(exceptions)
1632 @locking.ssynchronized(_config_lock)
1633 def MaintainCandidatePool(self, exceptions):
1634 """Try to grow the candidate pool to the desired size.
1636 @type exceptions: list
1637 @param exceptions: if passed, list of nodes that should be ignored
1639 @return: list with the adjusted nodes (L{objects.Node} instances)
1642 mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats(exceptions)
1645 node_list = self._config_data.nodes.keys()
1646 random.shuffle(node_list)
1647 for name in node_list:
1648 if mc_now >= mc_max:
1650 node = self._config_data.nodes[name]
1651 if (node.master_candidate or node.offline or node.drained or
1652 node.name in exceptions or not node.master_capable):
1654 mod_list.append(node)
1655 node.master_candidate = True
1658 if mc_now != mc_max:
1659 # this should not happen
1660 logging.warning("Warning: MaintainCandidatePool didn't manage to"
1661 " fill the candidate pool (%d/%d)", mc_now, mc_max)
1663 self._config_data.cluster.serial_no += 1
1668 def _UnlockedAddNodeToGroup(self, node_name, nodegroup_uuid):
1669 """Add a given node to the specified group.
1672 if nodegroup_uuid not in self._config_data.nodegroups:
1673 # This can happen if a node group gets deleted between its lookup and
1674 # when we're adding the first node to it, since we don't keep a lock in
1675 # the meantime. It's ok though, as we'll fail cleanly if the node group
1676 # is not found anymore.
1677 raise errors.OpExecError("Unknown node group: %s" % nodegroup_uuid)
1678 if node_name not in self._config_data.nodegroups[nodegroup_uuid].members:
1679 self._config_data.nodegroups[nodegroup_uuid].members.append(node_name)
1681 def _UnlockedRemoveNodeFromGroup(self, node):
1682 """Remove a given node from its group.
1685 nodegroup = node.group
1686 if nodegroup not in self._config_data.nodegroups:
1687 logging.warning("Warning: node '%s' has unknown node group '%s'"
1688 " (while being removed from it)", node.name, nodegroup)
1689 nodegroup_obj = self._config_data.nodegroups[nodegroup]
1690 if node.name not in nodegroup_obj.members:
1691 logging.warning("Warning: node '%s' not a member of its node group '%s'"
1692 " (while being removed from it)", node.name, nodegroup)
1694 nodegroup_obj.members.remove(node.name)
1696 @locking.ssynchronized(_config_lock)
1697 def AssignGroupNodes(self, mods):
1698 """Changes the group of a number of nodes.
1700 @type mods: list of tuples; (node name, new group UUID)
1701 @param mods: Node membership modifications
1704 groups = self._config_data.nodegroups
1705 nodes = self._config_data.nodes
1709 # Try to resolve names/UUIDs first
1710 for (node_name, new_group_uuid) in mods:
1712 node = nodes[node_name]
1714 raise errors.ConfigurationError("Unable to find node '%s'" % node_name)
1716 if node.group == new_group_uuid:
1717 # Node is being assigned to its current group
1718 logging.debug("Node '%s' was assigned to its current group (%s)",
1719 node_name, node.group)
1722 # Try to find current group of node
1724 old_group = groups[node.group]
1726 raise errors.ConfigurationError("Unable to find old group '%s'" %
1729 # Try to find new group for node
1731 new_group = groups[new_group_uuid]
1733 raise errors.ConfigurationError("Unable to find new group '%s'" %
1736 assert node.name in old_group.members, \
1737 ("Inconsistent configuration: node '%s' not listed in members for its"
1738 " old group '%s'" % (node.name, old_group.uuid))
1739 assert node.name not in new_group.members, \
1740 ("Inconsistent configuration: node '%s' already listed in members for"
1741 " its new group '%s'" % (node.name, new_group.uuid))
1743 resmod.append((node, old_group, new_group))
1746 for (node, old_group, new_group) in resmod:
1747 assert node.uuid != new_group.uuid and old_group.uuid != new_group.uuid, \
1748 "Assigning to current group is not possible"
1750 node.group = new_group.uuid
1752 # Update members of involved groups
1753 if node.name in old_group.members:
1754 old_group.members.remove(node.name)
1755 if node.name not in new_group.members:
1756 new_group.members.append(node.name)
1758 # Update timestamps and serials (only once per node/group object)
1760 for obj in frozenset(itertools.chain(*resmod)): # pylint: disable=W0142
1764 # Force ssconf update
1765 self._config_data.cluster.serial_no += 1
1769 def _BumpSerialNo(self):
1770 """Bump up the serial number of the config.
1773 self._config_data.serial_no += 1
1774 self._config_data.mtime = time.time()
1776 def _AllUUIDObjects(self):
1777 """Returns all objects with uuid attributes.
1780 return (self._config_data.instances.values() +
1781 self._config_data.nodes.values() +
1782 self._config_data.nodegroups.values() +
1783 [self._config_data.cluster])
1785 def _OpenConfig(self, accept_foreign):
1786 """Read the config data from disk.
1789 raw_data = utils.ReadFile(self._cfg_file)
1792 data = objects.ConfigData.FromDict(serializer.Load(raw_data))
1793 except Exception, err:
1794 raise errors.ConfigurationError(err)
1796 # Make sure the configuration has the right version
1797 _ValidateConfig(data)
1799 if (not hasattr(data, 'cluster') or
1800 not hasattr(data.cluster, 'rsahostkeypub')):
1801 raise errors.ConfigurationError("Incomplete configuration"
1802 " (missing cluster.rsahostkeypub)")
1804 if data.cluster.master_node != self._my_hostname and not accept_foreign:
1805 msg = ("The configuration denotes node %s as master, while my"
1806 " hostname is %s; opening a foreign configuration is only"
1807 " possible in accept_foreign mode" %
1808 (data.cluster.master_node, self._my_hostname))
1809 raise errors.ConfigurationError(msg)
1811 # Upgrade configuration if needed
1812 data.UpgradeConfig()
1814 self._config_data = data
1815 # reset the last serial as -1 so that the next write will cause
1817 self._last_cluster_serial = -1
1819 # And finally run our (custom) config upgrade sequence
1820 self._UpgradeConfig()
1822 self._cfg_id = utils.GetFileID(path=self._cfg_file)
1824 def _UpgradeConfig(self):
1825 """Run upgrade steps that cannot be done purely in the objects.
1827 This is because some data elements need uniqueness across the
1828 whole configuration, etc.
1830 @warning: this function will call L{_WriteConfig()}, but also
1831 L{DropECReservations} so it needs to be called only from a
1832 "safe" place (the constructor). If one wanted to call it with
1833 the lock held, a DropECReservationUnlocked would need to be
1834 created first, to avoid causing deadlock.
1838 for item in self._AllUUIDObjects():
1839 if item.uuid is None:
1840 item.uuid = self._GenerateUniqueID(_UPGRADE_CONFIG_JID)
1842 if not self._config_data.nodegroups:
1843 default_nodegroup_name = constants.INITIAL_NODE_GROUP_NAME
1844 default_nodegroup = objects.NodeGroup(name=default_nodegroup_name,
1846 self._UnlockedAddNodeGroup(default_nodegroup, _UPGRADE_CONFIG_JID, True)
1848 for node in self._config_data.nodes.values():
1850 node.group = self.LookupNodeGroup(None)
1852 # This is technically *not* an upgrade, but needs to be done both when
1853 # nodegroups are being added, and upon normally loading the config,
1854 # because the members list of a node group is discarded upon
1855 # serializing/deserializing the object.
1856 self._UnlockedAddNodeToGroup(node.name, node.group)
1859 # This is ok even if it acquires the internal lock, as _UpgradeConfig is
1860 # only called at config init time, without the lock held
1861 self.DropECReservations(_UPGRADE_CONFIG_JID)
1863 def _DistributeConfig(self, feedback_fn):
1864 """Distribute the configuration to the other nodes.
1866 Currently, this only copies the configuration file. In the future,
1867 it could be used to encapsulate the 2/3-phase update mechanism.
1877 myhostname = self._my_hostname
1878 # we can skip checking whether _UnlockedGetNodeInfo returns None
1879 # since the node list comes from _UnlocketGetNodeList, and we are
1880 # called with the lock held, so no modifications should take place
1882 for node_name in self._UnlockedGetNodeList():
1883 if node_name == myhostname:
1885 node_info = self._UnlockedGetNodeInfo(node_name)
1886 if not node_info.master_candidate:
1888 node_list.append(node_info.name)
1889 addr_list.append(node_info.primary_ip)
1891 # TODO: Use dedicated resolver talking to config writer for name resolution
1893 self._GetRpc(addr_list).call_upload_file(node_list, self._cfg_file)
1894 for to_node, to_result in result.items():
1895 msg = to_result.fail_msg
1897 msg = ("Copy of file %s to node %s failed: %s" %
1898 (self._cfg_file, to_node, msg))
1908 def _WriteConfig(self, destination=None, feedback_fn=None):
1909 """Write the configuration data to persistent storage.
1912 assert feedback_fn is None or callable(feedback_fn)
1914 # Warn on config errors, but don't abort the save - the
1915 # configuration has already been modified, and we can't revert;
1916 # the best we can do is to warn the user and save as is, leaving
1917 # recovery to the user
1918 config_errors = self._UnlockedVerifyConfig()
1920 errmsg = ("Configuration data is not consistent: %s" %
1921 (utils.CommaJoin(config_errors)))
1922 logging.critical(errmsg)
1926 if destination is None:
1927 destination = self._cfg_file
1928 self._BumpSerialNo()
1929 txt = serializer.Dump(self._config_data.ToDict())
1931 getents = self._getents()
1933 fd = utils.SafeWriteFile(destination, self._cfg_id, data=txt,
1934 close=False, gid=getents.confd_gid, mode=0640)
1935 except errors.LockError:
1936 raise errors.ConfigurationError("The configuration file has been"
1937 " modified since the last write, cannot"
1940 self._cfg_id = utils.GetFileID(fd=fd)
1944 self.write_count += 1
1946 # and redistribute the config file to master candidates
1947 self._DistributeConfig(feedback_fn)
1949 # Write ssconf files on all nodes (including locally)
1950 if self._last_cluster_serial < self._config_data.cluster.serial_no:
1951 if not self._offline:
1952 result = self._GetRpc(None).call_write_ssconf_files(
1953 self._UnlockedGetOnlineNodeList(),
1954 self._UnlockedGetSsconfValues())
1956 for nname, nresu in result.items():
1957 msg = nresu.fail_msg
1959 errmsg = ("Error while uploading ssconf files to"
1960 " node %s: %s" % (nname, msg))
1961 logging.warning(errmsg)
1966 self._last_cluster_serial = self._config_data.cluster.serial_no
1968 def _UnlockedGetSsconfValues(self):
1969 """Return the values needed by ssconf.
1972 @return: a dictionary with keys the ssconf names and values their
1977 instance_names = utils.NiceSort(self._UnlockedGetInstanceList())
1978 node_names = utils.NiceSort(self._UnlockedGetNodeList())
1979 node_info = [self._UnlockedGetNodeInfo(name) for name in node_names]
1980 node_pri_ips = ["%s %s" % (ninfo.name, ninfo.primary_ip)
1981 for ninfo in node_info]
1982 node_snd_ips = ["%s %s" % (ninfo.name, ninfo.secondary_ip)
1983 for ninfo in node_info]
1985 instance_data = fn(instance_names)
1986 off_data = fn(node.name for node in node_info if node.offline)
1987 on_data = fn(node.name for node in node_info if not node.offline)
1988 mc_data = fn(node.name for node in node_info if node.master_candidate)
1989 mc_ips_data = fn(node.primary_ip for node in node_info
1990 if node.master_candidate)
1991 node_data = fn(node_names)
1992 node_pri_ips_data = fn(node_pri_ips)
1993 node_snd_ips_data = fn(node_snd_ips)
1995 cluster = self._config_data.cluster
1996 cluster_tags = fn(cluster.GetTags())
1998 hypervisor_list = fn(cluster.enabled_hypervisors)
2000 uid_pool = uidpool.FormatUidPool(cluster.uid_pool, separator="\n")
2002 nodegroups = ["%s %s" % (nodegroup.uuid, nodegroup.name) for nodegroup in
2003 self._config_data.nodegroups.values()]
2004 nodegroups_data = fn(utils.NiceSort(nodegroups))
2007 constants.SS_CLUSTER_NAME: cluster.cluster_name,
2008 constants.SS_CLUSTER_TAGS: cluster_tags,
2009 constants.SS_FILE_STORAGE_DIR: cluster.file_storage_dir,
2010 constants.SS_SHARED_FILE_STORAGE_DIR: cluster.shared_file_storage_dir,
2011 constants.SS_MASTER_CANDIDATES: mc_data,
2012 constants.SS_MASTER_CANDIDATES_IPS: mc_ips_data,
2013 constants.SS_MASTER_IP: cluster.master_ip,
2014 constants.SS_MASTER_NETDEV: cluster.master_netdev,
2015 constants.SS_MASTER_NETMASK: str(cluster.master_netmask),
2016 constants.SS_MASTER_NODE: cluster.master_node,
2017 constants.SS_NODE_LIST: node_data,
2018 constants.SS_NODE_PRIMARY_IPS: node_pri_ips_data,
2019 constants.SS_NODE_SECONDARY_IPS: node_snd_ips_data,
2020 constants.SS_OFFLINE_NODES: off_data,
2021 constants.SS_ONLINE_NODES: on_data,
2022 constants.SS_PRIMARY_IP_FAMILY: str(cluster.primary_ip_family),
2023 constants.SS_INSTANCE_LIST: instance_data,
2024 constants.SS_RELEASE_VERSION: constants.RELEASE_VERSION,
2025 constants.SS_HYPERVISOR_LIST: hypervisor_list,
2026 constants.SS_MAINTAIN_NODE_HEALTH: str(cluster.maintain_node_health),
2027 constants.SS_UID_POOL: uid_pool,
2028 constants.SS_NODEGROUPS: nodegroups_data,
2030 bad_values = [(k, v) for k, v in ssconf_values.items()
2031 if not isinstance(v, (str, basestring))]
2033 err = utils.CommaJoin("%s=%s" % (k, v) for k, v in bad_values)
2034 raise errors.ConfigurationError("Some ssconf key(s) have non-string"
2035 " values: %s" % err)
2036 return ssconf_values
2038 @locking.ssynchronized(_config_lock, shared=1)
2039 def GetSsconfValues(self):
2040 """Wrapper using lock around _UnlockedGetSsconf().
2043 return self._UnlockedGetSsconfValues()
2045 @locking.ssynchronized(_config_lock, shared=1)
2046 def GetVGName(self):
2047 """Return the volume group name.
2050 return self._config_data.cluster.volume_group_name
2052 @locking.ssynchronized(_config_lock)
2053 def SetVGName(self, vg_name):
2054 """Set the volume group name.
2057 self._config_data.cluster.volume_group_name = vg_name
2058 self._config_data.cluster.serial_no += 1
2061 @locking.ssynchronized(_config_lock, shared=1)
2062 def GetDRBDHelper(self):
2063 """Return DRBD usermode helper.
2066 return self._config_data.cluster.drbd_usermode_helper
2068 @locking.ssynchronized(_config_lock)
2069 def SetDRBDHelper(self, drbd_helper):
2070 """Set DRBD usermode helper.
2073 self._config_data.cluster.drbd_usermode_helper = drbd_helper
2074 self._config_data.cluster.serial_no += 1
2077 @locking.ssynchronized(_config_lock, shared=1)
2078 def GetMACPrefix(self):
2079 """Return the mac prefix.
2082 return self._config_data.cluster.mac_prefix
2084 @locking.ssynchronized(_config_lock, shared=1)
2085 def GetClusterInfo(self):
2086 """Returns information about the cluster
2088 @rtype: L{objects.Cluster}
2089 @return: the cluster object
2092 return self._config_data.cluster
2094 @locking.ssynchronized(_config_lock, shared=1)
2095 def HasAnyDiskOfType(self, dev_type):
2096 """Check if in there is at disk of the given type in the configuration.
2099 return self._config_data.HasAnyDiskOfType(dev_type)
2101 @locking.ssynchronized(_config_lock)
2102 def Update(self, target, feedback_fn):
2103 """Notify function to be called after updates.
2105 This function must be called when an object (as returned by
2106 GetInstanceInfo, GetNodeInfo, GetCluster) has been updated and the
2107 caller wants the modifications saved to the backing store. Note
2108 that all modified objects will be saved, but the target argument
2109 is the one the caller wants to ensure that it's saved.
2111 @param target: an instance of either L{objects.Cluster},
2112 L{objects.Node} or L{objects.Instance} which is existing in
2114 @param feedback_fn: Callable feedback function
2117 if self._config_data is None:
2118 raise errors.ProgrammerError("Configuration file not read,"
2120 update_serial = False
2121 if isinstance(target, objects.Cluster):
2122 test = target == self._config_data.cluster
2123 elif isinstance(target, objects.Node):
2124 test = target in self._config_data.nodes.values()
2125 update_serial = True
2126 elif isinstance(target, objects.Instance):
2127 test = target in self._config_data.instances.values()
2128 elif isinstance(target, objects.NodeGroup):
2129 test = target in self._config_data.nodegroups.values()
2131 raise errors.ProgrammerError("Invalid object type (%s) passed to"
2132 " ConfigWriter.Update" % type(target))
2134 raise errors.ConfigurationError("Configuration updated since object"
2135 " has been read or unknown object")
2136 target.serial_no += 1
2137 target.mtime = now = time.time()
2140 # for node updates, we need to increase the cluster serial too
2141 self._config_data.cluster.serial_no += 1
2142 self._config_data.cluster.mtime = now
2144 if isinstance(target, objects.Instance):
2145 self._UnlockedReleaseDRBDMinors(target.name)
2147 self._WriteConfig(feedback_fn=feedback_fn)
2149 @locking.ssynchronized(_config_lock)
2150 def DropECReservations(self, ec_id):
2151 """Drop per-execution-context reservations
2154 for rm in self._all_rms:
2155 rm.DropECReservations(ec_id)