4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Configuration management for Ganeti
24 This module provides the interface to the Ganeti cluster configuration.
26 The configuration data is stored on every node but is updated on the master
27 only. After each update, the master distributes the data to the other nodes.
29 Currently, the data storage format is JSON. YAML was slow and consuming too
34 # pylint: disable=R0904
35 # R0904: Too many public methods
43 from ganeti import errors
44 from ganeti import locking
45 from ganeti import utils
46 from ganeti import constants
47 from ganeti import rpc
48 from ganeti import objects
49 from ganeti import serializer
50 from ganeti import uidpool
51 from ganeti import netutils
52 from ganeti import runtime
55 _config_lock = locking.SharedLock("ConfigWriter")
57 # job id used for resource management at config upgrade time
58 _UPGRADE_CONFIG_JID = "jid-cfg-upgrade"
61 def _ValidateConfig(data):
62 """Verifies that a configuration objects looks valid.
64 This only verifies the version of the configuration.
66 @raise errors.ConfigurationError: if the version differs from what
70 if data.version != constants.CONFIG_VERSION:
71 raise errors.ConfigVersionMismatch(constants.CONFIG_VERSION, data.version)
74 class TemporaryReservationManager:
75 """A temporary resource reservation manager.
77 This is used to reserve resources in a job, before using them, making sure
78 other jobs cannot get them in the meantime.
82 self._ec_reserved = {}
84 def Reserved(self, resource):
85 for holder_reserved in self._ec_reserved.values():
86 if resource in holder_reserved:
90 def Reserve(self, ec_id, resource):
91 if self.Reserved(resource):
92 raise errors.ReservationError("Duplicate reservation for resource '%s'"
94 if ec_id not in self._ec_reserved:
95 self._ec_reserved[ec_id] = set([resource])
97 self._ec_reserved[ec_id].add(resource)
99 def DropECReservations(self, ec_id):
100 if ec_id in self._ec_reserved:
101 del self._ec_reserved[ec_id]
103 def GetReserved(self):
105 for holder_reserved in self._ec_reserved.values():
106 all_reserved.update(holder_reserved)
109 def Generate(self, existing, generate_one_fn, ec_id):
110 """Generate a new resource of this type
113 assert callable(generate_one_fn)
115 all_elems = self.GetReserved()
116 all_elems.update(existing)
119 new_resource = generate_one_fn()
120 if new_resource is not None and new_resource not in all_elems:
123 raise errors.ConfigurationError("Not able generate new resource"
124 " (last tried: %s)" % new_resource)
125 self.Reserve(ec_id, new_resource)
129 def _MatchNameComponentIgnoreCase(short_name, names):
130 """Wrapper around L{utils.text.MatchNameComponent}.
133 return utils.MatchNameComponent(short_name, names, case_sensitive=False)
136 def _CheckInstanceDiskIvNames(disks):
137 """Checks if instance's disks' C{iv_name} attributes are in order.
139 @type disks: list of L{objects.Disk}
140 @param disks: List of disks
141 @rtype: list of tuples; (int, string, string)
142 @return: List of wrongly named disks, each tuple contains disk index,
143 expected and actual name
148 for (idx, disk) in enumerate(disks):
149 exp_iv_name = "disk/%s" % idx
150 if disk.iv_name != exp_iv_name:
151 result.append((idx, exp_iv_name, disk.iv_name))
157 """The interface to the cluster configuration.
159 @ivar _temporary_lvs: reservation manager for temporary LVs
160 @ivar _all_rms: a list of all temporary reservation managers
163 def __init__(self, cfg_file=None, offline=False, _getents=runtime.GetEnts,
164 accept_foreign=False):
166 self._lock = _config_lock
167 self._config_data = None
168 self._offline = offline
170 self._cfg_file = constants.CLUSTER_CONF_FILE
172 self._cfg_file = cfg_file
173 self._getents = _getents
174 self._temporary_ids = TemporaryReservationManager()
175 self._temporary_drbds = {}
176 self._temporary_macs = TemporaryReservationManager()
177 self._temporary_secrets = TemporaryReservationManager()
178 self._temporary_lvs = TemporaryReservationManager()
179 self._all_rms = [self._temporary_ids, self._temporary_macs,
180 self._temporary_secrets, self._temporary_lvs]
181 # Note: in order to prevent errors when resolving our name in
182 # _DistributeConfig, we compute it here once and reuse it; it's
183 # better to raise an error before starting to modify the config
184 # file than after it was modified
185 self._my_hostname = netutils.Hostname.GetSysName()
186 self._last_cluster_serial = -1
189 self._OpenConfig(accept_foreign)
191 def _GetRpc(self, address_list):
192 """Returns RPC runner for configuration.
195 return rpc.ConfigRunner(self._context, address_list)
197 def SetContext(self, context):
198 """Sets Ganeti context.
201 self._context = context
203 # this method needs to be static, so that we can call it on the class
206 """Check if the cluster is configured.
209 return os.path.exists(constants.CLUSTER_CONF_FILE)
211 def _GenerateOneMAC(self):
212 """Generate one mac address
215 prefix = self._config_data.cluster.mac_prefix
216 byte1 = random.randrange(0, 256)
217 byte2 = random.randrange(0, 256)
218 byte3 = random.randrange(0, 256)
219 mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
222 @locking.ssynchronized(_config_lock, shared=1)
223 def GetNdParams(self, node):
224 """Get the node params populated with cluster defaults.
226 @type node: L{objects.Node}
227 @param node: The node we want to know the params for
228 @return: A dict with the filled in node params
231 nodegroup = self._UnlockedGetNodeGroup(node.group)
232 return self._config_data.cluster.FillND(node, nodegroup)
234 @locking.ssynchronized(_config_lock, shared=1)
235 def GenerateMAC(self, ec_id):
236 """Generate a MAC for an instance.
238 This should check the current instances for duplicates.
241 existing = self._AllMACs()
242 return self._temporary_ids.Generate(existing, self._GenerateOneMAC, ec_id)
244 @locking.ssynchronized(_config_lock, shared=1)
245 def ReserveMAC(self, mac, ec_id):
246 """Reserve a MAC for an instance.
248 This only checks instances managed by this cluster, it does not
249 check for potential collisions elsewhere.
252 all_macs = self._AllMACs()
254 raise errors.ReservationError("mac already in use")
256 self._temporary_macs.Reserve(ec_id, mac)
258 @locking.ssynchronized(_config_lock, shared=1)
259 def ReserveLV(self, lv_name, ec_id):
260 """Reserve an VG/LV pair for an instance.
262 @type lv_name: string
263 @param lv_name: the logical volume name to reserve
266 all_lvs = self._AllLVs()
267 if lv_name in all_lvs:
268 raise errors.ReservationError("LV already in use")
270 self._temporary_lvs.Reserve(ec_id, lv_name)
272 @locking.ssynchronized(_config_lock, shared=1)
273 def GenerateDRBDSecret(self, ec_id):
274 """Generate a DRBD secret.
276 This checks the current disks for duplicates.
279 return self._temporary_secrets.Generate(self._AllDRBDSecrets(),
280 utils.GenerateSecret,
284 """Compute the list of all LVs.
288 for instance in self._config_data.instances.values():
289 node_data = instance.MapLVsByNode()
290 for lv_list in node_data.values():
291 lvnames.update(lv_list)
294 def _AllIDs(self, include_temporary):
295 """Compute the list of all UUIDs and names we have.
297 @type include_temporary: boolean
298 @param include_temporary: whether to include the _temporary_ids set
300 @return: a set of IDs
304 if include_temporary:
305 existing.update(self._temporary_ids.GetReserved())
306 existing.update(self._AllLVs())
307 existing.update(self._config_data.instances.keys())
308 existing.update(self._config_data.nodes.keys())
309 existing.update([i.uuid for i in self._AllUUIDObjects() if i.uuid])
312 def _GenerateUniqueID(self, ec_id):
313 """Generate an unique UUID.
315 This checks the current node, instances and disk names for
319 @return: the unique id
322 existing = self._AllIDs(include_temporary=False)
323 return self._temporary_ids.Generate(existing, utils.NewUUID, ec_id)
325 @locking.ssynchronized(_config_lock, shared=1)
326 def GenerateUniqueID(self, ec_id):
327 """Generate an unique ID.
329 This is just a wrapper over the unlocked version.
332 @param ec_id: unique id for the job to reserve the id to
335 return self._GenerateUniqueID(ec_id)
338 """Return all MACs present in the config.
341 @return: the list of all MACs
345 for instance in self._config_data.instances.values():
346 for nic in instance.nics:
347 result.append(nic.mac)
351 def _AllDRBDSecrets(self):
352 """Return all DRBD secrets present in the config.
355 @return: the list of all DRBD secrets
358 def helper(disk, result):
359 """Recursively gather secrets from this disk."""
360 if disk.dev_type == constants.DT_DRBD8:
361 result.append(disk.logical_id[5])
363 for child in disk.children:
364 helper(child, result)
367 for instance in self._config_data.instances.values():
368 for disk in instance.disks:
373 def _CheckDiskIDs(self, disk, l_ids, p_ids):
374 """Compute duplicate disk IDs
376 @type disk: L{objects.Disk}
377 @param disk: the disk at which to start searching
379 @param l_ids: list of current logical ids
381 @param p_ids: list of current physical ids
383 @return: a list of error messages
387 if disk.logical_id is not None:
388 if disk.logical_id in l_ids:
389 result.append("duplicate logical id %s" % str(disk.logical_id))
391 l_ids.append(disk.logical_id)
392 if disk.physical_id is not None:
393 if disk.physical_id in p_ids:
394 result.append("duplicate physical id %s" % str(disk.physical_id))
396 p_ids.append(disk.physical_id)
399 for child in disk.children:
400 result.extend(self._CheckDiskIDs(child, l_ids, p_ids))
403 def _UnlockedVerifyConfig(self):
407 @return: a list of error messages; a non-empty list signifies
411 # pylint: disable=R0914
415 data = self._config_data
416 cluster = data.cluster
420 # global cluster checks
421 if not cluster.enabled_hypervisors:
422 result.append("enabled hypervisors list doesn't have any entries")
423 invalid_hvs = set(cluster.enabled_hypervisors) - constants.HYPER_TYPES
425 result.append("enabled hypervisors contains invalid entries: %s" %
427 missing_hvp = (set(cluster.enabled_hypervisors) -
428 set(cluster.hvparams.keys()))
430 result.append("hypervisor parameters missing for the enabled"
431 " hypervisor(s) %s" % utils.CommaJoin(missing_hvp))
433 if cluster.master_node not in data.nodes:
434 result.append("cluster has invalid primary node '%s'" %
437 def _helper(owner, attr, value, template):
439 utils.ForceDictType(value, template)
440 except errors.GenericError, err:
441 result.append("%s has invalid %s: %s" % (owner, attr, err))
443 def _helper_nic(owner, params):
445 objects.NIC.CheckParameterSyntax(params)
446 except errors.ConfigurationError, err:
447 result.append("%s has invalid nicparams: %s" % (owner, err))
449 def _helper_ipolicy(owner, params):
451 objects.InstancePolicy.CheckParameterSyntax(params)
452 except errors.ConfigurationError, err:
453 result.append("%s has invalid instance policy: %s" % (owner, err))
455 def _helper_ispecs(owner, params):
456 for key, value in params.items():
457 if key in constants.IPOLICY_ISPECS:
458 fullkey = "ipolicy/" + key
459 _helper(owner, fullkey, value, constants.ISPECS_PARAMETER_TYPES)
461 # FIXME: assuming list type
462 if key in constants.IPOLICY_PARAMETERS:
466 if not isinstance(value, exp_type):
467 result.append("%s has invalid instance policy: for %s,"
468 " expecting %s, got %s" %
469 (owner, key, exp_type.__name__, type(value)))
471 # check cluster parameters
472 _helper("cluster", "beparams", cluster.SimpleFillBE({}),
473 constants.BES_PARAMETER_TYPES)
474 _helper("cluster", "nicparams", cluster.SimpleFillNIC({}),
475 constants.NICS_PARAMETER_TYPES)
476 _helper_nic("cluster", cluster.SimpleFillNIC({}))
477 _helper("cluster", "ndparams", cluster.SimpleFillND({}),
478 constants.NDS_PARAMETER_TYPES)
479 _helper_ipolicy("cluster", cluster.SimpleFillIPolicy({}))
480 _helper_ispecs("cluster", cluster.SimpleFillIPolicy({}))
482 # per-instance checks
483 for instance_name in data.instances:
484 instance = data.instances[instance_name]
485 if instance.name != instance_name:
486 result.append("instance '%s' is indexed by wrong name '%s'" %
487 (instance.name, instance_name))
488 if instance.primary_node not in data.nodes:
489 result.append("instance '%s' has invalid primary node '%s'" %
490 (instance_name, instance.primary_node))
491 for snode in instance.secondary_nodes:
492 if snode not in data.nodes:
493 result.append("instance '%s' has invalid secondary node '%s'" %
494 (instance_name, snode))
495 for idx, nic in enumerate(instance.nics):
496 if nic.mac in seen_macs:
497 result.append("instance '%s' has NIC %d mac %s duplicate" %
498 (instance_name, idx, nic.mac))
500 seen_macs.append(nic.mac)
502 filled = cluster.SimpleFillNIC(nic.nicparams)
503 owner = "instance %s nic %d" % (instance.name, idx)
504 _helper(owner, "nicparams",
505 filled, constants.NICS_PARAMETER_TYPES)
506 _helper_nic(owner, filled)
509 if instance.beparams:
510 _helper("instance %s" % instance.name, "beparams",
511 cluster.FillBE(instance), constants.BES_PARAMETER_TYPES)
513 # gather the drbd ports for duplicate checks
514 for (idx, dsk) in enumerate(instance.disks):
515 if dsk.dev_type in constants.LDS_DRBD:
516 tcp_port = dsk.logical_id[2]
517 if tcp_port not in ports:
519 ports[tcp_port].append((instance.name, "drbd disk %s" % idx))
520 # gather network port reservation
521 net_port = getattr(instance, "network_port", None)
522 if net_port is not None:
523 if net_port not in ports:
525 ports[net_port].append((instance.name, "network port"))
527 # instance disk verify
528 for idx, disk in enumerate(instance.disks):
529 result.extend(["instance '%s' disk %d error: %s" %
530 (instance.name, idx, msg) for msg in disk.Verify()])
531 result.extend(self._CheckDiskIDs(disk, seen_lids, seen_pids))
533 wrong_names = _CheckInstanceDiskIvNames(instance.disks)
535 tmp = "; ".join(("name of disk %s should be '%s', but is '%s'" %
536 (idx, exp_name, actual_name))
537 for (idx, exp_name, actual_name) in wrong_names)
539 result.append("Instance '%s' has wrongly named disks: %s" %
540 (instance.name, tmp))
542 # cluster-wide pool of free ports
543 for free_port in cluster.tcpudp_port_pool:
544 if free_port not in ports:
545 ports[free_port] = []
546 ports[free_port].append(("cluster", "port marked as free"))
548 # compute tcp/udp duplicate ports
554 txt = utils.CommaJoin(["%s/%s" % val for val in pdata])
555 result.append("tcp/udp port %s has duplicates: %s" % (pnum, txt))
557 # highest used tcp port check
559 if keys[-1] > cluster.highest_used_port:
560 result.append("Highest used port mismatch, saved %s, computed %s" %
561 (cluster.highest_used_port, keys[-1]))
563 if not data.nodes[cluster.master_node].master_candidate:
564 result.append("Master node is not a master candidate")
566 # master candidate checks
567 mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats()
569 result.append("Not enough master candidates: actual %d, target %d" %
573 for node_name, node in data.nodes.items():
574 if node.name != node_name:
575 result.append("Node '%s' is indexed by wrong name '%s'" %
576 (node.name, node_name))
577 if [node.master_candidate, node.drained, node.offline].count(True) > 1:
578 result.append("Node %s state is invalid: master_candidate=%s,"
579 " drain=%s, offline=%s" %
580 (node.name, node.master_candidate, node.drained,
582 if node.group not in data.nodegroups:
583 result.append("Node '%s' has invalid group '%s'" %
584 (node.name, node.group))
586 _helper("node %s" % node.name, "ndparams",
587 cluster.FillND(node, data.nodegroups[node.group]),
588 constants.NDS_PARAMETER_TYPES)
591 nodegroups_names = set()
592 for nodegroup_uuid in data.nodegroups:
593 nodegroup = data.nodegroups[nodegroup_uuid]
594 if nodegroup.uuid != nodegroup_uuid:
595 result.append("node group '%s' (uuid: '%s') indexed by wrong uuid '%s'"
596 % (nodegroup.name, nodegroup.uuid, nodegroup_uuid))
597 if utils.UUID_RE.match(nodegroup.name.lower()):
598 result.append("node group '%s' (uuid: '%s') has uuid-like name" %
599 (nodegroup.name, nodegroup.uuid))
600 if nodegroup.name in nodegroups_names:
601 result.append("duplicate node group name '%s'" % nodegroup.name)
603 nodegroups_names.add(nodegroup.name)
604 group_name = "group %s" % nodegroup.name
605 _helper_ipolicy(group_name, cluster.SimpleFillIPolicy(nodegroup.ipolicy))
606 _helper_ispecs(group_name, cluster.SimpleFillIPolicy(nodegroup.ipolicy))
607 if nodegroup.ndparams:
608 _helper(group_name, "ndparams",
609 cluster.SimpleFillND(nodegroup.ndparams),
610 constants.NDS_PARAMETER_TYPES)
613 _, duplicates = self._UnlockedComputeDRBDMap()
614 for node, minor, instance_a, instance_b in duplicates:
615 result.append("DRBD minor %d on node %s is assigned twice to instances"
616 " %s and %s" % (minor, node, instance_a, instance_b))
619 default_nicparams = cluster.nicparams[constants.PP_DEFAULT]
622 def _AddIpAddress(ip, name):
623 ips.setdefault(ip, []).append(name)
625 _AddIpAddress(cluster.master_ip, "cluster_ip")
627 for node in data.nodes.values():
628 _AddIpAddress(node.primary_ip, "node:%s/primary" % node.name)
629 if node.secondary_ip != node.primary_ip:
630 _AddIpAddress(node.secondary_ip, "node:%s/secondary" % node.name)
632 for instance in data.instances.values():
633 for idx, nic in enumerate(instance.nics):
637 nicparams = objects.FillDict(default_nicparams, nic.nicparams)
638 nic_mode = nicparams[constants.NIC_MODE]
639 nic_link = nicparams[constants.NIC_LINK]
641 if nic_mode == constants.NIC_MODE_BRIDGED:
642 link = "bridge:%s" % nic_link
643 elif nic_mode == constants.NIC_MODE_ROUTED:
644 link = "route:%s" % nic_link
646 raise errors.ProgrammerError("NIC mode '%s' not handled" % nic_mode)
648 _AddIpAddress("%s/%s" % (link, nic.ip),
649 "instance:%s/nic:%d" % (instance.name, idx))
651 for ip, owners in ips.items():
653 result.append("IP address %s is used by multiple owners: %s" %
654 (ip, utils.CommaJoin(owners)))
658 @locking.ssynchronized(_config_lock, shared=1)
659 def VerifyConfig(self):
662 This is just a wrapper over L{_UnlockedVerifyConfig}.
665 @return: a list of error messages; a non-empty list signifies
669 return self._UnlockedVerifyConfig()
671 def _UnlockedSetDiskID(self, disk, node_name):
672 """Convert the unique ID to the ID needed on the target nodes.
674 This is used only for drbd, which needs ip/port configuration.
676 The routine descends down and updates its children also, because
677 this helps when the only the top device is passed to the remote
680 This function is for internal use, when the config lock is already held.
684 for child in disk.children:
685 self._UnlockedSetDiskID(child, node_name)
687 if disk.logical_id is None and disk.physical_id is not None:
689 if disk.dev_type == constants.LD_DRBD8:
690 pnode, snode, port, pminor, sminor, secret = disk.logical_id
691 if node_name not in (pnode, snode):
692 raise errors.ConfigurationError("DRBD device not knowing node %s" %
694 pnode_info = self._UnlockedGetNodeInfo(pnode)
695 snode_info = self._UnlockedGetNodeInfo(snode)
696 if pnode_info is None or snode_info is None:
697 raise errors.ConfigurationError("Can't find primary or secondary node"
698 " for %s" % str(disk))
699 p_data = (pnode_info.secondary_ip, port)
700 s_data = (snode_info.secondary_ip, port)
701 if pnode == node_name:
702 disk.physical_id = p_data + s_data + (pminor, secret)
703 else: # it must be secondary, we tested above
704 disk.physical_id = s_data + p_data + (sminor, secret)
706 disk.physical_id = disk.logical_id
709 @locking.ssynchronized(_config_lock)
710 def SetDiskID(self, disk, node_name):
711 """Convert the unique ID to the ID needed on the target nodes.
713 This is used only for drbd, which needs ip/port configuration.
715 The routine descends down and updates its children also, because
716 this helps when the only the top device is passed to the remote
720 return self._UnlockedSetDiskID(disk, node_name)
722 @locking.ssynchronized(_config_lock)
723 def AddTcpUdpPort(self, port):
724 """Adds a new port to the available port pool.
726 @warning: this method does not "flush" the configuration (via
727 L{_WriteConfig}); callers should do that themselves once the
728 configuration is stable
731 if not isinstance(port, int):
732 raise errors.ProgrammerError("Invalid type passed for port")
734 self._config_data.cluster.tcpudp_port_pool.add(port)
736 @locking.ssynchronized(_config_lock, shared=1)
737 def GetPortList(self):
738 """Returns a copy of the current port list.
741 return self._config_data.cluster.tcpudp_port_pool.copy()
743 @locking.ssynchronized(_config_lock)
744 def AllocatePort(self):
747 The port will be taken from the available port pool or from the
748 default port range (and in this case we increase
752 # If there are TCP/IP ports configured, we use them first.
753 if self._config_data.cluster.tcpudp_port_pool:
754 port = self._config_data.cluster.tcpudp_port_pool.pop()
756 port = self._config_data.cluster.highest_used_port + 1
757 if port >= constants.LAST_DRBD_PORT:
758 raise errors.ConfigurationError("The highest used port is greater"
759 " than %s. Aborting." %
760 constants.LAST_DRBD_PORT)
761 self._config_data.cluster.highest_used_port = port
766 def _UnlockedComputeDRBDMap(self):
767 """Compute the used DRBD minor/nodes.
770 @return: dictionary of node_name: dict of minor: instance_name;
771 the returned dict will have all the nodes in it (even if with
772 an empty list), and a list of duplicates; if the duplicates
773 list is not empty, the configuration is corrupted and its caller
774 should raise an exception
777 def _AppendUsedPorts(instance_name, disk, used):
779 if disk.dev_type == constants.LD_DRBD8 and len(disk.logical_id) >= 5:
780 node_a, node_b, _, minor_a, minor_b = disk.logical_id[:5]
781 for node, port in ((node_a, minor_a), (node_b, minor_b)):
782 assert node in used, ("Node '%s' of instance '%s' not found"
783 " in node list" % (node, instance_name))
784 if port in used[node]:
785 duplicates.append((node, port, instance_name, used[node][port]))
787 used[node][port] = instance_name
789 for child in disk.children:
790 duplicates.extend(_AppendUsedPorts(instance_name, child, used))
794 my_dict = dict((node, {}) for node in self._config_data.nodes)
795 for instance in self._config_data.instances.itervalues():
796 for disk in instance.disks:
797 duplicates.extend(_AppendUsedPorts(instance.name, disk, my_dict))
798 for (node, minor), instance in self._temporary_drbds.iteritems():
799 if minor in my_dict[node] and my_dict[node][minor] != instance:
800 duplicates.append((node, minor, instance, my_dict[node][minor]))
802 my_dict[node][minor] = instance
803 return my_dict, duplicates
805 @locking.ssynchronized(_config_lock)
806 def ComputeDRBDMap(self):
807 """Compute the used DRBD minor/nodes.
809 This is just a wrapper over L{_UnlockedComputeDRBDMap}.
811 @return: dictionary of node_name: dict of minor: instance_name;
812 the returned dict will have all the nodes in it (even if with
816 d_map, duplicates = self._UnlockedComputeDRBDMap()
818 raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
822 @locking.ssynchronized(_config_lock)
823 def AllocateDRBDMinor(self, nodes, instance):
824 """Allocate a drbd minor.
826 The free minor will be automatically computed from the existing
827 devices. A node can be given multiple times in order to allocate
828 multiple minors. The result is the list of minors, in the same
829 order as the passed nodes.
831 @type instance: string
832 @param instance: the instance for which we allocate minors
835 assert isinstance(instance, basestring), \
836 "Invalid argument '%s' passed to AllocateDRBDMinor" % instance
838 d_map, duplicates = self._UnlockedComputeDRBDMap()
840 raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
846 # no minors used, we can start at 0
849 self._temporary_drbds[(nname, 0)] = instance
853 ffree = utils.FirstFree(keys)
855 # return the next minor
856 # TODO: implement high-limit check
860 # double-check minor against current instances
861 assert minor not in d_map[nname], \
862 ("Attempt to reuse allocated DRBD minor %d on node %s,"
863 " already allocated to instance %s" %
864 (minor, nname, d_map[nname][minor]))
865 ndata[minor] = instance
866 # double-check minor against reservation
867 r_key = (nname, minor)
868 assert r_key not in self._temporary_drbds, \
869 ("Attempt to reuse reserved DRBD minor %d on node %s,"
870 " reserved for instance %s" %
871 (minor, nname, self._temporary_drbds[r_key]))
872 self._temporary_drbds[r_key] = instance
874 logging.debug("Request to allocate drbd minors, input: %s, returning %s",
878 def _UnlockedReleaseDRBDMinors(self, instance):
879 """Release temporary drbd minors allocated for a given instance.
881 @type instance: string
882 @param instance: the instance for which temporary minors should be
886 assert isinstance(instance, basestring), \
887 "Invalid argument passed to ReleaseDRBDMinors"
888 for key, name in self._temporary_drbds.items():
890 del self._temporary_drbds[key]
892 @locking.ssynchronized(_config_lock)
893 def ReleaseDRBDMinors(self, instance):
894 """Release temporary drbd minors allocated for a given instance.
896 This should be called on the error paths, on the success paths
897 it's automatically called by the ConfigWriter add and update
900 This function is just a wrapper over L{_UnlockedReleaseDRBDMinors}.
902 @type instance: string
903 @param instance: the instance for which temporary minors should be
907 self._UnlockedReleaseDRBDMinors(instance)
909 @locking.ssynchronized(_config_lock, shared=1)
910 def GetConfigVersion(self):
911 """Get the configuration version.
913 @return: Config version
916 return self._config_data.version
918 @locking.ssynchronized(_config_lock, shared=1)
919 def GetClusterName(self):
922 @return: Cluster name
925 return self._config_data.cluster.cluster_name
927 @locking.ssynchronized(_config_lock, shared=1)
928 def GetMasterNode(self):
929 """Get the hostname of the master node for this cluster.
931 @return: Master hostname
934 return self._config_data.cluster.master_node
936 @locking.ssynchronized(_config_lock, shared=1)
937 def GetMasterIP(self):
938 """Get the IP of the master node for this cluster.
943 return self._config_data.cluster.master_ip
945 @locking.ssynchronized(_config_lock, shared=1)
946 def GetMasterNetdev(self):
947 """Get the master network device for this cluster.
950 return self._config_data.cluster.master_netdev
952 @locking.ssynchronized(_config_lock, shared=1)
953 def GetMasterNetmask(self):
954 """Get the netmask of the master node for this cluster.
957 return self._config_data.cluster.master_netmask
959 @locking.ssynchronized(_config_lock, shared=1)
960 def GetUseExternalMipScript(self):
961 """Get flag representing whether to use the external master IP setup script.
964 return self._config_data.cluster.use_external_mip_script
966 @locking.ssynchronized(_config_lock, shared=1)
967 def GetFileStorageDir(self):
968 """Get the file storage dir for this cluster.
971 return self._config_data.cluster.file_storage_dir
973 @locking.ssynchronized(_config_lock, shared=1)
974 def GetSharedFileStorageDir(self):
975 """Get the shared file storage dir for this cluster.
978 return self._config_data.cluster.shared_file_storage_dir
980 @locking.ssynchronized(_config_lock, shared=1)
981 def GetHypervisorType(self):
982 """Get the hypervisor type for this cluster.
985 return self._config_data.cluster.enabled_hypervisors[0]
987 @locking.ssynchronized(_config_lock, shared=1)
988 def GetHostKey(self):
989 """Return the rsa hostkey from the config.
992 @return: the rsa hostkey
995 return self._config_data.cluster.rsahostkeypub
997 @locking.ssynchronized(_config_lock, shared=1)
998 def GetDefaultIAllocator(self):
999 """Get the default instance allocator for this cluster.
1002 return self._config_data.cluster.default_iallocator
1004 @locking.ssynchronized(_config_lock, shared=1)
1005 def GetPrimaryIPFamily(self):
1006 """Get cluster primary ip family.
1008 @return: primary ip family
1011 return self._config_data.cluster.primary_ip_family
1013 @locking.ssynchronized(_config_lock, shared=1)
1014 def GetMasterNetworkParameters(self):
1015 """Get network parameters of the master node.
1017 @rtype: L{object.MasterNetworkParameters}
1018 @return: network parameters of the master node
1021 cluster = self._config_data.cluster
1022 result = objects.MasterNetworkParameters(name=cluster.master_node,
1023 ip=cluster.master_ip,
1024 netmask=cluster.master_netmask,
1025 netdev=cluster.master_netdev,
1026 ip_family=cluster.primary_ip_family)
1030 @locking.ssynchronized(_config_lock)
1031 def AddNodeGroup(self, group, ec_id, check_uuid=True):
1032 """Add a node group to the configuration.
1034 This method calls group.UpgradeConfig() to fill any missing attributes
1035 according to their default values.
1037 @type group: L{objects.NodeGroup}
1038 @param group: the NodeGroup object to add
1040 @param ec_id: unique id for the job to use when creating a missing UUID
1041 @type check_uuid: bool
1042 @param check_uuid: add an UUID to the group if it doesn't have one or, if
1043 it does, ensure that it does not exist in the
1044 configuration already
1047 self._UnlockedAddNodeGroup(group, ec_id, check_uuid)
1050 def _UnlockedAddNodeGroup(self, group, ec_id, check_uuid):
1051 """Add a node group to the configuration.
1054 logging.info("Adding node group %s to configuration", group.name)
1056 # Some code might need to add a node group with a pre-populated UUID
1057 # generated with ConfigWriter.GenerateUniqueID(). We allow them to bypass
1058 # the "does this UUID" exist already check.
1060 self._EnsureUUID(group, ec_id)
1063 existing_uuid = self._UnlockedLookupNodeGroup(group.name)
1064 except errors.OpPrereqError:
1067 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
1068 " node group (UUID: %s)" %
1069 (group.name, existing_uuid),
1070 errors.ECODE_EXISTS)
1073 group.ctime = group.mtime = time.time()
1074 group.UpgradeConfig()
1076 self._config_data.nodegroups[group.uuid] = group
1077 self._config_data.cluster.serial_no += 1
1079 @locking.ssynchronized(_config_lock)
1080 def RemoveNodeGroup(self, group_uuid):
1081 """Remove a node group from the configuration.
1083 @type group_uuid: string
1084 @param group_uuid: the UUID of the node group to remove
1087 logging.info("Removing node group %s from configuration", group_uuid)
1089 if group_uuid not in self._config_data.nodegroups:
1090 raise errors.ConfigurationError("Unknown node group '%s'" % group_uuid)
1092 assert len(self._config_data.nodegroups) != 1, \
1093 "Group '%s' is the only group, cannot be removed" % group_uuid
1095 del self._config_data.nodegroups[group_uuid]
1096 self._config_data.cluster.serial_no += 1
1099 def _UnlockedLookupNodeGroup(self, target):
1100 """Lookup a node group's UUID.
1102 @type target: string or None
1103 @param target: group name or UUID or None to look for the default
1105 @return: nodegroup UUID
1106 @raises errors.OpPrereqError: when the target group cannot be found
1110 if len(self._config_data.nodegroups) != 1:
1111 raise errors.OpPrereqError("More than one node group exists. Target"
1112 " group must be specified explicitely.")
1114 return self._config_data.nodegroups.keys()[0]
1115 if target in self._config_data.nodegroups:
1117 for nodegroup in self._config_data.nodegroups.values():
1118 if nodegroup.name == target:
1119 return nodegroup.uuid
1120 raise errors.OpPrereqError("Node group '%s' not found" % target,
1123 @locking.ssynchronized(_config_lock, shared=1)
1124 def LookupNodeGroup(self, target):
1125 """Lookup a node group's UUID.
1127 This function is just a wrapper over L{_UnlockedLookupNodeGroup}.
1129 @type target: string or None
1130 @param target: group name or UUID or None to look for the default
1132 @return: nodegroup UUID
1135 return self._UnlockedLookupNodeGroup(target)
1137 def _UnlockedGetNodeGroup(self, uuid):
1138 """Lookup a node group.
1141 @param uuid: group UUID
1142 @rtype: L{objects.NodeGroup} or None
1143 @return: nodegroup object, or None if not found
1146 if uuid not in self._config_data.nodegroups:
1149 return self._config_data.nodegroups[uuid]
1151 @locking.ssynchronized(_config_lock, shared=1)
1152 def GetNodeGroup(self, uuid):
1153 """Lookup a node group.
1156 @param uuid: group UUID
1157 @rtype: L{objects.NodeGroup} or None
1158 @return: nodegroup object, or None if not found
1161 return self._UnlockedGetNodeGroup(uuid)
1163 @locking.ssynchronized(_config_lock, shared=1)
1164 def GetAllNodeGroupsInfo(self):
1165 """Get the configuration of all node groups.
1168 return dict(self._config_data.nodegroups)
1170 @locking.ssynchronized(_config_lock, shared=1)
1171 def GetNodeGroupList(self):
1172 """Get a list of node groups.
1175 return self._config_data.nodegroups.keys()
1177 @locking.ssynchronized(_config_lock, shared=1)
1178 def GetNodeGroupMembersByNodes(self, nodes):
1179 """Get nodes which are member in the same nodegroups as the given nodes.
1182 ngfn = lambda node_name: self._UnlockedGetNodeInfo(node_name).group
1183 return frozenset(member_name
1184 for node_name in nodes
1186 self._UnlockedGetNodeGroup(ngfn(node_name)).members)
1188 @locking.ssynchronized(_config_lock, shared=1)
1189 def GetMultiNodeGroupInfo(self, group_uuids):
1190 """Get the configuration of multiple node groups.
1192 @param group_uuids: List of node group UUIDs
1194 @return: List of tuples of (group_uuid, group_info)
1197 return [(uuid, self._UnlockedGetNodeGroup(uuid)) for uuid in group_uuids]
1199 @locking.ssynchronized(_config_lock)
1200 def AddInstance(self, instance, ec_id):
1201 """Add an instance to the config.
1203 This should be used after creating a new instance.
1205 @type instance: L{objects.Instance}
1206 @param instance: the instance object
1209 if not isinstance(instance, objects.Instance):
1210 raise errors.ProgrammerError("Invalid type passed to AddInstance")
1212 if instance.disk_template != constants.DT_DISKLESS:
1213 all_lvs = instance.MapLVsByNode()
1214 logging.info("Instance '%s' DISK_LAYOUT: %s", instance.name, all_lvs)
1216 all_macs = self._AllMACs()
1217 for nic in instance.nics:
1218 if nic.mac in all_macs:
1219 raise errors.ConfigurationError("Cannot add instance %s:"
1220 " MAC address '%s' already in use." %
1221 (instance.name, nic.mac))
1223 self._EnsureUUID(instance, ec_id)
1225 instance.serial_no = 1
1226 instance.ctime = instance.mtime = time.time()
1227 self._config_data.instances[instance.name] = instance
1228 self._config_data.cluster.serial_no += 1
1229 self._UnlockedReleaseDRBDMinors(instance.name)
1232 def _EnsureUUID(self, item, ec_id):
1233 """Ensures a given object has a valid UUID.
1235 @param item: the instance or node to be checked
1236 @param ec_id: the execution context id for the uuid reservation
1240 item.uuid = self._GenerateUniqueID(ec_id)
1241 elif item.uuid in self._AllIDs(include_temporary=True):
1242 raise errors.ConfigurationError("Cannot add '%s': UUID %s already"
1243 " in use" % (item.name, item.uuid))
1245 def _SetInstanceStatus(self, instance_name, status):
1246 """Set the instance's status to a given value.
1249 assert status in constants.ADMINST_ALL, \
1250 "Invalid status '%s' passed to SetInstanceStatus" % (status,)
1252 if instance_name not in self._config_data.instances:
1253 raise errors.ConfigurationError("Unknown instance '%s'" %
1255 instance = self._config_data.instances[instance_name]
1256 if instance.admin_state != status:
1257 instance.admin_state = status
1258 instance.serial_no += 1
1259 instance.mtime = time.time()
1262 @locking.ssynchronized(_config_lock)
1263 def MarkInstanceUp(self, instance_name):
1264 """Mark the instance status to up in the config.
1267 self._SetInstanceStatus(instance_name, constants.ADMINST_UP)
1269 @locking.ssynchronized(_config_lock)
1270 def MarkInstanceOffline(self, instance_name):
1271 """Mark the instance status to down in the config.
1274 self._SetInstanceStatus(instance_name, constants.ADMINST_OFFLINE)
1276 @locking.ssynchronized(_config_lock)
1277 def RemoveInstance(self, instance_name):
1278 """Remove the instance from the configuration.
1281 if instance_name not in self._config_data.instances:
1282 raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
1284 # If a network port has been allocated to the instance,
1285 # return it to the pool of free ports.
1286 inst = self._config_data.instances[instance_name]
1287 network_port = getattr(inst, "network_port", None)
1288 if network_port is not None:
1289 self._config_data.cluster.tcpudp_port_pool.add(network_port)
1291 del self._config_data.instances[instance_name]
1292 self._config_data.cluster.serial_no += 1
1295 @locking.ssynchronized(_config_lock)
1296 def RenameInstance(self, old_name, new_name):
1297 """Rename an instance.
1299 This needs to be done in ConfigWriter and not by RemoveInstance
1300 combined with AddInstance as only we can guarantee an atomic
1304 if old_name not in self._config_data.instances:
1305 raise errors.ConfigurationError("Unknown instance '%s'" % old_name)
1307 # Operate on a copy to not loose instance object in case of a failure
1308 inst = self._config_data.instances[old_name].Copy()
1309 inst.name = new_name
1311 for (idx, disk) in enumerate(inst.disks):
1312 if disk.dev_type == constants.LD_FILE:
1313 # rename the file paths in logical and physical id
1314 file_storage_dir = os.path.dirname(os.path.dirname(disk.logical_id[1]))
1315 disk.logical_id = (disk.logical_id[0],
1316 utils.PathJoin(file_storage_dir, inst.name,
1318 disk.physical_id = disk.logical_id
1320 # Actually replace instance object
1321 del self._config_data.instances[old_name]
1322 self._config_data.instances[inst.name] = inst
1324 # Force update of ssconf files
1325 self._config_data.cluster.serial_no += 1
1329 @locking.ssynchronized(_config_lock)
1330 def MarkInstanceDown(self, instance_name):
1331 """Mark the status of an instance to down in the configuration.
1334 self._SetInstanceStatus(instance_name, constants.ADMINST_DOWN)
1336 def _UnlockedGetInstanceList(self):
1337 """Get the list of instances.
1339 This function is for internal use, when the config lock is already held.
1342 return self._config_data.instances.keys()
1344 @locking.ssynchronized(_config_lock, shared=1)
1345 def GetInstanceList(self):
1346 """Get the list of instances.
1348 @return: array of instances, ex. ['instance2.example.com',
1349 'instance1.example.com']
1352 return self._UnlockedGetInstanceList()
1354 def ExpandInstanceName(self, short_name):
1355 """Attempt to expand an incomplete instance name.
1358 # Locking is done in L{ConfigWriter.GetInstanceList}
1359 return _MatchNameComponentIgnoreCase(short_name, self.GetInstanceList())
1361 def _UnlockedGetInstanceInfo(self, instance_name):
1362 """Returns information about an instance.
1364 This function is for internal use, when the config lock is already held.
1367 if instance_name not in self._config_data.instances:
1370 return self._config_data.instances[instance_name]
1372 @locking.ssynchronized(_config_lock, shared=1)
1373 def GetInstanceInfo(self, instance_name):
1374 """Returns information about an instance.
1376 It takes the information from the configuration file. Other information of
1377 an instance are taken from the live systems.
1379 @param instance_name: name of the instance, e.g.
1380 I{instance1.example.com}
1382 @rtype: L{objects.Instance}
1383 @return: the instance object
1386 return self._UnlockedGetInstanceInfo(instance_name)
1388 @locking.ssynchronized(_config_lock, shared=1)
1389 def GetInstanceNodeGroups(self, instance_name, primary_only=False):
1390 """Returns set of node group UUIDs for instance's nodes.
1395 instance = self._UnlockedGetInstanceInfo(instance_name)
1397 raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
1400 nodes = [instance.primary_node]
1402 nodes = instance.all_nodes
1404 return frozenset(self._UnlockedGetNodeInfo(node_name).group
1405 for node_name in nodes)
1407 @locking.ssynchronized(_config_lock, shared=1)
1408 def GetMultiInstanceInfo(self, instances):
1409 """Get the configuration of multiple instances.
1411 @param instances: list of instance names
1413 @return: list of tuples (instance, instance_info), where
1414 instance_info is what would GetInstanceInfo return for the
1415 node, while keeping the original order
1418 return [(name, self._UnlockedGetInstanceInfo(name)) for name in instances]
1420 @locking.ssynchronized(_config_lock, shared=1)
1421 def GetAllInstancesInfo(self):
1422 """Get the configuration of all instances.
1425 @return: dict of (instance, instance_info), where instance_info is what
1426 would GetInstanceInfo return for the node
1429 my_dict = dict([(instance, self._UnlockedGetInstanceInfo(instance))
1430 for instance in self._UnlockedGetInstanceList()])
1433 @locking.ssynchronized(_config_lock, shared=1)
1434 def GetInstancesInfoByFilter(self, filter_fn):
1435 """Get instance configuration with a filter.
1437 @type filter_fn: callable
1438 @param filter_fn: Filter function receiving instance object as parameter,
1439 returning boolean. Important: this function is called while the
1440 configuration locks is held. It must not do any complex work or call
1441 functions potentially leading to a deadlock. Ideally it doesn't call any
1442 other functions and just compares instance attributes.
1445 return dict((name, inst)
1446 for (name, inst) in self._config_data.instances.items()
1449 @locking.ssynchronized(_config_lock)
1450 def AddNode(self, node, ec_id):
1451 """Add a node to the configuration.
1453 @type node: L{objects.Node}
1454 @param node: a Node instance
1457 logging.info("Adding node %s to configuration", node.name)
1459 self._EnsureUUID(node, ec_id)
1462 node.ctime = node.mtime = time.time()
1463 self._UnlockedAddNodeToGroup(node.name, node.group)
1464 self._config_data.nodes[node.name] = node
1465 self._config_data.cluster.serial_no += 1
1468 @locking.ssynchronized(_config_lock)
1469 def RemoveNode(self, node_name):
1470 """Remove a node from the configuration.
1473 logging.info("Removing node %s from configuration", node_name)
1475 if node_name not in self._config_data.nodes:
1476 raise errors.ConfigurationError("Unknown node '%s'" % node_name)
1478 self._UnlockedRemoveNodeFromGroup(self._config_data.nodes[node_name])
1479 del self._config_data.nodes[node_name]
1480 self._config_data.cluster.serial_no += 1
1483 def ExpandNodeName(self, short_name):
1484 """Attempt to expand an incomplete node name.
1487 # Locking is done in L{ConfigWriter.GetNodeList}
1488 return _MatchNameComponentIgnoreCase(short_name, self.GetNodeList())
1490 def _UnlockedGetNodeInfo(self, node_name):
1491 """Get the configuration of a node, as stored in the config.
1493 This function is for internal use, when the config lock is already
1496 @param node_name: the node name, e.g. I{node1.example.com}
1498 @rtype: L{objects.Node}
1499 @return: the node object
1502 if node_name not in self._config_data.nodes:
1505 return self._config_data.nodes[node_name]
1507 @locking.ssynchronized(_config_lock, shared=1)
1508 def GetNodeInfo(self, node_name):
1509 """Get the configuration of a node, as stored in the config.
1511 This is just a locked wrapper over L{_UnlockedGetNodeInfo}.
1513 @param node_name: the node name, e.g. I{node1.example.com}
1515 @rtype: L{objects.Node}
1516 @return: the node object
1519 return self._UnlockedGetNodeInfo(node_name)
1521 @locking.ssynchronized(_config_lock, shared=1)
1522 def GetNodeInstances(self, node_name):
1523 """Get the instances of a node, as stored in the config.
1525 @param node_name: the node name, e.g. I{node1.example.com}
1527 @rtype: (list, list)
1528 @return: a tuple with two lists: the primary and the secondary instances
1533 for inst in self._config_data.instances.values():
1534 if inst.primary_node == node_name:
1535 pri.append(inst.name)
1536 if node_name in inst.secondary_nodes:
1537 sec.append(inst.name)
1540 @locking.ssynchronized(_config_lock, shared=1)
1541 def GetNodeGroupInstances(self, uuid, primary_only=False):
1542 """Get the instances of a node group.
1544 @param uuid: Node group UUID
1545 @param primary_only: Whether to only consider primary nodes
1547 @return: List of instance names in node group
1551 nodes_fn = lambda inst: [inst.primary_node]
1553 nodes_fn = lambda inst: inst.all_nodes
1555 return frozenset(inst.name
1556 for inst in self._config_data.instances.values()
1557 for node_name in nodes_fn(inst)
1558 if self._UnlockedGetNodeInfo(node_name).group == uuid)
1560 def _UnlockedGetNodeList(self):
1561 """Return the list of nodes which are in the configuration.
1563 This function is for internal use, when the config lock is already
1569 return self._config_data.nodes.keys()
1571 @locking.ssynchronized(_config_lock, shared=1)
1572 def GetNodeList(self):
1573 """Return the list of nodes which are in the configuration.
1576 return self._UnlockedGetNodeList()
1578 def _UnlockedGetOnlineNodeList(self):
1579 """Return the list of nodes which are online.
1582 all_nodes = [self._UnlockedGetNodeInfo(node)
1583 for node in self._UnlockedGetNodeList()]
1584 return [node.name for node in all_nodes if not node.offline]
1586 @locking.ssynchronized(_config_lock, shared=1)
1587 def GetOnlineNodeList(self):
1588 """Return the list of nodes which are online.
1591 return self._UnlockedGetOnlineNodeList()
1593 @locking.ssynchronized(_config_lock, shared=1)
1594 def GetVmCapableNodeList(self):
1595 """Return the list of nodes which are not vm capable.
1598 all_nodes = [self._UnlockedGetNodeInfo(node)
1599 for node in self._UnlockedGetNodeList()]
1600 return [node.name for node in all_nodes if node.vm_capable]
1602 @locking.ssynchronized(_config_lock, shared=1)
1603 def GetNonVmCapableNodeList(self):
1604 """Return the list of nodes which are not vm capable.
1607 all_nodes = [self._UnlockedGetNodeInfo(node)
1608 for node in self._UnlockedGetNodeList()]
1609 return [node.name for node in all_nodes if not node.vm_capable]
1611 @locking.ssynchronized(_config_lock, shared=1)
1612 def GetMultiNodeInfo(self, nodes):
1613 """Get the configuration of multiple nodes.
1615 @param nodes: list of node names
1617 @return: list of tuples of (node, node_info), where node_info is
1618 what would GetNodeInfo return for the node, in the original
1622 return [(name, self._UnlockedGetNodeInfo(name)) for name in nodes]
1624 @locking.ssynchronized(_config_lock, shared=1)
1625 def GetAllNodesInfo(self):
1626 """Get the configuration of all nodes.
1629 @return: dict of (node, node_info), where node_info is what
1630 would GetNodeInfo return for the node
1633 return self._UnlockedGetAllNodesInfo()
1635 def _UnlockedGetAllNodesInfo(self):
1636 """Gets configuration of all nodes.
1638 @note: See L{GetAllNodesInfo}
1641 return dict([(node, self._UnlockedGetNodeInfo(node))
1642 for node in self._UnlockedGetNodeList()])
1644 @locking.ssynchronized(_config_lock, shared=1)
1645 def GetNodeGroupsFromNodes(self, nodes):
1646 """Returns groups for a list of nodes.
1648 @type nodes: list of string
1649 @param nodes: List of node names
1653 return frozenset(self._UnlockedGetNodeInfo(name).group for name in nodes)
1655 def _UnlockedGetMasterCandidateStats(self, exceptions=None):
1656 """Get the number of current and maximum desired and possible candidates.
1658 @type exceptions: list
1659 @param exceptions: if passed, list of nodes that should be ignored
1661 @return: tuple of (current, desired and possible, possible)
1664 mc_now = mc_should = mc_max = 0
1665 for node in self._config_data.nodes.values():
1666 if exceptions and node.name in exceptions:
1668 if not (node.offline or node.drained) and node.master_capable:
1670 if node.master_candidate:
1672 mc_should = min(mc_max, self._config_data.cluster.candidate_pool_size)
1673 return (mc_now, mc_should, mc_max)
1675 @locking.ssynchronized(_config_lock, shared=1)
1676 def GetMasterCandidateStats(self, exceptions=None):
1677 """Get the number of current and maximum possible candidates.
1679 This is just a wrapper over L{_UnlockedGetMasterCandidateStats}.
1681 @type exceptions: list
1682 @param exceptions: if passed, list of nodes that should be ignored
1684 @return: tuple of (current, max)
1687 return self._UnlockedGetMasterCandidateStats(exceptions)
1689 @locking.ssynchronized(_config_lock)
1690 def MaintainCandidatePool(self, exceptions):
1691 """Try to grow the candidate pool to the desired size.
1693 @type exceptions: list
1694 @param exceptions: if passed, list of nodes that should be ignored
1696 @return: list with the adjusted nodes (L{objects.Node} instances)
1699 mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats(exceptions)
1702 node_list = self._config_data.nodes.keys()
1703 random.shuffle(node_list)
1704 for name in node_list:
1705 if mc_now >= mc_max:
1707 node = self._config_data.nodes[name]
1708 if (node.master_candidate or node.offline or node.drained or
1709 node.name in exceptions or not node.master_capable):
1711 mod_list.append(node)
1712 node.master_candidate = True
1715 if mc_now != mc_max:
1716 # this should not happen
1717 logging.warning("Warning: MaintainCandidatePool didn't manage to"
1718 " fill the candidate pool (%d/%d)", mc_now, mc_max)
1720 self._config_data.cluster.serial_no += 1
1725 def _UnlockedAddNodeToGroup(self, node_name, nodegroup_uuid):
1726 """Add a given node to the specified group.
1729 if nodegroup_uuid not in self._config_data.nodegroups:
1730 # This can happen if a node group gets deleted between its lookup and
1731 # when we're adding the first node to it, since we don't keep a lock in
1732 # the meantime. It's ok though, as we'll fail cleanly if the node group
1733 # is not found anymore.
1734 raise errors.OpExecError("Unknown node group: %s" % nodegroup_uuid)
1735 if node_name not in self._config_data.nodegroups[nodegroup_uuid].members:
1736 self._config_data.nodegroups[nodegroup_uuid].members.append(node_name)
1738 def _UnlockedRemoveNodeFromGroup(self, node):
1739 """Remove a given node from its group.
1742 nodegroup = node.group
1743 if nodegroup not in self._config_data.nodegroups:
1744 logging.warning("Warning: node '%s' has unknown node group '%s'"
1745 " (while being removed from it)", node.name, nodegroup)
1746 nodegroup_obj = self._config_data.nodegroups[nodegroup]
1747 if node.name not in nodegroup_obj.members:
1748 logging.warning("Warning: node '%s' not a member of its node group '%s'"
1749 " (while being removed from it)", node.name, nodegroup)
1751 nodegroup_obj.members.remove(node.name)
1753 @locking.ssynchronized(_config_lock)
1754 def AssignGroupNodes(self, mods):
1755 """Changes the group of a number of nodes.
1757 @type mods: list of tuples; (node name, new group UUID)
1758 @param mods: Node membership modifications
1761 groups = self._config_data.nodegroups
1762 nodes = self._config_data.nodes
1766 # Try to resolve names/UUIDs first
1767 for (node_name, new_group_uuid) in mods:
1769 node = nodes[node_name]
1771 raise errors.ConfigurationError("Unable to find node '%s'" % node_name)
1773 if node.group == new_group_uuid:
1774 # Node is being assigned to its current group
1775 logging.debug("Node '%s' was assigned to its current group (%s)",
1776 node_name, node.group)
1779 # Try to find current group of node
1781 old_group = groups[node.group]
1783 raise errors.ConfigurationError("Unable to find old group '%s'" %
1786 # Try to find new group for node
1788 new_group = groups[new_group_uuid]
1790 raise errors.ConfigurationError("Unable to find new group '%s'" %
1793 assert node.name in old_group.members, \
1794 ("Inconsistent configuration: node '%s' not listed in members for its"
1795 " old group '%s'" % (node.name, old_group.uuid))
1796 assert node.name not in new_group.members, \
1797 ("Inconsistent configuration: node '%s' already listed in members for"
1798 " its new group '%s'" % (node.name, new_group.uuid))
1800 resmod.append((node, old_group, new_group))
1803 for (node, old_group, new_group) in resmod:
1804 assert node.uuid != new_group.uuid and old_group.uuid != new_group.uuid, \
1805 "Assigning to current group is not possible"
1807 node.group = new_group.uuid
1809 # Update members of involved groups
1810 if node.name in old_group.members:
1811 old_group.members.remove(node.name)
1812 if node.name not in new_group.members:
1813 new_group.members.append(node.name)
1815 # Update timestamps and serials (only once per node/group object)
1817 for obj in frozenset(itertools.chain(*resmod)): # pylint: disable=W0142
1821 # Force ssconf update
1822 self._config_data.cluster.serial_no += 1
1826 def _BumpSerialNo(self):
1827 """Bump up the serial number of the config.
1830 self._config_data.serial_no += 1
1831 self._config_data.mtime = time.time()
1833 def _AllUUIDObjects(self):
1834 """Returns all objects with uuid attributes.
1837 return (self._config_data.instances.values() +
1838 self._config_data.nodes.values() +
1839 self._config_data.nodegroups.values() +
1840 [self._config_data.cluster])
1842 def _OpenConfig(self, accept_foreign):
1843 """Read the config data from disk.
1846 raw_data = utils.ReadFile(self._cfg_file)
1849 data = objects.ConfigData.FromDict(serializer.Load(raw_data))
1850 except Exception, err:
1851 raise errors.ConfigurationError(err)
1853 # Make sure the configuration has the right version
1854 _ValidateConfig(data)
1856 if (not hasattr(data, "cluster") or
1857 not hasattr(data.cluster, "rsahostkeypub")):
1858 raise errors.ConfigurationError("Incomplete configuration"
1859 " (missing cluster.rsahostkeypub)")
1861 if data.cluster.master_node != self._my_hostname and not accept_foreign:
1862 msg = ("The configuration denotes node %s as master, while my"
1863 " hostname is %s; opening a foreign configuration is only"
1864 " possible in accept_foreign mode" %
1865 (data.cluster.master_node, self._my_hostname))
1866 raise errors.ConfigurationError(msg)
1868 # Upgrade configuration if needed
1869 data.UpgradeConfig()
1871 self._config_data = data
1872 # reset the last serial as -1 so that the next write will cause
1874 self._last_cluster_serial = -1
1876 # And finally run our (custom) config upgrade sequence
1877 self._UpgradeConfig()
1879 self._cfg_id = utils.GetFileID(path=self._cfg_file)
1881 def _UpgradeConfig(self):
1882 """Run upgrade steps that cannot be done purely in the objects.
1884 This is because some data elements need uniqueness across the
1885 whole configuration, etc.
1887 @warning: this function will call L{_WriteConfig()}, but also
1888 L{DropECReservations} so it needs to be called only from a
1889 "safe" place (the constructor). If one wanted to call it with
1890 the lock held, a DropECReservationUnlocked would need to be
1891 created first, to avoid causing deadlock.
1895 for item in self._AllUUIDObjects():
1896 if item.uuid is None:
1897 item.uuid = self._GenerateUniqueID(_UPGRADE_CONFIG_JID)
1899 if not self._config_data.nodegroups:
1900 default_nodegroup_name = constants.INITIAL_NODE_GROUP_NAME
1901 default_nodegroup = objects.NodeGroup(name=default_nodegroup_name,
1903 self._UnlockedAddNodeGroup(default_nodegroup, _UPGRADE_CONFIG_JID, True)
1905 for node in self._config_data.nodes.values():
1907 node.group = self.LookupNodeGroup(None)
1909 # This is technically *not* an upgrade, but needs to be done both when
1910 # nodegroups are being added, and upon normally loading the config,
1911 # because the members list of a node group is discarded upon
1912 # serializing/deserializing the object.
1913 self._UnlockedAddNodeToGroup(node.name, node.group)
1916 # This is ok even if it acquires the internal lock, as _UpgradeConfig is
1917 # only called at config init time, without the lock held
1918 self.DropECReservations(_UPGRADE_CONFIG_JID)
1920 def _DistributeConfig(self, feedback_fn):
1921 """Distribute the configuration to the other nodes.
1923 Currently, this only copies the configuration file. In the future,
1924 it could be used to encapsulate the 2/3-phase update mechanism.
1934 myhostname = self._my_hostname
1935 # we can skip checking whether _UnlockedGetNodeInfo returns None
1936 # since the node list comes from _UnlocketGetNodeList, and we are
1937 # called with the lock held, so no modifications should take place
1939 for node_name in self._UnlockedGetNodeList():
1940 if node_name == myhostname:
1942 node_info = self._UnlockedGetNodeInfo(node_name)
1943 if not node_info.master_candidate:
1945 node_list.append(node_info.name)
1946 addr_list.append(node_info.primary_ip)
1948 # TODO: Use dedicated resolver talking to config writer for name resolution
1950 self._GetRpc(addr_list).call_upload_file(node_list, self._cfg_file)
1951 for to_node, to_result in result.items():
1952 msg = to_result.fail_msg
1954 msg = ("Copy of file %s to node %s failed: %s" %
1955 (self._cfg_file, to_node, msg))
1965 def _WriteConfig(self, destination=None, feedback_fn=None):
1966 """Write the configuration data to persistent storage.
1969 assert feedback_fn is None or callable(feedback_fn)
1971 # Warn on config errors, but don't abort the save - the
1972 # configuration has already been modified, and we can't revert;
1973 # the best we can do is to warn the user and save as is, leaving
1974 # recovery to the user
1975 config_errors = self._UnlockedVerifyConfig()
1977 errmsg = ("Configuration data is not consistent: %s" %
1978 (utils.CommaJoin(config_errors)))
1979 logging.critical(errmsg)
1983 if destination is None:
1984 destination = self._cfg_file
1985 self._BumpSerialNo()
1986 txt = serializer.Dump(self._config_data.ToDict())
1988 getents = self._getents()
1990 fd = utils.SafeWriteFile(destination, self._cfg_id, data=txt,
1991 close=False, gid=getents.confd_gid, mode=0640)
1992 except errors.LockError:
1993 raise errors.ConfigurationError("The configuration file has been"
1994 " modified since the last write, cannot"
1997 self._cfg_id = utils.GetFileID(fd=fd)
2001 self.write_count += 1
2003 # and redistribute the config file to master candidates
2004 self._DistributeConfig(feedback_fn)
2006 # Write ssconf files on all nodes (including locally)
2007 if self._last_cluster_serial < self._config_data.cluster.serial_no:
2008 if not self._offline:
2009 result = self._GetRpc(None).call_write_ssconf_files(
2010 self._UnlockedGetOnlineNodeList(),
2011 self._UnlockedGetSsconfValues())
2013 for nname, nresu in result.items():
2014 msg = nresu.fail_msg
2016 errmsg = ("Error while uploading ssconf files to"
2017 " node %s: %s" % (nname, msg))
2018 logging.warning(errmsg)
2023 self._last_cluster_serial = self._config_data.cluster.serial_no
2025 def _UnlockedGetSsconfValues(self):
2026 """Return the values needed by ssconf.
2029 @return: a dictionary with keys the ssconf names and values their
2034 instance_names = utils.NiceSort(self._UnlockedGetInstanceList())
2035 node_names = utils.NiceSort(self._UnlockedGetNodeList())
2036 node_info = [self._UnlockedGetNodeInfo(name) for name in node_names]
2037 node_pri_ips = ["%s %s" % (ninfo.name, ninfo.primary_ip)
2038 for ninfo in node_info]
2039 node_snd_ips = ["%s %s" % (ninfo.name, ninfo.secondary_ip)
2040 for ninfo in node_info]
2042 instance_data = fn(instance_names)
2043 off_data = fn(node.name for node in node_info if node.offline)
2044 on_data = fn(node.name for node in node_info if not node.offline)
2045 mc_data = fn(node.name for node in node_info if node.master_candidate)
2046 mc_ips_data = fn(node.primary_ip for node in node_info
2047 if node.master_candidate)
2048 node_data = fn(node_names)
2049 node_pri_ips_data = fn(node_pri_ips)
2050 node_snd_ips_data = fn(node_snd_ips)
2052 cluster = self._config_data.cluster
2053 cluster_tags = fn(cluster.GetTags())
2055 hypervisor_list = fn(cluster.enabled_hypervisors)
2057 uid_pool = uidpool.FormatUidPool(cluster.uid_pool, separator="\n")
2059 nodegroups = ["%s %s" % (nodegroup.uuid, nodegroup.name) for nodegroup in
2060 self._config_data.nodegroups.values()]
2061 nodegroups_data = fn(utils.NiceSort(nodegroups))
2064 constants.SS_CLUSTER_NAME: cluster.cluster_name,
2065 constants.SS_CLUSTER_TAGS: cluster_tags,
2066 constants.SS_FILE_STORAGE_DIR: cluster.file_storage_dir,
2067 constants.SS_SHARED_FILE_STORAGE_DIR: cluster.shared_file_storage_dir,
2068 constants.SS_MASTER_CANDIDATES: mc_data,
2069 constants.SS_MASTER_CANDIDATES_IPS: mc_ips_data,
2070 constants.SS_MASTER_IP: cluster.master_ip,
2071 constants.SS_MASTER_NETDEV: cluster.master_netdev,
2072 constants.SS_MASTER_NETMASK: str(cluster.master_netmask),
2073 constants.SS_MASTER_NODE: cluster.master_node,
2074 constants.SS_NODE_LIST: node_data,
2075 constants.SS_NODE_PRIMARY_IPS: node_pri_ips_data,
2076 constants.SS_NODE_SECONDARY_IPS: node_snd_ips_data,
2077 constants.SS_OFFLINE_NODES: off_data,
2078 constants.SS_ONLINE_NODES: on_data,
2079 constants.SS_PRIMARY_IP_FAMILY: str(cluster.primary_ip_family),
2080 constants.SS_INSTANCE_LIST: instance_data,
2081 constants.SS_RELEASE_VERSION: constants.RELEASE_VERSION,
2082 constants.SS_HYPERVISOR_LIST: hypervisor_list,
2083 constants.SS_MAINTAIN_NODE_HEALTH: str(cluster.maintain_node_health),
2084 constants.SS_UID_POOL: uid_pool,
2085 constants.SS_NODEGROUPS: nodegroups_data,
2087 bad_values = [(k, v) for k, v in ssconf_values.items()
2088 if not isinstance(v, (str, basestring))]
2090 err = utils.CommaJoin("%s=%s" % (k, v) for k, v in bad_values)
2091 raise errors.ConfigurationError("Some ssconf key(s) have non-string"
2092 " values: %s" % err)
2093 return ssconf_values
2095 @locking.ssynchronized(_config_lock, shared=1)
2096 def GetSsconfValues(self):
2097 """Wrapper using lock around _UnlockedGetSsconf().
2100 return self._UnlockedGetSsconfValues()
2102 @locking.ssynchronized(_config_lock, shared=1)
2103 def GetVGName(self):
2104 """Return the volume group name.
2107 return self._config_data.cluster.volume_group_name
2109 @locking.ssynchronized(_config_lock)
2110 def SetVGName(self, vg_name):
2111 """Set the volume group name.
2114 self._config_data.cluster.volume_group_name = vg_name
2115 self._config_data.cluster.serial_no += 1
2118 @locking.ssynchronized(_config_lock, shared=1)
2119 def GetDRBDHelper(self):
2120 """Return DRBD usermode helper.
2123 return self._config_data.cluster.drbd_usermode_helper
2125 @locking.ssynchronized(_config_lock)
2126 def SetDRBDHelper(self, drbd_helper):
2127 """Set DRBD usermode helper.
2130 self._config_data.cluster.drbd_usermode_helper = drbd_helper
2131 self._config_data.cluster.serial_no += 1
2134 @locking.ssynchronized(_config_lock, shared=1)
2135 def GetMACPrefix(self):
2136 """Return the mac prefix.
2139 return self._config_data.cluster.mac_prefix
2141 @locking.ssynchronized(_config_lock, shared=1)
2142 def GetClusterInfo(self):
2143 """Returns information about the cluster
2145 @rtype: L{objects.Cluster}
2146 @return: the cluster object
2149 return self._config_data.cluster
2151 @locking.ssynchronized(_config_lock, shared=1)
2152 def HasAnyDiskOfType(self, dev_type):
2153 """Check if in there is at disk of the given type in the configuration.
2156 return self._config_data.HasAnyDiskOfType(dev_type)
2158 @locking.ssynchronized(_config_lock)
2159 def Update(self, target, feedback_fn):
2160 """Notify function to be called after updates.
2162 This function must be called when an object (as returned by
2163 GetInstanceInfo, GetNodeInfo, GetCluster) has been updated and the
2164 caller wants the modifications saved to the backing store. Note
2165 that all modified objects will be saved, but the target argument
2166 is the one the caller wants to ensure that it's saved.
2168 @param target: an instance of either L{objects.Cluster},
2169 L{objects.Node} or L{objects.Instance} which is existing in
2171 @param feedback_fn: Callable feedback function
2174 if self._config_data is None:
2175 raise errors.ProgrammerError("Configuration file not read,"
2177 update_serial = False
2178 if isinstance(target, objects.Cluster):
2179 test = target == self._config_data.cluster
2180 elif isinstance(target, objects.Node):
2181 test = target in self._config_data.nodes.values()
2182 update_serial = True
2183 elif isinstance(target, objects.Instance):
2184 test = target in self._config_data.instances.values()
2185 elif isinstance(target, objects.NodeGroup):
2186 test = target in self._config_data.nodegroups.values()
2188 raise errors.ProgrammerError("Invalid object type (%s) passed to"
2189 " ConfigWriter.Update" % type(target))
2191 raise errors.ConfigurationError("Configuration updated since object"
2192 " has been read or unknown object")
2193 target.serial_no += 1
2194 target.mtime = now = time.time()
2197 # for node updates, we need to increase the cluster serial too
2198 self._config_data.cluster.serial_no += 1
2199 self._config_data.cluster.mtime = now
2201 if isinstance(target, objects.Instance):
2202 self._UnlockedReleaseDRBDMinors(target.name)
2204 self._WriteConfig(feedback_fn=feedback_fn)
2206 @locking.ssynchronized(_config_lock)
2207 def DropECReservations(self, ec_id):
2208 """Drop per-execution-context reservations
2211 for rm in self._all_rms:
2212 rm.DropECReservations(ec_id)