4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Configuration management for Ganeti
24 This module provides the interface to the Ganeti cluster configuration.
26 The configuration data is stored on every node but is updated on the master
27 only. After each update, the master distributes the data to the other nodes.
29 Currently, the data storage format is JSON. YAML was slow and consuming too
34 # pylint: disable-msg=R0904
35 # R0904: Too many public methods
43 from ganeti import errors
44 from ganeti import locking
45 from ganeti import utils
46 from ganeti import constants
47 from ganeti import rpc
48 from ganeti import objects
49 from ganeti import serializer
50 from ganeti import uidpool
51 from ganeti import netutils
52 from ganeti import runtime
55 _config_lock = locking.SharedLock("ConfigWriter")
57 # job id used for resource management at config upgrade time
58 _UPGRADE_CONFIG_JID = "jid-cfg-upgrade"
61 def _ValidateConfig(data):
62 """Verifies that a configuration objects looks valid.
64 This only verifies the version of the configuration.
66 @raise errors.ConfigurationError: if the version differs from what
70 if data.version != constants.CONFIG_VERSION:
71 raise errors.ConfigVersionMismatch(constants.CONFIG_VERSION, data.version)
74 class TemporaryReservationManager:
75 """A temporary resource reservation manager.
77 This is used to reserve resources in a job, before using them, making sure
78 other jobs cannot get them in the meantime.
82 self._ec_reserved = {}
84 def Reserved(self, resource):
85 for holder_reserved in self._ec_reserved.values():
86 if resource in holder_reserved:
90 def Reserve(self, ec_id, resource):
91 if self.Reserved(resource):
92 raise errors.ReservationError("Duplicate reservation for resource '%s'"
94 if ec_id not in self._ec_reserved:
95 self._ec_reserved[ec_id] = set([resource])
97 self._ec_reserved[ec_id].add(resource)
99 def DropECReservations(self, ec_id):
100 if ec_id in self._ec_reserved:
101 del self._ec_reserved[ec_id]
103 def GetReserved(self):
105 for holder_reserved in self._ec_reserved.values():
106 all_reserved.update(holder_reserved)
109 def Generate(self, existing, generate_one_fn, ec_id):
110 """Generate a new resource of this type
113 assert callable(generate_one_fn)
115 all_elems = self.GetReserved()
116 all_elems.update(existing)
119 new_resource = generate_one_fn()
120 if new_resource is not None and new_resource not in all_elems:
123 raise errors.ConfigurationError("Not able generate new resource"
124 " (last tried: %s)" % new_resource)
125 self.Reserve(ec_id, new_resource)
130 """The interface to the cluster configuration.
132 @ivar _temporary_lvs: reservation manager for temporary LVs
133 @ivar _all_rms: a list of all temporary reservation managers
136 def __init__(self, cfg_file=None, offline=False, _getents=runtime.GetEnts,
137 accept_foreign=False):
139 self._lock = _config_lock
140 self._config_data = None
141 self._offline = offline
143 self._cfg_file = constants.CLUSTER_CONF_FILE
145 self._cfg_file = cfg_file
146 self._getents = _getents
147 self._temporary_ids = TemporaryReservationManager()
148 self._temporary_drbds = {}
149 self._temporary_macs = TemporaryReservationManager()
150 self._temporary_secrets = TemporaryReservationManager()
151 self._temporary_lvs = TemporaryReservationManager()
152 self._all_rms = [self._temporary_ids, self._temporary_macs,
153 self._temporary_secrets, self._temporary_lvs]
154 # Note: in order to prevent errors when resolving our name in
155 # _DistributeConfig, we compute it here once and reuse it; it's
156 # better to raise an error before starting to modify the config
157 # file than after it was modified
158 self._my_hostname = netutils.Hostname.GetSysName()
159 self._last_cluster_serial = -1
161 self._OpenConfig(accept_foreign)
163 # this method needs to be static, so that we can call it on the class
166 """Check if the cluster is configured.
169 return os.path.exists(constants.CLUSTER_CONF_FILE)
171 def _GenerateOneMAC(self):
172 """Generate one mac address
175 prefix = self._config_data.cluster.mac_prefix
176 byte1 = random.randrange(0, 256)
177 byte2 = random.randrange(0, 256)
178 byte3 = random.randrange(0, 256)
179 mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
182 @locking.ssynchronized(_config_lock, shared=1)
183 def GetNdParams(self, node):
184 """Get the node params populated with cluster defaults.
186 @type node: L{object.Node}
187 @param node: The node we want to know the params for
188 @return: A dict with the filled in node params
191 nodegroup = self._UnlockedGetNodeGroup(node.group)
192 return self._config_data.cluster.FillND(node, nodegroup)
194 @locking.ssynchronized(_config_lock, shared=1)
195 def GenerateMAC(self, ec_id):
196 """Generate a MAC for an instance.
198 This should check the current instances for duplicates.
201 existing = self._AllMACs()
202 return self._temporary_ids.Generate(existing, self._GenerateOneMAC, ec_id)
204 @locking.ssynchronized(_config_lock, shared=1)
205 def ReserveMAC(self, mac, ec_id):
206 """Reserve a MAC for an instance.
208 This only checks instances managed by this cluster, it does not
209 check for potential collisions elsewhere.
212 all_macs = self._AllMACs()
214 raise errors.ReservationError("mac already in use")
216 self._temporary_macs.Reserve(ec_id, mac)
218 @locking.ssynchronized(_config_lock, shared=1)
219 def ReserveLV(self, lv_name, ec_id):
220 """Reserve an VG/LV pair for an instance.
222 @type lv_name: string
223 @param lv_name: the logical volume name to reserve
226 all_lvs = self._AllLVs()
227 if lv_name in all_lvs:
228 raise errors.ReservationError("LV already in use")
230 self._temporary_lvs.Reserve(ec_id, lv_name)
232 @locking.ssynchronized(_config_lock, shared=1)
233 def GenerateDRBDSecret(self, ec_id):
234 """Generate a DRBD secret.
236 This checks the current disks for duplicates.
239 return self._temporary_secrets.Generate(self._AllDRBDSecrets(),
240 utils.GenerateSecret,
244 """Compute the list of all LVs.
248 for instance in self._config_data.instances.values():
249 node_data = instance.MapLVsByNode()
250 for lv_list in node_data.values():
251 lvnames.update(lv_list)
254 def _AllIDs(self, include_temporary):
255 """Compute the list of all UUIDs and names we have.
257 @type include_temporary: boolean
258 @param include_temporary: whether to include the _temporary_ids set
260 @return: a set of IDs
264 if include_temporary:
265 existing.update(self._temporary_ids.GetReserved())
266 existing.update(self._AllLVs())
267 existing.update(self._config_data.instances.keys())
268 existing.update(self._config_data.nodes.keys())
269 existing.update([i.uuid for i in self._AllUUIDObjects() if i.uuid])
272 def _GenerateUniqueID(self, ec_id):
273 """Generate an unique UUID.
275 This checks the current node, instances and disk names for
279 @return: the unique id
282 existing = self._AllIDs(include_temporary=False)
283 return self._temporary_ids.Generate(existing, utils.NewUUID, ec_id)
285 @locking.ssynchronized(_config_lock, shared=1)
286 def GenerateUniqueID(self, ec_id):
287 """Generate an unique ID.
289 This is just a wrapper over the unlocked version.
292 @param ec_id: unique id for the job to reserve the id to
295 return self._GenerateUniqueID(ec_id)
298 """Return all MACs present in the config.
301 @return: the list of all MACs
305 for instance in self._config_data.instances.values():
306 for nic in instance.nics:
307 result.append(nic.mac)
311 def _AllDRBDSecrets(self):
312 """Return all DRBD secrets present in the config.
315 @return: the list of all DRBD secrets
318 def helper(disk, result):
319 """Recursively gather secrets from this disk."""
320 if disk.dev_type == constants.DT_DRBD8:
321 result.append(disk.logical_id[5])
323 for child in disk.children:
324 helper(child, result)
327 for instance in self._config_data.instances.values():
328 for disk in instance.disks:
333 def _CheckDiskIDs(self, disk, l_ids, p_ids):
334 """Compute duplicate disk IDs
336 @type disk: L{objects.Disk}
337 @param disk: the disk at which to start searching
339 @param l_ids: list of current logical ids
341 @param p_ids: list of current physical ids
343 @return: a list of error messages
347 if disk.logical_id is not None:
348 if disk.logical_id in l_ids:
349 result.append("duplicate logical id %s" % str(disk.logical_id))
351 l_ids.append(disk.logical_id)
352 if disk.physical_id is not None:
353 if disk.physical_id in p_ids:
354 result.append("duplicate physical id %s" % str(disk.physical_id))
356 p_ids.append(disk.physical_id)
359 for child in disk.children:
360 result.extend(self._CheckDiskIDs(child, l_ids, p_ids))
363 def _UnlockedVerifyConfig(self):
367 @return: a list of error messages; a non-empty list signifies
371 # pylint: disable-msg=R0914
375 data = self._config_data
376 cluster = data.cluster
380 # global cluster checks
381 if not cluster.enabled_hypervisors:
382 result.append("enabled hypervisors list doesn't have any entries")
383 invalid_hvs = set(cluster.enabled_hypervisors) - constants.HYPER_TYPES
385 result.append("enabled hypervisors contains invalid entries: %s" %
387 missing_hvp = (set(cluster.enabled_hypervisors) -
388 set(cluster.hvparams.keys()))
390 result.append("hypervisor parameters missing for the enabled"
391 " hypervisor(s) %s" % utils.CommaJoin(missing_hvp))
393 if cluster.master_node not in data.nodes:
394 result.append("cluster has invalid primary node '%s'" %
397 def _helper(owner, attr, value, template):
399 utils.ForceDictType(value, template)
400 except errors.GenericError, err:
401 result.append("%s has invalid %s: %s" % (owner, attr, err))
403 def _helper_nic(owner, params):
405 objects.NIC.CheckParameterSyntax(params)
406 except errors.ConfigurationError, err:
407 result.append("%s has invalid nicparams: %s" % (owner, err))
409 # check cluster parameters
410 _helper("cluster", "beparams", cluster.SimpleFillBE({}),
411 constants.BES_PARAMETER_TYPES)
412 _helper("cluster", "nicparams", cluster.SimpleFillNIC({}),
413 constants.NICS_PARAMETER_TYPES)
414 _helper_nic("cluster", cluster.SimpleFillNIC({}))
415 _helper("cluster", "ndparams", cluster.SimpleFillND({}),
416 constants.NDS_PARAMETER_TYPES)
418 # per-instance checks
419 for instance_name in data.instances:
420 instance = data.instances[instance_name]
421 if instance.name != instance_name:
422 result.append("instance '%s' is indexed by wrong name '%s'" %
423 (instance.name, instance_name))
424 if instance.primary_node not in data.nodes:
425 result.append("instance '%s' has invalid primary node '%s'" %
426 (instance_name, instance.primary_node))
427 for snode in instance.secondary_nodes:
428 if snode not in data.nodes:
429 result.append("instance '%s' has invalid secondary node '%s'" %
430 (instance_name, snode))
431 for idx, nic in enumerate(instance.nics):
432 if nic.mac in seen_macs:
433 result.append("instance '%s' has NIC %d mac %s duplicate" %
434 (instance_name, idx, nic.mac))
436 seen_macs.append(nic.mac)
438 filled = cluster.SimpleFillNIC(nic.nicparams)
439 owner = "instance %s nic %d" % (instance.name, idx)
440 _helper(owner, "nicparams",
441 filled, constants.NICS_PARAMETER_TYPES)
442 _helper_nic(owner, filled)
445 if instance.beparams:
446 _helper("instance %s" % instance.name, "beparams",
447 cluster.FillBE(instance), constants.BES_PARAMETER_TYPES)
449 # gather the drbd ports for duplicate checks
450 for dsk in instance.disks:
451 if dsk.dev_type in constants.LDS_DRBD:
452 tcp_port = dsk.logical_id[2]
453 if tcp_port not in ports:
455 ports[tcp_port].append((instance.name, "drbd disk %s" % dsk.iv_name))
456 # gather network port reservation
457 net_port = getattr(instance, "network_port", None)
458 if net_port is not None:
459 if net_port not in ports:
461 ports[net_port].append((instance.name, "network port"))
463 # instance disk verify
464 for idx, disk in enumerate(instance.disks):
465 result.extend(["instance '%s' disk %d error: %s" %
466 (instance.name, idx, msg) for msg in disk.Verify()])
467 result.extend(self._CheckDiskIDs(disk, seen_lids, seen_pids))
469 # cluster-wide pool of free ports
470 for free_port in cluster.tcpudp_port_pool:
471 if free_port not in ports:
472 ports[free_port] = []
473 ports[free_port].append(("cluster", "port marked as free"))
475 # compute tcp/udp duplicate ports
481 txt = utils.CommaJoin(["%s/%s" % val for val in pdata])
482 result.append("tcp/udp port %s has duplicates: %s" % (pnum, txt))
484 # highest used tcp port check
486 if keys[-1] > cluster.highest_used_port:
487 result.append("Highest used port mismatch, saved %s, computed %s" %
488 (cluster.highest_used_port, keys[-1]))
490 if not data.nodes[cluster.master_node].master_candidate:
491 result.append("Master node is not a master candidate")
493 # master candidate checks
494 mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats()
496 result.append("Not enough master candidates: actual %d, target %d" %
500 for node_name, node in data.nodes.items():
501 if node.name != node_name:
502 result.append("Node '%s' is indexed by wrong name '%s'" %
503 (node.name, node_name))
504 if [node.master_candidate, node.drained, node.offline].count(True) > 1:
505 result.append("Node %s state is invalid: master_candidate=%s,"
506 " drain=%s, offline=%s" %
507 (node.name, node.master_candidate, node.drained,
509 if node.group not in data.nodegroups:
510 result.append("Node '%s' has invalid group '%s'" %
511 (node.name, node.group))
513 _helper("node %s" % node.name, "ndparams",
514 cluster.FillND(node, data.nodegroups[node.group]),
515 constants.NDS_PARAMETER_TYPES)
518 nodegroups_names = set()
519 for nodegroup_uuid in data.nodegroups:
520 nodegroup = data.nodegroups[nodegroup_uuid]
521 if nodegroup.uuid != nodegroup_uuid:
522 result.append("node group '%s' (uuid: '%s') indexed by wrong uuid '%s'"
523 % (nodegroup.name, nodegroup.uuid, nodegroup_uuid))
524 if utils.UUID_RE.match(nodegroup.name.lower()):
525 result.append("node group '%s' (uuid: '%s') has uuid-like name" %
526 (nodegroup.name, nodegroup.uuid))
527 if nodegroup.name in nodegroups_names:
528 result.append("duplicate node group name '%s'" % nodegroup.name)
530 nodegroups_names.add(nodegroup.name)
531 if nodegroup.ndparams:
532 _helper("group %s" % nodegroup.name, "ndparams",
533 cluster.SimpleFillND(nodegroup.ndparams),
534 constants.NDS_PARAMETER_TYPES)
538 _, duplicates = self._UnlockedComputeDRBDMap()
539 for node, minor, instance_a, instance_b in duplicates:
540 result.append("DRBD minor %d on node %s is assigned twice to instances"
541 " %s and %s" % (minor, node, instance_a, instance_b))
544 default_nicparams = cluster.nicparams[constants.PP_DEFAULT]
547 def _AddIpAddress(ip, name):
548 ips.setdefault(ip, []).append(name)
550 _AddIpAddress(cluster.master_ip, "cluster_ip")
552 for node in data.nodes.values():
553 _AddIpAddress(node.primary_ip, "node:%s/primary" % node.name)
554 if node.secondary_ip != node.primary_ip:
555 _AddIpAddress(node.secondary_ip, "node:%s/secondary" % node.name)
557 for instance in data.instances.values():
558 for idx, nic in enumerate(instance.nics):
562 nicparams = objects.FillDict(default_nicparams, nic.nicparams)
563 nic_mode = nicparams[constants.NIC_MODE]
564 nic_link = nicparams[constants.NIC_LINK]
566 if nic_mode == constants.NIC_MODE_BRIDGED:
567 link = "bridge:%s" % nic_link
568 elif nic_mode == constants.NIC_MODE_ROUTED:
569 link = "route:%s" % nic_link
571 raise errors.ProgrammerError("NIC mode '%s' not handled" % nic_mode)
573 _AddIpAddress("%s/%s" % (link, nic.ip),
574 "instance:%s/nic:%d" % (instance.name, idx))
576 for ip, owners in ips.items():
578 result.append("IP address %s is used by multiple owners: %s" %
579 (ip, utils.CommaJoin(owners)))
583 @locking.ssynchronized(_config_lock, shared=1)
584 def VerifyConfig(self):
587 This is just a wrapper over L{_UnlockedVerifyConfig}.
590 @return: a list of error messages; a non-empty list signifies
594 return self._UnlockedVerifyConfig()
596 def _UnlockedSetDiskID(self, disk, node_name):
597 """Convert the unique ID to the ID needed on the target nodes.
599 This is used only for drbd, which needs ip/port configuration.
601 The routine descends down and updates its children also, because
602 this helps when the only the top device is passed to the remote
605 This function is for internal use, when the config lock is already held.
609 for child in disk.children:
610 self._UnlockedSetDiskID(child, node_name)
612 if disk.logical_id is None and disk.physical_id is not None:
614 if disk.dev_type == constants.LD_DRBD8:
615 pnode, snode, port, pminor, sminor, secret = disk.logical_id
616 if node_name not in (pnode, snode):
617 raise errors.ConfigurationError("DRBD device not knowing node %s" %
619 pnode_info = self._UnlockedGetNodeInfo(pnode)
620 snode_info = self._UnlockedGetNodeInfo(snode)
621 if pnode_info is None or snode_info is None:
622 raise errors.ConfigurationError("Can't find primary or secondary node"
623 " for %s" % str(disk))
624 p_data = (pnode_info.secondary_ip, port)
625 s_data = (snode_info.secondary_ip, port)
626 if pnode == node_name:
627 disk.physical_id = p_data + s_data + (pminor, secret)
628 else: # it must be secondary, we tested above
629 disk.physical_id = s_data + p_data + (sminor, secret)
631 disk.physical_id = disk.logical_id
634 @locking.ssynchronized(_config_lock)
635 def SetDiskID(self, disk, node_name):
636 """Convert the unique ID to the ID needed on the target nodes.
638 This is used only for drbd, which needs ip/port configuration.
640 The routine descends down and updates its children also, because
641 this helps when the only the top device is passed to the remote
645 return self._UnlockedSetDiskID(disk, node_name)
647 @locking.ssynchronized(_config_lock)
648 def AddTcpUdpPort(self, port):
649 """Adds a new port to the available port pool.
651 @warning: this method does not "flush" the configuration (via
652 L{_WriteConfig}); callers should do that themselves once the
653 configuration is stable
656 if not isinstance(port, int):
657 raise errors.ProgrammerError("Invalid type passed for port")
659 self._config_data.cluster.tcpudp_port_pool.add(port)
661 @locking.ssynchronized(_config_lock, shared=1)
662 def GetPortList(self):
663 """Returns a copy of the current port list.
666 return self._config_data.cluster.tcpudp_port_pool.copy()
668 @locking.ssynchronized(_config_lock)
669 def AllocatePort(self):
672 The port will be taken from the available port pool or from the
673 default port range (and in this case we increase
677 # If there are TCP/IP ports configured, we use them first.
678 if self._config_data.cluster.tcpudp_port_pool:
679 port = self._config_data.cluster.tcpudp_port_pool.pop()
681 port = self._config_data.cluster.highest_used_port + 1
682 if port >= constants.LAST_DRBD_PORT:
683 raise errors.ConfigurationError("The highest used port is greater"
684 " than %s. Aborting." %
685 constants.LAST_DRBD_PORT)
686 self._config_data.cluster.highest_used_port = port
691 def _UnlockedComputeDRBDMap(self):
692 """Compute the used DRBD minor/nodes.
695 @return: dictionary of node_name: dict of minor: instance_name;
696 the returned dict will have all the nodes in it (even if with
697 an empty list), and a list of duplicates; if the duplicates
698 list is not empty, the configuration is corrupted and its caller
699 should raise an exception
702 def _AppendUsedPorts(instance_name, disk, used):
704 if disk.dev_type == constants.LD_DRBD8 and len(disk.logical_id) >= 5:
705 node_a, node_b, _, minor_a, minor_b = disk.logical_id[:5]
706 for node, port in ((node_a, minor_a), (node_b, minor_b)):
707 assert node in used, ("Node '%s' of instance '%s' not found"
708 " in node list" % (node, instance_name))
709 if port in used[node]:
710 duplicates.append((node, port, instance_name, used[node][port]))
712 used[node][port] = instance_name
714 for child in disk.children:
715 duplicates.extend(_AppendUsedPorts(instance_name, child, used))
719 my_dict = dict((node, {}) for node in self._config_data.nodes)
720 for instance in self._config_data.instances.itervalues():
721 for disk in instance.disks:
722 duplicates.extend(_AppendUsedPorts(instance.name, disk, my_dict))
723 for (node, minor), instance in self._temporary_drbds.iteritems():
724 if minor in my_dict[node] and my_dict[node][minor] != instance:
725 duplicates.append((node, minor, instance, my_dict[node][minor]))
727 my_dict[node][minor] = instance
728 return my_dict, duplicates
730 @locking.ssynchronized(_config_lock)
731 def ComputeDRBDMap(self):
732 """Compute the used DRBD minor/nodes.
734 This is just a wrapper over L{_UnlockedComputeDRBDMap}.
736 @return: dictionary of node_name: dict of minor: instance_name;
737 the returned dict will have all the nodes in it (even if with
741 d_map, duplicates = self._UnlockedComputeDRBDMap()
743 raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
747 @locking.ssynchronized(_config_lock)
748 def AllocateDRBDMinor(self, nodes, instance):
749 """Allocate a drbd minor.
751 The free minor will be automatically computed from the existing
752 devices. A node can be given multiple times in order to allocate
753 multiple minors. The result is the list of minors, in the same
754 order as the passed nodes.
756 @type instance: string
757 @param instance: the instance for which we allocate minors
760 assert isinstance(instance, basestring), \
761 "Invalid argument '%s' passed to AllocateDRBDMinor" % instance
763 d_map, duplicates = self._UnlockedComputeDRBDMap()
765 raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
771 # no minors used, we can start at 0
774 self._temporary_drbds[(nname, 0)] = instance
778 ffree = utils.FirstFree(keys)
780 # return the next minor
781 # TODO: implement high-limit check
785 # double-check minor against current instances
786 assert minor not in d_map[nname], \
787 ("Attempt to reuse allocated DRBD minor %d on node %s,"
788 " already allocated to instance %s" %
789 (minor, nname, d_map[nname][minor]))
790 ndata[minor] = instance
791 # double-check minor against reservation
792 r_key = (nname, minor)
793 assert r_key not in self._temporary_drbds, \
794 ("Attempt to reuse reserved DRBD minor %d on node %s,"
795 " reserved for instance %s" %
796 (minor, nname, self._temporary_drbds[r_key]))
797 self._temporary_drbds[r_key] = instance
799 logging.debug("Request to allocate drbd minors, input: %s, returning %s",
803 def _UnlockedReleaseDRBDMinors(self, instance):
804 """Release temporary drbd minors allocated for a given instance.
806 @type instance: string
807 @param instance: the instance for which temporary minors should be
811 assert isinstance(instance, basestring), \
812 "Invalid argument passed to ReleaseDRBDMinors"
813 for key, name in self._temporary_drbds.items():
815 del self._temporary_drbds[key]
817 @locking.ssynchronized(_config_lock)
818 def ReleaseDRBDMinors(self, instance):
819 """Release temporary drbd minors allocated for a given instance.
821 This should be called on the error paths, on the success paths
822 it's automatically called by the ConfigWriter add and update
825 This function is just a wrapper over L{_UnlockedReleaseDRBDMinors}.
827 @type instance: string
828 @param instance: the instance for which temporary minors should be
832 self._UnlockedReleaseDRBDMinors(instance)
834 @locking.ssynchronized(_config_lock, shared=1)
835 def GetConfigVersion(self):
836 """Get the configuration version.
838 @return: Config version
841 return self._config_data.version
843 @locking.ssynchronized(_config_lock, shared=1)
844 def GetClusterName(self):
847 @return: Cluster name
850 return self._config_data.cluster.cluster_name
852 @locking.ssynchronized(_config_lock, shared=1)
853 def GetMasterNode(self):
854 """Get the hostname of the master node for this cluster.
856 @return: Master hostname
859 return self._config_data.cluster.master_node
861 @locking.ssynchronized(_config_lock, shared=1)
862 def GetMasterIP(self):
863 """Get the IP of the master node for this cluster.
868 return self._config_data.cluster.master_ip
870 @locking.ssynchronized(_config_lock, shared=1)
871 def GetMasterNetdev(self):
872 """Get the master network device for this cluster.
875 return self._config_data.cluster.master_netdev
877 @locking.ssynchronized(_config_lock, shared=1)
878 def GetFileStorageDir(self):
879 """Get the file storage dir for this cluster.
882 return self._config_data.cluster.file_storage_dir
884 @locking.ssynchronized(_config_lock, shared=1)
885 def GetHypervisorType(self):
886 """Get the hypervisor type for this cluster.
889 return self._config_data.cluster.enabled_hypervisors[0]
891 @locking.ssynchronized(_config_lock, shared=1)
892 def GetHostKey(self):
893 """Return the rsa hostkey from the config.
896 @return: the rsa hostkey
899 return self._config_data.cluster.rsahostkeypub
901 @locking.ssynchronized(_config_lock, shared=1)
902 def GetDefaultIAllocator(self):
903 """Get the default instance allocator for this cluster.
906 return self._config_data.cluster.default_iallocator
908 @locking.ssynchronized(_config_lock, shared=1)
909 def GetPrimaryIPFamily(self):
910 """Get cluster primary ip family.
912 @return: primary ip family
915 return self._config_data.cluster.primary_ip_family
917 @locking.ssynchronized(_config_lock)
918 def AddNodeGroup(self, group, ec_id, check_uuid=True):
919 """Add a node group to the configuration.
921 This method calls group.UpgradeConfig() to fill any missing attributes
922 according to their default values.
924 @type group: L{objects.NodeGroup}
925 @param group: the NodeGroup object to add
927 @param ec_id: unique id for the job to use when creating a missing UUID
928 @type check_uuid: bool
929 @param check_uuid: add an UUID to the group if it doesn't have one or, if
930 it does, ensure that it does not exist in the
931 configuration already
934 self._UnlockedAddNodeGroup(group, ec_id, check_uuid)
937 def _UnlockedAddNodeGroup(self, group, ec_id, check_uuid):
938 """Add a node group to the configuration.
941 logging.info("Adding node group %s to configuration", group.name)
943 # Some code might need to add a node group with a pre-populated UUID
944 # generated with ConfigWriter.GenerateUniqueID(). We allow them to bypass
945 # the "does this UUID" exist already check.
947 self._EnsureUUID(group, ec_id)
950 existing_uuid = self._UnlockedLookupNodeGroup(group.name)
951 except errors.OpPrereqError:
954 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
955 " node group (UUID: %s)" %
956 (group.name, existing_uuid),
960 group.ctime = group.mtime = time.time()
961 group.UpgradeConfig()
963 self._config_data.nodegroups[group.uuid] = group
964 self._config_data.cluster.serial_no += 1
966 @locking.ssynchronized(_config_lock)
967 def RemoveNodeGroup(self, group_uuid):
968 """Remove a node group from the configuration.
970 @type group_uuid: string
971 @param group_uuid: the UUID of the node group to remove
974 logging.info("Removing node group %s from configuration", group_uuid)
976 if group_uuid not in self._config_data.nodegroups:
977 raise errors.ConfigurationError("Unknown node group '%s'" % group_uuid)
979 assert len(self._config_data.nodegroups) != 1, \
980 "Group '%s' is the only group, cannot be removed" % group_uuid
982 del self._config_data.nodegroups[group_uuid]
983 self._config_data.cluster.serial_no += 1
986 def _UnlockedLookupNodeGroup(self, target):
987 """Lookup a node group's UUID.
989 @type target: string or None
990 @param target: group name or UUID or None to look for the default
992 @return: nodegroup UUID
993 @raises errors.OpPrereqError: when the target group cannot be found
997 if len(self._config_data.nodegroups) != 1:
998 raise errors.OpPrereqError("More than one node group exists. Target"
999 " group must be specified explicitely.")
1001 return self._config_data.nodegroups.keys()[0]
1002 if target in self._config_data.nodegroups:
1004 for nodegroup in self._config_data.nodegroups.values():
1005 if nodegroup.name == target:
1006 return nodegroup.uuid
1007 raise errors.OpPrereqError("Node group '%s' not found" % target,
1010 @locking.ssynchronized(_config_lock, shared=1)
1011 def LookupNodeGroup(self, target):
1012 """Lookup a node group's UUID.
1014 This function is just a wrapper over L{_UnlockedLookupNodeGroup}.
1016 @type target: string or None
1017 @param target: group name or UUID or None to look for the default
1019 @return: nodegroup UUID
1022 return self._UnlockedLookupNodeGroup(target)
1024 def _UnlockedGetNodeGroup(self, uuid):
1025 """Lookup a node group.
1028 @param uuid: group UUID
1029 @rtype: L{objects.NodeGroup} or None
1030 @return: nodegroup object, or None if not found
1033 if uuid not in self._config_data.nodegroups:
1036 return self._config_data.nodegroups[uuid]
1038 @locking.ssynchronized(_config_lock, shared=1)
1039 def GetNodeGroup(self, uuid):
1040 """Lookup a node group.
1043 @param uuid: group UUID
1044 @rtype: L{objects.NodeGroup} or None
1045 @return: nodegroup object, or None if not found
1048 return self._UnlockedGetNodeGroup(uuid)
1050 @locking.ssynchronized(_config_lock, shared=1)
1051 def GetAllNodeGroupsInfo(self):
1052 """Get the configuration of all node groups.
1055 return dict(self._config_data.nodegroups)
1057 @locking.ssynchronized(_config_lock, shared=1)
1058 def GetNodeGroupList(self):
1059 """Get a list of node groups.
1062 return self._config_data.nodegroups.keys()
1064 @locking.ssynchronized(_config_lock)
1065 def AddInstance(self, instance, ec_id):
1066 """Add an instance to the config.
1068 This should be used after creating a new instance.
1070 @type instance: L{objects.Instance}
1071 @param instance: the instance object
1074 if not isinstance(instance, objects.Instance):
1075 raise errors.ProgrammerError("Invalid type passed to AddInstance")
1077 if instance.disk_template != constants.DT_DISKLESS:
1078 all_lvs = instance.MapLVsByNode()
1079 logging.info("Instance '%s' DISK_LAYOUT: %s", instance.name, all_lvs)
1081 all_macs = self._AllMACs()
1082 for nic in instance.nics:
1083 if nic.mac in all_macs:
1084 raise errors.ConfigurationError("Cannot add instance %s:"
1085 " MAC address '%s' already in use." %
1086 (instance.name, nic.mac))
1088 self._EnsureUUID(instance, ec_id)
1090 instance.serial_no = 1
1091 instance.ctime = instance.mtime = time.time()
1092 self._config_data.instances[instance.name] = instance
1093 self._config_data.cluster.serial_no += 1
1094 self._UnlockedReleaseDRBDMinors(instance.name)
1097 def _EnsureUUID(self, item, ec_id):
1098 """Ensures a given object has a valid UUID.
1100 @param item: the instance or node to be checked
1101 @param ec_id: the execution context id for the uuid reservation
1105 item.uuid = self._GenerateUniqueID(ec_id)
1106 elif item.uuid in self._AllIDs(include_temporary=True):
1107 raise errors.ConfigurationError("Cannot add '%s': UUID %s already"
1108 " in use" % (item.name, item.uuid))
1110 def _SetInstanceStatus(self, instance_name, status):
1111 """Set the instance's status to a given value.
1114 assert isinstance(status, bool), \
1115 "Invalid status '%s' passed to SetInstanceStatus" % (status,)
1117 if instance_name not in self._config_data.instances:
1118 raise errors.ConfigurationError("Unknown instance '%s'" %
1120 instance = self._config_data.instances[instance_name]
1121 if instance.admin_up != status:
1122 instance.admin_up = status
1123 instance.serial_no += 1
1124 instance.mtime = time.time()
1127 @locking.ssynchronized(_config_lock)
1128 def MarkInstanceUp(self, instance_name):
1129 """Mark the instance status to up in the config.
1132 self._SetInstanceStatus(instance_name, True)
1134 @locking.ssynchronized(_config_lock)
1135 def RemoveInstance(self, instance_name):
1136 """Remove the instance from the configuration.
1139 if instance_name not in self._config_data.instances:
1140 raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
1142 # If a network port has been allocated to the instance,
1143 # return it to the pool of free ports.
1144 inst = self._config_data.instances[instance_name]
1145 network_port = getattr(inst, "network_port", None)
1146 if network_port is not None:
1147 self._config_data.cluster.tcpudp_port_pool.add(network_port)
1149 del self._config_data.instances[instance_name]
1150 self._config_data.cluster.serial_no += 1
1153 @locking.ssynchronized(_config_lock)
1154 def RenameInstance(self, old_name, new_name):
1155 """Rename an instance.
1157 This needs to be done in ConfigWriter and not by RemoveInstance
1158 combined with AddInstance as only we can guarantee an atomic
1162 if old_name not in self._config_data.instances:
1163 raise errors.ConfigurationError("Unknown instance '%s'" % old_name)
1164 inst = self._config_data.instances[old_name]
1165 del self._config_data.instances[old_name]
1166 inst.name = new_name
1168 for disk in inst.disks:
1169 if disk.dev_type == constants.LD_FILE:
1170 # rename the file paths in logical and physical id
1171 file_storage_dir = os.path.dirname(os.path.dirname(disk.logical_id[1]))
1172 disk_fname = "disk%s" % disk.iv_name.split("/")[1]
1173 disk.physical_id = disk.logical_id = (disk.logical_id[0],
1174 utils.PathJoin(file_storage_dir,
1178 # Force update of ssconf files
1179 self._config_data.cluster.serial_no += 1
1181 self._config_data.instances[inst.name] = inst
1184 @locking.ssynchronized(_config_lock)
1185 def MarkInstanceDown(self, instance_name):
1186 """Mark the status of an instance to down in the configuration.
1189 self._SetInstanceStatus(instance_name, False)
1191 def _UnlockedGetInstanceList(self):
1192 """Get the list of instances.
1194 This function is for internal use, when the config lock is already held.
1197 return self._config_data.instances.keys()
1199 @locking.ssynchronized(_config_lock, shared=1)
1200 def GetInstanceList(self):
1201 """Get the list of instances.
1203 @return: array of instances, ex. ['instance2.example.com',
1204 'instance1.example.com']
1207 return self._UnlockedGetInstanceList()
1209 @locking.ssynchronized(_config_lock, shared=1)
1210 def ExpandInstanceName(self, short_name):
1211 """Attempt to expand an incomplete instance name.
1214 return utils.MatchNameComponent(short_name,
1215 self._config_data.instances.keys(),
1216 case_sensitive=False)
1218 def _UnlockedGetInstanceInfo(self, instance_name):
1219 """Returns information about an instance.
1221 This function is for internal use, when the config lock is already held.
1224 if instance_name not in self._config_data.instances:
1227 return self._config_data.instances[instance_name]
1229 @locking.ssynchronized(_config_lock, shared=1)
1230 def GetInstanceInfo(self, instance_name):
1231 """Returns information about an instance.
1233 It takes the information from the configuration file. Other information of
1234 an instance are taken from the live systems.
1236 @param instance_name: name of the instance, e.g.
1237 I{instance1.example.com}
1239 @rtype: L{objects.Instance}
1240 @return: the instance object
1243 return self._UnlockedGetInstanceInfo(instance_name)
1245 @locking.ssynchronized(_config_lock, shared=1)
1246 def GetAllInstancesInfo(self):
1247 """Get the configuration of all instances.
1250 @return: dict of (instance, instance_info), where instance_info is what
1251 would GetInstanceInfo return for the node
1254 my_dict = dict([(instance, self._UnlockedGetInstanceInfo(instance))
1255 for instance in self._UnlockedGetInstanceList()])
1258 @locking.ssynchronized(_config_lock)
1259 def AddNode(self, node, ec_id):
1260 """Add a node to the configuration.
1262 @type node: L{objects.Node}
1263 @param node: a Node instance
1266 logging.info("Adding node %s to configuration", node.name)
1268 self._EnsureUUID(node, ec_id)
1271 node.ctime = node.mtime = time.time()
1272 self._UnlockedAddNodeToGroup(node.name, node.group)
1273 self._config_data.nodes[node.name] = node
1274 self._config_data.cluster.serial_no += 1
1277 @locking.ssynchronized(_config_lock)
1278 def RemoveNode(self, node_name):
1279 """Remove a node from the configuration.
1282 logging.info("Removing node %s from configuration", node_name)
1284 if node_name not in self._config_data.nodes:
1285 raise errors.ConfigurationError("Unknown node '%s'" % node_name)
1287 self._UnlockedRemoveNodeFromGroup(self._config_data.nodes[node_name])
1288 del self._config_data.nodes[node_name]
1289 self._config_data.cluster.serial_no += 1
1292 @locking.ssynchronized(_config_lock, shared=1)
1293 def ExpandNodeName(self, short_name):
1294 """Attempt to expand an incomplete instance name.
1297 return utils.MatchNameComponent(short_name,
1298 self._config_data.nodes.keys(),
1299 case_sensitive=False)
1301 def _UnlockedGetNodeInfo(self, node_name):
1302 """Get the configuration of a node, as stored in the config.
1304 This function is for internal use, when the config lock is already
1307 @param node_name: the node name, e.g. I{node1.example.com}
1309 @rtype: L{objects.Node}
1310 @return: the node object
1313 if node_name not in self._config_data.nodes:
1316 return self._config_data.nodes[node_name]
1318 @locking.ssynchronized(_config_lock, shared=1)
1319 def GetNodeInfo(self, node_name):
1320 """Get the configuration of a node, as stored in the config.
1322 This is just a locked wrapper over L{_UnlockedGetNodeInfo}.
1324 @param node_name: the node name, e.g. I{node1.example.com}
1326 @rtype: L{objects.Node}
1327 @return: the node object
1330 return self._UnlockedGetNodeInfo(node_name)
1332 @locking.ssynchronized(_config_lock, shared=1)
1333 def GetNodeInstances(self, node_name):
1334 """Get the instances of a node, as stored in the config.
1336 @param node_name: the node name, e.g. I{node1.example.com}
1338 @rtype: (list, list)
1339 @return: a tuple with two lists: the primary and the secondary instances
1344 for inst in self._config_data.instances.values():
1345 if inst.primary_node == node_name:
1346 pri.append(inst.name)
1347 if node_name in inst.secondary_nodes:
1348 sec.append(inst.name)
1351 def _UnlockedGetNodeList(self):
1352 """Return the list of nodes which are in the configuration.
1354 This function is for internal use, when the config lock is already
1360 return self._config_data.nodes.keys()
1362 @locking.ssynchronized(_config_lock, shared=1)
1363 def GetNodeList(self):
1364 """Return the list of nodes which are in the configuration.
1367 return self._UnlockedGetNodeList()
1369 def _UnlockedGetOnlineNodeList(self):
1370 """Return the list of nodes which are online.
1373 all_nodes = [self._UnlockedGetNodeInfo(node)
1374 for node in self._UnlockedGetNodeList()]
1375 return [node.name for node in all_nodes if not node.offline]
1377 @locking.ssynchronized(_config_lock, shared=1)
1378 def GetOnlineNodeList(self):
1379 """Return the list of nodes which are online.
1382 return self._UnlockedGetOnlineNodeList()
1384 @locking.ssynchronized(_config_lock, shared=1)
1385 def GetVmCapableNodeList(self):
1386 """Return the list of nodes which are not vm capable.
1389 all_nodes = [self._UnlockedGetNodeInfo(node)
1390 for node in self._UnlockedGetNodeList()]
1391 return [node.name for node in all_nodes if node.vm_capable]
1393 @locking.ssynchronized(_config_lock, shared=1)
1394 def GetNonVmCapableNodeList(self):
1395 """Return the list of nodes which are not vm capable.
1398 all_nodes = [self._UnlockedGetNodeInfo(node)
1399 for node in self._UnlockedGetNodeList()]
1400 return [node.name for node in all_nodes if not node.vm_capable]
1402 @locking.ssynchronized(_config_lock, shared=1)
1403 def GetAllNodesInfo(self):
1404 """Get the configuration of all nodes.
1407 @return: dict of (node, node_info), where node_info is what
1408 would GetNodeInfo return for the node
1411 my_dict = dict([(node, self._UnlockedGetNodeInfo(node))
1412 for node in self._UnlockedGetNodeList()])
1415 @locking.ssynchronized(_config_lock, shared=1)
1416 def GetNodeGroupsFromNodes(self, nodes):
1417 """Returns groups for a list of nodes.
1419 @type nodes: list of string
1420 @param nodes: List of node names
1424 return frozenset(self._UnlockedGetNodeInfo(name).group for name in nodes)
1426 def _UnlockedGetMasterCandidateStats(self, exceptions=None):
1427 """Get the number of current and maximum desired and possible candidates.
1429 @type exceptions: list
1430 @param exceptions: if passed, list of nodes that should be ignored
1432 @return: tuple of (current, desired and possible, possible)
1435 mc_now = mc_should = mc_max = 0
1436 for node in self._config_data.nodes.values():
1437 if exceptions and node.name in exceptions:
1439 if not (node.offline or node.drained) and node.master_capable:
1441 if node.master_candidate:
1443 mc_should = min(mc_max, self._config_data.cluster.candidate_pool_size)
1444 return (mc_now, mc_should, mc_max)
1446 @locking.ssynchronized(_config_lock, shared=1)
1447 def GetMasterCandidateStats(self, exceptions=None):
1448 """Get the number of current and maximum possible candidates.
1450 This is just a wrapper over L{_UnlockedGetMasterCandidateStats}.
1452 @type exceptions: list
1453 @param exceptions: if passed, list of nodes that should be ignored
1455 @return: tuple of (current, max)
1458 return self._UnlockedGetMasterCandidateStats(exceptions)
1460 @locking.ssynchronized(_config_lock)
1461 def MaintainCandidatePool(self, exceptions):
1462 """Try to grow the candidate pool to the desired size.
1464 @type exceptions: list
1465 @param exceptions: if passed, list of nodes that should be ignored
1467 @return: list with the adjusted nodes (L{objects.Node} instances)
1470 mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats(exceptions)
1473 node_list = self._config_data.nodes.keys()
1474 random.shuffle(node_list)
1475 for name in node_list:
1476 if mc_now >= mc_max:
1478 node = self._config_data.nodes[name]
1479 if (node.master_candidate or node.offline or node.drained or
1480 node.name in exceptions or not node.master_capable):
1482 mod_list.append(node)
1483 node.master_candidate = True
1486 if mc_now != mc_max:
1487 # this should not happen
1488 logging.warning("Warning: MaintainCandidatePool didn't manage to"
1489 " fill the candidate pool (%d/%d)", mc_now, mc_max)
1491 self._config_data.cluster.serial_no += 1
1496 def _UnlockedAddNodeToGroup(self, node_name, nodegroup_uuid):
1497 """Add a given node to the specified group.
1500 if nodegroup_uuid not in self._config_data.nodegroups:
1501 # This can happen if a node group gets deleted between its lookup and
1502 # when we're adding the first node to it, since we don't keep a lock in
1503 # the meantime. It's ok though, as we'll fail cleanly if the node group
1504 # is not found anymore.
1505 raise errors.OpExecError("Unknown node group: %s" % nodegroup_uuid)
1506 if node_name not in self._config_data.nodegroups[nodegroup_uuid].members:
1507 self._config_data.nodegroups[nodegroup_uuid].members.append(node_name)
1509 def _UnlockedRemoveNodeFromGroup(self, node):
1510 """Remove a given node from its group.
1513 nodegroup = node.group
1514 if nodegroup not in self._config_data.nodegroups:
1515 logging.warning("Warning: node '%s' has unknown node group '%s'"
1516 " (while being removed from it)", node.name, nodegroup)
1517 nodegroup_obj = self._config_data.nodegroups[nodegroup]
1518 if node.name not in nodegroup_obj.members:
1519 logging.warning("Warning: node '%s' not a member of its node group '%s'"
1520 " (while being removed from it)", node.name, nodegroup)
1522 nodegroup_obj.members.remove(node.name)
1524 @locking.ssynchronized(_config_lock)
1525 def AssignGroupNodes(self, mods):
1526 """Changes the group of a number of nodes.
1528 @type mods: list of tuples; (node name, new group UUID)
1529 @param mods: Node membership modifications
1532 groups = self._config_data.nodegroups
1533 nodes = self._config_data.nodes
1537 # Try to resolve names/UUIDs first
1538 for (node_name, new_group_uuid) in mods:
1540 node = nodes[node_name]
1542 raise errors.ConfigurationError("Unable to find node '%s'" % node_name)
1544 if node.group == new_group_uuid:
1545 # Node is being assigned to its current group
1546 logging.debug("Node '%s' was assigned to its current group (%s)",
1547 node_name, node.group)
1550 # Try to find current group of node
1552 old_group = groups[node.group]
1554 raise errors.ConfigurationError("Unable to find old group '%s'" %
1557 # Try to find new group for node
1559 new_group = groups[new_group_uuid]
1561 raise errors.ConfigurationError("Unable to find new group '%s'" %
1564 assert node.name in old_group.members, \
1565 ("Inconsistent configuration: node '%s' not listed in members for its"
1566 " old group '%s'" % (node.name, old_group.uuid))
1567 assert node.name not in new_group.members, \
1568 ("Inconsistent configuration: node '%s' already listed in members for"
1569 " its new group '%s'" % (node.name, new_group.uuid))
1571 resmod.append((node, old_group, new_group))
1574 for (node, old_group, new_group) in resmod:
1575 assert node.uuid != new_group.uuid and old_group.uuid != new_group.uuid, \
1576 "Assigning to current group is not possible"
1578 node.group = new_group.uuid
1580 # Update members of involved groups
1581 if node.name in old_group.members:
1582 old_group.members.remove(node.name)
1583 if node.name not in new_group.members:
1584 new_group.members.append(node.name)
1586 # Update timestamps and serials (only once per node/group object)
1588 for obj in frozenset(itertools.chain(*resmod)): # pylint: disable-msg=W0142
1592 # Force ssconf update
1593 self._config_data.cluster.serial_no += 1
1597 def _BumpSerialNo(self):
1598 """Bump up the serial number of the config.
1601 self._config_data.serial_no += 1
1602 self._config_data.mtime = time.time()
1604 def _AllUUIDObjects(self):
1605 """Returns all objects with uuid attributes.
1608 return (self._config_data.instances.values() +
1609 self._config_data.nodes.values() +
1610 self._config_data.nodegroups.values() +
1611 [self._config_data.cluster])
1613 def _OpenConfig(self, accept_foreign):
1614 """Read the config data from disk.
1617 raw_data = utils.ReadFile(self._cfg_file)
1620 data = objects.ConfigData.FromDict(serializer.Load(raw_data))
1621 except Exception, err:
1622 raise errors.ConfigurationError(err)
1624 # Make sure the configuration has the right version
1625 _ValidateConfig(data)
1627 if (not hasattr(data, 'cluster') or
1628 not hasattr(data.cluster, 'rsahostkeypub')):
1629 raise errors.ConfigurationError("Incomplete configuration"
1630 " (missing cluster.rsahostkeypub)")
1632 if data.cluster.master_node != self._my_hostname and not accept_foreign:
1633 msg = ("The configuration denotes node %s as master, while my"
1634 " hostname is %s; opening a foreign configuration is only"
1635 " possible in accept_foreign mode" %
1636 (data.cluster.master_node, self._my_hostname))
1637 raise errors.ConfigurationError(msg)
1639 # Upgrade configuration if needed
1640 data.UpgradeConfig()
1642 self._config_data = data
1643 # reset the last serial as -1 so that the next write will cause
1645 self._last_cluster_serial = -1
1647 # And finally run our (custom) config upgrade sequence
1648 self._UpgradeConfig()
1650 self._cfg_id = utils.GetFileID(path=self._cfg_file)
1652 def _UpgradeConfig(self):
1653 """Run upgrade steps that cannot be done purely in the objects.
1655 This is because some data elements need uniqueness across the
1656 whole configuration, etc.
1658 @warning: this function will call L{_WriteConfig()}, but also
1659 L{DropECReservations} so it needs to be called only from a
1660 "safe" place (the constructor). If one wanted to call it with
1661 the lock held, a DropECReservationUnlocked would need to be
1662 created first, to avoid causing deadlock.
1666 for item in self._AllUUIDObjects():
1667 if item.uuid is None:
1668 item.uuid = self._GenerateUniqueID(_UPGRADE_CONFIG_JID)
1670 if not self._config_data.nodegroups:
1671 default_nodegroup_name = constants.INITIAL_NODE_GROUP_NAME
1672 default_nodegroup = objects.NodeGroup(name=default_nodegroup_name,
1674 self._UnlockedAddNodeGroup(default_nodegroup, _UPGRADE_CONFIG_JID, True)
1676 for node in self._config_data.nodes.values():
1678 node.group = self.LookupNodeGroup(None)
1680 # This is technically *not* an upgrade, but needs to be done both when
1681 # nodegroups are being added, and upon normally loading the config,
1682 # because the members list of a node group is discarded upon
1683 # serializing/deserializing the object.
1684 self._UnlockedAddNodeToGroup(node.name, node.group)
1687 # This is ok even if it acquires the internal lock, as _UpgradeConfig is
1688 # only called at config init time, without the lock held
1689 self.DropECReservations(_UPGRADE_CONFIG_JID)
1691 def _DistributeConfig(self, feedback_fn):
1692 """Distribute the configuration to the other nodes.
1694 Currently, this only copies the configuration file. In the future,
1695 it could be used to encapsulate the 2/3-phase update mechanism.
1705 myhostname = self._my_hostname
1706 # we can skip checking whether _UnlockedGetNodeInfo returns None
1707 # since the node list comes from _UnlocketGetNodeList, and we are
1708 # called with the lock held, so no modifications should take place
1710 for node_name in self._UnlockedGetNodeList():
1711 if node_name == myhostname:
1713 node_info = self._UnlockedGetNodeInfo(node_name)
1714 if not node_info.master_candidate:
1716 node_list.append(node_info.name)
1717 addr_list.append(node_info.primary_ip)
1719 result = rpc.RpcRunner.call_upload_file(node_list, self._cfg_file,
1720 address_list=addr_list)
1721 for to_node, to_result in result.items():
1722 msg = to_result.fail_msg
1724 msg = ("Copy of file %s to node %s failed: %s" %
1725 (self._cfg_file, to_node, msg))
1735 def _WriteConfig(self, destination=None, feedback_fn=None):
1736 """Write the configuration data to persistent storage.
1739 assert feedback_fn is None or callable(feedback_fn)
1741 # Warn on config errors, but don't abort the save - the
1742 # configuration has already been modified, and we can't revert;
1743 # the best we can do is to warn the user and save as is, leaving
1744 # recovery to the user
1745 config_errors = self._UnlockedVerifyConfig()
1747 errmsg = ("Configuration data is not consistent: %s" %
1748 (utils.CommaJoin(config_errors)))
1749 logging.critical(errmsg)
1753 if destination is None:
1754 destination = self._cfg_file
1755 self._BumpSerialNo()
1756 txt = serializer.Dump(self._config_data.ToDict())
1758 getents = self._getents()
1760 fd = utils.SafeWriteFile(destination, self._cfg_id, data=txt,
1761 close=False, gid=getents.confd_gid, mode=0640)
1762 except errors.LockError:
1763 raise errors.ConfigurationError("The configuration file has been"
1764 " modified since the last write, cannot"
1767 self._cfg_id = utils.GetFileID(fd=fd)
1771 self.write_count += 1
1773 # and redistribute the config file to master candidates
1774 self._DistributeConfig(feedback_fn)
1776 # Write ssconf files on all nodes (including locally)
1777 if self._last_cluster_serial < self._config_data.cluster.serial_no:
1778 if not self._offline:
1779 result = rpc.RpcRunner.call_write_ssconf_files(
1780 self._UnlockedGetOnlineNodeList(),
1781 self._UnlockedGetSsconfValues())
1783 for nname, nresu in result.items():
1784 msg = nresu.fail_msg
1786 errmsg = ("Error while uploading ssconf files to"
1787 " node %s: %s" % (nname, msg))
1788 logging.warning(errmsg)
1793 self._last_cluster_serial = self._config_data.cluster.serial_no
1795 def _UnlockedGetSsconfValues(self):
1796 """Return the values needed by ssconf.
1799 @return: a dictionary with keys the ssconf names and values their
1804 instance_names = utils.NiceSort(self._UnlockedGetInstanceList())
1805 node_names = utils.NiceSort(self._UnlockedGetNodeList())
1806 node_info = [self._UnlockedGetNodeInfo(name) for name in node_names]
1807 node_pri_ips = ["%s %s" % (ninfo.name, ninfo.primary_ip)
1808 for ninfo in node_info]
1809 node_snd_ips = ["%s %s" % (ninfo.name, ninfo.secondary_ip)
1810 for ninfo in node_info]
1812 instance_data = fn(instance_names)
1813 off_data = fn(node.name for node in node_info if node.offline)
1814 on_data = fn(node.name for node in node_info if not node.offline)
1815 mc_data = fn(node.name for node in node_info if node.master_candidate)
1816 mc_ips_data = fn(node.primary_ip for node in node_info
1817 if node.master_candidate)
1818 node_data = fn(node_names)
1819 node_pri_ips_data = fn(node_pri_ips)
1820 node_snd_ips_data = fn(node_snd_ips)
1822 cluster = self._config_data.cluster
1823 cluster_tags = fn(cluster.GetTags())
1825 hypervisor_list = fn(cluster.enabled_hypervisors)
1827 uid_pool = uidpool.FormatUidPool(cluster.uid_pool, separator="\n")
1829 nodegroups = ["%s %s" % (nodegroup.uuid, nodegroup.name) for nodegroup in
1830 self._config_data.nodegroups.values()]
1831 nodegroups_data = fn(utils.NiceSort(nodegroups))
1834 constants.SS_CLUSTER_NAME: cluster.cluster_name,
1835 constants.SS_CLUSTER_TAGS: cluster_tags,
1836 constants.SS_FILE_STORAGE_DIR: cluster.file_storage_dir,
1837 constants.SS_MASTER_CANDIDATES: mc_data,
1838 constants.SS_MASTER_CANDIDATES_IPS: mc_ips_data,
1839 constants.SS_MASTER_IP: cluster.master_ip,
1840 constants.SS_MASTER_NETDEV: cluster.master_netdev,
1841 constants.SS_MASTER_NODE: cluster.master_node,
1842 constants.SS_NODE_LIST: node_data,
1843 constants.SS_NODE_PRIMARY_IPS: node_pri_ips_data,
1844 constants.SS_NODE_SECONDARY_IPS: node_snd_ips_data,
1845 constants.SS_OFFLINE_NODES: off_data,
1846 constants.SS_ONLINE_NODES: on_data,
1847 constants.SS_PRIMARY_IP_FAMILY: str(cluster.primary_ip_family),
1848 constants.SS_INSTANCE_LIST: instance_data,
1849 constants.SS_RELEASE_VERSION: constants.RELEASE_VERSION,
1850 constants.SS_HYPERVISOR_LIST: hypervisor_list,
1851 constants.SS_MAINTAIN_NODE_HEALTH: str(cluster.maintain_node_health),
1852 constants.SS_UID_POOL: uid_pool,
1853 constants.SS_NODEGROUPS: nodegroups_data,
1856 @locking.ssynchronized(_config_lock, shared=1)
1857 def GetSsconfValues(self):
1858 """Wrapper using lock around _UnlockedGetSsconf().
1861 return self._UnlockedGetSsconfValues()
1863 @locking.ssynchronized(_config_lock, shared=1)
1864 def GetVGName(self):
1865 """Return the volume group name.
1868 return self._config_data.cluster.volume_group_name
1870 @locking.ssynchronized(_config_lock)
1871 def SetVGName(self, vg_name):
1872 """Set the volume group name.
1875 self._config_data.cluster.volume_group_name = vg_name
1876 self._config_data.cluster.serial_no += 1
1879 @locking.ssynchronized(_config_lock, shared=1)
1880 def GetDRBDHelper(self):
1881 """Return DRBD usermode helper.
1884 return self._config_data.cluster.drbd_usermode_helper
1886 @locking.ssynchronized(_config_lock)
1887 def SetDRBDHelper(self, drbd_helper):
1888 """Set DRBD usermode helper.
1891 self._config_data.cluster.drbd_usermode_helper = drbd_helper
1892 self._config_data.cluster.serial_no += 1
1895 @locking.ssynchronized(_config_lock, shared=1)
1896 def GetMACPrefix(self):
1897 """Return the mac prefix.
1900 return self._config_data.cluster.mac_prefix
1902 @locking.ssynchronized(_config_lock, shared=1)
1903 def GetClusterInfo(self):
1904 """Returns information about the cluster
1906 @rtype: L{objects.Cluster}
1907 @return: the cluster object
1910 return self._config_data.cluster
1912 @locking.ssynchronized(_config_lock, shared=1)
1913 def HasAnyDiskOfType(self, dev_type):
1914 """Check if in there is at disk of the given type in the configuration.
1917 return self._config_data.HasAnyDiskOfType(dev_type)
1919 @locking.ssynchronized(_config_lock)
1920 def Update(self, target, feedback_fn):
1921 """Notify function to be called after updates.
1923 This function must be called when an object (as returned by
1924 GetInstanceInfo, GetNodeInfo, GetCluster) has been updated and the
1925 caller wants the modifications saved to the backing store. Note
1926 that all modified objects will be saved, but the target argument
1927 is the one the caller wants to ensure that it's saved.
1929 @param target: an instance of either L{objects.Cluster},
1930 L{objects.Node} or L{objects.Instance} which is existing in
1932 @param feedback_fn: Callable feedback function
1935 if self._config_data is None:
1936 raise errors.ProgrammerError("Configuration file not read,"
1938 update_serial = False
1939 if isinstance(target, objects.Cluster):
1940 test = target == self._config_data.cluster
1941 elif isinstance(target, objects.Node):
1942 test = target in self._config_data.nodes.values()
1943 update_serial = True
1944 elif isinstance(target, objects.Instance):
1945 test = target in self._config_data.instances.values()
1946 elif isinstance(target, objects.NodeGroup):
1947 test = target in self._config_data.nodegroups.values()
1949 raise errors.ProgrammerError("Invalid object type (%s) passed to"
1950 " ConfigWriter.Update" % type(target))
1952 raise errors.ConfigurationError("Configuration updated since object"
1953 " has been read or unknown object")
1954 target.serial_no += 1
1955 target.mtime = now = time.time()
1958 # for node updates, we need to increase the cluster serial too
1959 self._config_data.cluster.serial_no += 1
1960 self._config_data.cluster.mtime = now
1962 if isinstance(target, objects.Instance):
1963 self._UnlockedReleaseDRBDMinors(target.name)
1965 self._WriteConfig(feedback_fn=feedback_fn)
1967 @locking.ssynchronized(_config_lock)
1968 def DropECReservations(self, ec_id):
1969 """Drop per-execution-context reservations
1972 for rm in self._all_rms:
1973 rm.DropECReservations(ec_id)