4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Configuration management for Ganeti
24 This module provides the interface to the Ganeti cluster configuration.
26 The configuration data is stored on every node but is updated on the master
27 only. After each update, the master distributes the data to the other nodes.
29 Currently, the data storage format is JSON. YAML was slow and consuming too
34 # pylint: disable=R0904
35 # R0904: Too many public methods
43 from ganeti import errors
44 from ganeti import locking
45 from ganeti import utils
46 from ganeti import constants
47 from ganeti import rpc
48 from ganeti import objects
49 from ganeti import serializer
50 from ganeti import uidpool
51 from ganeti import netutils
52 from ganeti import runtime
55 _config_lock = locking.SharedLock("ConfigWriter")
57 # job id used for resource management at config upgrade time
58 _UPGRADE_CONFIG_JID = "jid-cfg-upgrade"
61 def _ValidateConfig(data):
62 """Verifies that a configuration objects looks valid.
64 This only verifies the version of the configuration.
66 @raise errors.ConfigurationError: if the version differs from what
70 if data.version != constants.CONFIG_VERSION:
71 raise errors.ConfigVersionMismatch(constants.CONFIG_VERSION, data.version)
74 class TemporaryReservationManager:
75 """A temporary resource reservation manager.
77 This is used to reserve resources in a job, before using them, making sure
78 other jobs cannot get them in the meantime.
82 self._ec_reserved = {}
84 def Reserved(self, resource):
85 for holder_reserved in self._ec_reserved.values():
86 if resource in holder_reserved:
90 def Reserve(self, ec_id, resource):
91 if self.Reserved(resource):
92 raise errors.ReservationError("Duplicate reservation for resource '%s'"
94 if ec_id not in self._ec_reserved:
95 self._ec_reserved[ec_id] = set([resource])
97 self._ec_reserved[ec_id].add(resource)
99 def DropECReservations(self, ec_id):
100 if ec_id in self._ec_reserved:
101 del self._ec_reserved[ec_id]
103 def GetReserved(self):
105 for holder_reserved in self._ec_reserved.values():
106 all_reserved.update(holder_reserved)
109 def Generate(self, existing, generate_one_fn, ec_id):
110 """Generate a new resource of this type
113 assert callable(generate_one_fn)
115 all_elems = self.GetReserved()
116 all_elems.update(existing)
119 new_resource = generate_one_fn()
120 if new_resource is not None and new_resource not in all_elems:
123 raise errors.ConfigurationError("Not able generate new resource"
124 " (last tried: %s)" % new_resource)
125 self.Reserve(ec_id, new_resource)
129 def _MatchNameComponentIgnoreCase(short_name, names):
130 """Wrapper around L{utils.text.MatchNameComponent}.
133 return utils.MatchNameComponent(short_name, names, case_sensitive=False)
136 def _CheckInstanceDiskIvNames(disks):
137 """Checks if instance's disks' C{iv_name} attributes are in order.
139 @type disks: list of L{objects.Disk}
140 @param disks: List of disks
141 @rtype: list of tuples; (int, string, string)
142 @return: List of wrongly named disks, each tuple contains disk index,
143 expected and actual name
148 for (idx, disk) in enumerate(disks):
149 exp_iv_name = "disk/%s" % idx
150 if disk.iv_name != exp_iv_name:
151 result.append((idx, exp_iv_name, disk.iv_name))
157 """The interface to the cluster configuration.
159 @ivar _temporary_lvs: reservation manager for temporary LVs
160 @ivar _all_rms: a list of all temporary reservation managers
163 def __init__(self, cfg_file=None, offline=False, _getents=runtime.GetEnts,
164 accept_foreign=False):
166 self._lock = _config_lock
167 self._config_data = None
168 self._offline = offline
170 self._cfg_file = constants.CLUSTER_CONF_FILE
172 self._cfg_file = cfg_file
173 self._getents = _getents
174 self._temporary_ids = TemporaryReservationManager()
175 self._temporary_drbds = {}
176 self._temporary_macs = TemporaryReservationManager()
177 self._temporary_secrets = TemporaryReservationManager()
178 self._temporary_lvs = TemporaryReservationManager()
179 self._all_rms = [self._temporary_ids, self._temporary_macs,
180 self._temporary_secrets, self._temporary_lvs]
181 # Note: in order to prevent errors when resolving our name in
182 # _DistributeConfig, we compute it here once and reuse it; it's
183 # better to raise an error before starting to modify the config
184 # file than after it was modified
185 self._my_hostname = netutils.Hostname.GetSysName()
186 self._last_cluster_serial = -1
189 self._OpenConfig(accept_foreign)
191 def _GetRpc(self, address_list):
192 """Returns RPC runner for configuration.
195 return rpc.ConfigRunner(self._context, address_list)
197 def SetContext(self, context):
198 """Sets Ganeti context.
201 self._context = context
203 # this method needs to be static, so that we can call it on the class
206 """Check if the cluster is configured.
209 return os.path.exists(constants.CLUSTER_CONF_FILE)
211 def _GenerateOneMAC(self):
212 """Generate one mac address
215 prefix = self._config_data.cluster.mac_prefix
216 byte1 = random.randrange(0, 256)
217 byte2 = random.randrange(0, 256)
218 byte3 = random.randrange(0, 256)
219 mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
222 @locking.ssynchronized(_config_lock, shared=1)
223 def GetNdParams(self, node):
224 """Get the node params populated with cluster defaults.
226 @type node: L{objects.Node}
227 @param node: The node we want to know the params for
228 @return: A dict with the filled in node params
231 nodegroup = self._UnlockedGetNodeGroup(node.group)
232 return self._config_data.cluster.FillND(node, nodegroup)
234 @locking.ssynchronized(_config_lock, shared=1)
235 def GetInstanceDiskParams(self, instance):
236 """Get the disk params populated with inherit chain.
238 @type instance: L{objects.Instance}
239 @param instance: The instance we want to know the params for
240 @return: A dict with the filled in disk params
243 node = self._UnlockedGetNodeInfo(instance.primary_node)
244 nodegroup = self._UnlockedGetNodeGroup(node.group)
245 return self._UnlockedGetGroupDiskParams(nodegroup)
247 @locking.ssynchronized(_config_lock, shared=1)
248 def GetGroupDiskParams(self, group):
249 """Get the disk params populated with inherit chain.
251 @type group: L{objects.NodeGroup}
252 @param group: The group we want to know the params for
253 @return: A dict with the filled in disk params
256 return self._UnlockedGetGroupDiskParams(group)
258 def _UnlockedGetGroupDiskParams(self, group):
259 """Get the disk params populated with inherit chain down to node-group.
261 @type group: L{objects.NodeGroup}
262 @param group: The group we want to know the params for
263 @return: A dict with the filled in disk params
266 return self._config_data.cluster.SimpleFillDP(group.diskparams)
268 @locking.ssynchronized(_config_lock, shared=1)
269 def GenerateMAC(self, ec_id):
270 """Generate a MAC for an instance.
272 This should check the current instances for duplicates.
275 existing = self._AllMACs()
276 return self._temporary_ids.Generate(existing, self._GenerateOneMAC, ec_id)
278 @locking.ssynchronized(_config_lock, shared=1)
279 def ReserveMAC(self, mac, ec_id):
280 """Reserve a MAC for an instance.
282 This only checks instances managed by this cluster, it does not
283 check for potential collisions elsewhere.
286 all_macs = self._AllMACs()
288 raise errors.ReservationError("mac already in use")
290 self._temporary_macs.Reserve(ec_id, mac)
292 @locking.ssynchronized(_config_lock, shared=1)
293 def ReserveLV(self, lv_name, ec_id):
294 """Reserve an VG/LV pair for an instance.
296 @type lv_name: string
297 @param lv_name: the logical volume name to reserve
300 all_lvs = self._AllLVs()
301 if lv_name in all_lvs:
302 raise errors.ReservationError("LV already in use")
304 self._temporary_lvs.Reserve(ec_id, lv_name)
306 @locking.ssynchronized(_config_lock, shared=1)
307 def GenerateDRBDSecret(self, ec_id):
308 """Generate a DRBD secret.
310 This checks the current disks for duplicates.
313 return self._temporary_secrets.Generate(self._AllDRBDSecrets(),
314 utils.GenerateSecret,
318 """Compute the list of all LVs.
322 for instance in self._config_data.instances.values():
323 node_data = instance.MapLVsByNode()
324 for lv_list in node_data.values():
325 lvnames.update(lv_list)
328 def _AllIDs(self, include_temporary):
329 """Compute the list of all UUIDs and names we have.
331 @type include_temporary: boolean
332 @param include_temporary: whether to include the _temporary_ids set
334 @return: a set of IDs
338 if include_temporary:
339 existing.update(self._temporary_ids.GetReserved())
340 existing.update(self._AllLVs())
341 existing.update(self._config_data.instances.keys())
342 existing.update(self._config_data.nodes.keys())
343 existing.update([i.uuid for i in self._AllUUIDObjects() if i.uuid])
346 def _GenerateUniqueID(self, ec_id):
347 """Generate an unique UUID.
349 This checks the current node, instances and disk names for
353 @return: the unique id
356 existing = self._AllIDs(include_temporary=False)
357 return self._temporary_ids.Generate(existing, utils.NewUUID, ec_id)
359 @locking.ssynchronized(_config_lock, shared=1)
360 def GenerateUniqueID(self, ec_id):
361 """Generate an unique ID.
363 This is just a wrapper over the unlocked version.
366 @param ec_id: unique id for the job to reserve the id to
369 return self._GenerateUniqueID(ec_id)
372 """Return all MACs present in the config.
375 @return: the list of all MACs
379 for instance in self._config_data.instances.values():
380 for nic in instance.nics:
381 result.append(nic.mac)
385 def _AllDRBDSecrets(self):
386 """Return all DRBD secrets present in the config.
389 @return: the list of all DRBD secrets
392 def helper(disk, result):
393 """Recursively gather secrets from this disk."""
394 if disk.dev_type == constants.DT_DRBD8:
395 result.append(disk.logical_id[5])
397 for child in disk.children:
398 helper(child, result)
401 for instance in self._config_data.instances.values():
402 for disk in instance.disks:
407 def _CheckDiskIDs(self, disk, l_ids, p_ids):
408 """Compute duplicate disk IDs
410 @type disk: L{objects.Disk}
411 @param disk: the disk at which to start searching
413 @param l_ids: list of current logical ids
415 @param p_ids: list of current physical ids
417 @return: a list of error messages
421 if disk.logical_id is not None:
422 if disk.logical_id in l_ids:
423 result.append("duplicate logical id %s" % str(disk.logical_id))
425 l_ids.append(disk.logical_id)
426 if disk.physical_id is not None:
427 if disk.physical_id in p_ids:
428 result.append("duplicate physical id %s" % str(disk.physical_id))
430 p_ids.append(disk.physical_id)
433 for child in disk.children:
434 result.extend(self._CheckDiskIDs(child, l_ids, p_ids))
437 def _UnlockedVerifyConfig(self):
441 @return: a list of error messages; a non-empty list signifies
445 # pylint: disable=R0914
449 data = self._config_data
450 cluster = data.cluster
454 # global cluster checks
455 if not cluster.enabled_hypervisors:
456 result.append("enabled hypervisors list doesn't have any entries")
457 invalid_hvs = set(cluster.enabled_hypervisors) - constants.HYPER_TYPES
459 result.append("enabled hypervisors contains invalid entries: %s" %
461 missing_hvp = (set(cluster.enabled_hypervisors) -
462 set(cluster.hvparams.keys()))
464 result.append("hypervisor parameters missing for the enabled"
465 " hypervisor(s) %s" % utils.CommaJoin(missing_hvp))
467 if cluster.master_node not in data.nodes:
468 result.append("cluster has invalid primary node '%s'" %
471 def _helper(owner, attr, value, template):
473 utils.ForceDictType(value, template)
474 except errors.GenericError, err:
475 result.append("%s has invalid %s: %s" % (owner, attr, err))
477 def _helper_nic(owner, params):
479 objects.NIC.CheckParameterSyntax(params)
480 except errors.ConfigurationError, err:
481 result.append("%s has invalid nicparams: %s" % (owner, err))
483 def _helper_ipolicy(owner, params, check_std):
485 objects.InstancePolicy.CheckParameterSyntax(params, check_std)
486 except errors.ConfigurationError, err:
487 result.append("%s has invalid instance policy: %s" % (owner, err))
489 def _helper_ispecs(owner, params):
490 for key, value in params.items():
491 if key in constants.IPOLICY_ISPECS:
492 fullkey = "ipolicy/" + key
493 _helper(owner, fullkey, value, constants.ISPECS_PARAMETER_TYPES)
495 # FIXME: assuming list type
496 if key in constants.IPOLICY_PARAMETERS:
500 if not isinstance(value, exp_type):
501 result.append("%s has invalid instance policy: for %s,"
502 " expecting %s, got %s" %
503 (owner, key, exp_type.__name__, type(value)))
505 # check cluster parameters
506 _helper("cluster", "beparams", cluster.SimpleFillBE({}),
507 constants.BES_PARAMETER_TYPES)
508 _helper("cluster", "nicparams", cluster.SimpleFillNIC({}),
509 constants.NICS_PARAMETER_TYPES)
510 _helper_nic("cluster", cluster.SimpleFillNIC({}))
511 _helper("cluster", "ndparams", cluster.SimpleFillND({}),
512 constants.NDS_PARAMETER_TYPES)
513 _helper_ipolicy("cluster", cluster.SimpleFillIPolicy({}), True)
514 _helper_ispecs("cluster", cluster.SimpleFillIPolicy({}))
516 # per-instance checks
517 for instance_name in data.instances:
518 instance = data.instances[instance_name]
519 if instance.name != instance_name:
520 result.append("instance '%s' is indexed by wrong name '%s'" %
521 (instance.name, instance_name))
522 if instance.primary_node not in data.nodes:
523 result.append("instance '%s' has invalid primary node '%s'" %
524 (instance_name, instance.primary_node))
525 for snode in instance.secondary_nodes:
526 if snode not in data.nodes:
527 result.append("instance '%s' has invalid secondary node '%s'" %
528 (instance_name, snode))
529 for idx, nic in enumerate(instance.nics):
530 if nic.mac in seen_macs:
531 result.append("instance '%s' has NIC %d mac %s duplicate" %
532 (instance_name, idx, nic.mac))
534 seen_macs.append(nic.mac)
536 filled = cluster.SimpleFillNIC(nic.nicparams)
537 owner = "instance %s nic %d" % (instance.name, idx)
538 _helper(owner, "nicparams",
539 filled, constants.NICS_PARAMETER_TYPES)
540 _helper_nic(owner, filled)
543 if instance.beparams:
544 _helper("instance %s" % instance.name, "beparams",
545 cluster.FillBE(instance), constants.BES_PARAMETER_TYPES)
547 # gather the drbd ports for duplicate checks
548 for (idx, dsk) in enumerate(instance.disks):
549 if dsk.dev_type in constants.LDS_DRBD:
550 tcp_port = dsk.logical_id[2]
551 if tcp_port not in ports:
553 ports[tcp_port].append((instance.name, "drbd disk %s" % idx))
554 # gather network port reservation
555 net_port = getattr(instance, "network_port", None)
556 if net_port is not None:
557 if net_port not in ports:
559 ports[net_port].append((instance.name, "network port"))
561 # instance disk verify
562 for idx, disk in enumerate(instance.disks):
563 result.extend(["instance '%s' disk %d error: %s" %
564 (instance.name, idx, msg) for msg in disk.Verify()])
565 result.extend(self._CheckDiskIDs(disk, seen_lids, seen_pids))
567 wrong_names = _CheckInstanceDiskIvNames(instance.disks)
569 tmp = "; ".join(("name of disk %s should be '%s', but is '%s'" %
570 (idx, exp_name, actual_name))
571 for (idx, exp_name, actual_name) in wrong_names)
573 result.append("Instance '%s' has wrongly named disks: %s" %
574 (instance.name, tmp))
576 # cluster-wide pool of free ports
577 for free_port in cluster.tcpudp_port_pool:
578 if free_port not in ports:
579 ports[free_port] = []
580 ports[free_port].append(("cluster", "port marked as free"))
582 # compute tcp/udp duplicate ports
588 txt = utils.CommaJoin(["%s/%s" % val for val in pdata])
589 result.append("tcp/udp port %s has duplicates: %s" % (pnum, txt))
591 # highest used tcp port check
593 if keys[-1] > cluster.highest_used_port:
594 result.append("Highest used port mismatch, saved %s, computed %s" %
595 (cluster.highest_used_port, keys[-1]))
597 if not data.nodes[cluster.master_node].master_candidate:
598 result.append("Master node is not a master candidate")
600 # master candidate checks
601 mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats()
603 result.append("Not enough master candidates: actual %d, target %d" %
607 for node_name, node in data.nodes.items():
608 if node.name != node_name:
609 result.append("Node '%s' is indexed by wrong name '%s'" %
610 (node.name, node_name))
611 if [node.master_candidate, node.drained, node.offline].count(True) > 1:
612 result.append("Node %s state is invalid: master_candidate=%s,"
613 " drain=%s, offline=%s" %
614 (node.name, node.master_candidate, node.drained,
616 if node.group not in data.nodegroups:
617 result.append("Node '%s' has invalid group '%s'" %
618 (node.name, node.group))
620 _helper("node %s" % node.name, "ndparams",
621 cluster.FillND(node, data.nodegroups[node.group]),
622 constants.NDS_PARAMETER_TYPES)
625 nodegroups_names = set()
626 for nodegroup_uuid in data.nodegroups:
627 nodegroup = data.nodegroups[nodegroup_uuid]
628 if nodegroup.uuid != nodegroup_uuid:
629 result.append("node group '%s' (uuid: '%s') indexed by wrong uuid '%s'"
630 % (nodegroup.name, nodegroup.uuid, nodegroup_uuid))
631 if utils.UUID_RE.match(nodegroup.name.lower()):
632 result.append("node group '%s' (uuid: '%s') has uuid-like name" %
633 (nodegroup.name, nodegroup.uuid))
634 if nodegroup.name in nodegroups_names:
635 result.append("duplicate node group name '%s'" % nodegroup.name)
637 nodegroups_names.add(nodegroup.name)
638 group_name = "group %s" % nodegroup.name
639 _helper_ipolicy(group_name, cluster.SimpleFillIPolicy(nodegroup.ipolicy),
641 _helper_ispecs(group_name, cluster.SimpleFillIPolicy(nodegroup.ipolicy))
642 if nodegroup.ndparams:
643 _helper(group_name, "ndparams",
644 cluster.SimpleFillND(nodegroup.ndparams),
645 constants.NDS_PARAMETER_TYPES)
648 _, duplicates = self._UnlockedComputeDRBDMap()
649 for node, minor, instance_a, instance_b in duplicates:
650 result.append("DRBD minor %d on node %s is assigned twice to instances"
651 " %s and %s" % (minor, node, instance_a, instance_b))
654 default_nicparams = cluster.nicparams[constants.PP_DEFAULT]
657 def _AddIpAddress(ip, name):
658 ips.setdefault(ip, []).append(name)
660 _AddIpAddress(cluster.master_ip, "cluster_ip")
662 for node in data.nodes.values():
663 _AddIpAddress(node.primary_ip, "node:%s/primary" % node.name)
664 if node.secondary_ip != node.primary_ip:
665 _AddIpAddress(node.secondary_ip, "node:%s/secondary" % node.name)
667 for instance in data.instances.values():
668 for idx, nic in enumerate(instance.nics):
672 nicparams = objects.FillDict(default_nicparams, nic.nicparams)
673 nic_mode = nicparams[constants.NIC_MODE]
674 nic_link = nicparams[constants.NIC_LINK]
676 if nic_mode == constants.NIC_MODE_BRIDGED:
677 link = "bridge:%s" % nic_link
678 elif nic_mode == constants.NIC_MODE_ROUTED:
679 link = "route:%s" % nic_link
681 raise errors.ProgrammerError("NIC mode '%s' not handled" % nic_mode)
683 _AddIpAddress("%s/%s" % (link, nic.ip),
684 "instance:%s/nic:%d" % (instance.name, idx))
686 for ip, owners in ips.items():
688 result.append("IP address %s is used by multiple owners: %s" %
689 (ip, utils.CommaJoin(owners)))
693 @locking.ssynchronized(_config_lock, shared=1)
694 def VerifyConfig(self):
697 This is just a wrapper over L{_UnlockedVerifyConfig}.
700 @return: a list of error messages; a non-empty list signifies
704 return self._UnlockedVerifyConfig()
706 def _UnlockedSetDiskID(self, disk, node_name):
707 """Convert the unique ID to the ID needed on the target nodes.
709 This is used only for drbd, which needs ip/port configuration.
711 The routine descends down and updates its children also, because
712 this helps when the only the top device is passed to the remote
715 This function is for internal use, when the config lock is already held.
719 for child in disk.children:
720 self._UnlockedSetDiskID(child, node_name)
722 if disk.logical_id is None and disk.physical_id is not None:
724 if disk.dev_type == constants.LD_DRBD8:
725 pnode, snode, port, pminor, sminor, secret = disk.logical_id
726 if node_name not in (pnode, snode):
727 raise errors.ConfigurationError("DRBD device not knowing node %s" %
729 pnode_info = self._UnlockedGetNodeInfo(pnode)
730 snode_info = self._UnlockedGetNodeInfo(snode)
731 if pnode_info is None or snode_info is None:
732 raise errors.ConfigurationError("Can't find primary or secondary node"
733 " for %s" % str(disk))
734 p_data = (pnode_info.secondary_ip, port)
735 s_data = (snode_info.secondary_ip, port)
736 if pnode == node_name:
737 disk.physical_id = p_data + s_data + (pminor, secret)
738 else: # it must be secondary, we tested above
739 disk.physical_id = s_data + p_data + (sminor, secret)
741 disk.physical_id = disk.logical_id
744 @locking.ssynchronized(_config_lock)
745 def SetDiskID(self, disk, node_name):
746 """Convert the unique ID to the ID needed on the target nodes.
748 This is used only for drbd, which needs ip/port configuration.
750 The routine descends down and updates its children also, because
751 this helps when the only the top device is passed to the remote
755 return self._UnlockedSetDiskID(disk, node_name)
757 @locking.ssynchronized(_config_lock)
758 def AddTcpUdpPort(self, port):
759 """Adds a new port to the available port pool.
761 @warning: this method does not "flush" the configuration (via
762 L{_WriteConfig}); callers should do that themselves once the
763 configuration is stable
766 if not isinstance(port, int):
767 raise errors.ProgrammerError("Invalid type passed for port")
769 self._config_data.cluster.tcpudp_port_pool.add(port)
771 @locking.ssynchronized(_config_lock, shared=1)
772 def GetPortList(self):
773 """Returns a copy of the current port list.
776 return self._config_data.cluster.tcpudp_port_pool.copy()
778 @locking.ssynchronized(_config_lock)
779 def AllocatePort(self):
782 The port will be taken from the available port pool or from the
783 default port range (and in this case we increase
787 # If there are TCP/IP ports configured, we use them first.
788 if self._config_data.cluster.tcpudp_port_pool:
789 port = self._config_data.cluster.tcpudp_port_pool.pop()
791 port = self._config_data.cluster.highest_used_port + 1
792 if port >= constants.LAST_DRBD_PORT:
793 raise errors.ConfigurationError("The highest used port is greater"
794 " than %s. Aborting." %
795 constants.LAST_DRBD_PORT)
796 self._config_data.cluster.highest_used_port = port
801 def _UnlockedComputeDRBDMap(self):
802 """Compute the used DRBD minor/nodes.
805 @return: dictionary of node_name: dict of minor: instance_name;
806 the returned dict will have all the nodes in it (even if with
807 an empty list), and a list of duplicates; if the duplicates
808 list is not empty, the configuration is corrupted and its caller
809 should raise an exception
812 def _AppendUsedPorts(instance_name, disk, used):
814 if disk.dev_type == constants.LD_DRBD8 and len(disk.logical_id) >= 5:
815 node_a, node_b, _, minor_a, minor_b = disk.logical_id[:5]
816 for node, port in ((node_a, minor_a), (node_b, minor_b)):
817 assert node in used, ("Node '%s' of instance '%s' not found"
818 " in node list" % (node, instance_name))
819 if port in used[node]:
820 duplicates.append((node, port, instance_name, used[node][port]))
822 used[node][port] = instance_name
824 for child in disk.children:
825 duplicates.extend(_AppendUsedPorts(instance_name, child, used))
829 my_dict = dict((node, {}) for node in self._config_data.nodes)
830 for instance in self._config_data.instances.itervalues():
831 for disk in instance.disks:
832 duplicates.extend(_AppendUsedPorts(instance.name, disk, my_dict))
833 for (node, minor), instance in self._temporary_drbds.iteritems():
834 if minor in my_dict[node] and my_dict[node][minor] != instance:
835 duplicates.append((node, minor, instance, my_dict[node][minor]))
837 my_dict[node][minor] = instance
838 return my_dict, duplicates
840 @locking.ssynchronized(_config_lock)
841 def ComputeDRBDMap(self):
842 """Compute the used DRBD minor/nodes.
844 This is just a wrapper over L{_UnlockedComputeDRBDMap}.
846 @return: dictionary of node_name: dict of minor: instance_name;
847 the returned dict will have all the nodes in it (even if with
851 d_map, duplicates = self._UnlockedComputeDRBDMap()
853 raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
857 @locking.ssynchronized(_config_lock)
858 def AllocateDRBDMinor(self, nodes, instance):
859 """Allocate a drbd minor.
861 The free minor will be automatically computed from the existing
862 devices. A node can be given multiple times in order to allocate
863 multiple minors. The result is the list of minors, in the same
864 order as the passed nodes.
866 @type instance: string
867 @param instance: the instance for which we allocate minors
870 assert isinstance(instance, basestring), \
871 "Invalid argument '%s' passed to AllocateDRBDMinor" % instance
873 d_map, duplicates = self._UnlockedComputeDRBDMap()
875 raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
881 # no minors used, we can start at 0
884 self._temporary_drbds[(nname, 0)] = instance
888 ffree = utils.FirstFree(keys)
890 # return the next minor
891 # TODO: implement high-limit check
895 # double-check minor against current instances
896 assert minor not in d_map[nname], \
897 ("Attempt to reuse allocated DRBD minor %d on node %s,"
898 " already allocated to instance %s" %
899 (minor, nname, d_map[nname][minor]))
900 ndata[minor] = instance
901 # double-check minor against reservation
902 r_key = (nname, minor)
903 assert r_key not in self._temporary_drbds, \
904 ("Attempt to reuse reserved DRBD minor %d on node %s,"
905 " reserved for instance %s" %
906 (minor, nname, self._temporary_drbds[r_key]))
907 self._temporary_drbds[r_key] = instance
909 logging.debug("Request to allocate drbd minors, input: %s, returning %s",
913 def _UnlockedReleaseDRBDMinors(self, instance):
914 """Release temporary drbd minors allocated for a given instance.
916 @type instance: string
917 @param instance: the instance for which temporary minors should be
921 assert isinstance(instance, basestring), \
922 "Invalid argument passed to ReleaseDRBDMinors"
923 for key, name in self._temporary_drbds.items():
925 del self._temporary_drbds[key]
927 @locking.ssynchronized(_config_lock)
928 def ReleaseDRBDMinors(self, instance):
929 """Release temporary drbd minors allocated for a given instance.
931 This should be called on the error paths, on the success paths
932 it's automatically called by the ConfigWriter add and update
935 This function is just a wrapper over L{_UnlockedReleaseDRBDMinors}.
937 @type instance: string
938 @param instance: the instance for which temporary minors should be
942 self._UnlockedReleaseDRBDMinors(instance)
944 @locking.ssynchronized(_config_lock, shared=1)
945 def GetConfigVersion(self):
946 """Get the configuration version.
948 @return: Config version
951 return self._config_data.version
953 @locking.ssynchronized(_config_lock, shared=1)
954 def GetClusterName(self):
957 @return: Cluster name
960 return self._config_data.cluster.cluster_name
962 @locking.ssynchronized(_config_lock, shared=1)
963 def GetMasterNode(self):
964 """Get the hostname of the master node for this cluster.
966 @return: Master hostname
969 return self._config_data.cluster.master_node
971 @locking.ssynchronized(_config_lock, shared=1)
972 def GetMasterIP(self):
973 """Get the IP of the master node for this cluster.
978 return self._config_data.cluster.master_ip
980 @locking.ssynchronized(_config_lock, shared=1)
981 def GetMasterNetdev(self):
982 """Get the master network device for this cluster.
985 return self._config_data.cluster.master_netdev
987 @locking.ssynchronized(_config_lock, shared=1)
988 def GetMasterNetmask(self):
989 """Get the netmask of the master node for this cluster.
992 return self._config_data.cluster.master_netmask
994 @locking.ssynchronized(_config_lock, shared=1)
995 def GetUseExternalMipScript(self):
996 """Get flag representing whether to use the external master IP setup script.
999 return self._config_data.cluster.use_external_mip_script
1001 @locking.ssynchronized(_config_lock, shared=1)
1002 def GetFileStorageDir(self):
1003 """Get the file storage dir for this cluster.
1006 return self._config_data.cluster.file_storage_dir
1008 @locking.ssynchronized(_config_lock, shared=1)
1009 def GetSharedFileStorageDir(self):
1010 """Get the shared file storage dir for this cluster.
1013 return self._config_data.cluster.shared_file_storage_dir
1015 @locking.ssynchronized(_config_lock, shared=1)
1016 def GetHypervisorType(self):
1017 """Get the hypervisor type for this cluster.
1020 return self._config_data.cluster.enabled_hypervisors[0]
1022 @locking.ssynchronized(_config_lock, shared=1)
1023 def GetHostKey(self):
1024 """Return the rsa hostkey from the config.
1027 @return: the rsa hostkey
1030 return self._config_data.cluster.rsahostkeypub
1032 @locking.ssynchronized(_config_lock, shared=1)
1033 def GetDefaultIAllocator(self):
1034 """Get the default instance allocator for this cluster.
1037 return self._config_data.cluster.default_iallocator
1039 @locking.ssynchronized(_config_lock, shared=1)
1040 def GetPrimaryIPFamily(self):
1041 """Get cluster primary ip family.
1043 @return: primary ip family
1046 return self._config_data.cluster.primary_ip_family
1048 @locking.ssynchronized(_config_lock, shared=1)
1049 def GetMasterNetworkParameters(self):
1050 """Get network parameters of the master node.
1052 @rtype: L{object.MasterNetworkParameters}
1053 @return: network parameters of the master node
1056 cluster = self._config_data.cluster
1057 result = objects.MasterNetworkParameters(name=cluster.master_node,
1058 ip=cluster.master_ip,
1059 netmask=cluster.master_netmask,
1060 netdev=cluster.master_netdev,
1061 ip_family=cluster.primary_ip_family)
1065 @locking.ssynchronized(_config_lock)
1066 def AddNodeGroup(self, group, ec_id, check_uuid=True):
1067 """Add a node group to the configuration.
1069 This method calls group.UpgradeConfig() to fill any missing attributes
1070 according to their default values.
1072 @type group: L{objects.NodeGroup}
1073 @param group: the NodeGroup object to add
1075 @param ec_id: unique id for the job to use when creating a missing UUID
1076 @type check_uuid: bool
1077 @param check_uuid: add an UUID to the group if it doesn't have one or, if
1078 it does, ensure that it does not exist in the
1079 configuration already
1082 self._UnlockedAddNodeGroup(group, ec_id, check_uuid)
1085 def _UnlockedAddNodeGroup(self, group, ec_id, check_uuid):
1086 """Add a node group to the configuration.
1089 logging.info("Adding node group %s to configuration", group.name)
1091 # Some code might need to add a node group with a pre-populated UUID
1092 # generated with ConfigWriter.GenerateUniqueID(). We allow them to bypass
1093 # the "does this UUID" exist already check.
1095 self._EnsureUUID(group, ec_id)
1098 existing_uuid = self._UnlockedLookupNodeGroup(group.name)
1099 except errors.OpPrereqError:
1102 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
1103 " node group (UUID: %s)" %
1104 (group.name, existing_uuid),
1105 errors.ECODE_EXISTS)
1108 group.ctime = group.mtime = time.time()
1109 group.UpgradeConfig()
1111 self._config_data.nodegroups[group.uuid] = group
1112 self._config_data.cluster.serial_no += 1
1114 @locking.ssynchronized(_config_lock)
1115 def RemoveNodeGroup(self, group_uuid):
1116 """Remove a node group from the configuration.
1118 @type group_uuid: string
1119 @param group_uuid: the UUID of the node group to remove
1122 logging.info("Removing node group %s from configuration", group_uuid)
1124 if group_uuid not in self._config_data.nodegroups:
1125 raise errors.ConfigurationError("Unknown node group '%s'" % group_uuid)
1127 assert len(self._config_data.nodegroups) != 1, \
1128 "Group '%s' is the only group, cannot be removed" % group_uuid
1130 del self._config_data.nodegroups[group_uuid]
1131 self._config_data.cluster.serial_no += 1
1134 def _UnlockedLookupNodeGroup(self, target):
1135 """Lookup a node group's UUID.
1137 @type target: string or None
1138 @param target: group name or UUID or None to look for the default
1140 @return: nodegroup UUID
1141 @raises errors.OpPrereqError: when the target group cannot be found
1145 if len(self._config_data.nodegroups) != 1:
1146 raise errors.OpPrereqError("More than one node group exists. Target"
1147 " group must be specified explicitly.")
1149 return self._config_data.nodegroups.keys()[0]
1150 if target in self._config_data.nodegroups:
1152 for nodegroup in self._config_data.nodegroups.values():
1153 if nodegroup.name == target:
1154 return nodegroup.uuid
1155 raise errors.OpPrereqError("Node group '%s' not found" % target,
1158 @locking.ssynchronized(_config_lock, shared=1)
1159 def LookupNodeGroup(self, target):
1160 """Lookup a node group's UUID.
1162 This function is just a wrapper over L{_UnlockedLookupNodeGroup}.
1164 @type target: string or None
1165 @param target: group name or UUID or None to look for the default
1167 @return: nodegroup UUID
1170 return self._UnlockedLookupNodeGroup(target)
1172 def _UnlockedGetNodeGroup(self, uuid):
1173 """Lookup a node group.
1176 @param uuid: group UUID
1177 @rtype: L{objects.NodeGroup} or None
1178 @return: nodegroup object, or None if not found
1181 if uuid not in self._config_data.nodegroups:
1184 return self._config_data.nodegroups[uuid]
1186 @locking.ssynchronized(_config_lock, shared=1)
1187 def GetNodeGroup(self, uuid):
1188 """Lookup a node group.
1191 @param uuid: group UUID
1192 @rtype: L{objects.NodeGroup} or None
1193 @return: nodegroup object, or None if not found
1196 return self._UnlockedGetNodeGroup(uuid)
1198 @locking.ssynchronized(_config_lock, shared=1)
1199 def GetAllNodeGroupsInfo(self):
1200 """Get the configuration of all node groups.
1203 return dict(self._config_data.nodegroups)
1205 @locking.ssynchronized(_config_lock, shared=1)
1206 def GetNodeGroupList(self):
1207 """Get a list of node groups.
1210 return self._config_data.nodegroups.keys()
1212 @locking.ssynchronized(_config_lock, shared=1)
1213 def GetNodeGroupMembersByNodes(self, nodes):
1214 """Get nodes which are member in the same nodegroups as the given nodes.
1217 ngfn = lambda node_name: self._UnlockedGetNodeInfo(node_name).group
1218 return frozenset(member_name
1219 for node_name in nodes
1221 self._UnlockedGetNodeGroup(ngfn(node_name)).members)
1223 @locking.ssynchronized(_config_lock, shared=1)
1224 def GetMultiNodeGroupInfo(self, group_uuids):
1225 """Get the configuration of multiple node groups.
1227 @param group_uuids: List of node group UUIDs
1229 @return: List of tuples of (group_uuid, group_info)
1232 return [(uuid, self._UnlockedGetNodeGroup(uuid)) for uuid in group_uuids]
1234 @locking.ssynchronized(_config_lock)
1235 def AddInstance(self, instance, ec_id):
1236 """Add an instance to the config.
1238 This should be used after creating a new instance.
1240 @type instance: L{objects.Instance}
1241 @param instance: the instance object
1244 if not isinstance(instance, objects.Instance):
1245 raise errors.ProgrammerError("Invalid type passed to AddInstance")
1247 if instance.disk_template != constants.DT_DISKLESS:
1248 all_lvs = instance.MapLVsByNode()
1249 logging.info("Instance '%s' DISK_LAYOUT: %s", instance.name, all_lvs)
1251 all_macs = self._AllMACs()
1252 for nic in instance.nics:
1253 if nic.mac in all_macs:
1254 raise errors.ConfigurationError("Cannot add instance %s:"
1255 " MAC address '%s' already in use." %
1256 (instance.name, nic.mac))
1258 self._EnsureUUID(instance, ec_id)
1260 instance.serial_no = 1
1261 instance.ctime = instance.mtime = time.time()
1262 self._config_data.instances[instance.name] = instance
1263 self._config_data.cluster.serial_no += 1
1264 self._UnlockedReleaseDRBDMinors(instance.name)
1267 def _EnsureUUID(self, item, ec_id):
1268 """Ensures a given object has a valid UUID.
1270 @param item: the instance or node to be checked
1271 @param ec_id: the execution context id for the uuid reservation
1275 item.uuid = self._GenerateUniqueID(ec_id)
1276 elif item.uuid in self._AllIDs(include_temporary=True):
1277 raise errors.ConfigurationError("Cannot add '%s': UUID %s already"
1278 " in use" % (item.name, item.uuid))
1280 def _SetInstanceStatus(self, instance_name, status):
1281 """Set the instance's status to a given value.
1284 assert status in constants.ADMINST_ALL, \
1285 "Invalid status '%s' passed to SetInstanceStatus" % (status,)
1287 if instance_name not in self._config_data.instances:
1288 raise errors.ConfigurationError("Unknown instance '%s'" %
1290 instance = self._config_data.instances[instance_name]
1291 if instance.admin_state != status:
1292 instance.admin_state = status
1293 instance.serial_no += 1
1294 instance.mtime = time.time()
1297 @locking.ssynchronized(_config_lock)
1298 def MarkInstanceUp(self, instance_name):
1299 """Mark the instance status to up in the config.
1302 self._SetInstanceStatus(instance_name, constants.ADMINST_UP)
1304 @locking.ssynchronized(_config_lock)
1305 def MarkInstanceOffline(self, instance_name):
1306 """Mark the instance status to down in the config.
1309 self._SetInstanceStatus(instance_name, constants.ADMINST_OFFLINE)
1311 @locking.ssynchronized(_config_lock)
1312 def RemoveInstance(self, instance_name):
1313 """Remove the instance from the configuration.
1316 if instance_name not in self._config_data.instances:
1317 raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
1319 # If a network port has been allocated to the instance,
1320 # return it to the pool of free ports.
1321 inst = self._config_data.instances[instance_name]
1322 network_port = getattr(inst, "network_port", None)
1323 if network_port is not None:
1324 self._config_data.cluster.tcpudp_port_pool.add(network_port)
1326 del self._config_data.instances[instance_name]
1327 self._config_data.cluster.serial_no += 1
1330 @locking.ssynchronized(_config_lock)
1331 def RenameInstance(self, old_name, new_name):
1332 """Rename an instance.
1334 This needs to be done in ConfigWriter and not by RemoveInstance
1335 combined with AddInstance as only we can guarantee an atomic
1339 if old_name not in self._config_data.instances:
1340 raise errors.ConfigurationError("Unknown instance '%s'" % old_name)
1342 # Operate on a copy to not loose instance object in case of a failure
1343 inst = self._config_data.instances[old_name].Copy()
1344 inst.name = new_name
1346 for (idx, disk) in enumerate(inst.disks):
1347 if disk.dev_type == constants.LD_FILE:
1348 # rename the file paths in logical and physical id
1349 file_storage_dir = os.path.dirname(os.path.dirname(disk.logical_id[1]))
1350 disk.logical_id = (disk.logical_id[0],
1351 utils.PathJoin(file_storage_dir, inst.name,
1353 disk.physical_id = disk.logical_id
1355 # Actually replace instance object
1356 del self._config_data.instances[old_name]
1357 self._config_data.instances[inst.name] = inst
1359 # Force update of ssconf files
1360 self._config_data.cluster.serial_no += 1
1364 @locking.ssynchronized(_config_lock)
1365 def MarkInstanceDown(self, instance_name):
1366 """Mark the status of an instance to down in the configuration.
1369 self._SetInstanceStatus(instance_name, constants.ADMINST_DOWN)
1371 def _UnlockedGetInstanceList(self):
1372 """Get the list of instances.
1374 This function is for internal use, when the config lock is already held.
1377 return self._config_data.instances.keys()
1379 @locking.ssynchronized(_config_lock, shared=1)
1380 def GetInstanceList(self):
1381 """Get the list of instances.
1383 @return: array of instances, ex. ['instance2.example.com',
1384 'instance1.example.com']
1387 return self._UnlockedGetInstanceList()
1389 def ExpandInstanceName(self, short_name):
1390 """Attempt to expand an incomplete instance name.
1393 # Locking is done in L{ConfigWriter.GetInstanceList}
1394 return _MatchNameComponentIgnoreCase(short_name, self.GetInstanceList())
1396 def _UnlockedGetInstanceInfo(self, instance_name):
1397 """Returns information about an instance.
1399 This function is for internal use, when the config lock is already held.
1402 if instance_name not in self._config_data.instances:
1405 return self._config_data.instances[instance_name]
1407 @locking.ssynchronized(_config_lock, shared=1)
1408 def GetInstanceInfo(self, instance_name):
1409 """Returns information about an instance.
1411 It takes the information from the configuration file. Other information of
1412 an instance are taken from the live systems.
1414 @param instance_name: name of the instance, e.g.
1415 I{instance1.example.com}
1417 @rtype: L{objects.Instance}
1418 @return: the instance object
1421 return self._UnlockedGetInstanceInfo(instance_name)
1423 @locking.ssynchronized(_config_lock, shared=1)
1424 def GetInstanceNodeGroups(self, instance_name, primary_only=False):
1425 """Returns set of node group UUIDs for instance's nodes.
1430 instance = self._UnlockedGetInstanceInfo(instance_name)
1432 raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
1435 nodes = [instance.primary_node]
1437 nodes = instance.all_nodes
1439 return frozenset(self._UnlockedGetNodeInfo(node_name).group
1440 for node_name in nodes)
1442 @locking.ssynchronized(_config_lock, shared=1)
1443 def GetMultiInstanceInfo(self, instances):
1444 """Get the configuration of multiple instances.
1446 @param instances: list of instance names
1448 @return: list of tuples (instance, instance_info), where
1449 instance_info is what would GetInstanceInfo return for the
1450 node, while keeping the original order
1453 return [(name, self._UnlockedGetInstanceInfo(name)) for name in instances]
1455 @locking.ssynchronized(_config_lock, shared=1)
1456 def GetAllInstancesInfo(self):
1457 """Get the configuration of all instances.
1460 @return: dict of (instance, instance_info), where instance_info is what
1461 would GetInstanceInfo return for the node
1464 my_dict = dict([(instance, self._UnlockedGetInstanceInfo(instance))
1465 for instance in self._UnlockedGetInstanceList()])
1468 @locking.ssynchronized(_config_lock, shared=1)
1469 def GetInstancesInfoByFilter(self, filter_fn):
1470 """Get instance configuration with a filter.
1472 @type filter_fn: callable
1473 @param filter_fn: Filter function receiving instance object as parameter,
1474 returning boolean. Important: this function is called while the
1475 configuration locks is held. It must not do any complex work or call
1476 functions potentially leading to a deadlock. Ideally it doesn't call any
1477 other functions and just compares instance attributes.
1480 return dict((name, inst)
1481 for (name, inst) in self._config_data.instances.items()
1484 @locking.ssynchronized(_config_lock)
1485 def AddNode(self, node, ec_id):
1486 """Add a node to the configuration.
1488 @type node: L{objects.Node}
1489 @param node: a Node instance
1492 logging.info("Adding node %s to configuration", node.name)
1494 self._EnsureUUID(node, ec_id)
1497 node.ctime = node.mtime = time.time()
1498 self._UnlockedAddNodeToGroup(node.name, node.group)
1499 self._config_data.nodes[node.name] = node
1500 self._config_data.cluster.serial_no += 1
1503 @locking.ssynchronized(_config_lock)
1504 def RemoveNode(self, node_name):
1505 """Remove a node from the configuration.
1508 logging.info("Removing node %s from configuration", node_name)
1510 if node_name not in self._config_data.nodes:
1511 raise errors.ConfigurationError("Unknown node '%s'" % node_name)
1513 self._UnlockedRemoveNodeFromGroup(self._config_data.nodes[node_name])
1514 del self._config_data.nodes[node_name]
1515 self._config_data.cluster.serial_no += 1
1518 def ExpandNodeName(self, short_name):
1519 """Attempt to expand an incomplete node name.
1522 # Locking is done in L{ConfigWriter.GetNodeList}
1523 return _MatchNameComponentIgnoreCase(short_name, self.GetNodeList())
1525 def _UnlockedGetNodeInfo(self, node_name):
1526 """Get the configuration of a node, as stored in the config.
1528 This function is for internal use, when the config lock is already
1531 @param node_name: the node name, e.g. I{node1.example.com}
1533 @rtype: L{objects.Node}
1534 @return: the node object
1537 if node_name not in self._config_data.nodes:
1540 return self._config_data.nodes[node_name]
1542 @locking.ssynchronized(_config_lock, shared=1)
1543 def GetNodeInfo(self, node_name):
1544 """Get the configuration of a node, as stored in the config.
1546 This is just a locked wrapper over L{_UnlockedGetNodeInfo}.
1548 @param node_name: the node name, e.g. I{node1.example.com}
1550 @rtype: L{objects.Node}
1551 @return: the node object
1554 return self._UnlockedGetNodeInfo(node_name)
1556 @locking.ssynchronized(_config_lock, shared=1)
1557 def GetNodeInstances(self, node_name):
1558 """Get the instances of a node, as stored in the config.
1560 @param node_name: the node name, e.g. I{node1.example.com}
1562 @rtype: (list, list)
1563 @return: a tuple with two lists: the primary and the secondary instances
1568 for inst in self._config_data.instances.values():
1569 if inst.primary_node == node_name:
1570 pri.append(inst.name)
1571 if node_name in inst.secondary_nodes:
1572 sec.append(inst.name)
1575 @locking.ssynchronized(_config_lock, shared=1)
1576 def GetNodeGroupInstances(self, uuid, primary_only=False):
1577 """Get the instances of a node group.
1579 @param uuid: Node group UUID
1580 @param primary_only: Whether to only consider primary nodes
1582 @return: List of instance names in node group
1586 nodes_fn = lambda inst: [inst.primary_node]
1588 nodes_fn = lambda inst: inst.all_nodes
1590 return frozenset(inst.name
1591 for inst in self._config_data.instances.values()
1592 for node_name in nodes_fn(inst)
1593 if self._UnlockedGetNodeInfo(node_name).group == uuid)
1595 def _UnlockedGetNodeList(self):
1596 """Return the list of nodes which are in the configuration.
1598 This function is for internal use, when the config lock is already
1604 return self._config_data.nodes.keys()
1606 @locking.ssynchronized(_config_lock, shared=1)
1607 def GetNodeList(self):
1608 """Return the list of nodes which are in the configuration.
1611 return self._UnlockedGetNodeList()
1613 def _UnlockedGetOnlineNodeList(self):
1614 """Return the list of nodes which are online.
1617 all_nodes = [self._UnlockedGetNodeInfo(node)
1618 for node in self._UnlockedGetNodeList()]
1619 return [node.name for node in all_nodes if not node.offline]
1621 @locking.ssynchronized(_config_lock, shared=1)
1622 def GetOnlineNodeList(self):
1623 """Return the list of nodes which are online.
1626 return self._UnlockedGetOnlineNodeList()
1628 @locking.ssynchronized(_config_lock, shared=1)
1629 def GetVmCapableNodeList(self):
1630 """Return the list of nodes which are not vm capable.
1633 all_nodes = [self._UnlockedGetNodeInfo(node)
1634 for node in self._UnlockedGetNodeList()]
1635 return [node.name for node in all_nodes if node.vm_capable]
1637 @locking.ssynchronized(_config_lock, shared=1)
1638 def GetNonVmCapableNodeList(self):
1639 """Return the list of nodes which are not vm capable.
1642 all_nodes = [self._UnlockedGetNodeInfo(node)
1643 for node in self._UnlockedGetNodeList()]
1644 return [node.name for node in all_nodes if not node.vm_capable]
1646 @locking.ssynchronized(_config_lock, shared=1)
1647 def GetMultiNodeInfo(self, nodes):
1648 """Get the configuration of multiple nodes.
1650 @param nodes: list of node names
1652 @return: list of tuples of (node, node_info), where node_info is
1653 what would GetNodeInfo return for the node, in the original
1657 return [(name, self._UnlockedGetNodeInfo(name)) for name in nodes]
1659 @locking.ssynchronized(_config_lock, shared=1)
1660 def GetAllNodesInfo(self):
1661 """Get the configuration of all nodes.
1664 @return: dict of (node, node_info), where node_info is what
1665 would GetNodeInfo return for the node
1668 return self._UnlockedGetAllNodesInfo()
1670 def _UnlockedGetAllNodesInfo(self):
1671 """Gets configuration of all nodes.
1673 @note: See L{GetAllNodesInfo}
1676 return dict([(node, self._UnlockedGetNodeInfo(node))
1677 for node in self._UnlockedGetNodeList()])
1679 @locking.ssynchronized(_config_lock, shared=1)
1680 def GetNodeGroupsFromNodes(self, nodes):
1681 """Returns groups for a list of nodes.
1683 @type nodes: list of string
1684 @param nodes: List of node names
1688 return frozenset(self._UnlockedGetNodeInfo(name).group for name in nodes)
1690 def _UnlockedGetMasterCandidateStats(self, exceptions=None):
1691 """Get the number of current and maximum desired and possible candidates.
1693 @type exceptions: list
1694 @param exceptions: if passed, list of nodes that should be ignored
1696 @return: tuple of (current, desired and possible, possible)
1699 mc_now = mc_should = mc_max = 0
1700 for node in self._config_data.nodes.values():
1701 if exceptions and node.name in exceptions:
1703 if not (node.offline or node.drained) and node.master_capable:
1705 if node.master_candidate:
1707 mc_should = min(mc_max, self._config_data.cluster.candidate_pool_size)
1708 return (mc_now, mc_should, mc_max)
1710 @locking.ssynchronized(_config_lock, shared=1)
1711 def GetMasterCandidateStats(self, exceptions=None):
1712 """Get the number of current and maximum possible candidates.
1714 This is just a wrapper over L{_UnlockedGetMasterCandidateStats}.
1716 @type exceptions: list
1717 @param exceptions: if passed, list of nodes that should be ignored
1719 @return: tuple of (current, max)
1722 return self._UnlockedGetMasterCandidateStats(exceptions)
1724 @locking.ssynchronized(_config_lock)
1725 def MaintainCandidatePool(self, exceptions):
1726 """Try to grow the candidate pool to the desired size.
1728 @type exceptions: list
1729 @param exceptions: if passed, list of nodes that should be ignored
1731 @return: list with the adjusted nodes (L{objects.Node} instances)
1734 mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats(exceptions)
1737 node_list = self._config_data.nodes.keys()
1738 random.shuffle(node_list)
1739 for name in node_list:
1740 if mc_now >= mc_max:
1742 node = self._config_data.nodes[name]
1743 if (node.master_candidate or node.offline or node.drained or
1744 node.name in exceptions or not node.master_capable):
1746 mod_list.append(node)
1747 node.master_candidate = True
1750 if mc_now != mc_max:
1751 # this should not happen
1752 logging.warning("Warning: MaintainCandidatePool didn't manage to"
1753 " fill the candidate pool (%d/%d)", mc_now, mc_max)
1755 self._config_data.cluster.serial_no += 1
1760 def _UnlockedAddNodeToGroup(self, node_name, nodegroup_uuid):
1761 """Add a given node to the specified group.
1764 if nodegroup_uuid not in self._config_data.nodegroups:
1765 # This can happen if a node group gets deleted between its lookup and
1766 # when we're adding the first node to it, since we don't keep a lock in
1767 # the meantime. It's ok though, as we'll fail cleanly if the node group
1768 # is not found anymore.
1769 raise errors.OpExecError("Unknown node group: %s" % nodegroup_uuid)
1770 if node_name not in self._config_data.nodegroups[nodegroup_uuid].members:
1771 self._config_data.nodegroups[nodegroup_uuid].members.append(node_name)
1773 def _UnlockedRemoveNodeFromGroup(self, node):
1774 """Remove a given node from its group.
1777 nodegroup = node.group
1778 if nodegroup not in self._config_data.nodegroups:
1779 logging.warning("Warning: node '%s' has unknown node group '%s'"
1780 " (while being removed from it)", node.name, nodegroup)
1781 nodegroup_obj = self._config_data.nodegroups[nodegroup]
1782 if node.name not in nodegroup_obj.members:
1783 logging.warning("Warning: node '%s' not a member of its node group '%s'"
1784 " (while being removed from it)", node.name, nodegroup)
1786 nodegroup_obj.members.remove(node.name)
1788 @locking.ssynchronized(_config_lock)
1789 def AssignGroupNodes(self, mods):
1790 """Changes the group of a number of nodes.
1792 @type mods: list of tuples; (node name, new group UUID)
1793 @param mods: Node membership modifications
1796 groups = self._config_data.nodegroups
1797 nodes = self._config_data.nodes
1801 # Try to resolve names/UUIDs first
1802 for (node_name, new_group_uuid) in mods:
1804 node = nodes[node_name]
1806 raise errors.ConfigurationError("Unable to find node '%s'" % node_name)
1808 if node.group == new_group_uuid:
1809 # Node is being assigned to its current group
1810 logging.debug("Node '%s' was assigned to its current group (%s)",
1811 node_name, node.group)
1814 # Try to find current group of node
1816 old_group = groups[node.group]
1818 raise errors.ConfigurationError("Unable to find old group '%s'" %
1821 # Try to find new group for node
1823 new_group = groups[new_group_uuid]
1825 raise errors.ConfigurationError("Unable to find new group '%s'" %
1828 assert node.name in old_group.members, \
1829 ("Inconsistent configuration: node '%s' not listed in members for its"
1830 " old group '%s'" % (node.name, old_group.uuid))
1831 assert node.name not in new_group.members, \
1832 ("Inconsistent configuration: node '%s' already listed in members for"
1833 " its new group '%s'" % (node.name, new_group.uuid))
1835 resmod.append((node, old_group, new_group))
1838 for (node, old_group, new_group) in resmod:
1839 assert node.uuid != new_group.uuid and old_group.uuid != new_group.uuid, \
1840 "Assigning to current group is not possible"
1842 node.group = new_group.uuid
1844 # Update members of involved groups
1845 if node.name in old_group.members:
1846 old_group.members.remove(node.name)
1847 if node.name not in new_group.members:
1848 new_group.members.append(node.name)
1850 # Update timestamps and serials (only once per node/group object)
1852 for obj in frozenset(itertools.chain(*resmod)): # pylint: disable=W0142
1856 # Force ssconf update
1857 self._config_data.cluster.serial_no += 1
1861 def _BumpSerialNo(self):
1862 """Bump up the serial number of the config.
1865 self._config_data.serial_no += 1
1866 self._config_data.mtime = time.time()
1868 def _AllUUIDObjects(self):
1869 """Returns all objects with uuid attributes.
1872 return (self._config_data.instances.values() +
1873 self._config_data.nodes.values() +
1874 self._config_data.nodegroups.values() +
1875 [self._config_data.cluster])
1877 def _OpenConfig(self, accept_foreign):
1878 """Read the config data from disk.
1881 raw_data = utils.ReadFile(self._cfg_file)
1884 data = objects.ConfigData.FromDict(serializer.Load(raw_data))
1885 except Exception, err:
1886 raise errors.ConfigurationError(err)
1888 # Make sure the configuration has the right version
1889 _ValidateConfig(data)
1891 if (not hasattr(data, "cluster") or
1892 not hasattr(data.cluster, "rsahostkeypub")):
1893 raise errors.ConfigurationError("Incomplete configuration"
1894 " (missing cluster.rsahostkeypub)")
1896 if data.cluster.master_node != self._my_hostname and not accept_foreign:
1897 msg = ("The configuration denotes node %s as master, while my"
1898 " hostname is %s; opening a foreign configuration is only"
1899 " possible in accept_foreign mode" %
1900 (data.cluster.master_node, self._my_hostname))
1901 raise errors.ConfigurationError(msg)
1903 # Upgrade configuration if needed
1904 data.UpgradeConfig()
1906 self._config_data = data
1907 # reset the last serial as -1 so that the next write will cause
1909 self._last_cluster_serial = -1
1911 # And finally run our (custom) config upgrade sequence
1912 self._UpgradeConfig()
1914 self._cfg_id = utils.GetFileID(path=self._cfg_file)
1916 def _UpgradeConfig(self):
1917 """Run upgrade steps that cannot be done purely in the objects.
1919 This is because some data elements need uniqueness across the
1920 whole configuration, etc.
1922 @warning: this function will call L{_WriteConfig()}, but also
1923 L{DropECReservations} so it needs to be called only from a
1924 "safe" place (the constructor). If one wanted to call it with
1925 the lock held, a DropECReservationUnlocked would need to be
1926 created first, to avoid causing deadlock.
1930 for item in self._AllUUIDObjects():
1931 if item.uuid is None:
1932 item.uuid = self._GenerateUniqueID(_UPGRADE_CONFIG_JID)
1934 if not self._config_data.nodegroups:
1935 default_nodegroup_name = constants.INITIAL_NODE_GROUP_NAME
1936 default_nodegroup = objects.NodeGroup(name=default_nodegroup_name,
1938 self._UnlockedAddNodeGroup(default_nodegroup, _UPGRADE_CONFIG_JID, True)
1940 for node in self._config_data.nodes.values():
1942 node.group = self.LookupNodeGroup(None)
1944 # This is technically *not* an upgrade, but needs to be done both when
1945 # nodegroups are being added, and upon normally loading the config,
1946 # because the members list of a node group is discarded upon
1947 # serializing/deserializing the object.
1948 self._UnlockedAddNodeToGroup(node.name, node.group)
1951 # This is ok even if it acquires the internal lock, as _UpgradeConfig is
1952 # only called at config init time, without the lock held
1953 self.DropECReservations(_UPGRADE_CONFIG_JID)
1955 def _DistributeConfig(self, feedback_fn):
1956 """Distribute the configuration to the other nodes.
1958 Currently, this only copies the configuration file. In the future,
1959 it could be used to encapsulate the 2/3-phase update mechanism.
1969 myhostname = self._my_hostname
1970 # we can skip checking whether _UnlockedGetNodeInfo returns None
1971 # since the node list comes from _UnlocketGetNodeList, and we are
1972 # called with the lock held, so no modifications should take place
1974 for node_name in self._UnlockedGetNodeList():
1975 if node_name == myhostname:
1977 node_info = self._UnlockedGetNodeInfo(node_name)
1978 if not node_info.master_candidate:
1980 node_list.append(node_info.name)
1981 addr_list.append(node_info.primary_ip)
1983 # TODO: Use dedicated resolver talking to config writer for name resolution
1985 self._GetRpc(addr_list).call_upload_file(node_list, self._cfg_file)
1986 for to_node, to_result in result.items():
1987 msg = to_result.fail_msg
1989 msg = ("Copy of file %s to node %s failed: %s" %
1990 (self._cfg_file, to_node, msg))
2000 def _WriteConfig(self, destination=None, feedback_fn=None):
2001 """Write the configuration data to persistent storage.
2004 assert feedback_fn is None or callable(feedback_fn)
2006 # Warn on config errors, but don't abort the save - the
2007 # configuration has already been modified, and we can't revert;
2008 # the best we can do is to warn the user and save as is, leaving
2009 # recovery to the user
2010 config_errors = self._UnlockedVerifyConfig()
2012 errmsg = ("Configuration data is not consistent: %s" %
2013 (utils.CommaJoin(config_errors)))
2014 logging.critical(errmsg)
2018 if destination is None:
2019 destination = self._cfg_file
2020 self._BumpSerialNo()
2021 txt = serializer.Dump(self._config_data.ToDict())
2023 getents = self._getents()
2025 fd = utils.SafeWriteFile(destination, self._cfg_id, data=txt,
2026 close=False, gid=getents.confd_gid, mode=0640)
2027 except errors.LockError:
2028 raise errors.ConfigurationError("The configuration file has been"
2029 " modified since the last write, cannot"
2032 self._cfg_id = utils.GetFileID(fd=fd)
2036 self.write_count += 1
2038 # and redistribute the config file to master candidates
2039 self._DistributeConfig(feedback_fn)
2041 # Write ssconf files on all nodes (including locally)
2042 if self._last_cluster_serial < self._config_data.cluster.serial_no:
2043 if not self._offline:
2044 result = self._GetRpc(None).call_write_ssconf_files(
2045 self._UnlockedGetOnlineNodeList(),
2046 self._UnlockedGetSsconfValues())
2048 for nname, nresu in result.items():
2049 msg = nresu.fail_msg
2051 errmsg = ("Error while uploading ssconf files to"
2052 " node %s: %s" % (nname, msg))
2053 logging.warning(errmsg)
2058 self._last_cluster_serial = self._config_data.cluster.serial_no
2060 def _UnlockedGetSsconfValues(self):
2061 """Return the values needed by ssconf.
2064 @return: a dictionary with keys the ssconf names and values their
2069 instance_names = utils.NiceSort(self._UnlockedGetInstanceList())
2070 node_names = utils.NiceSort(self._UnlockedGetNodeList())
2071 node_info = [self._UnlockedGetNodeInfo(name) for name in node_names]
2072 node_pri_ips = ["%s %s" % (ninfo.name, ninfo.primary_ip)
2073 for ninfo in node_info]
2074 node_snd_ips = ["%s %s" % (ninfo.name, ninfo.secondary_ip)
2075 for ninfo in node_info]
2077 instance_data = fn(instance_names)
2078 off_data = fn(node.name for node in node_info if node.offline)
2079 on_data = fn(node.name for node in node_info if not node.offline)
2080 mc_data = fn(node.name for node in node_info if node.master_candidate)
2081 mc_ips_data = fn(node.primary_ip for node in node_info
2082 if node.master_candidate)
2083 node_data = fn(node_names)
2084 node_pri_ips_data = fn(node_pri_ips)
2085 node_snd_ips_data = fn(node_snd_ips)
2087 cluster = self._config_data.cluster
2088 cluster_tags = fn(cluster.GetTags())
2090 hypervisor_list = fn(cluster.enabled_hypervisors)
2092 uid_pool = uidpool.FormatUidPool(cluster.uid_pool, separator="\n")
2094 nodegroups = ["%s %s" % (nodegroup.uuid, nodegroup.name) for nodegroup in
2095 self._config_data.nodegroups.values()]
2096 nodegroups_data = fn(utils.NiceSort(nodegroups))
2099 constants.SS_CLUSTER_NAME: cluster.cluster_name,
2100 constants.SS_CLUSTER_TAGS: cluster_tags,
2101 constants.SS_FILE_STORAGE_DIR: cluster.file_storage_dir,
2102 constants.SS_SHARED_FILE_STORAGE_DIR: cluster.shared_file_storage_dir,
2103 constants.SS_MASTER_CANDIDATES: mc_data,
2104 constants.SS_MASTER_CANDIDATES_IPS: mc_ips_data,
2105 constants.SS_MASTER_IP: cluster.master_ip,
2106 constants.SS_MASTER_NETDEV: cluster.master_netdev,
2107 constants.SS_MASTER_NETMASK: str(cluster.master_netmask),
2108 constants.SS_MASTER_NODE: cluster.master_node,
2109 constants.SS_NODE_LIST: node_data,
2110 constants.SS_NODE_PRIMARY_IPS: node_pri_ips_data,
2111 constants.SS_NODE_SECONDARY_IPS: node_snd_ips_data,
2112 constants.SS_OFFLINE_NODES: off_data,
2113 constants.SS_ONLINE_NODES: on_data,
2114 constants.SS_PRIMARY_IP_FAMILY: str(cluster.primary_ip_family),
2115 constants.SS_INSTANCE_LIST: instance_data,
2116 constants.SS_RELEASE_VERSION: constants.RELEASE_VERSION,
2117 constants.SS_HYPERVISOR_LIST: hypervisor_list,
2118 constants.SS_MAINTAIN_NODE_HEALTH: str(cluster.maintain_node_health),
2119 constants.SS_UID_POOL: uid_pool,
2120 constants.SS_NODEGROUPS: nodegroups_data,
2122 bad_values = [(k, v) for k, v in ssconf_values.items()
2123 if not isinstance(v, (str, basestring))]
2125 err = utils.CommaJoin("%s=%s" % (k, v) for k, v in bad_values)
2126 raise errors.ConfigurationError("Some ssconf key(s) have non-string"
2127 " values: %s" % err)
2128 return ssconf_values
2130 @locking.ssynchronized(_config_lock, shared=1)
2131 def GetSsconfValues(self):
2132 """Wrapper using lock around _UnlockedGetSsconf().
2135 return self._UnlockedGetSsconfValues()
2137 @locking.ssynchronized(_config_lock, shared=1)
2138 def GetVGName(self):
2139 """Return the volume group name.
2142 return self._config_data.cluster.volume_group_name
2144 @locking.ssynchronized(_config_lock)
2145 def SetVGName(self, vg_name):
2146 """Set the volume group name.
2149 self._config_data.cluster.volume_group_name = vg_name
2150 self._config_data.cluster.serial_no += 1
2153 @locking.ssynchronized(_config_lock, shared=1)
2154 def GetDRBDHelper(self):
2155 """Return DRBD usermode helper.
2158 return self._config_data.cluster.drbd_usermode_helper
2160 @locking.ssynchronized(_config_lock)
2161 def SetDRBDHelper(self, drbd_helper):
2162 """Set DRBD usermode helper.
2165 self._config_data.cluster.drbd_usermode_helper = drbd_helper
2166 self._config_data.cluster.serial_no += 1
2169 @locking.ssynchronized(_config_lock, shared=1)
2170 def GetMACPrefix(self):
2171 """Return the mac prefix.
2174 return self._config_data.cluster.mac_prefix
2176 @locking.ssynchronized(_config_lock, shared=1)
2177 def GetClusterInfo(self):
2178 """Returns information about the cluster
2180 @rtype: L{objects.Cluster}
2181 @return: the cluster object
2184 return self._config_data.cluster
2186 @locking.ssynchronized(_config_lock, shared=1)
2187 def HasAnyDiskOfType(self, dev_type):
2188 """Check if in there is at disk of the given type in the configuration.
2191 return self._config_data.HasAnyDiskOfType(dev_type)
2193 @locking.ssynchronized(_config_lock)
2194 def Update(self, target, feedback_fn):
2195 """Notify function to be called after updates.
2197 This function must be called when an object (as returned by
2198 GetInstanceInfo, GetNodeInfo, GetCluster) has been updated and the
2199 caller wants the modifications saved to the backing store. Note
2200 that all modified objects will be saved, but the target argument
2201 is the one the caller wants to ensure that it's saved.
2203 @param target: an instance of either L{objects.Cluster},
2204 L{objects.Node} or L{objects.Instance} which is existing in
2206 @param feedback_fn: Callable feedback function
2209 if self._config_data is None:
2210 raise errors.ProgrammerError("Configuration file not read,"
2212 update_serial = False
2213 if isinstance(target, objects.Cluster):
2214 test = target == self._config_data.cluster
2215 elif isinstance(target, objects.Node):
2216 test = target in self._config_data.nodes.values()
2217 update_serial = True
2218 elif isinstance(target, objects.Instance):
2219 test = target in self._config_data.instances.values()
2220 elif isinstance(target, objects.NodeGroup):
2221 test = target in self._config_data.nodegroups.values()
2223 raise errors.ProgrammerError("Invalid object type (%s) passed to"
2224 " ConfigWriter.Update" % type(target))
2226 raise errors.ConfigurationError("Configuration updated since object"
2227 " has been read or unknown object")
2228 target.serial_no += 1
2229 target.mtime = now = time.time()
2232 # for node updates, we need to increase the cluster serial too
2233 self._config_data.cluster.serial_no += 1
2234 self._config_data.cluster.mtime = now
2236 if isinstance(target, objects.Instance):
2237 self._UnlockedReleaseDRBDMinors(target.name)
2239 self._WriteConfig(feedback_fn=feedback_fn)
2241 @locking.ssynchronized(_config_lock)
2242 def DropECReservations(self, ec_id):
2243 """Drop per-execution-context reservations
2246 for rm in self._all_rms:
2247 rm.DropECReservations(ec_id)