4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Configuration management for Ganeti
24 This module provides the interface to the Ganeti cluster configuration.
26 The configuration data is stored on every node but is updated on the master
27 only. After each update, the master distributes the data to the other nodes.
29 Currently, the data storage format is JSON. YAML was slow and consuming too
39 from ganeti import errors
40 from ganeti import locking
41 from ganeti import logger
42 from ganeti import utils
43 from ganeti import constants
44 from ganeti import rpc
45 from ganeti import objects
46 from ganeti import serializer
47 from ganeti import ssconf
50 _config_lock = locking.SharedLock()
54 sstore = ssconf.SimpleStore()
56 if sstore.GetConfigVersion() != constants.CONFIG_VERSION:
57 raise errors.ConfigurationError("Cluster configuration version"
58 " mismatch, got %s instead of %s" %
59 (sstore.GetConfigVersion(),
60 constants.CONFIG_VERSION))
64 """The interface to the cluster configuration.
67 def __init__(self, cfg_file=None, offline=False):
69 self._lock = _config_lock
70 self._config_data = None
71 self._config_time = None
72 self._config_size = None
73 self._config_inode = None
74 self._offline = offline
76 self._cfg_file = constants.CLUSTER_CONF_FILE
78 self._cfg_file = cfg_file
79 self._temporary_ids = set()
80 # Note: in order to prevent errors when resolving our name in
81 # _DistributeConfig, we compute it here once and reuse it; it's
82 # better to raise an error before starting to modify the config
83 # file than after it was modified
84 self._my_hostname = utils.HostInfo().name
86 # this method needs to be static, so that we can call it on the class
89 """Check if the cluster is configured.
92 return os.path.exists(constants.CLUSTER_CONF_FILE)
94 @locking.ssynchronized(_config_lock, shared=1)
95 def GenerateMAC(self):
96 """Generate a MAC for an instance.
98 This should check the current instances for duplicates.
102 prefix = self._config_data.cluster.mac_prefix
103 all_macs = self._AllMACs()
106 byte1 = random.randrange(0, 256)
107 byte2 = random.randrange(0, 256)
108 byte3 = random.randrange(0, 256)
109 mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
110 if mac not in all_macs:
114 raise errors.ConfigurationError("Can't generate unique MAC")
117 @locking.ssynchronized(_config_lock, shared=1)
118 def IsMacInUse(self, mac):
119 """Predicate: check if the specified MAC is in use in the Ganeti cluster.
121 This only checks instances managed by this cluster, it does not
122 check for potential collisions elsewhere.
126 all_macs = self._AllMACs()
127 return mac in all_macs
129 def _ComputeAllLVs(self):
130 """Compute the list of all LVs.
135 for instance in self._config_data.instances.values():
136 node_data = instance.MapLVsByNode()
137 for lv_list in node_data.values():
138 lvnames.update(lv_list)
141 @locking.ssynchronized(_config_lock, shared=1)
142 def GenerateUniqueID(self, exceptions=None):
143 """Generate an unique disk name.
145 This checks the current node, instances and disk names for
149 - exceptions: a list with some other names which should be checked
150 for uniqueness (used for example when you want to get
151 more than one id at one time without adding each one in
152 turn to the config file
154 Returns: the unique id as a string
158 existing.update(self._temporary_ids)
159 existing.update(self._ComputeAllLVs())
160 existing.update(self._config_data.instances.keys())
161 existing.update(self._config_data.nodes.keys())
162 if exceptions is not None:
163 existing.update(exceptions)
166 unique_id = utils.NewUUID()
167 if unique_id not in existing and unique_id is not None:
170 raise errors.ConfigurationError("Not able generate an unique ID"
171 " (last tried ID: %s" % unique_id)
172 self._temporary_ids.add(unique_id)
176 """Return all MACs present in the config.
182 for instance in self._config_data.instances.values():
183 for nic in instance.nics:
184 result.append(nic.mac)
188 @locking.ssynchronized(_config_lock, shared=1)
189 def VerifyConfig(self):
190 """Stub verify function.
196 data = self._config_data
197 for instance_name in data.instances:
198 instance = data.instances[instance_name]
199 if instance.primary_node not in data.nodes:
200 result.append("instance '%s' has invalid primary node '%s'" %
201 (instance_name, instance.primary_node))
202 for snode in instance.secondary_nodes:
203 if snode not in data.nodes:
204 result.append("instance '%s' has invalid secondary node '%s'" %
205 (instance_name, snode))
206 for idx, nic in enumerate(instance.nics):
207 if nic.mac in seen_macs:
208 result.append("instance '%s' has NIC %d mac %s duplicate" %
209 (instance_name, idx, nic.mac))
211 seen_macs.append(nic.mac)
214 def _UnlockedSetDiskID(self, disk, node_name):
215 """Convert the unique ID to the ID needed on the target nodes.
217 This is used only for drbd, which needs ip/port configuration.
219 The routine descends down and updates its children also, because
220 this helps when the only the top device is passed to the remote
223 This function is for internal use, when the config lock is already held.
227 for child in disk.children:
228 self._UnlockedSetDiskID(child, node_name)
230 if disk.logical_id is None and disk.physical_id is not None:
232 if disk.dev_type in constants.LDS_DRBD:
233 pnode, snode, port = disk.logical_id
234 if node_name not in (pnode, snode):
235 raise errors.ConfigurationError("DRBD device not knowing node %s" %
237 pnode_info = self._UnlockedGetNodeInfo(pnode)
238 snode_info = self._UnlockedGetNodeInfo(snode)
239 if pnode_info is None or snode_info is None:
240 raise errors.ConfigurationError("Can't find primary or secondary node"
241 " for %s" % str(disk))
242 if pnode == node_name:
243 disk.physical_id = (pnode_info.secondary_ip, port,
244 snode_info.secondary_ip, port)
245 else: # it must be secondary, we tested above
246 disk.physical_id = (snode_info.secondary_ip, port,
247 pnode_info.secondary_ip, port)
249 disk.physical_id = disk.logical_id
252 @locking.ssynchronized(_config_lock)
253 def SetDiskID(self, disk, node_name):
254 """Convert the unique ID to the ID needed on the target nodes.
256 This is used only for drbd, which needs ip/port configuration.
258 The routine descends down and updates its children also, because
259 this helps when the only the top device is passed to the remote
263 return self._UnlockedSetDiskID(disk, node_name)
265 @locking.ssynchronized(_config_lock)
266 def AddTcpUdpPort(self, port):
267 """Adds a new port to the available port pool.
270 if not isinstance(port, int):
271 raise errors.ProgrammerError("Invalid type passed for port")
274 self._config_data.cluster.tcpudp_port_pool.add(port)
277 @locking.ssynchronized(_config_lock, shared=1)
278 def GetPortList(self):
279 """Returns a copy of the current port list.
283 return self._config_data.cluster.tcpudp_port_pool.copy()
285 @locking.ssynchronized(_config_lock)
286 def AllocatePort(self):
289 The port will be taken from the available port pool or from the
290 default port range (and in this case we increase
296 # If there are TCP/IP ports configured, we use them first.
297 if self._config_data.cluster.tcpudp_port_pool:
298 port = self._config_data.cluster.tcpudp_port_pool.pop()
300 port = self._config_data.cluster.highest_used_port + 1
301 if port >= constants.LAST_DRBD_PORT:
302 raise errors.ConfigurationError("The highest used port is greater"
303 " than %s. Aborting." %
304 constants.LAST_DRBD_PORT)
305 self._config_data.cluster.highest_used_port = port
310 @locking.ssynchronized(_config_lock, shared=1)
311 def GetHostKey(self):
312 """Return the rsa hostkey from the config.
319 return self._config_data.cluster.rsahostkeypub
321 @locking.ssynchronized(_config_lock)
322 def AddInstance(self, instance):
323 """Add an instance to the config.
325 This should be used after creating a new instance.
328 instance: the instance object
330 if not isinstance(instance, objects.Instance):
331 raise errors.ProgrammerError("Invalid type passed to AddInstance")
333 if instance.disk_template != constants.DT_DISKLESS:
334 all_lvs = instance.MapLVsByNode()
335 logger.Info("Instance '%s' DISK_LAYOUT: %s" % (instance.name, all_lvs))
338 self._config_data.instances[instance.name] = instance
341 def _SetInstanceStatus(self, instance_name, status):
342 """Set the instance's status to a given value.
345 if status not in ("up", "down"):
346 raise errors.ProgrammerError("Invalid status '%s' passed to"
347 " ConfigWriter._SetInstanceStatus()" %
351 if instance_name not in self._config_data.instances:
352 raise errors.ConfigurationError("Unknown instance '%s'" %
354 instance = self._config_data.instances[instance_name]
355 if instance.status != status:
356 instance.status = status
359 @locking.ssynchronized(_config_lock)
360 def MarkInstanceUp(self, instance_name):
361 """Mark the instance status to up in the config.
364 self._SetInstanceStatus(instance_name, "up")
366 @locking.ssynchronized(_config_lock)
367 def RemoveInstance(self, instance_name):
368 """Remove the instance from the configuration.
373 if instance_name not in self._config_data.instances:
374 raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
375 del self._config_data.instances[instance_name]
378 @locking.ssynchronized(_config_lock)
379 def RenameInstance(self, old_name, new_name):
380 """Rename an instance.
382 This needs to be done in ConfigWriter and not by RemoveInstance
383 combined with AddInstance as only we can guarantee an atomic
388 if old_name not in self._config_data.instances:
389 raise errors.ConfigurationError("Unknown instance '%s'" % old_name)
390 inst = self._config_data.instances[old_name]
391 del self._config_data.instances[old_name]
394 for disk in inst.disks:
395 if disk.dev_type == constants.LD_FILE:
396 # rename the file paths in logical and physical id
397 file_storage_dir = os.path.dirname(os.path.dirname(disk.logical_id[1]))
398 disk.physical_id = disk.logical_id = (disk.logical_id[0],
399 os.path.join(file_storage_dir,
403 self._config_data.instances[inst.name] = inst
406 @locking.ssynchronized(_config_lock)
407 def MarkInstanceDown(self, instance_name):
408 """Mark the status of an instance to down in the configuration.
411 self._SetInstanceStatus(instance_name, "down")
413 @locking.ssynchronized(_config_lock, shared=1)
414 def GetInstanceList(self):
415 """Get the list of instances.
418 array of instances, ex. ['instance2.example.com','instance1.example.com']
419 these contains all the instances, also the ones in Admin_down state
424 return self._config_data.instances.keys()
426 @locking.ssynchronized(_config_lock, shared=1)
427 def ExpandInstanceName(self, short_name):
428 """Attempt to expand an incomplete instance name.
433 return utils.MatchNameComponent(short_name,
434 self._config_data.instances.keys())
436 @locking.ssynchronized(_config_lock, shared=1)
437 def GetInstanceInfo(self, instance_name):
438 """Returns informations about an instance.
440 It takes the information from the configuration file. Other informations of
441 an instance are taken from the live systems.
444 instance: name of the instance, ex instance1.example.com
452 if instance_name not in self._config_data.instances:
455 return self._config_data.instances[instance_name]
457 @locking.ssynchronized(_config_lock)
458 def AddNode(self, node):
459 """Add a node to the configuration.
462 node: an object.Node instance
465 logging.info("Adding node %s to configuration" % node.name)
468 self._config_data.nodes[node.name] = node
471 @locking.ssynchronized(_config_lock)
472 def RemoveNode(self, node_name):
473 """Remove a node from the configuration.
476 logging.info("Removing node %s from configuration" % node_name)
479 if node_name not in self._config_data.nodes:
480 raise errors.ConfigurationError("Unknown node '%s'" % node_name)
482 del self._config_data.nodes[node_name]
485 @locking.ssynchronized(_config_lock, shared=1)
486 def ExpandNodeName(self, short_name):
487 """Attempt to expand an incomplete instance name.
492 return utils.MatchNameComponent(short_name,
493 self._config_data.nodes.keys())
495 def _UnlockedGetNodeInfo(self, node_name):
496 """Get the configuration of a node, as stored in the config.
498 This function is for internal use, when the config lock is already held.
500 Args: node: nodename (tuple) of the node
502 Returns: the node object
507 if node_name not in self._config_data.nodes:
510 return self._config_data.nodes[node_name]
513 @locking.ssynchronized(_config_lock, shared=1)
514 def GetNodeInfo(self, node_name):
515 """Get the configuration of a node, as stored in the config.
517 Args: node: nodename (tuple) of the node
519 Returns: the node object
522 return self._UnlockedGetNodeInfo(node_name)
524 def _UnlockedGetNodeList(self):
525 """Return the list of nodes which are in the configuration.
527 This function is for internal use, when the config lock is already held.
531 return self._config_data.nodes.keys()
534 @locking.ssynchronized(_config_lock, shared=1)
535 def GetNodeList(self):
536 """Return the list of nodes which are in the configuration.
539 return self._UnlockedGetNodeList()
541 @locking.ssynchronized(_config_lock, shared=1)
542 def DumpConfig(self):
543 """Return the entire configuration of the cluster.
546 return self._config_data
548 def _BumpSerialNo(self):
549 """Bump up the serial number of the config.
552 self._config_data.cluster.serial_no += 1
554 def _OpenConfig(self):
555 """Read the config data from disk.
557 In case we already have configuration data and the config file has
558 the same mtime as when we read it, we skip the parsing of the
559 file, since de-serialisation could be slow.
563 st = os.stat(self._cfg_file)
565 raise errors.ConfigurationError("Can't stat config file: %s" % err)
566 if (self._config_data is not None and
567 self._config_time is not None and
568 self._config_time == st.st_mtime and
569 self._config_size == st.st_size and
570 self._config_inode == st.st_ino):
571 # data is current, so skip loading of config file
574 # Make sure the configuration has the right version
577 f = open(self._cfg_file, 'r')
580 data = objects.ConfigData.FromDict(serializer.Load(f.read()))
581 except Exception, err:
582 raise errors.ConfigurationError(err)
585 if (not hasattr(data, 'cluster') or
586 not hasattr(data.cluster, 'rsahostkeypub')):
587 raise errors.ConfigurationError("Incomplete configuration"
588 " (missing cluster.rsahostkeypub)")
589 self._config_data = data
590 self._config_time = st.st_mtime
591 self._config_size = st.st_size
592 self._config_inode = st.st_ino
594 def _DistributeConfig(self):
595 """Distribute the configuration to the other nodes.
597 Currently, this only copies the configuration file. In the future,
598 it could be used to encapsulate the 2/3-phase update mechanism.
604 nodelist = self._UnlockedGetNodeList()
605 myhostname = self._my_hostname
608 nodelist.remove(myhostname)
612 result = rpc.call_upload_file(nodelist, self._cfg_file)
613 for node in nodelist:
615 logger.Error("copy of file %s to node %s failed" %
616 (self._cfg_file, node))
620 def _WriteConfig(self, destination=None):
621 """Write the configuration data to persistent storage.
624 if destination is None:
625 destination = self._cfg_file
627 txt = serializer.Dump(self._config_data.ToDict())
628 dir_name, file_name = os.path.split(destination)
629 fd, name = tempfile.mkstemp('.newconfig', file_name, dir_name)
630 f = os.fdopen(fd, 'w')
636 # we don't need to do os.close(fd) as f.close() did it
637 os.rename(name, destination)
638 self.write_count += 1
639 # re-set our cache as not to re-read the config file
641 st = os.stat(destination)
643 raise errors.ConfigurationError("Can't stat config file: %s" % err)
644 self._config_time = st.st_mtime
645 self._config_size = st.st_size
646 self._config_inode = st.st_ino
647 # and redistribute the config file
648 self._DistributeConfig()
650 @locking.ssynchronized(_config_lock)
651 def InitConfig(self, node, primary_ip, secondary_ip,
652 hostkeypub, mac_prefix, vg_name, def_bridge):
653 """Create the initial cluster configuration.
655 It will contain the current node, which will also be the master
656 node, and no instances or operating systmes.
659 node: the nodename of the initial node
660 primary_ip: the IP address of the current host
661 secondary_ip: the secondary IP of the current host or None
662 hostkeypub: the public hostkey of this host
665 hu_port = constants.FIRST_DRBD_PORT - 1
666 globalconfig = objects.Cluster(serial_no=1,
667 rsahostkeypub=hostkeypub,
668 highest_used_port=hu_port,
669 mac_prefix=mac_prefix,
670 volume_group_name=vg_name,
671 default_bridge=def_bridge,
672 tcpudp_port_pool=set())
673 if secondary_ip is None:
674 secondary_ip = primary_ip
675 nodeconfig = objects.Node(name=node, primary_ip=primary_ip,
676 secondary_ip=secondary_ip)
678 self._config_data = objects.ConfigData(nodes={node: nodeconfig},
680 cluster=globalconfig)
683 @locking.ssynchronized(_config_lock, shared=1)
685 """Return the volume group name.
689 return self._config_data.cluster.volume_group_name
691 @locking.ssynchronized(_config_lock)
692 def SetVGName(self, vg_name):
693 """Set the volume group name.
697 self._config_data.cluster.volume_group_name = vg_name
700 @locking.ssynchronized(_config_lock, shared=1)
701 def GetDefBridge(self):
702 """Return the default bridge.
706 return self._config_data.cluster.default_bridge
708 @locking.ssynchronized(_config_lock, shared=1)
709 def GetMACPrefix(self):
710 """Return the mac prefix.
714 return self._config_data.cluster.mac_prefix
716 @locking.ssynchronized(_config_lock, shared=1)
717 def GetClusterInfo(self):
718 """Returns informations about the cluster
726 return self._config_data.cluster
728 @locking.ssynchronized(_config_lock)
729 def Update(self, target):
730 """Notify function to be called after updates.
732 This function must be called when an object (as returned by
733 GetInstanceInfo, GetNodeInfo, GetCluster) has been updated and the
734 caller wants the modifications saved to the backing store. Note
735 that all modified objects will be saved, but the target argument
736 is the one the caller wants to ensure that it's saved.
739 if self._config_data is None:
740 raise errors.ProgrammerError("Configuration file not read,"
742 if isinstance(target, objects.Cluster):
743 test = target == self._config_data.cluster
744 elif isinstance(target, objects.Node):
745 test = target in self._config_data.nodes.values()
746 elif isinstance(target, objects.Instance):
747 test = target in self._config_data.instances.values()
749 raise errors.ProgrammerError("Invalid object type (%s) passed to"
750 " ConfigWriter.Update" % type(target))
752 raise errors.ConfigurationError("Configuration updated since object"
753 " has been read or unknown object")