4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Configuration management for Ganeti
24 This module provides the interface to the Ganeti cluster configuration.
26 The configuration data is stored on every node but is updated on the master
27 only. After each update, the master distributes the data to the other nodes.
29 Currently, the data storage format is JSON. YAML was slow and consuming too
38 from ganeti import errors
39 from ganeti import logger
40 from ganeti import utils
41 from ganeti import constants
42 from ganeti import rpc
43 from ganeti import objects
47 """The interface to the cluster configuration.
50 def __init__(self, cfg_file=None, offline=False):
52 self._config_data = None
53 self._config_time = None
54 self._config_size = None
55 self._config_inode = None
56 self._offline = offline
58 self._cfg_file = constants.CLUSTER_CONF_FILE
60 self._cfg_file = cfg_file
61 self._temporary_ids = set()
62 # Note: in order to prevent errors when resolving our name in
63 # _DistributeConfig, we compute it here once and reuse it; it's
64 # better to raise an error before starting to modify the config
65 # file than after it was modified
66 self._my_hostname = utils.HostInfo().name
68 # this method needs to be static, so that we can call it on the class
71 """Check if the cluster is configured.
74 return os.path.exists(constants.CLUSTER_CONF_FILE)
76 def GenerateMAC(self):
77 """Generate a MAC for an instance.
79 This should check the current instances for duplicates.
84 prefix = self._config_data.cluster.mac_prefix
85 all_macs = self._AllMACs()
88 byte1 = random.randrange(0, 256)
89 byte2 = random.randrange(0, 256)
90 byte3 = random.randrange(0, 256)
91 mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
92 if mac not in all_macs:
96 raise errors.ConfigurationError("Can't generate unique MAC")
99 def IsMacInUse(self, mac):
100 """Predicate: check if the specified MAC is in use in the Ganeti cluster.
102 This only checks instances managed by this cluster, it does not
103 check for potential collisions elsewhere.
108 all_macs = self._AllMACs()
109 return mac in all_macs
111 def _ComputeAllLVs(self):
112 """Compute the list of all LVs.
118 for instance in self._config_data.instances.values():
119 node_data = instance.MapLVsByNode()
120 for lv_list in node_data.values():
121 lvnames.update(lv_list)
124 def GenerateUniqueID(self, exceptions=None):
125 """Generate an unique disk name.
127 This checks the current node, instances and disk names for
131 - exceptions: a list with some other names which should be checked
132 for uniqueness (used for example when you want to get
133 more than one id at one time without adding each one in
134 turn to the config file
136 Returns: the unique id as a string
140 existing.update(self._temporary_ids)
141 existing.update(self._ComputeAllLVs())
142 existing.update(self._config_data.instances.keys())
143 existing.update(self._config_data.nodes.keys())
144 if exceptions is not None:
145 existing.update(exceptions)
148 unique_id = utils.NewUUID()
149 if unique_id not in existing and unique_id is not None:
152 raise errors.ConfigurationError("Not able generate an unique ID"
153 " (last tried ID: %s" % unique_id)
154 self._temporary_ids.add(unique_id)
158 """Return all MACs present in the config.
165 for instance in self._config_data.instances.values():
166 for nic in instance.nics:
167 result.append(nic.mac)
171 def VerifyConfig(self):
172 """Stub verify function.
179 data = self._config_data
180 for instance_name in data.instances:
181 instance = data.instances[instance_name]
182 if instance.primary_node not in data.nodes:
183 result.append("instance '%s' has invalid primary node '%s'" %
184 (instance_name, instance.primary_node))
185 for snode in instance.secondary_nodes:
186 if snode not in data.nodes:
187 result.append("instance '%s' has invalid secondary node '%s'" %
188 (instance_name, snode))
189 for idx, nic in enumerate(instance.nics):
190 if nic.mac in seen_macs:
191 result.append("instance '%s' has NIC %d mac %s duplicate" %
192 (instance_name, idx, nic.mac))
194 seen_macs.append(nic.mac)
197 def SetDiskID(self, disk, node_name):
198 """Convert the unique ID to the ID needed on the target nodes.
200 This is used only for drbd, which needs ip/port configuration.
202 The routine descends down and updates its children also, because
203 this helps when the only the top device is passed to the remote
208 for child in disk.children:
209 self.SetDiskID(child, node_name)
211 if disk.logical_id is None and disk.physical_id is not None:
213 if disk.dev_type in constants.LDS_DRBD:
214 pnode, snode, port = disk.logical_id
215 if node_name not in (pnode, snode):
216 raise errors.ConfigurationError("DRBD device not knowing node %s" %
218 pnode_info = self.GetNodeInfo(pnode)
219 snode_info = self.GetNodeInfo(snode)
220 if pnode_info is None or snode_info is None:
221 raise errors.ConfigurationError("Can't find primary or secondary node"
222 " for %s" % str(disk))
223 if pnode == node_name:
224 disk.physical_id = (pnode_info.secondary_ip, port,
225 snode_info.secondary_ip, port)
226 else: # it must be secondary, we tested above
227 disk.physical_id = (snode_info.secondary_ip, port,
228 pnode_info.secondary_ip, port)
230 disk.physical_id = disk.logical_id
233 def AddTcpUdpPort(self, port):
234 """Adds a new port to the available port pool.
237 if not isinstance(port, int):
238 raise errors.ProgrammerError("Invalid type passed for port")
241 self._config_data.cluster.tcpudp_port_pool.add(port)
244 def GetPortList(self):
245 """Returns a copy of the current port list.
250 return self._config_data.cluster.tcpudp_port_pool.copy()
252 def AllocatePort(self):
255 The port will be taken from the available port pool or from the
256 default port range (and in this case we increase
262 # If there are TCP/IP ports configured, we use them first.
263 if self._config_data.cluster.tcpudp_port_pool:
264 port = self._config_data.cluster.tcpudp_port_pool.pop()
266 port = self._config_data.cluster.highest_used_port + 1
267 if port >= constants.LAST_DRBD_PORT:
268 raise errors.ConfigurationError("The highest used port is greater"
269 " than %s. Aborting." %
270 constants.LAST_DRBD_PORT)
271 self._config_data.cluster.highest_used_port = port
276 def GetHostKey(self):
277 """Return the rsa hostkey from the config.
285 return self._config_data.cluster.rsahostkeypub
287 def AddInstance(self, instance):
288 """Add an instance to the config.
290 This should be used after creating a new instance.
293 instance: the instance object
295 if not isinstance(instance, objects.Instance):
296 raise errors.ProgrammerError("Invalid type passed to AddInstance")
298 if instance.disk_template != constants.DT_DISKLESS:
299 all_lvs = instance.MapLVsByNode()
300 logger.Info("Instance '%s' DISK_LAYOUT: %s" % (instance.name, all_lvs))
303 self._config_data.instances[instance.name] = instance
306 def MarkInstanceUp(self, instance_name):
307 """Mark the instance status to up in the config.
312 if instance_name not in self._config_data.instances:
313 raise errors.ConfigurationError("Unknown instance '%s'" %
315 instance = self._config_data.instances[instance_name]
316 instance.status = "up"
319 def RemoveInstance(self, instance_name):
320 """Remove the instance from the configuration.
325 if instance_name not in self._config_data.instances:
326 raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
327 del self._config_data.instances[instance_name]
330 def RenameInstance(self, old_name, new_name):
331 """Rename an instance.
333 This needs to be done in ConfigWriter and not by RemoveInstance
334 combined with AddInstance as only we can guarantee an atomic
339 if old_name not in self._config_data.instances:
340 raise errors.ConfigurationError("Unknown instance '%s'" % old_name)
341 inst = self._config_data.instances[old_name]
342 del self._config_data.instances[old_name]
344 self._config_data.instances[inst.name] = inst
347 def MarkInstanceDown(self, instance_name):
348 """Mark the status of an instance to down in the configuration.
353 if instance_name not in self._config_data.instances:
354 raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
355 instance = self._config_data.instances[instance_name]
356 instance.status = "down"
359 def GetInstanceList(self):
360 """Get the list of instances.
363 array of instances, ex. ['instance2.example.com','instance1.example.com']
364 these contains all the instances, also the ones in Admin_down state
370 return self._config_data.instances.keys()
372 def ExpandInstanceName(self, short_name):
373 """Attempt to expand an incomplete instance name.
379 return utils.MatchNameComponent(short_name,
380 self._config_data.instances.keys())
382 def GetInstanceInfo(self, instance_name):
383 """Returns informations about an instance.
385 It takes the information from the configuration file. Other informations of
386 an instance are taken from the live systems.
389 instance: name of the instance, ex instance1.example.com
398 if instance_name not in self._config_data.instances:
401 return self._config_data.instances[instance_name]
403 def AddNode(self, node):
404 """Add a node to the configuration.
407 node: an object.Node instance
411 self._config_data.nodes[node.name] = node
414 def RemoveNode(self, node_name):
415 """Remove a node from the configuration.
419 if node_name not in self._config_data.nodes:
420 raise errors.ConfigurationError("Unknown node '%s'" % node_name)
422 del self._config_data.nodes[node_name]
425 def ExpandNodeName(self, short_name):
426 """Attempt to expand an incomplete instance name.
432 return utils.MatchNameComponent(short_name,
433 self._config_data.nodes.keys())
435 def GetNodeInfo(self, node_name):
436 """Get the configuration of a node, as stored in the config.
438 Args: node: nodename (tuple) of the node
440 Returns: the node object
446 if node_name not in self._config_data.nodes:
449 return self._config_data.nodes[node_name]
451 def GetNodeList(self):
452 """Return the list of nodes which are in the configuration.
457 return self._config_data.nodes.keys()
459 def DumpConfig(self):
460 """Return the entire configuration of the cluster.
464 return self._config_data
466 def _BumpSerialNo(self):
467 """Bump up the serial number of the config.
470 self._config_data.cluster.serial_no += 1
472 def _OpenConfig(self):
473 """Read the config data from disk.
475 In case we already have configuration data and the config file has
476 the same mtime as when we read it, we skip the parsing of the
477 file, since de-serialisation could be slow.
481 st = os.stat(self._cfg_file)
483 raise errors.ConfigurationError("Can't stat config file: %s" % err)
484 if (self._config_data is not None and
485 self._config_time is not None and
486 self._config_time == st.st_mtime and
487 self._config_size == st.st_size and
488 self._config_inode == st.st_ino):
489 # data is current, so skip loading of config file
491 f = open(self._cfg_file, 'r')
494 data = objects.ConfigData.Load(f)
495 except Exception, err:
496 raise errors.ConfigurationError(err)
499 if (not hasattr(data, 'cluster') or
500 not hasattr(data.cluster, 'config_version')):
501 raise errors.ConfigurationError("Incomplete configuration"
502 " (missing cluster.config_version)")
503 if data.cluster.config_version != constants.CONFIG_VERSION:
504 raise errors.ConfigurationError("Cluster configuration version"
505 " mismatch, got %s instead of %s" %
506 (data.cluster.config_version,
507 constants.CONFIG_VERSION))
508 self._config_data = data
509 self._config_time = st.st_mtime
510 self._config_size = st.st_size
511 self._config_inode = st.st_ino
513 def _ReleaseLock(self):
517 def _DistributeConfig(self):
518 """Distribute the configuration to the other nodes.
520 Currently, this only copies the configuration file. In the future,
521 it could be used to encapsulate the 2/3-phase update mechanism.
527 nodelist = self.GetNodeList()
528 myhostname = self._my_hostname
531 for node in nodelist:
532 nodeinfo = self.GetNodeInfo(node)
533 if nodeinfo.name == myhostname:
535 tgt_list.append(node)
537 result = rpc.call_upload_file(tgt_list, self._cfg_file)
538 for node in tgt_list:
540 logger.Error("copy of file %s to node %s failed" %
541 (self._cfg_file, node))
545 def _WriteConfig(self, destination=None):
546 """Write the configuration data to persistent storage.
549 if destination is None:
550 destination = self._cfg_file
552 dir_name, file_name = os.path.split(destination)
553 fd, name = tempfile.mkstemp('.newconfig', file_name, dir_name)
554 f = os.fdopen(fd, 'w')
556 self._config_data.Dump(f)
560 # we don't need to do os.close(fd) as f.close() did it
561 os.rename(name, destination)
562 self.write_count += 1
563 # re-set our cache as not to re-read the config file
565 st = os.stat(destination)
567 raise errors.ConfigurationError("Can't stat config file: %s" % err)
568 self._config_time = st.st_mtime
569 self._config_size = st.st_size
570 self._config_inode = st.st_ino
571 # and redistribute the config file
572 self._DistributeConfig()
574 def InitConfig(self, node, primary_ip, secondary_ip,
575 hostkeypub, mac_prefix, vg_name, def_bridge):
576 """Create the initial cluster configuration.
578 It will contain the current node, which will also be the master
579 node, and no instances or operating systmes.
582 node: the nodename of the initial node
583 primary_ip: the IP address of the current host
584 secondary_ip: the secondary IP of the current host or None
585 hostkeypub: the public hostkey of this host
588 hu_port = constants.FIRST_DRBD_PORT - 1
589 globalconfig = objects.Cluster(config_version=constants.CONFIG_VERSION,
591 rsahostkeypub=hostkeypub,
592 highest_used_port=hu_port,
593 mac_prefix=mac_prefix,
594 volume_group_name=vg_name,
595 default_bridge=def_bridge,
596 tcpudp_port_pool=set())
597 if secondary_ip is None:
598 secondary_ip = primary_ip
599 nodeconfig = objects.Node(name=node, primary_ip=primary_ip,
600 secondary_ip=secondary_ip)
602 self._config_data = objects.ConfigData(nodes={node: nodeconfig},
604 cluster=globalconfig)
608 """Return the volume group name.
613 return self._config_data.cluster.volume_group_name
615 def GetDefBridge(self):
616 """Return the default bridge.
621 return self._config_data.cluster.default_bridge
623 def GetMACPrefix(self):
624 """Return the mac prefix.
629 return self._config_data.cluster.mac_prefix
631 def GetClusterInfo(self):
632 """Returns informations about the cluster
641 return self._config_data.cluster
643 def Update(self, target):
644 """Notify function to be called after updates.
646 This function must be called when an object (as returned by
647 GetInstanceInfo, GetNodeInfo, GetCluster) has been updated and the
648 caller wants the modifications saved to the backing store. Note
649 that all modified objects will be saved, but the target argument
650 is the one the caller wants to ensure that it's saved.
653 if self._config_data is None:
654 raise errors.ProgrammerError("Configuration file not read,"
656 if isinstance(target, objects.Cluster):
657 test = target == self._config_data.cluster
658 elif isinstance(target, objects.Node):
659 test = target in self._config_data.nodes.values()
660 elif isinstance(target, objects.Instance):
661 test = target in self._config_data.instances.values()
663 raise errors.ProgrammerError("Invalid object type (%s) passed to"
664 " ConfigWriter.Update" % type(target))
666 raise errors.ConfigurationError("Configuration updated since object"
667 " has been read or unknown object")