4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Configuration management for Ganeti
24 This module provides the interface to the Ganeti cluster configuration.
26 The configuration data is stored on every node but is updated on the master
27 only. After each update, the master distributes the data to the other nodes.
29 Currently, the data storage format is JSON. YAML was slow and consuming too
38 from ganeti import errors
39 from ganeti import logger
40 from ganeti import utils
41 from ganeti import constants
42 from ganeti import rpc
43 from ganeti import objects
47 """The interface to the cluster configuration.
50 def __init__(self, cfg_file=None, offline=False):
52 self._config_data = None
53 self._config_time = None
54 self._config_size = None
55 self._config_inode = None
56 self._offline = offline
58 self._cfg_file = constants.CLUSTER_CONF_FILE
60 self._cfg_file = cfg_file
61 self._temporary_ids = set()
62 # Note: in order to prevent errors when resolving our name in
63 # _DistributeConfig, we compute it here once and reuse it; it's
64 # better to raise an error before starting to modify the config
65 # file than after it was modified
66 self._my_hostname = utils.HostInfo().name
68 # this method needs to be static, so that we can call it on the class
71 """Check if the cluster is configured.
74 return os.path.exists(constants.CLUSTER_CONF_FILE)
76 def GenerateMAC(self):
77 """Generate a MAC for an instance.
79 This should check the current instances for duplicates.
84 prefix = self._config_data.cluster.mac_prefix
85 all_macs = self._AllMACs()
88 byte1 = random.randrange(0, 256)
89 byte2 = random.randrange(0, 256)
90 byte3 = random.randrange(0, 256)
91 mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
92 if mac not in all_macs:
96 raise errors.ConfigurationError("Can't generate unique MAC")
99 def _ComputeAllLVs(self):
100 """Compute the list of all LVs.
106 for instance in self._config_data.instances.values():
107 node_data = instance.MapLVsByNode()
108 for lv_list in node_data.values():
109 lvnames.update(lv_list)
112 def GenerateUniqueID(self, exceptions=None):
113 """Generate an unique disk name.
115 This checks the current node, instances and disk names for
119 - exceptions: a list with some other names which should be checked
120 for uniqueness (used for example when you want to get
121 more than one id at one time without adding each one in
122 turn to the config file
124 Returns: the unique id as a string
128 existing.update(self._temporary_ids)
129 existing.update(self._ComputeAllLVs())
130 existing.update(self._config_data.instances.keys())
131 existing.update(self._config_data.nodes.keys())
132 if exceptions is not None:
133 existing.update(exceptions)
136 unique_id = utils.GetUUID()
137 if unique_id not in existing and unique_id is not None:
140 raise errors.ConfigurationError("Not able generate an unique ID"
141 " (last tried ID: %s" % unique_id)
142 self._temporary_ids.add(unique_id)
146 """Return all MACs present in the config.
153 for instance in self._config_data.instances.values():
154 for nic in instance.nics:
155 result.append(nic.mac)
159 def VerifyConfig(self):
160 """Stub verify function.
167 data = self._config_data
168 for instance_name in data.instances:
169 instance = data.instances[instance_name]
170 if instance.primary_node not in data.nodes:
171 result.append("Instance '%s' has invalid primary node '%s'" %
172 (instance_name, instance.primary_node))
173 for snode in instance.secondary_nodes:
174 if snode not in data.nodes:
175 result.append("Instance '%s' has invalid secondary node '%s'" %
176 (instance_name, snode))
177 for idx, nic in enumerate(instance.nics):
178 if nic.mac in seen_macs:
179 result.append("Instance '%s' has NIC %d mac %s duplicate" %
180 (instance_name, idx, nic.mac))
182 seen_macs.append(nic.mac)
185 def SetDiskID(self, disk, node_name):
186 """Convert the unique ID to the ID needed on the target nodes.
188 This is used only for drbd, which needs ip/port configuration.
190 The routine descends down and updates its children also, because
191 this helps when the only the top device is passed to the remote
196 for child in disk.children:
197 self.SetDiskID(child, node_name)
199 if disk.logical_id is None and disk.physical_id is not None:
201 if disk.dev_type == "drbd":
202 pnode, snode, port = disk.logical_id
203 if node_name not in (pnode, snode):
204 raise errors.ConfigurationError("DRBD device not knowing node %s" %
206 pnode_info = self.GetNodeInfo(pnode)
207 snode_info = self.GetNodeInfo(snode)
208 if pnode_info is None or snode_info is None:
209 raise errors.ConfigurationError("Can't find primary or secondary node"
210 " for %s" % str(disk))
211 if pnode == node_name:
212 disk.physical_id = (pnode_info.secondary_ip, port,
213 snode_info.secondary_ip, port)
214 else: # it must be secondary, we tested above
215 disk.physical_id = (snode_info.secondary_ip, port,
216 pnode_info.secondary_ip, port)
218 disk.physical_id = disk.logical_id
221 def AddTcpUdpPort(self, port):
222 """Adds a new port to the available port pool.
225 if not isinstance(port, int):
226 raise errors.ProgrammerError("Invalid type passed for port")
229 self._config_data.cluster.tcpudp_port_pool.add(port)
232 def GetPortList(self):
233 """Returns a copy of the current port list.
238 return self._config_data.cluster.tcpudp_port_pool.copy()
240 def AllocatePort(self):
243 The port will be taken from the available port pool or from the
244 default port range (and in this case we increase
250 # If there are TCP/IP ports configured, we use them first.
251 if self._config_data.cluster.tcpudp_port_pool:
252 port = self._config_data.cluster.tcpudp_port_pool.pop()
254 port = self._config_data.cluster.highest_used_port + 1
255 if port >= constants.LAST_DRBD_PORT:
256 raise errors.ConfigurationError("The highest used port is greater"
257 " than %s. Aborting." %
258 constants.LAST_DRBD_PORT)
259 self._config_data.cluster.highest_used_port = port
264 def GetHostKey(self):
265 """Return the rsa hostkey from the config.
273 return self._config_data.cluster.rsahostkeypub
275 def AddInstance(self, instance):
276 """Add an instance to the config.
278 This should be used after creating a new instance.
281 instance: the instance object
283 if not isinstance(instance, objects.Instance):
284 raise errors.ProgrammerError("Invalid type passed to AddInstance")
286 if instance.disk_template != constants.DT_DISKLESS:
287 all_lvs = instance.MapLVsByNode()
288 logger.Info("Instance '%s' DISK_LAYOUT: %s" % (instance.name, all_lvs))
291 self._config_data.instances[instance.name] = instance
294 def MarkInstanceUp(self, instance_name):
295 """Mark the instance status to up in the config.
300 if instance_name not in self._config_data.instances:
301 raise errors.ConfigurationError("Unknown instance '%s'" %
303 instance = self._config_data.instances[instance_name]
304 instance.status = "up"
307 def RemoveInstance(self, instance_name):
308 """Remove the instance from the configuration.
313 if instance_name not in self._config_data.instances:
314 raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
315 del self._config_data.instances[instance_name]
318 def RenameInstance(self, old_name, new_name):
319 """Rename an instance.
321 This needs to be done in ConfigWriter and not by RemoveInstance
322 combined with AddInstance as only we can guarantee an atomic
327 if old_name not in self._config_data.instances:
328 raise errors.ConfigurationError("Unknown instance '%s'" % old_name)
329 inst = self._config_data.instances[old_name]
330 del self._config_data.instances[old_name]
332 self._config_data.instances[inst.name] = inst
335 def MarkInstanceDown(self, instance_name):
336 """Mark the status of an instance to down in the configuration.
341 if instance_name not in self._config_data.instances:
342 raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
343 instance = self._config_data.instances[instance_name]
344 instance.status = "down"
347 def GetInstanceList(self):
348 """Get the list of instances.
351 array of instances, ex. ['instance2.example.com','instance1.example.com']
352 these contains all the instances, also the ones in Admin_down state
358 return self._config_data.instances.keys()
360 def ExpandInstanceName(self, short_name):
361 """Attempt to expand an incomplete instance name.
367 return utils.MatchNameComponent(short_name,
368 self._config_data.instances.keys())
370 def GetInstanceInfo(self, instance_name):
371 """Returns informations about an instance.
373 It takes the information from the configuration file. Other informations of
374 an instance are taken from the live systems.
377 instance: name of the instance, ex instance1.example.com
386 if instance_name not in self._config_data.instances:
389 return self._config_data.instances[instance_name]
391 def AddNode(self, node):
392 """Add a node to the configuration.
395 node: an object.Node instance
399 self._config_data.nodes[node.name] = node
402 def RemoveNode(self, node_name):
403 """Remove a node from the configuration.
407 if node_name not in self._config_data.nodes:
408 raise errors.ConfigurationError("Unknown node '%s'" % node_name)
410 del self._config_data.nodes[node_name]
413 def ExpandNodeName(self, short_name):
414 """Attempt to expand an incomplete instance name.
420 return utils.MatchNameComponent(short_name,
421 self._config_data.nodes.keys())
423 def GetNodeInfo(self, node_name):
424 """Get the configuration of a node, as stored in the config.
426 Args: node: nodename (tuple) of the node
428 Returns: the node object
434 if node_name not in self._config_data.nodes:
437 return self._config_data.nodes[node_name]
439 def GetNodeList(self):
440 """Return the list of nodes which are in the configuration.
445 return self._config_data.nodes.keys()
447 def DumpConfig(self):
448 """Return the entire configuration of the cluster.
452 return self._config_data
454 def _BumpSerialNo(self):
455 """Bump up the serial number of the config.
458 self._config_data.cluster.serial_no += 1
460 def _OpenConfig(self):
461 """Read the config data from disk.
463 In case we already have configuration data and the config file has
464 the same mtime as when we read it, we skip the parsing of the
465 file, since de-serialisation could be slow.
469 st = os.stat(self._cfg_file)
471 raise errors.ConfigurationError("Can't stat config file: %s" % err)
472 if (self._config_data is not None and
473 self._config_time is not None and
474 self._config_time == st.st_mtime and
475 self._config_size == st.st_size and
476 self._config_inode == st.st_ino):
477 # data is current, so skip loading of config file
479 f = open(self._cfg_file, 'r')
482 data = objects.ConfigData.Load(f)
483 except Exception, err:
484 raise errors.ConfigurationError(err)
487 if (not hasattr(data, 'cluster') or
488 not hasattr(data.cluster, 'config_version')):
489 raise errors.ConfigurationError("Incomplete configuration"
490 " (missing cluster.config_version)")
491 if data.cluster.config_version != constants.CONFIG_VERSION:
492 raise errors.ConfigurationError("Cluster configuration version"
493 " mismatch, got %s instead of %s" %
494 (data.cluster.config_version,
495 constants.CONFIG_VERSION))
496 self._config_data = data
497 self._config_time = st.st_mtime
498 self._config_size = st.st_size
499 self._config_inode = st.st_ino
501 def _ReleaseLock(self):
505 def _DistributeConfig(self):
506 """Distribute the configuration to the other nodes.
508 Currently, this only copies the configuration file. In the future,
509 it could be used to encapsulate the 2/3-phase update mechanism.
515 nodelist = self.GetNodeList()
516 myhostname = self._my_hostname
519 for node in nodelist:
520 nodeinfo = self.GetNodeInfo(node)
521 if nodeinfo.name == myhostname:
523 tgt_list.append(node)
525 result = rpc.call_upload_file(tgt_list, self._cfg_file)
526 for node in tgt_list:
528 logger.Error("copy of file %s to node %s failed" %
529 (self._cfg_file, node))
533 def _WriteConfig(self, destination=None):
534 """Write the configuration data to persistent storage.
537 if destination is None:
538 destination = self._cfg_file
540 dir_name, file_name = os.path.split(destination)
541 fd, name = tempfile.mkstemp('.newconfig', file_name, dir_name)
542 f = os.fdopen(fd, 'w')
544 self._config_data.Dump(f)
548 # we don't need to do os.close(fd) as f.close() did it
549 os.rename(name, destination)
550 self.write_count += 1
551 # re-set our cache as not to re-read the config file
553 st = os.stat(destination)
555 raise errors.ConfigurationError("Can't stat config file: %s" % err)
556 self._config_time = st.st_mtime
557 self._config_size = st.st_size
558 self._config_inode = st.st_ino
559 # and redistribute the config file
560 self._DistributeConfig()
562 def InitConfig(self, node, primary_ip, secondary_ip,
563 hostkeypub, mac_prefix, vg_name, def_bridge):
564 """Create the initial cluster configuration.
566 It will contain the current node, which will also be the master
567 node, and no instances or operating systmes.
570 node: the nodename of the initial node
571 primary_ip: the IP address of the current host
572 secondary_ip: the secondary IP of the current host or None
573 hostkeypub: the public hostkey of this host
576 hu_port = constants.FIRST_DRBD_PORT - 1
577 globalconfig = objects.Cluster(config_version=constants.CONFIG_VERSION,
579 rsahostkeypub=hostkeypub,
580 highest_used_port=hu_port,
581 mac_prefix=mac_prefix,
582 volume_group_name=vg_name,
583 default_bridge=def_bridge,
584 tcpudp_port_pool=set())
585 if secondary_ip is None:
586 secondary_ip = primary_ip
587 nodeconfig = objects.Node(name=node, primary_ip=primary_ip,
588 secondary_ip=secondary_ip)
590 self._config_data = objects.ConfigData(nodes={node: nodeconfig},
592 cluster=globalconfig)
596 """Return the volume group name.
601 return self._config_data.cluster.volume_group_name
603 def GetDefBridge(self):
604 """Return the default bridge.
609 return self._config_data.cluster.default_bridge
611 def GetMACPrefix(self):
612 """Return the mac prefix.
617 return self._config_data.cluster.mac_prefix
619 def GetClusterInfo(self):
620 """Returns informations about the cluster
629 return self._config_data.cluster
631 def Update(self, target):
632 """Notify function to be called after updates.
634 This function must be called when an object (as returned by
635 GetInstanceInfo, GetNodeInfo, GetCluster) has been updated and the
636 caller wants the modifications saved to the backing store. Note
637 that all modified objects will be saved, but the target argument
638 is the one the caller wants to ensure that it's saved.
641 if self._config_data is None:
642 raise errors.ProgrammerError("Configuration file not read,"
644 if isinstance(target, objects.Cluster):
645 test = target == self._config_data.cluster
646 elif isinstance(target, objects.Node):
647 test = target in self._config_data.nodes.values()
648 elif isinstance(target, objects.Instance):
649 test = target in self._config_data.instances.values()
651 raise errors.ProgrammerError("Invalid object type (%s) passed to"
652 " ConfigWriter.Update" % type(target))
654 raise errors.ConfigurationError("Configuration updated since object"
655 " has been read or unknown object")