4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Configuration management for Ganeti
24 This module provides the interface to the Ganeti cluster configuration.
26 The configuration data is stored on every node but is updated on the master
27 only. After each update, the master distributes the data to the other nodes.
29 Currently, the data storage format is JSON. YAML was slow and consuming too
39 from ganeti import errors
40 from ganeti import logger
41 from ganeti import utils
42 from ganeti import constants
43 from ganeti import rpc
44 from ganeti import objects
45 from ganeti import serializer
49 """The interface to the cluster configuration.
52 def __init__(self, cfg_file=None, offline=False):
54 self._config_data = None
55 self._config_time = None
56 self._config_size = None
57 self._config_inode = None
58 self._offline = offline
60 self._cfg_file = constants.CLUSTER_CONF_FILE
62 self._cfg_file = cfg_file
63 self._temporary_ids = set()
64 # Note: in order to prevent errors when resolving our name in
65 # _DistributeConfig, we compute it here once and reuse it; it's
66 # better to raise an error before starting to modify the config
67 # file than after it was modified
68 self._my_hostname = utils.HostInfo().name
70 # this method needs to be static, so that we can call it on the class
73 """Check if the cluster is configured.
76 return os.path.exists(constants.CLUSTER_CONF_FILE)
78 def GenerateMAC(self):
79 """Generate a MAC for an instance.
81 This should check the current instances for duplicates.
86 prefix = self._config_data.cluster.mac_prefix
87 all_macs = self._AllMACs()
90 byte1 = random.randrange(0, 256)
91 byte2 = random.randrange(0, 256)
92 byte3 = random.randrange(0, 256)
93 mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
94 if mac not in all_macs:
98 raise errors.ConfigurationError("Can't generate unique MAC")
101 def IsMacInUse(self, mac):
102 """Predicate: check if the specified MAC is in use in the Ganeti cluster.
104 This only checks instances managed by this cluster, it does not
105 check for potential collisions elsewhere.
110 all_macs = self._AllMACs()
111 return mac in all_macs
113 def _ComputeAllLVs(self):
114 """Compute the list of all LVs.
120 for instance in self._config_data.instances.values():
121 node_data = instance.MapLVsByNode()
122 for lv_list in node_data.values():
123 lvnames.update(lv_list)
126 def GenerateUniqueID(self, exceptions=None):
127 """Generate an unique disk name.
129 This checks the current node, instances and disk names for
133 - exceptions: a list with some other names which should be checked
134 for uniqueness (used for example when you want to get
135 more than one id at one time without adding each one in
136 turn to the config file
138 Returns: the unique id as a string
142 existing.update(self._temporary_ids)
143 existing.update(self._ComputeAllLVs())
144 existing.update(self._config_data.instances.keys())
145 existing.update(self._config_data.nodes.keys())
146 if exceptions is not None:
147 existing.update(exceptions)
150 unique_id = utils.NewUUID()
151 if unique_id not in existing and unique_id is not None:
154 raise errors.ConfigurationError("Not able generate an unique ID"
155 " (last tried ID: %s" % unique_id)
156 self._temporary_ids.add(unique_id)
160 """Return all MACs present in the config.
167 for instance in self._config_data.instances.values():
168 for nic in instance.nics:
169 result.append(nic.mac)
173 def VerifyConfig(self):
174 """Stub verify function.
181 data = self._config_data
182 for instance_name in data.instances:
183 instance = data.instances[instance_name]
184 if instance.primary_node not in data.nodes:
185 result.append("instance '%s' has invalid primary node '%s'" %
186 (instance_name, instance.primary_node))
187 for snode in instance.secondary_nodes:
188 if snode not in data.nodes:
189 result.append("instance '%s' has invalid secondary node '%s'" %
190 (instance_name, snode))
191 for idx, nic in enumerate(instance.nics):
192 if nic.mac in seen_macs:
193 result.append("instance '%s' has NIC %d mac %s duplicate" %
194 (instance_name, idx, nic.mac))
196 seen_macs.append(nic.mac)
199 def SetDiskID(self, disk, node_name):
200 """Convert the unique ID to the ID needed on the target nodes.
202 This is used only for drbd, which needs ip/port configuration.
204 The routine descends down and updates its children also, because
205 this helps when the only the top device is passed to the remote
210 for child in disk.children:
211 self.SetDiskID(child, node_name)
213 if disk.logical_id is None and disk.physical_id is not None:
215 if disk.dev_type in constants.LDS_DRBD:
216 pnode, snode, port = disk.logical_id
217 if node_name not in (pnode, snode):
218 raise errors.ConfigurationError("DRBD device not knowing node %s" %
220 pnode_info = self.GetNodeInfo(pnode)
221 snode_info = self.GetNodeInfo(snode)
222 if pnode_info is None or snode_info is None:
223 raise errors.ConfigurationError("Can't find primary or secondary node"
224 " for %s" % str(disk))
225 if pnode == node_name:
226 disk.physical_id = (pnode_info.secondary_ip, port,
227 snode_info.secondary_ip, port)
228 else: # it must be secondary, we tested above
229 disk.physical_id = (snode_info.secondary_ip, port,
230 pnode_info.secondary_ip, port)
232 disk.physical_id = disk.logical_id
235 def AddTcpUdpPort(self, port):
236 """Adds a new port to the available port pool.
239 if not isinstance(port, int):
240 raise errors.ProgrammerError("Invalid type passed for port")
243 self._config_data.cluster.tcpudp_port_pool.add(port)
246 def GetPortList(self):
247 """Returns a copy of the current port list.
252 return self._config_data.cluster.tcpudp_port_pool.copy()
254 def AllocatePort(self):
257 The port will be taken from the available port pool or from the
258 default port range (and in this case we increase
264 # If there are TCP/IP ports configured, we use them first.
265 if self._config_data.cluster.tcpudp_port_pool:
266 port = self._config_data.cluster.tcpudp_port_pool.pop()
268 port = self._config_data.cluster.highest_used_port + 1
269 if port >= constants.LAST_DRBD_PORT:
270 raise errors.ConfigurationError("The highest used port is greater"
271 " than %s. Aborting." %
272 constants.LAST_DRBD_PORT)
273 self._config_data.cluster.highest_used_port = port
278 def GetHostKey(self):
279 """Return the rsa hostkey from the config.
287 return self._config_data.cluster.rsahostkeypub
289 def AddInstance(self, instance):
290 """Add an instance to the config.
292 This should be used after creating a new instance.
295 instance: the instance object
297 if not isinstance(instance, objects.Instance):
298 raise errors.ProgrammerError("Invalid type passed to AddInstance")
300 if instance.disk_template != constants.DT_DISKLESS:
301 all_lvs = instance.MapLVsByNode()
302 logger.Info("Instance '%s' DISK_LAYOUT: %s" % (instance.name, all_lvs))
305 self._config_data.instances[instance.name] = instance
308 def _SetInstanceStatus(self, instance_name, status):
309 """Set the instance's status to a given value.
312 if status not in ("up", "down"):
313 raise errors.ProgrammerError("Invalid status '%s' passed to"
314 " ConfigWriter._SetInstanceStatus()" %
318 if instance_name not in self._config_data.instances:
319 raise errors.ConfigurationError("Unknown instance '%s'" %
321 instance = self._config_data.instances[instance_name]
322 if instance.status != status:
323 instance.status = status
326 def MarkInstanceUp(self, instance_name):
327 """Mark the instance status to up in the config.
330 self._SetInstanceStatus(instance_name, "up")
332 def RemoveInstance(self, instance_name):
333 """Remove the instance from the configuration.
338 if instance_name not in self._config_data.instances:
339 raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
340 del self._config_data.instances[instance_name]
343 def RenameInstance(self, old_name, new_name):
344 """Rename an instance.
346 This needs to be done in ConfigWriter and not by RemoveInstance
347 combined with AddInstance as only we can guarantee an atomic
352 if old_name not in self._config_data.instances:
353 raise errors.ConfigurationError("Unknown instance '%s'" % old_name)
354 inst = self._config_data.instances[old_name]
355 del self._config_data.instances[old_name]
358 for disk in inst.disks:
359 if disk.dev_type == constants.LD_FILE:
360 # rename the file paths in logical and physical id
361 file_storage_dir = os.path.dirname(os.path.dirname(disk.logical_id[1]))
362 disk.physical_id = disk.logical_id = (disk.logical_id[0],
363 os.path.join(file_storage_dir,
367 self._config_data.instances[inst.name] = inst
370 def MarkInstanceDown(self, instance_name):
371 """Mark the status of an instance to down in the configuration.
374 self._SetInstanceStatus(instance_name, "down")
376 def GetInstanceList(self):
377 """Get the list of instances.
380 array of instances, ex. ['instance2.example.com','instance1.example.com']
381 these contains all the instances, also the ones in Admin_down state
387 return self._config_data.instances.keys()
389 def ExpandInstanceName(self, short_name):
390 """Attempt to expand an incomplete instance name.
396 return utils.MatchNameComponent(short_name,
397 self._config_data.instances.keys())
399 def GetInstanceInfo(self, instance_name):
400 """Returns informations about an instance.
402 It takes the information from the configuration file. Other informations of
403 an instance are taken from the live systems.
406 instance: name of the instance, ex instance1.example.com
415 if instance_name not in self._config_data.instances:
418 return self._config_data.instances[instance_name]
420 def AddNode(self, node):
421 """Add a node to the configuration.
424 node: an object.Node instance
428 self._config_data.nodes[node.name] = node
431 def RemoveNode(self, node_name):
432 """Remove a node from the configuration.
436 if node_name not in self._config_data.nodes:
437 raise errors.ConfigurationError("Unknown node '%s'" % node_name)
439 del self._config_data.nodes[node_name]
442 def ExpandNodeName(self, short_name):
443 """Attempt to expand an incomplete instance name.
449 return utils.MatchNameComponent(short_name,
450 self._config_data.nodes.keys())
452 def GetNodeInfo(self, node_name):
453 """Get the configuration of a node, as stored in the config.
455 Args: node: nodename (tuple) of the node
457 Returns: the node object
463 if node_name not in self._config_data.nodes:
466 return self._config_data.nodes[node_name]
468 def GetNodeList(self):
469 """Return the list of nodes which are in the configuration.
474 return self._config_data.nodes.keys()
476 def DumpConfig(self):
477 """Return the entire configuration of the cluster.
481 return self._config_data
483 def _BumpSerialNo(self):
484 """Bump up the serial number of the config.
487 self._config_data.cluster.serial_no += 1
489 def _OpenConfig(self):
490 """Read the config data from disk.
492 In case we already have configuration data and the config file has
493 the same mtime as when we read it, we skip the parsing of the
494 file, since de-serialisation could be slow.
498 st = os.stat(self._cfg_file)
500 raise errors.ConfigurationError("Can't stat config file: %s" % err)
501 if (self._config_data is not None and
502 self._config_time is not None and
503 self._config_time == st.st_mtime and
504 self._config_size == st.st_size and
505 self._config_inode == st.st_ino):
506 # data is current, so skip loading of config file
508 f = open(self._cfg_file, 'r')
511 data = objects.ConfigData.FromDict(serializer.Load(f.read()))
512 except Exception, err:
513 raise errors.ConfigurationError(err)
516 if (not hasattr(data, 'cluster') or
517 not hasattr(data.cluster, 'config_version')):
518 raise errors.ConfigurationError("Incomplete configuration"
519 " (missing cluster.config_version)")
520 if data.cluster.config_version != constants.CONFIG_VERSION:
521 raise errors.ConfigurationError("Cluster configuration version"
522 " mismatch, got %s instead of %s" %
523 (data.cluster.config_version,
524 constants.CONFIG_VERSION))
525 self._config_data = data
526 self._config_time = st.st_mtime
527 self._config_size = st.st_size
528 self._config_inode = st.st_ino
530 def _ReleaseLock(self):
534 def _DistributeConfig(self):
535 """Distribute the configuration to the other nodes.
537 Currently, this only copies the configuration file. In the future,
538 it could be used to encapsulate the 2/3-phase update mechanism.
544 nodelist = self.GetNodeList()
545 myhostname = self._my_hostname
547 nodelist.remove(myhostname)
549 result = rpc.call_upload_file(nodelist, self._cfg_file)
550 for node in nodelist:
552 logger.Error("copy of file %s to node %s failed" %
553 (self._cfg_file, node))
557 def _WriteConfig(self, destination=None):
558 """Write the configuration data to persistent storage.
561 if destination is None:
562 destination = self._cfg_file
564 txt = serializer.Dump(self._config_data.ToDict())
565 dir_name, file_name = os.path.split(destination)
566 fd, name = tempfile.mkstemp('.newconfig', file_name, dir_name)
567 f = os.fdopen(fd, 'w')
573 # we don't need to do os.close(fd) as f.close() did it
574 os.rename(name, destination)
575 self.write_count += 1
576 # re-set our cache as not to re-read the config file
578 st = os.stat(destination)
580 raise errors.ConfigurationError("Can't stat config file: %s" % err)
581 self._config_time = st.st_mtime
582 self._config_size = st.st_size
583 self._config_inode = st.st_ino
584 # and redistribute the config file
585 self._DistributeConfig()
587 def InitConfig(self, node, primary_ip, secondary_ip,
588 hostkeypub, mac_prefix, vg_name, def_bridge):
589 """Create the initial cluster configuration.
591 It will contain the current node, which will also be the master
592 node, and no instances or operating systmes.
595 node: the nodename of the initial node
596 primary_ip: the IP address of the current host
597 secondary_ip: the secondary IP of the current host or None
598 hostkeypub: the public hostkey of this host
601 hu_port = constants.FIRST_DRBD_PORT - 1
602 globalconfig = objects.Cluster(config_version=constants.CONFIG_VERSION,
604 rsahostkeypub=hostkeypub,
605 highest_used_port=hu_port,
606 mac_prefix=mac_prefix,
607 volume_group_name=vg_name,
608 default_bridge=def_bridge,
609 tcpudp_port_pool=set())
610 if secondary_ip is None:
611 secondary_ip = primary_ip
612 nodeconfig = objects.Node(name=node, primary_ip=primary_ip,
613 secondary_ip=secondary_ip)
615 self._config_data = objects.ConfigData(nodes={node: nodeconfig},
617 cluster=globalconfig)
621 """Return the volume group name.
626 return self._config_data.cluster.volume_group_name
628 def SetVGName(self, vg_name):
629 """Set the volume group name.
633 self._config_data.cluster.volume_group_name = vg_name
636 def GetDefBridge(self):
637 """Return the default bridge.
642 return self._config_data.cluster.default_bridge
644 def GetMACPrefix(self):
645 """Return the mac prefix.
650 return self._config_data.cluster.mac_prefix
652 def GetClusterInfo(self):
653 """Returns informations about the cluster
662 return self._config_data.cluster
664 def Update(self, target):
665 """Notify function to be called after updates.
667 This function must be called when an object (as returned by
668 GetInstanceInfo, GetNodeInfo, GetCluster) has been updated and the
669 caller wants the modifications saved to the backing store. Note
670 that all modified objects will be saved, but the target argument
671 is the one the caller wants to ensure that it's saved.
674 if self._config_data is None:
675 raise errors.ProgrammerError("Configuration file not read,"
677 if isinstance(target, objects.Cluster):
678 test = target == self._config_data.cluster
679 elif isinstance(target, objects.Node):
680 test = target in self._config_data.nodes.values()
681 elif isinstance(target, objects.Instance):
682 test = target in self._config_data.instances.values()
684 raise errors.ProgrammerError("Invalid object type (%s) passed to"
685 " ConfigWriter.Update" % type(target))
687 raise errors.ConfigurationError("Configuration updated since object"
688 " has been read or unknown object")