4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Configuration management for Ganeti
24 This module provides the interface to the Ganeti cluster configuration.
26 The configuration data is stored on every node but is updated on the master
27 only. After each update, the master distributes the data to the other nodes.
29 Currently, the data storage format is JSON. YAML was slow and consuming too
39 from ganeti import errors
40 from ganeti import logger
41 from ganeti import utils
42 from ganeti import constants
43 from ganeti import rpc
44 from ganeti import objects
45 from ganeti import serializer
46 from ganeti import ssconf
50 sstore = ssconf.SimpleStore()
52 if sstore.GetConfigVersion() != constants.CONFIG_VERSION:
53 raise errors.ConfigurationError("Cluster configuration version"
54 " mismatch, got %s instead of %s" %
55 (sstore.GetConfigVersion(),
56 constants.CONFIG_VERSION))
60 """The interface to the cluster configuration.
63 def __init__(self, cfg_file=None, offline=False):
65 self._config_data = None
66 self._config_time = None
67 self._config_size = None
68 self._config_inode = None
69 self._offline = offline
71 self._cfg_file = constants.CLUSTER_CONF_FILE
73 self._cfg_file = cfg_file
74 self._temporary_ids = set()
75 # Note: in order to prevent errors when resolving our name in
76 # _DistributeConfig, we compute it here once and reuse it; it's
77 # better to raise an error before starting to modify the config
78 # file than after it was modified
79 self._my_hostname = utils.HostInfo().name
81 # this method needs to be static, so that we can call it on the class
84 """Check if the cluster is configured.
87 return os.path.exists(constants.CLUSTER_CONF_FILE)
89 def GenerateMAC(self):
90 """Generate a MAC for an instance.
92 This should check the current instances for duplicates.
97 prefix = self._config_data.cluster.mac_prefix
98 all_macs = self._AllMACs()
101 byte1 = random.randrange(0, 256)
102 byte2 = random.randrange(0, 256)
103 byte3 = random.randrange(0, 256)
104 mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
105 if mac not in all_macs:
109 raise errors.ConfigurationError("Can't generate unique MAC")
112 def IsMacInUse(self, mac):
113 """Predicate: check if the specified MAC is in use in the Ganeti cluster.
115 This only checks instances managed by this cluster, it does not
116 check for potential collisions elsewhere.
121 all_macs = self._AllMACs()
122 return mac in all_macs
124 def _ComputeAllLVs(self):
125 """Compute the list of all LVs.
131 for instance in self._config_data.instances.values():
132 node_data = instance.MapLVsByNode()
133 for lv_list in node_data.values():
134 lvnames.update(lv_list)
137 def GenerateUniqueID(self, exceptions=None):
138 """Generate an unique disk name.
140 This checks the current node, instances and disk names for
144 - exceptions: a list with some other names which should be checked
145 for uniqueness (used for example when you want to get
146 more than one id at one time without adding each one in
147 turn to the config file
149 Returns: the unique id as a string
153 existing.update(self._temporary_ids)
154 existing.update(self._ComputeAllLVs())
155 existing.update(self._config_data.instances.keys())
156 existing.update(self._config_data.nodes.keys())
157 if exceptions is not None:
158 existing.update(exceptions)
161 unique_id = utils.NewUUID()
162 if unique_id not in existing and unique_id is not None:
165 raise errors.ConfigurationError("Not able generate an unique ID"
166 " (last tried ID: %s" % unique_id)
167 self._temporary_ids.add(unique_id)
171 """Return all MACs present in the config.
178 for instance in self._config_data.instances.values():
179 for nic in instance.nics:
180 result.append(nic.mac)
184 def VerifyConfig(self):
185 """Stub verify function.
192 data = self._config_data
193 for instance_name in data.instances:
194 instance = data.instances[instance_name]
195 if instance.primary_node not in data.nodes:
196 result.append("instance '%s' has invalid primary node '%s'" %
197 (instance_name, instance.primary_node))
198 for snode in instance.secondary_nodes:
199 if snode not in data.nodes:
200 result.append("instance '%s' has invalid secondary node '%s'" %
201 (instance_name, snode))
202 for idx, nic in enumerate(instance.nics):
203 if nic.mac in seen_macs:
204 result.append("instance '%s' has NIC %d mac %s duplicate" %
205 (instance_name, idx, nic.mac))
207 seen_macs.append(nic.mac)
210 def SetDiskID(self, disk, node_name):
211 """Convert the unique ID to the ID needed on the target nodes.
213 This is used only for drbd, which needs ip/port configuration.
215 The routine descends down and updates its children also, because
216 this helps when the only the top device is passed to the remote
221 for child in disk.children:
222 self.SetDiskID(child, node_name)
224 if disk.logical_id is None and disk.physical_id is not None:
226 if disk.dev_type in constants.LDS_DRBD:
227 pnode, snode, port = disk.logical_id
228 if node_name not in (pnode, snode):
229 raise errors.ConfigurationError("DRBD device not knowing node %s" %
231 pnode_info = self.GetNodeInfo(pnode)
232 snode_info = self.GetNodeInfo(snode)
233 if pnode_info is None or snode_info is None:
234 raise errors.ConfigurationError("Can't find primary or secondary node"
235 " for %s" % str(disk))
236 if pnode == node_name:
237 disk.physical_id = (pnode_info.secondary_ip, port,
238 snode_info.secondary_ip, port)
239 else: # it must be secondary, we tested above
240 disk.physical_id = (snode_info.secondary_ip, port,
241 pnode_info.secondary_ip, port)
243 disk.physical_id = disk.logical_id
246 def AddTcpUdpPort(self, port):
247 """Adds a new port to the available port pool.
250 if not isinstance(port, int):
251 raise errors.ProgrammerError("Invalid type passed for port")
254 self._config_data.cluster.tcpudp_port_pool.add(port)
257 def GetPortList(self):
258 """Returns a copy of the current port list.
263 return self._config_data.cluster.tcpudp_port_pool.copy()
265 def AllocatePort(self):
268 The port will be taken from the available port pool or from the
269 default port range (and in this case we increase
275 # If there are TCP/IP ports configured, we use them first.
276 if self._config_data.cluster.tcpudp_port_pool:
277 port = self._config_data.cluster.tcpudp_port_pool.pop()
279 port = self._config_data.cluster.highest_used_port + 1
280 if port >= constants.LAST_DRBD_PORT:
281 raise errors.ConfigurationError("The highest used port is greater"
282 " than %s. Aborting." %
283 constants.LAST_DRBD_PORT)
284 self._config_data.cluster.highest_used_port = port
289 def GetHostKey(self):
290 """Return the rsa hostkey from the config.
298 return self._config_data.cluster.rsahostkeypub
300 def AddInstance(self, instance):
301 """Add an instance to the config.
303 This should be used after creating a new instance.
306 instance: the instance object
308 if not isinstance(instance, objects.Instance):
309 raise errors.ProgrammerError("Invalid type passed to AddInstance")
311 if instance.disk_template != constants.DT_DISKLESS:
312 all_lvs = instance.MapLVsByNode()
313 logger.Info("Instance '%s' DISK_LAYOUT: %s" % (instance.name, all_lvs))
316 self._config_data.instances[instance.name] = instance
319 def _SetInstanceStatus(self, instance_name, status):
320 """Set the instance's status to a given value.
323 if status not in ("up", "down"):
324 raise errors.ProgrammerError("Invalid status '%s' passed to"
325 " ConfigWriter._SetInstanceStatus()" %
329 if instance_name not in self._config_data.instances:
330 raise errors.ConfigurationError("Unknown instance '%s'" %
332 instance = self._config_data.instances[instance_name]
333 if instance.status != status:
334 instance.status = status
337 def MarkInstanceUp(self, instance_name):
338 """Mark the instance status to up in the config.
341 self._SetInstanceStatus(instance_name, "up")
343 def RemoveInstance(self, instance_name):
344 """Remove the instance from the configuration.
349 if instance_name not in self._config_data.instances:
350 raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
351 del self._config_data.instances[instance_name]
354 def RenameInstance(self, old_name, new_name):
355 """Rename an instance.
357 This needs to be done in ConfigWriter and not by RemoveInstance
358 combined with AddInstance as only we can guarantee an atomic
363 if old_name not in self._config_data.instances:
364 raise errors.ConfigurationError("Unknown instance '%s'" % old_name)
365 inst = self._config_data.instances[old_name]
366 del self._config_data.instances[old_name]
369 for disk in inst.disks:
370 if disk.dev_type == constants.LD_FILE:
371 # rename the file paths in logical and physical id
372 file_storage_dir = os.path.dirname(os.path.dirname(disk.logical_id[1]))
373 disk.physical_id = disk.logical_id = (disk.logical_id[0],
374 os.path.join(file_storage_dir,
378 self._config_data.instances[inst.name] = inst
381 def MarkInstanceDown(self, instance_name):
382 """Mark the status of an instance to down in the configuration.
385 self._SetInstanceStatus(instance_name, "down")
387 def GetInstanceList(self):
388 """Get the list of instances.
391 array of instances, ex. ['instance2.example.com','instance1.example.com']
392 these contains all the instances, also the ones in Admin_down state
398 return self._config_data.instances.keys()
400 def ExpandInstanceName(self, short_name):
401 """Attempt to expand an incomplete instance name.
407 return utils.MatchNameComponent(short_name,
408 self._config_data.instances.keys())
410 def GetInstanceInfo(self, instance_name):
411 """Returns informations about an instance.
413 It takes the information from the configuration file. Other informations of
414 an instance are taken from the live systems.
417 instance: name of the instance, ex instance1.example.com
426 if instance_name not in self._config_data.instances:
429 return self._config_data.instances[instance_name]
431 def AddNode(self, node):
432 """Add a node to the configuration.
435 node: an object.Node instance
439 self._config_data.nodes[node.name] = node
442 def RemoveNode(self, node_name):
443 """Remove a node from the configuration.
447 if node_name not in self._config_data.nodes:
448 raise errors.ConfigurationError("Unknown node '%s'" % node_name)
450 del self._config_data.nodes[node_name]
453 def ExpandNodeName(self, short_name):
454 """Attempt to expand an incomplete instance name.
460 return utils.MatchNameComponent(short_name,
461 self._config_data.nodes.keys())
463 def GetNodeInfo(self, node_name):
464 """Get the configuration of a node, as stored in the config.
466 Args: node: nodename (tuple) of the node
468 Returns: the node object
474 if node_name not in self._config_data.nodes:
477 return self._config_data.nodes[node_name]
479 def GetNodeList(self):
480 """Return the list of nodes which are in the configuration.
485 return self._config_data.nodes.keys()
487 def DumpConfig(self):
488 """Return the entire configuration of the cluster.
492 return self._config_data
494 def _BumpSerialNo(self):
495 """Bump up the serial number of the config.
498 self._config_data.cluster.serial_no += 1
500 def _OpenConfig(self):
501 """Read the config data from disk.
503 In case we already have configuration data and the config file has
504 the same mtime as when we read it, we skip the parsing of the
505 file, since de-serialisation could be slow.
509 st = os.stat(self._cfg_file)
511 raise errors.ConfigurationError("Can't stat config file: %s" % err)
512 if (self._config_data is not None and
513 self._config_time is not None and
514 self._config_time == st.st_mtime and
515 self._config_size == st.st_size and
516 self._config_inode == st.st_ino):
517 # data is current, so skip loading of config file
520 # Make sure the configuration has the right version
523 f = open(self._cfg_file, 'r')
526 data = objects.ConfigData.FromDict(serializer.Load(f.read()))
527 except Exception, err:
528 raise errors.ConfigurationError(err)
531 if (not hasattr(data, 'cluster') or
532 not hasattr(data.cluster, 'rsahostkeypub')):
533 raise errors.ConfigurationError("Incomplete configuration"
534 " (missing cluster.rsahostkeypub)")
535 self._config_data = data
536 self._config_time = st.st_mtime
537 self._config_size = st.st_size
538 self._config_inode = st.st_ino
540 def _ReleaseLock(self):
544 def _DistributeConfig(self):
545 """Distribute the configuration to the other nodes.
547 Currently, this only copies the configuration file. In the future,
548 it could be used to encapsulate the 2/3-phase update mechanism.
554 nodelist = self.GetNodeList()
555 myhostname = self._my_hostname
558 nodelist.remove(myhostname)
562 result = rpc.call_upload_file(nodelist, self._cfg_file)
563 for node in nodelist:
565 logger.Error("copy of file %s to node %s failed" %
566 (self._cfg_file, node))
570 def _WriteConfig(self, destination=None):
571 """Write the configuration data to persistent storage.
574 if destination is None:
575 destination = self._cfg_file
577 txt = serializer.Dump(self._config_data.ToDict())
578 dir_name, file_name = os.path.split(destination)
579 fd, name = tempfile.mkstemp('.newconfig', file_name, dir_name)
580 f = os.fdopen(fd, 'w')
586 # we don't need to do os.close(fd) as f.close() did it
587 os.rename(name, destination)
588 self.write_count += 1
589 # re-set our cache as not to re-read the config file
591 st = os.stat(destination)
593 raise errors.ConfigurationError("Can't stat config file: %s" % err)
594 self._config_time = st.st_mtime
595 self._config_size = st.st_size
596 self._config_inode = st.st_ino
597 # and redistribute the config file
598 self._DistributeConfig()
600 def InitConfig(self, node, primary_ip, secondary_ip,
601 hostkeypub, mac_prefix, vg_name, def_bridge):
602 """Create the initial cluster configuration.
604 It will contain the current node, which will also be the master
605 node, and no instances or operating systmes.
608 node: the nodename of the initial node
609 primary_ip: the IP address of the current host
610 secondary_ip: the secondary IP of the current host or None
611 hostkeypub: the public hostkey of this host
614 hu_port = constants.FIRST_DRBD_PORT - 1
615 globalconfig = objects.Cluster(serial_no=1,
616 rsahostkeypub=hostkeypub,
617 highest_used_port=hu_port,
618 mac_prefix=mac_prefix,
619 volume_group_name=vg_name,
620 default_bridge=def_bridge,
621 tcpudp_port_pool=set())
622 if secondary_ip is None:
623 secondary_ip = primary_ip
624 nodeconfig = objects.Node(name=node, primary_ip=primary_ip,
625 secondary_ip=secondary_ip)
627 self._config_data = objects.ConfigData(nodes={node: nodeconfig},
629 cluster=globalconfig)
633 """Return the volume group name.
638 return self._config_data.cluster.volume_group_name
640 def SetVGName(self, vg_name):
641 """Set the volume group name.
645 self._config_data.cluster.volume_group_name = vg_name
648 def GetDefBridge(self):
649 """Return the default bridge.
654 return self._config_data.cluster.default_bridge
656 def GetMACPrefix(self):
657 """Return the mac prefix.
662 return self._config_data.cluster.mac_prefix
664 def GetClusterInfo(self):
665 """Returns informations about the cluster
674 return self._config_data.cluster
676 def Update(self, target):
677 """Notify function to be called after updates.
679 This function must be called when an object (as returned by
680 GetInstanceInfo, GetNodeInfo, GetCluster) has been updated and the
681 caller wants the modifications saved to the backing store. Note
682 that all modified objects will be saved, but the target argument
683 is the one the caller wants to ensure that it's saved.
686 if self._config_data is None:
687 raise errors.ProgrammerError("Configuration file not read,"
689 if isinstance(target, objects.Cluster):
690 test = target == self._config_data.cluster
691 elif isinstance(target, objects.Node):
692 test = target in self._config_data.nodes.values()
693 elif isinstance(target, objects.Instance):
694 test = target in self._config_data.instances.values()
696 raise errors.ProgrammerError("Invalid object type (%s) passed to"
697 " ConfigWriter.Update" % type(target))
699 raise errors.ConfigurationError("Configuration updated since object"
700 " has been read or unknown object")