4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Configuration management for Ganeti
24 This module provides the interface to the ganeti cluster configuration.
27 The configuration data is stored on every node but is updated on the
28 master only. After each update, the master distributes the data to the
31 Currently the data storage format is pickle as yaml was initially not
32 available, then we used it but it was a memory-eating slow beast, so
33 we reverted to pickle using custom Unpicklers.
42 from ganeti import errors
43 from ganeti import logger
44 from ganeti import utils
45 from ganeti import constants
46 from ganeti import rpc
47 from ganeti import objects
51 """The interface to the cluster configuration"""
53 def __init__(self, cfg_file=None, offline=False):
54 self._config_data = None
55 self._config_time = None
56 self._offline = offline
58 self._cfg_file = constants.CLUSTER_CONF_FILE
60 self._cfg_file = cfg_file
62 # this method needs to be static, so that we can call it on the class
65 """Check if the cluster is configured.
68 return os.path.exists(constants.CLUSTER_CONF_FILE)
70 def GenerateMAC(self):
71 """Generate a MAC for an instance.
73 This should check the current instances for duplicates.
78 prefix = self._config_data.cluster.mac_prefix
79 all_macs = self._AllMACs()
82 byte1 = random.randrange(0, 256)
83 byte2 = random.randrange(0, 256)
84 byte3 = random.randrange(0, 256)
85 mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
86 if mac not in all_macs:
90 raise errors.ConfigurationError, ("Can't generate unique MAC")
94 """Return all MACs present in the config.
101 for instance in self._config_data.instances.values():
102 for nic in instance.nics:
103 result.append(nic.mac)
107 def VerifyConfig(self):
108 """Stub verify function.
115 data = self._config_data
116 for instance_name in data.instances:
117 instance = data.instances[instance_name]
118 if instance.primary_node not in data.nodes:
119 result.append("Instance '%s' has invalid primary node '%s'" %
120 (instance_name, instance.primary_node))
121 for snode in instance.secondary_nodes:
122 if snode not in data.nodes:
123 result.append("Instance '%s' has invalid secondary node '%s'" %
124 (instance_name, snode))
125 for idx, nic in enumerate(instance.nics):
126 if nic.mac in seen_macs:
127 result.append("Instance '%s' has NIC %d mac %s duplicate" %
128 (instance_name, idx, nic.mac))
130 seen_macs.append(nic.mac)
134 def SetDiskID(self, disk, node_name):
135 """Convert the unique ID to the ID needed on the target nodes.
137 This is used only for drbd, which needs ip/port configuration.
139 The routine descends down and updates its children also, because
140 this helps when the only the top device is passed to the remote
145 for child in disk.children:
146 self.SetDiskID(child, node_name)
148 if disk.logical_id is None and disk.physical_id is not None:
150 if disk.dev_type == "drbd":
151 pnode, snode, port = disk.logical_id
152 if node_name not in (pnode, snode):
153 raise errors.ConfigurationError, ("DRBD device not knowing node %s" %
155 pnode_info = self.GetNodeInfo(pnode)
156 snode_info = self.GetNodeInfo(snode)
157 if pnode_info is None or snode_info is None:
158 raise errors.ConfigurationError("Can't find primary or secondary node"
159 " for %s" % str(disk))
160 if pnode == node_name:
161 disk.physical_id = (pnode_info.secondary_ip, port,
162 snode_info.secondary_ip, port)
163 else: # it must be secondary, we tested above
164 disk.physical_id = (snode_info.secondary_ip, port,
165 pnode_info.secondary_ip, port)
167 disk.physical_id = disk.logical_id
170 def AllocatePort(self):
173 The port will be recorded in the cluster config.
178 self._config_data.cluster.highest_used_port += 1
179 if self._config_data.cluster.highest_used_port >= constants.LAST_DRBD_PORT:
180 raise errors.ConfigurationError, ("The highest used port is greater"
181 " than %s. Aborting." %
182 constants.LAST_DRBD_PORT)
183 port = self._config_data.cluster.highest_used_port
188 def GetHostKey(self):
189 """Return the rsa hostkey from the config.
197 return self._config_data.cluster.rsahostkeypub
199 def AddInstance(self, instance):
200 """Add an instance to the config.
202 This should be used after creating a new instance.
205 instance: the instance object
207 if not isinstance(instance, objects.Instance):
208 raise errors.ProgrammerError("Invalid type passed to AddInstance")
211 self._config_data.instances[instance.name] = instance
214 def MarkInstanceUp(self, instance_name):
215 """Mark the instance status to up in the config.
220 if instance_name not in self._config_data.instances:
221 raise errors.ConfigurationError, ("Unknown instance '%s'" %
223 instance = self._config_data.instances[instance_name]
224 instance.status = "up"
227 def RemoveInstance(self, instance_name):
228 """Remove the instance from the configuration.
233 if instance_name not in self._config_data.instances:
234 raise errors.ConfigurationError, ("Unknown instance '%s'" %
236 del self._config_data.instances[instance_name]
239 def MarkInstanceDown(self, instance_name):
240 """Mark the status of an instance to down in the configuration.
246 if instance_name not in self._config_data.instances:
247 raise errors.ConfigurationError, ("Unknown instance '%s'" %
249 instance = self._config_data.instances[instance_name]
250 instance.status = "down"
253 def GetInstanceList(self):
254 """Get the list of instances.
257 array of instances, ex. ['instance2.example.com','instance1.example.com']
258 these contains all the instances, also the ones in Admin_down state
264 return self._config_data.instances.keys()
266 def ExpandInstanceName(self, short_name):
267 """Attempt to expand an incomplete instance name.
273 return utils.MatchNameComponent(short_name,
274 self._config_data.instances.keys())
276 def GetInstanceInfo(self, instance_name):
277 """Returns informations about an instance.
279 It takes the information from the configuration file. Other informations of
280 an instance are taken from the live systems.
283 instance: name of the instance, ex instance1.example.com
292 if instance_name not in self._config_data.instances:
295 return self._config_data.instances[instance_name]
297 def AddNode(self, node):
298 """Add a node to the configuration.
301 node: an object.Node instance
305 self._config_data.nodes[node.name] = node
308 def RemoveNode(self, node_name):
309 """Remove a node from the configuration.
313 if node_name not in self._config_data.nodes:
314 raise errors.ConfigurationError, ("Unknown node '%s'" % node_name)
316 del self._config_data.nodes[node_name]
319 def ExpandNodeName(self, short_name):
320 """Attempt to expand an incomplete instance name.
326 return utils.MatchNameComponent(short_name,
327 self._config_data.nodes.keys())
329 def GetNodeInfo(self, node_name):
330 """Get the configuration of a node, as stored in the config.
332 Args: node: nodename (tuple) of the node
334 Returns: the node object
340 if node_name not in self._config_data.nodes:
343 return self._config_data.nodes[node_name]
345 def GetNodeList(self):
346 """Return the list of nodes which are in the configuration.
351 return self._config_data.nodes.keys()
353 def DumpConfig(self):
354 """Return the entire configuration of the cluster.
358 return self._config_data
360 def _BumpSerialNo(self):
361 """Bump up the serial number of the config.
364 self._config_data.cluster.serial_no += 1
366 def _OpenConfig(self):
367 """Read the config data from disk.
369 In case we already have configuration data and the config file has
370 the same mtime as when we read it, we skip the parsing of the
371 file, since de-serialisation could be slow.
375 st = os.stat(self._cfg_file)
377 raise errors.ConfigurationError, "Can't stat config file: %s" % err
378 if (self._config_data is not None and
379 self._config_time is not None and
380 self._config_time == st.st_mtime):
381 # data is current, so skip loading of config file
383 f = open(self._cfg_file, 'r')
386 data = objects.ConfigObject.Load(f)
387 except Exception, err:
388 raise errors.ConfigurationError, err
391 if (not hasattr(data, 'cluster') or
392 not hasattr(data.cluster, 'config_version')):
393 raise errors.ConfigurationError, ("Incomplete configuration"
394 " (missing cluster.config_version)")
395 if data.cluster.config_version != constants.CONFIG_VERSION:
396 raise errors.ConfigurationError, ("Cluster configuration version"
397 " mismatch, got %s instead of %s" %
398 (data.cluster.config_version,
399 constants.CONFIG_VERSION))
400 self._config_data = data
401 self._config_time = st.st_mtime
403 def _ReleaseLock(self):
407 def _DistributeConfig(self):
408 """Distribute the configuration to the other nodes.
410 Currently, this only copies the configuration file. In the future,
411 it could be used to encapsulate the 2/3-phase update mechanism.
417 nodelist = self.GetNodeList()
418 myhostname = socket.gethostname()
421 for node in nodelist:
422 nodeinfo = self.GetNodeInfo(node)
423 if nodeinfo.name == myhostname:
425 tgt_list.append(node)
427 result = rpc.call_upload_file(tgt_list, self._cfg_file)
428 for node in tgt_list:
430 logger.Error("copy of file %s to node %s failed" %
431 (self._cfg_file, node))
435 def _WriteConfig(self, destination=None):
436 """Write the configuration data to persistent storage.
439 if destination is None:
440 destination = self._cfg_file
442 dir_name, file_name = os.path.split(destination)
443 fd, name = tempfile.mkstemp('.newconfig', file_name, dir_name)
444 f = os.fdopen(fd, 'w')
446 self._config_data.Dump(f)
450 # we don't need to do os.close(fd) as f.close() did it
451 os.rename(name, destination)
452 self._DistributeConfig()
454 def InitConfig(self, node, primary_ip, secondary_ip,
455 clustername, hostkeypub, mac_prefix, vg_name, def_bridge):
456 """Create the initial cluster configuration.
458 It will contain the current node, which will also be the master
459 node, and no instances or operating systmes.
462 node: the nodename of the initial node
463 primary_ip: the IP address of the current host
464 secondary_ip: the secondary IP of the current host or None
465 clustername: the name of the cluster
466 hostkeypub: the public hostkey of this host
469 hu_port = constants.FIRST_DRBD_PORT - 1
470 globalconfig = objects.Cluster(config_version=constants.CONFIG_VERSION,
471 serial_no=1, master_node=node,
473 rsahostkeypub=hostkeypub,
474 highest_used_port=hu_port,
475 mac_prefix=mac_prefix,
476 volume_group_name=vg_name,
477 default_bridge=def_bridge)
478 if secondary_ip is None:
479 secondary_ip = primary_ip
480 nodeconfig = objects.Node(name=node, primary_ip=primary_ip,
481 secondary_ip=secondary_ip)
483 self._config_data = objects.ConfigData(nodes={node: nodeconfig},
485 cluster=globalconfig)
488 def GetClusterName(self):
489 """Return the cluster name.
494 return self._config_data.cluster.name
497 """Return the volume group name.
502 return self._config_data.cluster.volume_group_name
504 def GetDefBridge(self):
505 """Return the default bridge.
510 return self._config_data.cluster.default_bridge
512 def GetMACPrefix(self):
513 """Return the mac prefix.
518 return self._config_data.cluster.mac_prefix
521 """Get the name of the master.
526 return self._config_data.cluster.master_node
528 def SetMaster(self, master_node):
529 """Change the master of the cluster.
531 As with all changes, the configuration data will be distributed to
534 This function is used for manual master failover.
538 self._config_data.cluster.master_node = master_node