#
#
-# Copyright (C) 2006, 2007 Google Inc.
+# Copyright (C) 2006, 2007, 2008 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
from ganeti import objects
from ganeti import opcodes
from ganeti import ssconf
+from ganeti import serializer
class LogicalUnit(object):
"""Verifies the cluster status.
"""
- _OP_REQP = []
+ _OP_REQP = ["skip_checks"]
def _VerifyNode(self, node, file_list, local_cksum, vglist, node_result,
remote_version, feedback_fn):
feedback_fn(" - ERROR: hypervisor verify failure: '%s'" % hyp_result)
return bad
- def _VerifyInstance(self, instance, node_vol_is, node_instance, feedback_fn):
+ def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
+ node_instance, feedback_fn):
"""Verify an instance.
This function checks to see if the required block devices are
"""
bad = False
- instancelist = self.cfg.GetInstanceList()
- if not instance in instancelist:
- feedback_fn(" - ERROR: instance %s not in instance list %s" %
- (instance, instancelist))
- bad = True
-
- instanceconfig = self.cfg.GetInstanceInfo(instance)
node_current = instanceconfig.primary_node
node_vol_should = {}
bad = True
if not instanceconfig.status == 'down':
- if not instance in node_instance[node_current]:
+ if (node_current not in node_instance or
+ not instance in node_instance[node_current]):
feedback_fn(" - ERROR: instance %s not running on node %s" %
(instance, node_current))
bad = True
bad = True
return bad
+ def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
+ """Verify N+1 Memory Resilience.
+
+ Check that if one single node dies we can still start all the instances it
+ was primary for.
+
+ """
+ bad = False
+
+ for node, nodeinfo in node_info.iteritems():
+ # This code checks that every node which is now listed as secondary has
+ # enough memory to host all instances it is supposed to should a single
+ # other node in the cluster fail.
+ # FIXME: not ready for failover to an arbitrary node
+ # FIXME: does not support file-backed instances
+ # WARNING: we currently take into account down instances as well as up
+ # ones, considering that even if they're down someone might want to start
+ # them even in the event of a node failure.
+ for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
+ needed_mem = 0
+ for instance in instances:
+ needed_mem += instance_cfg[instance].memory
+ if nodeinfo['mfree'] < needed_mem:
+ feedback_fn(" - ERROR: not enough memory on node %s to accomodate"
+ " failovers should node %s fail" % (node, prinode))
+ bad = True
+ return bad
+
def CheckPrereq(self):
"""Check prerequisites.
- This has no prerequisites.
+ Transform the list of checks we're going to skip into a set and check that
+ all its members are valid.
"""
- pass
+ self.skip_set = frozenset(self.op.skip_checks)
+ if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
+ raise errors.OpPrereqError("Invalid checks to be skipped specified")
def Exec(self, feedback_fn):
"""Verify integrity of cluster, performing various test on nodes.
vg_name = self.cfg.GetVGName()
nodelist = utils.NiceSort(self.cfg.GetNodeList())
instancelist = utils.NiceSort(self.cfg.GetInstanceList())
+ i_non_redundant = [] # Non redundant instances
node_volume = {}
node_instance = {}
+ node_info = {}
+ instance_cfg = {}
# FIXME: verify OS list
# do local checksums
}
all_nvinfo = rpc.call_node_verify(nodelist, node_verify_param)
all_rversion = rpc.call_version(nodelist)
+ all_ninfo = rpc.call_node_info(nodelist, self.cfg.GetVGName())
for node in nodelist:
feedback_fn("* Verifying node %s" % node)
node_instance[node] = nodeinstance
+ # node_info
+ nodeinfo = all_ninfo[node]
+ if not isinstance(nodeinfo, dict):
+ feedback_fn(" - ERROR: connection to %s failed" % (node,))
+ bad = True
+ continue
+
+ try:
+ node_info[node] = {
+ "mfree": int(nodeinfo['memory_free']),
+ "dfree": int(nodeinfo['vg_free']),
+ "pinst": [],
+ "sinst": [],
+ # dictionary holding all instances this node is secondary for,
+ # grouped by their primary node. Each key is a cluster node, and each
+ # value is a list of instances which have the key as primary and the
+ # current node as secondary. this is handy to calculate N+1 memory
+ # availability if you can only failover from a primary to its
+ # secondary.
+ "sinst-by-pnode": {},
+ }
+ except ValueError:
+ feedback_fn(" - ERROR: invalid value returned from node %s" % (node,))
+ bad = True
+ continue
+
node_vol_should = {}
for instance in instancelist:
feedback_fn("* Verifying instance %s" % instance)
- result = self._VerifyInstance(instance, node_volume, node_instance,
- feedback_fn)
- bad = bad or result
-
inst_config = self.cfg.GetInstanceInfo(instance)
+ result = self._VerifyInstance(instance, inst_config, node_volume,
+ node_instance, feedback_fn)
+ bad = bad or result
inst_config.MapLVsByNode(node_vol_should)
+ instance_cfg[instance] = inst_config
+
+ pnode = inst_config.primary_node
+ if pnode in node_info:
+ node_info[pnode]['pinst'].append(instance)
+ else:
+ feedback_fn(" - ERROR: instance %s, connection to primary node"
+ " %s failed" % (instance, pnode))
+ bad = True
+
+ # If the instance is non-redundant we cannot survive losing its primary
+ # node, so we are not N+1 compliant. On the other hand we have no disk
+ # templates with more than one secondary so that situation is not well
+ # supported either.
+ # FIXME: does not support file-backed instances
+ if len(inst_config.secondary_nodes) == 0:
+ i_non_redundant.append(instance)
+ elif len(inst_config.secondary_nodes) > 1:
+ feedback_fn(" - WARNING: multiple secondaries for instance %s"
+ % instance)
+
+ for snode in inst_config.secondary_nodes:
+ if snode in node_info:
+ node_info[snode]['sinst'].append(instance)
+ if pnode not in node_info[snode]['sinst-by-pnode']:
+ node_info[snode]['sinst-by-pnode'][pnode] = []
+ node_info[snode]['sinst-by-pnode'][pnode].append(instance)
+ else:
+ feedback_fn(" - ERROR: instance %s, connection to secondary node"
+ " %s failed" % (instance, snode))
+
feedback_fn("* Verifying orphan volumes")
result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
feedback_fn)
feedback_fn)
bad = bad or result
+ if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
+ feedback_fn("* Verifying N+1 Memory redundancy")
+ result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
+ bad = bad or result
+
+ feedback_fn("* Other Notes")
+ if i_non_redundant:
+ feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
+ % len(i_non_redundant))
+
return int(bad)
"""Logical unit for OS diagnose/query.
"""
- _OP_REQP = []
+ _OP_REQP = ["output_fields", "names"]
def CheckPrereq(self):
"""Check prerequisites.
This always succeeds, since this is a pure query LU.
"""
- return
+ if self.op.names:
+ raise errors.OpPrereqError("Selective OS query not supported")
+
+ self.dynamic_fields = frozenset(["name", "valid", "node_status"])
+ _CheckOutputFields(static=[],
+ dynamic=self.dynamic_fields,
+ selected=self.op.output_fields)
+
+ @staticmethod
+ def _DiagnoseByOS(node_list, rlist):
+ """Remaps a per-node return list into an a per-os per-node dictionary
+
+ Args:
+ node_list: a list with the names of all nodes
+ rlist: a map with node names as keys and OS objects as values
+
+ Returns:
+ map: a map with osnames as keys and as value another map, with
+ nodes as
+ keys and list of OS objects as values
+ e.g. {"debian-etch": {"node1": [<object>,...],
+ "node2": [<object>,]}
+ }
+
+ """
+ all_os = {}
+ for node_name, nr in rlist.iteritems():
+ if not nr:
+ continue
+ for os in nr:
+ if os.name not in all_os:
+ # build a list of nodes for this os containing empty lists
+ # for each node in node_list
+ all_os[os.name] = {}
+ for nname in node_list:
+ all_os[os.name][nname] = []
+ all_os[os.name][node_name].append(os)
+ return all_os
def Exec(self, feedback_fn):
"""Compute the list of OSes.
node_data = rpc.call_os_diagnose(node_list)
if node_data == False:
raise errors.OpExecError("Can't gather the list of OSes")
- return node_data
+ pol = self._DiagnoseByOS(node_list, node_data)
+ output = []
+ for os_name, os_data in pol.iteritems():
+ row = []
+ for field in self.op.output_fields:
+ if field == "name":
+ val = os_name
+ elif field == "valid":
+ val = utils.all([osl and osl[0] for osl in os_data.values()])
+ elif field == "node_status":
+ val = {}
+ for node_name, nos_list in os_data.iteritems():
+ val[node_name] = [(v.status, v.path) for v in nos_list]
+ else:
+ raise errors.ParameterError(field)
+ row.append(val)
+ output.append(row)
+
+ return output
class LURemoveNode(LogicalUnit):
if not utils.IsValidIP(secondary_ip):
raise errors.OpPrereqError("Invalid secondary IP given")
self.op.secondary_ip = secondary_ip
+
node_list = cfg.GetNodeList()
- if node in node_list:
- raise errors.OpPrereqError("Node %s is already in the configuration"
- % node)
+ if not self.op.readd and node in node_list:
+ raise errors.OpPrereqError("Node %s is already in the configuration" %
+ node)
+ elif self.op.readd and node not in node_list:
+ raise errors.OpPrereqError("Node %s is not in the configuration" % node)
for existing_node_name in node_list:
existing_node = cfg.GetNodeInfo(existing_node_name)
+
+ if self.op.readd and node == existing_node_name:
+ if (existing_node.primary_ip != primary_ip or
+ existing_node.secondary_ip != secondary_ip):
+ raise errors.OpPrereqError("Readded node doesn't have the same IP"
+ " address configuration as before")
+ continue
+
if (existing_node.primary_ip == primary_ip or
existing_node.secondary_ip == primary_ip or
existing_node.primary_ip == secondary_ip or
if not self.ssh.CopyFileToNode(node, fname):
logger.Error("could not copy file %s to node %s" % (fname, node))
- logger.Info("adding node %s to cluster.conf" % node)
- self.cfg.AddNode(new_node)
+ if not self.op.readd:
+ logger.Info("adding node %s to cluster.conf" % node)
+ self.cfg.AddNode(new_node)
class LUMasterFailover(LogicalUnit):
instance_list = self.cfg.GetInstanceList()
if new_name in instance_list:
raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
- instance_name)
+ new_name)
if not getattr(self.op, "ignore_ip", False):
command = ["fping", "-q", name_info.ip]
file_storage_dir)
if not result:
- logger.Error("Could not connect to node '%s'" % inst.primary_node)
+ logger.Error("Could not connect to node '%s'" % instance.primary_node)
return False
if not result[0]:
return result
+def _ComputeDiskSize(disk_template, disk_size, swap_size):
+ """Compute disk size requirements in the volume group
+
+ This is currently hard-coded for the two-drive layout.
+
+ """
+ # Required free disk space as a function of disk and swap space
+ req_size_dict = {
+ constants.DT_DISKLESS: None,
+ constants.DT_PLAIN: disk_size + swap_size,
+ # 256 MB are added for drbd metadata, 128MB for each drbd device
+ constants.DT_DRBD8: disk_size + swap_size + 256,
+ constants.DT_FILE: None,
+ }
+
+ if disk_template not in req_size_dict:
+ raise errors.ProgrammerError("Disk template '%s' size requirement"
+ " is unknown" % disk_template)
+
+ return req_size_dict[disk_template]
+
+
class LUCreateInstance(LogicalUnit):
"""Create an instance.
"""
HPATH = "instance-add"
HTYPE = constants.HTYPE_INSTANCE
- _OP_REQP = ["instance_name", "mem_size", "disk_size", "pnode",
+ _OP_REQP = ["instance_name", "mem_size", "disk_size",
"disk_template", "swap_size", "mode", "start", "vcpus",
"wait_for_sync", "ip_check", "mac"]
+ def _RunAllocator(self):
+ """Run the allocator based on input opcode.
+
+ """
+ disks = [{"size": self.op.disk_size, "mode": "w"},
+ {"size": self.op.swap_size, "mode": "w"}]
+ nics = [{"mac": self.op.mac, "ip": getattr(self.op, "ip", None),
+ "bridge": self.op.bridge}]
+ ial = IAllocator(self.cfg, self.sstore,
+ name=self.op.instance_name,
+ disk_template=self.op.disk_template,
+ tags=[],
+ os=self.op.os_type,
+ vcpus=self.op.vcpus,
+ mem_size=self.op.mem_size,
+ disks=disks,
+ nics=nics,
+ mode=constants.IALLOCATOR_MODE_ALLOC)
+
+ ial.Run(self.op.iallocator)
+
+ if not ial.success:
+ raise errors.OpPrereqError("Can't compute nodes using"
+ " iallocator '%s': %s" % (self.op.iallocator,
+ ial.info))
+ if len(ial.nodes) != ial.required_nodes:
+ raise errors.OpPrereqError("iallocator '%s' returned invalid number"
+ " of nodes (%s), required %s" %
+ (len(ial.nodes), ial.required_nodes))
+ self.op.pnode = ial.nodes[0]
+ logger.ToStdout("Selected nodes for the instance: %s" %
+ (", ".join(ial.nodes),))
+ logger.Info("Selected nodes for instance %s via iallocator %s: %s" %
+ (self.op.instance_name, self.op.iallocator, ial.nodes))
+ if ial.required_nodes == 2:
+ self.op.snode = ial.nodes[1]
+
def BuildHooksEnv(self):
"""Build hooks env.
"""Check prerequisites.
"""
- for attr in ["kernel_path", "initrd_path", "hvm_boot_order"]:
+ # set optional parameters to none if they don't exist
+ for attr in ["kernel_path", "initrd_path", "hvm_boot_order", "pnode",
+ "iallocator"]:
if not hasattr(self.op, attr):
setattr(self.op, attr, None)
if getattr(self.op, "os_type", None) is None:
raise errors.OpPrereqError("No guest OS specified")
- # check primary node
- pnode = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.pnode))
- if pnode is None:
- raise errors.OpPrereqError("Primary node '%s' is unknown" %
- self.op.pnode)
- self.op.pnode = pnode.name
- self.pnode = pnode
- self.secondaries = []
+ #### instance parameters check
+
# disk template and mirror node verification
if self.op.disk_template not in constants.DISK_TEMPLATES:
raise errors.OpPrereqError("Invalid disk template name")
+ # instance name verification
+ hostname1 = utils.HostInfo(self.op.instance_name)
+
+ self.op.instance_name = instance_name = hostname1.name
+ instance_list = self.cfg.GetInstanceList()
+ if instance_name in instance_list:
+ raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
+ instance_name)
+
+ # ip validity checks
+ ip = getattr(self.op, "ip", None)
+ if ip is None or ip.lower() == "none":
+ inst_ip = None
+ elif ip.lower() == "auto":
+ inst_ip = hostname1.ip
+ else:
+ if not utils.IsValidIP(ip):
+ raise errors.OpPrereqError("given IP address '%s' doesn't look"
+ " like a valid IP" % ip)
+ inst_ip = ip
+ self.inst_ip = self.op.ip = inst_ip
+
+ if self.op.start and not self.op.ip_check:
+ raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
+ " adding an instance in start mode")
+
+ if self.op.ip_check:
+ if utils.TcpPing(hostname1.ip, constants.DEFAULT_NODED_PORT):
+ raise errors.OpPrereqError("IP %s of instance %s already in use" %
+ (hostname1.ip, instance_name))
+
+ # MAC address verification
+ if self.op.mac != "auto":
+ if not utils.IsValidMac(self.op.mac.lower()):
+ raise errors.OpPrereqError("invalid MAC address specified: %s" %
+ self.op.mac)
+
+ # bridge verification
+ bridge = getattr(self.op, "bridge", None)
+ if bridge is None:
+ self.op.bridge = self.cfg.GetDefBridge()
+ else:
+ self.op.bridge = bridge
+
+ # boot order verification
+ if self.op.hvm_boot_order is not None:
+ if len(self.op.hvm_boot_order.strip("acdn")) != 0:
+ raise errors.OpPrereqError("invalid boot order specified,"
+ " must be one or more of [acdn]")
+ # file storage checks
if (self.op.file_driver and
not self.op.file_driver in constants.FILE_DRIVER):
raise errors.OpPrereqError("Invalid file driver name '%s'" %
if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
raise errors.OpPrereqError("File storage directory not a relative"
" path")
+ #### allocator run
+ if [self.op.iallocator, self.op.pnode].count(None) != 1:
+ raise errors.OpPrereqError("One and only one of iallocator and primary"
+ " node must be given")
+
+ if self.op.iallocator is not None:
+ self._RunAllocator()
+
+ #### node related checks
+
+ # check primary node
+ pnode = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.pnode))
+ if pnode is None:
+ raise errors.OpPrereqError("Primary node '%s' is unknown" %
+ self.op.pnode)
+ self.op.pnode = pnode.name
+ self.pnode = pnode
+ self.secondaries = []
+
+ # mirror node verification
if self.op.disk_template in constants.DTS_NET_MIRROR:
if getattr(self.op, "snode", None) is None:
raise errors.OpPrereqError("The networked disk templates need"
" the primary node.")
self.secondaries.append(snode_name)
- # Required free disk space as a function of disk and swap space
- req_size_dict = {
- constants.DT_DISKLESS: None,
- constants.DT_PLAIN: self.op.disk_size + self.op.swap_size,
- # 256 MB are added for drbd metadata, 128MB for each drbd device
- constants.DT_DRBD8: self.op.disk_size + self.op.swap_size + 256,
- constants.DT_FILE: None,
- }
-
- if self.op.disk_template not in req_size_dict:
- raise errors.ProgrammerError("Disk template '%s' size requirement"
- " is unknown" % self.op.disk_template)
-
- req_size = req_size_dict[self.op.disk_template]
+ req_size = _ComputeDiskSize(self.op.disk_template,
+ self.op.disk_size, self.op.swap_size)
# Check lv size requirements
if req_size is not None:
if self.op.kernel_path == constants.VALUE_NONE:
raise errors.OpPrereqError("Can't set instance kernel to none")
- # instance verification
- hostname1 = utils.HostInfo(self.op.instance_name)
-
- self.op.instance_name = instance_name = hostname1.name
- instance_list = self.cfg.GetInstanceList()
- if instance_name in instance_list:
- raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
- instance_name)
-
- ip = getattr(self.op, "ip", None)
- if ip is None or ip.lower() == "none":
- inst_ip = None
- elif ip.lower() == "auto":
- inst_ip = hostname1.ip
- else:
- if not utils.IsValidIP(ip):
- raise errors.OpPrereqError("given IP address '%s' doesn't look"
- " like a valid IP" % ip)
- inst_ip = ip
- self.inst_ip = inst_ip
-
- if self.op.start and not self.op.ip_check:
- raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
- " adding an instance in start mode")
-
- if self.op.ip_check:
- if utils.TcpPing(hostname1.ip, constants.DEFAULT_NODED_PORT):
- raise errors.OpPrereqError("IP %s of instance %s already in use" %
- (hostname1.ip, instance_name))
-
- # MAC address verification
- if self.op.mac != "auto":
- if not utils.IsValidMac(self.op.mac.lower()):
- raise errors.OpPrereqError("invalid MAC address specified: %s" %
- self.op.mac)
-
- # bridge verification
- bridge = getattr(self.op, "bridge", None)
- if bridge is None:
- self.op.bridge = self.cfg.GetDefBridge()
- else:
- self.op.bridge = bridge
+ # bridge check on primary node
if not rpc.call_bridges_exist(self.pnode.name, [self.op.bridge]):
raise errors.OpPrereqError("target bridge '%s' does not exist on"
" destination node '%s'" %
(self.op.bridge, pnode.name))
- # boot order verification
- if self.op.hvm_boot_order is not None:
- if len(self.op.hvm_boot_order.strip("acdn")) != 0:
- raise errors.OpPrereqError("invalid boot order specified,"
- " must be one or more of [acdn]")
-
if self.op.start:
self.instance_status = 'up'
else:
def CheckPrereq(self):
"""Check prerequisites.
- This checks that the instance name is a valid one.
+ This checks that the instance and node names are valid.
"""
instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
# shutdown the instance, but not the disks
if not rpc.call_instance_shutdown(src_node, instance):
raise errors.OpExecError("Could not shutdown instance %s on node %s" %
- (instance.name, source_node))
+ (instance.name, src_node))
vgname = self.cfg.GetVGName()
" on node %s" % (instance.name, node))
+class LURemoveExport(NoHooksLU):
+ """Remove exports related to the named instance.
+
+ """
+ _OP_REQP = ["instance_name"]
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+ """
+ pass
+
+ def Exec(self, feedback_fn):
+ """Remove any export.
+
+ """
+ instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
+ # If the instance was not found we'll try with the name that was passed in.
+ # This will only work if it was an FQDN, though.
+ fqdn_warn = False
+ if not instance_name:
+ fqdn_warn = True
+ instance_name = self.op.instance_name
+
+ op = opcodes.OpQueryExports(nodes=[])
+ exportlist = self.proc.ChainOpCode(op)
+ found = False
+ for node in exportlist:
+ if instance_name in exportlist[node]:
+ found = True
+ if not rpc.call_export_remove(node, instance_name):
+ logger.Error("could not remove export for instance %s"
+ " on node %s" % (instance_name, node))
+
+ if fqdn_warn and not found:
+ feedback_fn("Export not found. If trying to remove an export belonging"
+ " to a deleted instance please use its Fully Qualified"
+ " Domain Name.")
+
+
class TagsLU(NoHooksLU):
"""Generic tags LU.
if not node_result:
raise errors.OpExecError("Failure during rpc call to node %s,"
" result: %s" % (node, node_result))
+
+
+class IAllocator(object):
+ """IAllocator framework.
+
+ An IAllocator instance has three sets of attributes:
+ - cfg/sstore that are needed to query the cluster
+ - input data (all members of the _KEYS class attribute are required)
+ - four buffer attributes (in|out_data|text), that represent the
+ input (to the external script) in text and data structure format,
+ and the output from it, again in two formats
+ - the result variables from the script (success, info, nodes) for
+ easy usage
+
+ """
+ _KEYS = [
+ "mode", "name",
+ "mem_size", "disks", "disk_template",
+ "os", "tags", "nics", "vcpus",
+ ]
+
+ def __init__(self, cfg, sstore, **kwargs):
+ self.cfg = cfg
+ self.sstore = sstore
+ # init buffer variables
+ self.in_text = self.out_text = self.in_data = self.out_data = None
+ # init all input fields so that pylint is happy
+ self.mode = self.name = None
+ self.mem_size = self.disks = self.disk_template = None
+ self.os = self.tags = self.nics = self.vcpus = None
+ # computed fields
+ self.required_nodes = None
+ # init result fields
+ self.success = self.info = self.nodes = None
+ for key in kwargs:
+ if key not in self._KEYS:
+ raise errors.ProgrammerError("Invalid input parameter '%s' to"
+ " IAllocator" % key)
+ setattr(self, key, kwargs[key])
+ for key in self._KEYS:
+ if key not in kwargs:
+ raise errors.ProgrammerError("Missing input parameter '%s' to"
+ " IAllocator" % key)
+ self._BuildInputData()
+
+ def _ComputeClusterData(self):
+ """Compute the generic allocator input data.
+
+ This is the data that is independent of the actual operation.
+
+ """
+ cfg = self.cfg
+ # cluster data
+ data = {
+ "version": 1,
+ "cluster_name": self.sstore.GetClusterName(),
+ "cluster_tags": list(cfg.GetClusterInfo().GetTags()),
+ # we don't have job IDs
+ }
+
+ # node data
+ node_results = {}
+ node_list = cfg.GetNodeList()
+ node_data = rpc.call_node_info(node_list, cfg.GetVGName())
+ for nname in node_list:
+ ninfo = cfg.GetNodeInfo(nname)
+ if nname not in node_data or not isinstance(node_data[nname], dict):
+ raise errors.OpExecError("Can't get data for node %s" % nname)
+ remote_info = node_data[nname]
+ for attr in ['memory_total', 'memory_free',
+ 'vg_size', 'vg_free']:
+ if attr not in remote_info:
+ raise errors.OpExecError("Node '%s' didn't return attribute '%s'" %
+ (nname, attr))
+ try:
+ int(remote_info[attr])
+ except ValueError, err:
+ raise errors.OpExecError("Node '%s' returned invalid value for '%s':"
+ " %s" % (nname, attr, str(err)))
+ pnr = {
+ "tags": list(ninfo.GetTags()),
+ "total_memory": utils.TryConvert(int, remote_info['memory_total']),
+ "free_memory": utils.TryConvert(int, remote_info['memory_free']),
+ "total_disk": utils.TryConvert(int, remote_info['vg_size']),
+ "free_disk": utils.TryConvert(int, remote_info['vg_free']),
+ "primary_ip": ninfo.primary_ip,
+ "secondary_ip": ninfo.secondary_ip,
+ }
+ node_results[nname] = pnr
+ data["nodes"] = node_results
+
+ # instance data
+ instance_data = {}
+ i_list = cfg.GetInstanceList()
+ for iname in i_list:
+ iinfo = cfg.GetInstanceInfo(iname)
+ nic_data = [{"mac": n.mac, "ip": n.ip, "bridge": n.bridge}
+ for n in iinfo.nics]
+ pir = {
+ "tags": list(iinfo.GetTags()),
+ "should_run": iinfo.status == "up",
+ "vcpus": iinfo.vcpus,
+ "memory": iinfo.memory,
+ "os": iinfo.os,
+ "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
+ "nics": nic_data,
+ "disks": [{"size": dsk.size, "mode": "w"} for dsk in iinfo.disks],
+ "disk_template": iinfo.disk_template,
+ }
+ instance_data[iname] = pir
+
+ data["instances"] = instance_data
+
+ self.in_data = data
+
+ def _AddNewInstance(self):
+ """Add new instance data to allocator structure.
+
+ This in combination with _AllocatorGetClusterData will create the
+ correct structure needed as input for the allocator.
+
+ The checks for the completeness of the opcode must have already been
+ done.
+
+ """
+ data = self.in_data
+ if len(self.disks) != 2:
+ raise errors.OpExecError("Only two-disk configurations supported")
+
+ disk_space = _ComputeDiskSize(self.disk_template,
+ self.disks[0]["size"], self.disks[1]["size"])
+
+ if self.disk_template in constants.DTS_NET_MIRROR:
+ self.required_nodes = 2
+ else:
+ self.required_nodes = 1
+ request = {
+ "type": "allocate",
+ "name": self.name,
+ "disk_template": self.disk_template,
+ "tags": self.tags,
+ "os": self.os,
+ "vcpus": self.vcpus,
+ "memory": self.mem_size,
+ "disks": self.disks,
+ "disk_space_total": disk_space,
+ "nics": self.nics,
+ "required_nodes": self.required_nodes,
+ }
+ data["request"] = request
+
+ def _AddRelocateInstance(self):
+ """Add relocate instance data to allocator structure.
+
+ This in combination with _IAllocatorGetClusterData will create the
+ correct structure needed as input for the allocator.
+
+ The checks for the completeness of the opcode must have already been
+ done.
+
+ """
+ instance = self.cfg.GetInstanceInfo(self.name)
+ if instance is None:
+ raise errors.ProgrammerError("Unknown instance '%s' passed to"
+ " IAllocator" % self.name)
+
+ if instance.disk_template not in constants.DTS_NET_MIRROR:
+ raise errors.OpPrereqError("Can't relocate non-mirrored instances")
+
+ if len(instance.secondary_nodes) != 1:
+ raise errors.OpPrereqError("Instance has not exactly one secondary node")
+
+ self.required_nodes = 1
+
+ disk_space = _ComputeDiskSize(instance.disk_template,
+ instance.disks[0].size,
+ instance.disks[1].size)
+
+ request = {
+ "type": "relocate",
+ "name": self.name,
+ "disk_space_total": disk_space,
+ "required_nodes": self.required_nodes,
+ "nodes": list(instance.secondary_nodes),
+ }
+ self.in_data["request"] = request
+
+ def _BuildInputData(self):
+ """Build input data structures.
+
+ """
+ self._ComputeClusterData()
+
+ if self.mode == constants.IALLOCATOR_MODE_ALLOC:
+ self._AddNewInstance()
+ else:
+ self._AddRelocateInstance()
+
+ self.in_text = serializer.Dump(self.in_data)
+
+ def Run(self, name, validate=True):
+ """Run an instance allocator and return the results.
+
+ """
+ data = self.in_text
+
+ alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH,
+ os.path.isfile)
+ if alloc_script is None:
+ raise errors.OpExecError("Can't find allocator '%s'" % name)
+
+ fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.")
+ try:
+ os.write(fd, data)
+ os.close(fd)
+ result = utils.RunCmd([alloc_script, fin_name])
+ if result.failed:
+ raise errors.OpExecError("Instance allocator call failed: %s,"
+ " output: %s" %
+ (result.fail_reason, result.output))
+ finally:
+ os.unlink(fin_name)
+ self.out_text = result.stdout
+ if validate:
+ self._ValidateResult()
+
+ def _ValidateResult(self):
+ """Process the allocator results.
+
+ This will process and if successful save the result in
+ self.out_data and the other parameters.
+
+ """
+ try:
+ rdict = serializer.Load(self.out_text)
+ except Exception, err:
+ raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
+
+ if not isinstance(rdict, dict):
+ raise errors.OpExecError("Can't parse iallocator results: not a dict")
+
+ for key in "success", "info", "nodes":
+ if key not in rdict:
+ raise errors.OpExecError("Can't parse iallocator results:"
+ " missing key '%s'" % key)
+ setattr(self, key, rdict[key])
+
+ if not isinstance(rdict["nodes"], list):
+ raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
+ " is not a list")
+ self.out_data = rdict
+
+
+class LUTestAllocator(NoHooksLU):
+ """Run allocator tests.
+
+ This LU runs the allocator tests
+
+ """
+ _OP_REQP = ["direction", "mode", "name"]
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This checks the opcode parameters depending on the director and mode test.
+
+ """
+ if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
+ for attr in ["name", "mem_size", "disks", "disk_template",
+ "os", "tags", "nics", "vcpus"]:
+ if not hasattr(self.op, attr):
+ raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
+ attr)
+ iname = self.cfg.ExpandInstanceName(self.op.name)
+ if iname is not None:
+ raise errors.OpPrereqError("Instance '%s' already in the cluster" %
+ iname)
+ if not isinstance(self.op.nics, list):
+ raise errors.OpPrereqError("Invalid parameter 'nics'")
+ for row in self.op.nics:
+ if (not isinstance(row, dict) or
+ "mac" not in row or
+ "ip" not in row or
+ "bridge" not in row):
+ raise errors.OpPrereqError("Invalid contents of the"
+ " 'nics' parameter")
+ if not isinstance(self.op.disks, list):
+ raise errors.OpPrereqError("Invalid parameter 'disks'")
+ if len(self.op.disks) != 2:
+ raise errors.OpPrereqError("Only two-disk configurations supported")
+ for row in self.op.disks:
+ if (not isinstance(row, dict) or
+ "size" not in row or
+ not isinstance(row["size"], int) or
+ "mode" not in row or
+ row["mode"] not in ['r', 'w']):
+ raise errors.OpPrereqError("Invalid contents of the"
+ " 'disks' parameter")
+ elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
+ if not hasattr(self.op, "name"):
+ raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
+ fname = self.cfg.ExpandInstanceName(self.op.name)
+ if fname is None:
+ raise errors.OpPrereqError("Instance '%s' not found for relocation" %
+ self.op.name)
+ self.op.name = fname
+ else:
+ raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
+ self.op.mode)
+
+ if self.op.direction == constants.IALLOCATOR_DIR_OUT:
+ if not hasattr(self.op, "allocator") or self.op.allocator is None:
+ raise errors.OpPrereqError("Missing allocator name")
+ elif self.op.direction != constants.IALLOCATOR_DIR_IN:
+ raise errors.OpPrereqError("Wrong allocator test '%s'" %
+ self.op.direction)
+
+ def Exec(self, feedback_fn):
+ """Run the allocator test.
+
+ """
+ ial = IAllocator(self.cfg, self.sstore,
+ mode=self.op.mode,
+ name=self.op.name,
+ mem_size=self.op.mem_size,
+ disks=self.op.disks,
+ disk_template=self.op.disk_template,
+ os=self.op.os,
+ tags=self.op.tags,
+ nics=self.op.nics,
+ vcpus=self.op.vcpus,
+ )
+
+ if self.op.direction == constants.IALLOCATOR_DIR_IN:
+ result = ial.in_text
+ else:
+ ial.Run(self.op.allocator, validate=False)
+ result = ial.out_text
+ return result