4 # Copyright (C) 2012, 2013 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the iallocator code."""
24 from ganeti import compat
25 from ganeti import constants
26 from ganeti import errors
28 from ganeti import outils
29 from ganeti import opcodes
30 from ganeti import rpc
31 from ganeti import serializer
32 from ganeti import utils
34 import ganeti.masterd.instance as gmi
37 _STRING_LIST = ht.TListOf(ht.TString)
38 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
39 # pylint: disable=E1101
40 # Class '...' has no 'OP_ID' member
41 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
42 opcodes.OpInstanceMigrate.OP_ID,
43 opcodes.OpInstanceReplaceDisks.OP_ID]),
47 ht.TListOf(ht.TAnd(ht.TIsLength(3),
48 ht.TItems([ht.TNonEmptyString,
50 ht.TListOf(ht.TNonEmptyString),
53 ht.TListOf(ht.TAnd(ht.TIsLength(2),
54 ht.TItems([ht.TNonEmptyString,
57 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
58 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
60 _INST_NAME = ("name", ht.TNonEmptyString)
61 _INST_UUID = ("inst_uuid", ht.TNonEmptyString)
64 class _AutoReqParam(outils.AutoSlots):
65 """Meta class for request definitions.
69 def _GetSlots(mcs, attrs):
70 """Extract the slots out of REQ_PARAMS.
73 params = attrs.setdefault("REQ_PARAMS", [])
74 return [slot for (slot, _) in params]
77 class IARequestBase(outils.ValidatedSlots):
78 """A generic IAllocator request object.
81 __metaclass__ = _AutoReqParam
85 REQ_RESULT = NotImplemented
87 def __init__(self, **kwargs):
88 """Constructor for IARequestBase.
90 The constructor takes only keyword arguments and will set
91 attributes on this object based on the passed arguments. As such,
92 it means that you should not pass arguments which are not in the
93 REQ_PARAMS attribute for this class.
96 outils.ValidatedSlots.__init__(self, **kwargs)
101 """Validates all parameters of the request.
104 assert self.MODE in constants.VALID_IALLOCATOR_MODES
106 for (param, validator) in self.REQ_PARAMS:
107 if not hasattr(self, param):
108 raise errors.OpPrereqError("Request is missing '%s' parameter" % param,
111 value = getattr(self, param)
112 if not validator(value):
113 raise errors.OpPrereqError(("Request parameter '%s' has invalid"
114 " type %s/value %s") %
115 (param, type(value), value),
118 def GetRequest(self, cfg):
119 """Gets the request data dict.
121 @param cfg: The configuration instance
124 raise NotImplementedError
126 def ValidateResult(self, ia, result):
127 """Validates the result of an request.
129 @param ia: The IAllocator instance
130 @param result: The IAllocator run result
131 @raises ResultValidationError: If validation fails
134 if ia.success and not self.REQ_RESULT(result):
135 raise errors.ResultValidationError("iallocator returned invalid result,"
136 " expected %s, got %s" %
137 (self.REQ_RESULT, result))
140 class IAReqInstanceAlloc(IARequestBase):
141 """An instance allocation request.
144 # pylint: disable=E1101
145 MODE = constants.IALLOCATOR_MODE_ALLOC
148 ("memory", ht.TNonNegativeInt),
149 ("spindle_use", ht.TNonNegativeInt),
150 ("disks", ht.TListOf(ht.TDict)),
151 ("disk_template", ht.TString),
153 ("tags", _STRING_LIST),
154 ("nics", ht.TListOf(ht.TDict)),
156 ("hypervisor", ht.TString),
157 ("node_whitelist", ht.TMaybeListOf(ht.TNonEmptyString)),
159 REQ_RESULT = ht.TList
161 def RequiredNodes(self):
162 """Calculates the required nodes based on the disk_template.
165 if self.disk_template in constants.DTS_INT_MIRROR:
170 def GetRequest(self, cfg):
171 """Requests a new instance.
173 The checks for the completeness of the opcode must have already been
177 disk_space = gmi.ComputeDiskSize(self.disk_template, self.disks)
181 "disk_template": self.disk_template,
185 "memory": self.memory,
186 "spindle_use": self.spindle_use,
188 "disk_space_total": disk_space,
190 "required_nodes": self.RequiredNodes(),
191 "hypervisor": self.hypervisor,
194 def ValidateResult(self, ia, result):
195 """Validates an single instance allocation request.
198 IARequestBase.ValidateResult(self, ia, result)
200 if ia.success and len(result) != self.RequiredNodes():
201 raise errors.ResultValidationError("iallocator returned invalid number"
202 " of nodes (%s), required %s" %
203 (len(result), self.RequiredNodes()))
206 class IAReqMultiInstanceAlloc(IARequestBase):
207 """An multi instance allocation request.
210 # pylint: disable=E1101
211 MODE = constants.IALLOCATOR_MODE_MULTI_ALLOC
213 ("instances", ht.TListOf(ht.TInstanceOf(IAReqInstanceAlloc))),
216 ht.TListOf(ht.TAnd(ht.TIsLength(2),
217 ht.TItems([ht.TNonEmptyString,
218 ht.TListOf(ht.TNonEmptyString),
220 _MAFAILED = ht.TListOf(ht.TNonEmptyString)
221 REQ_RESULT = ht.TAnd(ht.TList, ht.TIsLength(2),
222 ht.TItems([_MASUCCESS, _MAFAILED]))
224 def GetRequest(self, cfg):
226 "instances": [iareq.GetRequest(cfg) for iareq in self.instances],
230 class IAReqRelocate(IARequestBase):
231 """A relocation request.
234 # pylint: disable=E1101
235 MODE = constants.IALLOCATOR_MODE_RELOC
238 ("relocate_from_node_uuids", _STRING_LIST),
240 REQ_RESULT = ht.TList
242 def GetRequest(self, cfg):
243 """Request an relocation of an instance
245 The checks for the completeness of the opcode must have already been
249 instance = cfg.GetInstanceInfo(self.inst_uuid)
251 raise errors.ProgrammerError("Unknown instance '%s' passed to"
252 " IAllocator" % self.inst_uuid)
254 if instance.disk_template not in constants.DTS_MIRRORED:
255 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
258 if (instance.disk_template in constants.DTS_INT_MIRROR and
259 len(instance.secondary_nodes) != 1):
260 raise errors.OpPrereqError("Instance has not exactly one secondary node",
263 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
264 disk_space = gmi.ComputeDiskSize(instance.disk_template, disk_sizes)
267 "name": instance.name,
268 "disk_space_total": disk_space,
270 "relocate_from": cfg.GetNodeNames(self.relocate_from_node_uuids),
273 def ValidateResult(self, ia, result):
274 """Validates the result of an relocation request.
277 IARequestBase.ValidateResult(self, ia, result)
279 node2group = dict((name, ndata["group"])
280 for (name, ndata) in ia.in_data["nodes"].items())
282 fn = compat.partial(self._NodesToGroups, node2group,
283 ia.in_data["nodegroups"])
285 instance = ia.cfg.GetInstanceInfo(self.inst_uuid)
286 request_groups = fn(ia.cfg.GetNodeNames(self.relocate_from_node_uuids) +
287 ia.cfg.GetNodeNames([instance.primary_node]))
288 result_groups = fn(result + ia.cfg.GetNodeNames([instance.primary_node]))
290 if ia.success and not set(result_groups).issubset(request_groups):
291 raise errors.ResultValidationError("Groups of nodes returned by"
292 " iallocator (%s) differ from original"
294 (utils.CommaJoin(result_groups),
295 utils.CommaJoin(request_groups)))
298 def _NodesToGroups(node2group, groups, nodes):
299 """Returns a list of unique group names for a list of nodes.
301 @type node2group: dict
302 @param node2group: Map from node name to group UUID
304 @param groups: Group information
306 @param nodes: Node names
313 group_uuid = node2group[node]
315 # Ignore unknown node
319 group = groups[group_uuid]
321 # Can't find group, let's use UUID
322 group_name = group_uuid
324 group_name = group["name"]
326 result.add(group_name)
328 return sorted(result)
331 class IAReqNodeEvac(IARequestBase):
332 """A node evacuation request.
335 # pylint: disable=E1101
336 MODE = constants.IALLOCATOR_MODE_NODE_EVAC
338 ("instances", _STRING_LIST),
339 ("evac_mode", ht.TEvacMode),
341 REQ_RESULT = _NEVAC_RESULT
343 def GetRequest(self, cfg):
344 """Get data for node-evacuate requests.
348 "instances": self.instances,
349 "evac_mode": self.evac_mode,
353 class IAReqGroupChange(IARequestBase):
354 """A group change request.
357 # pylint: disable=E1101
358 MODE = constants.IALLOCATOR_MODE_CHG_GROUP
360 ("instances", _STRING_LIST),
361 ("target_groups", _STRING_LIST),
363 REQ_RESULT = _NEVAC_RESULT
365 def GetRequest(self, cfg):
366 """Get data for node-evacuate requests.
370 "instances": self.instances,
371 "target_groups": self.target_groups,
375 class IAllocator(object):
376 """IAllocator framework.
378 An IAllocator instance has three sets of attributes:
379 - cfg that is needed to query the cluster
380 - input data (all members of the _KEYS class attribute are required)
381 - four buffer attributes (in|out_data|text), that represent the
382 input (to the external script) in text and data structure format,
383 and the output from it, again in two formats
384 - the result variables from the script (success, info, nodes) for
388 # pylint: disable=R0902
389 # lots of instance attributes
391 def __init__(self, cfg, rpc_runner, req):
393 self.rpc = rpc_runner
395 # init buffer variables
396 self.in_text = self.out_text = self.in_data = self.out_data = None
398 self.success = self.info = self.result = None
400 self._BuildInputData(req)
402 def _ComputeClusterDataNodeInfo(self, node_list, cluster_info,
404 """Prepare and execute node info call.
406 @type node_list: list of strings
407 @param node_list: list of nodes' UUIDs
408 @type cluster_info: L{objects.Cluster}
409 @param cluster_info: the cluster's information from the config
410 @type hypervisor_name: string
411 @param hypervisor_name: the hypervisor name
412 @rtype: same as the result of the node info RPC call
413 @return: the result of the node info RPC call
416 storage_units_raw = utils.storage.GetStorageUnitsOfCluster(
417 self.cfg, include_spindles=True)
418 storage_units = rpc.PrepareStorageUnitsForNodes(self.cfg, storage_units_raw,
420 hvspecs = [(hypervisor_name, cluster_info.hvparams[hypervisor_name])]
421 return self.rpc.call_node_info(node_list, storage_units, hvspecs)
423 def _ComputeClusterData(self):
424 """Compute the generic allocator input data.
426 This is the data that is independent of the actual operation.
429 cluster_info = self.cfg.GetClusterInfo()
432 "version": constants.IALLOCATOR_VERSION,
433 "cluster_name": self.cfg.GetClusterName(),
434 "cluster_tags": list(cluster_info.GetTags()),
435 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
436 "ipolicy": cluster_info.ipolicy,
438 ninfo = self.cfg.GetAllNodesInfo()
439 iinfo = self.cfg.GetAllInstancesInfo().values()
440 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
443 node_list = [n.uuid for n in ninfo.values() if n.vm_capable]
445 if isinstance(self.req, IAReqInstanceAlloc):
446 hypervisor_name = self.req.hypervisor
447 node_whitelist = self.req.node_whitelist
448 elif isinstance(self.req, IAReqRelocate):
449 hypervisor_name = self.cfg.GetInstanceInfo(self.req.inst_uuid).hypervisor
450 node_whitelist = None
452 hypervisor_name = cluster_info.primary_hypervisor
453 node_whitelist = None
455 has_lvm = utils.storage.IsLvmEnabled(cluster_info.enabled_disk_templates)
456 node_data = self._ComputeClusterDataNodeInfo(node_list, cluster_info,
460 self.rpc.call_all_instances_info(node_list,
461 cluster_info.enabled_hypervisors,
462 cluster_info.hvparams)
464 data["nodegroups"] = self._ComputeNodeGroupData(self.cfg)
466 config_ndata = self._ComputeBasicNodeData(self.cfg, ninfo, node_whitelist)
467 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
468 i_list, config_ndata, has_lvm)
469 assert len(data["nodes"]) == len(ninfo), \
470 "Incomplete node data computed"
472 data["instances"] = self._ComputeInstanceData(self.cfg, cluster_info,
478 def _ComputeNodeGroupData(cfg):
479 """Compute node groups data.
482 cluster = cfg.GetClusterInfo()
485 "alloc_policy": gdata.alloc_policy,
486 "networks": [net_uuid for net_uuid, _ in gdata.networks.items()],
487 "ipolicy": gmi.CalculateGroupIPolicy(cluster, gdata),
488 "tags": list(gdata.GetTags()),
490 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
495 def _ComputeBasicNodeData(cfg, node_cfg, node_whitelist):
496 """Compute global node data.
499 @returns: a dict of name: (node dict, node config)
502 # fill in static (config-based) values
503 node_results = dict((ninfo.name, {
504 "tags": list(ninfo.GetTags()),
505 "primary_ip": ninfo.primary_ip,
506 "secondary_ip": ninfo.secondary_ip,
507 "offline": (ninfo.offline or
508 not (node_whitelist is None or
509 ninfo.name in node_whitelist)),
510 "drained": ninfo.drained,
511 "master_candidate": ninfo.master_candidate,
512 "group": ninfo.group,
513 "master_capable": ninfo.master_capable,
514 "vm_capable": ninfo.vm_capable,
515 "ndparams": cfg.GetNdParams(ninfo),
517 for ninfo in node_cfg.values())
522 def _GetAttributeFromHypervisorNodeData(hv_info, node_name, attr):
523 """Extract an attribute from the hypervisor's node information.
525 This is a helper function to extract data from the hypervisor's information
526 about the node, as part of the result of a node_info query.
528 @type hv_info: dict of strings
529 @param hv_info: dictionary of node information from the hypervisor
530 @type node_name: string
531 @param node_name: name of the node
533 @param attr: key of the attribute in the hv_info dictionary
535 @return: the value of the attribute
536 @raises errors.OpExecError: if key not in dictionary or value not
540 if attr not in hv_info:
541 raise errors.OpExecError("Node '%s' didn't return attribute"
542 " '%s'" % (node_name, attr))
543 value = hv_info[attr]
544 if not isinstance(value, int):
545 raise errors.OpExecError("Node '%s' returned invalid value"
547 (node_name, attr, value))
551 def _ComputeStorageDataFromSpaceInfo(space_info, node_name, has_lvm):
552 """Extract storage data from node info.
554 @type space_info: see result of the RPC call node info
555 @param space_info: the storage reporting part of the result of the RPC call
557 @type node_name: string
558 @param node_name: the node's name
559 @type has_lvm: boolean
560 @param has_lvm: whether or not LVM storage information is requested
561 @rtype: 4-tuple of integers
562 @return: tuple of storage info (total_disk, free_disk, total_spindles,
566 # TODO: replace this with proper storage reporting
568 lvm_vg_info = utils.storage.LookupSpaceInfoByStorageType(
569 space_info, constants.ST_LVM_VG)
571 raise errors.OpExecError("Node '%s' didn't return LVM vg space info."
573 total_disk = lvm_vg_info["storage_size"]
574 free_disk = lvm_vg_info["storage_free"]
575 lvm_pv_info = utils.storage.LookupSpaceInfoByStorageType(
576 space_info, constants.ST_LVM_PV)
578 raise errors.OpExecError("Node '%s' didn't return LVM pv space info."
580 total_spindles = lvm_pv_info["storage_size"]
581 free_spindles = lvm_pv_info["storage_free"]
583 # we didn't even ask the node for VG status, so use zeros
584 total_disk = free_disk = 0
585 total_spindles = free_spindles = 0
586 return (total_disk, free_disk, total_spindles, free_spindles)
589 def _ComputeInstanceMemory(instance_list, node_instances_info, node_uuid,
591 """Compute memory used by primary instances.
593 @rtype: tuple (int, int, int)
594 @returns: A tuple of three integers: 1. the sum of memory used by primary
595 instances on the node (including the ones that are currently down), 2.
596 the sum of memory used by primary instances of the node that are up, 3.
597 the amount of memory that is free on the node considering the current
598 usage of the instances.
601 i_p_mem = i_p_up_mem = 0
602 mem_free = input_mem_free
603 for iinfo, beinfo in instance_list:
604 if iinfo.primary_node == node_uuid:
605 i_p_mem += beinfo[constants.BE_MAXMEM]
606 if iinfo.name not in node_instances_info[node_uuid].payload:
609 i_used_mem = int(node_instances_info[node_uuid]
610 .payload[iinfo.name]["memory"])
611 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
612 mem_free -= max(0, i_mem_diff)
614 if iinfo.admin_state == constants.ADMINST_UP:
615 i_p_up_mem += beinfo[constants.BE_MAXMEM]
616 return (i_p_mem, i_p_up_mem, mem_free)
618 def _ComputeDynamicNodeData(self, node_cfg, node_data, node_iinfo, i_list,
619 node_results, has_lvm):
620 """Compute global node data.
622 @param node_results: the basic node structures as filled from the config
625 #TODO(dynmem): compute the right data on MAX and MIN memory
626 # make a copy of the current dict
627 node_results = dict(node_results)
628 for nuuid, nresult in node_data.items():
629 ninfo = node_cfg[nuuid]
630 assert ninfo.name in node_results, "Missing basic data for node %s" % \
633 if not (ninfo.offline or ninfo.drained):
634 nresult.Raise("Can't get data for node %s" % ninfo.name)
635 node_iinfo[nuuid].Raise("Can't get node instance info from node %s" %
637 (_, space_info, (hv_info, )) = nresult.payload
639 mem_free = self._GetAttributeFromHypervisorNodeData(hv_info, ninfo.name,
642 (i_p_mem, i_p_up_mem, mem_free) = self._ComputeInstanceMemory(
643 i_list, node_iinfo, nuuid, mem_free)
644 (total_disk, free_disk, total_spindles, free_spindles) = \
645 self._ComputeStorageDataFromSpaceInfo(space_info, ninfo.name,
648 # compute memory used by instances
650 "total_memory": self._GetAttributeFromHypervisorNodeData(
651 hv_info, ninfo.name, "memory_total"),
652 "reserved_memory": self._GetAttributeFromHypervisorNodeData(
653 hv_info, ninfo.name, "memory_dom0"),
654 "free_memory": mem_free,
655 "total_disk": total_disk,
656 "free_disk": free_disk,
657 "total_spindles": total_spindles,
658 "free_spindles": free_spindles,
659 "total_cpus": self._GetAttributeFromHypervisorNodeData(
660 hv_info, ninfo.name, "cpu_total"),
661 "reserved_cpus": self._GetAttributeFromHypervisorNodeData(
662 hv_info, ninfo.name, "cpu_dom0"),
663 "i_pri_memory": i_p_mem,
664 "i_pri_up_memory": i_p_up_mem,
666 pnr_dyn.update(node_results[ninfo.name])
667 node_results[ninfo.name] = pnr_dyn
672 def _ComputeInstanceData(cfg, cluster_info, i_list):
673 """Compute global instance data.
677 for iinfo, beinfo in i_list:
679 for nic in iinfo.nics:
680 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
684 "mode": filled_params[constants.NIC_MODE],
685 "link": filled_params[constants.NIC_LINK],
687 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
688 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
689 nic_data.append(nic_dict)
691 "tags": list(iinfo.GetTags()),
692 "admin_state": iinfo.admin_state,
693 "vcpus": beinfo[constants.BE_VCPUS],
694 "memory": beinfo[constants.BE_MAXMEM],
695 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
697 "nodes": [cfg.GetNodeName(iinfo.primary_node)] +
698 cfg.GetNodeNames(iinfo.secondary_nodes),
700 "disks": [{constants.IDISK_SIZE: dsk.size,
701 constants.IDISK_MODE: dsk.mode,
702 constants.IDISK_SPINDLES: dsk.spindles}
703 for dsk in iinfo.disks],
704 "disk_template": iinfo.disk_template,
705 "disks_active": iinfo.disks_active,
706 "hypervisor": iinfo.hypervisor,
708 pir["disk_space_total"] = gmi.ComputeDiskSize(iinfo.disk_template,
710 instance_data[iinfo.name] = pir
714 def _BuildInputData(self, req):
715 """Build input data structures.
718 self._ComputeClusterData()
720 request = req.GetRequest(self.cfg)
721 request["type"] = req.MODE
722 self.in_data["request"] = request
724 self.in_text = serializer.Dump(self.in_data)
726 def Run(self, name, validate=True, call_fn=None):
727 """Run an instance allocator and return the results.
731 call_fn = self.rpc.call_iallocator_runner
733 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
734 result.Raise("Failure while running the iallocator script")
736 self.out_text = result.payload
738 self._ValidateResult()
740 def _ValidateResult(self):
741 """Process the allocator results.
743 This will process and if successful save the result in
744 self.out_data and the other parameters.
748 rdict = serializer.Load(self.out_text)
749 except Exception, err:
750 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
752 if not isinstance(rdict, dict):
753 raise errors.OpExecError("Can't parse iallocator results: not a dict")
755 # TODO: remove backwards compatiblity in later versions
756 if "nodes" in rdict and "result" not in rdict:
757 rdict["result"] = rdict["nodes"]
760 for key in "success", "info", "result":
762 raise errors.OpExecError("Can't parse iallocator results:"
763 " missing key '%s'" % key)
764 setattr(self, key, rdict[key])
766 self.req.ValidateResult(self, self.result)
767 self.out_data = rdict