4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
43 from ganeti import ssh
44 from ganeti import utils
45 from ganeti import errors
46 from ganeti import hypervisor
47 from ganeti import locking
48 from ganeti import constants
49 from ganeti import objects
50 from ganeti import ssconf
51 from ganeti import uidpool
52 from ganeti import compat
53 from ganeti import masterd
54 from ganeti import netutils
55 from ganeti import query
56 from ganeti import qlang
57 from ganeti import opcodes
59 from ganeti import rpc
60 from ganeti import runtime
61 from ganeti import pathutils
62 from ganeti import vcluster
63 from ganeti import network
64 from ganeti.masterd import iallocator
66 from ganeti.cmdlib.base import ResultWithJobs, LogicalUnit, NoHooksLU, \
68 from ganeti.cmdlib.common import _ExpandInstanceName, _ExpandItemName, \
69 _ExpandNodeName, _ShareAll
70 from ganeti.cmdlib.tags import LUTagsGet, LUTagsSearch, LUTagsSet, LUTagsDel
72 import ganeti.masterd.instance # pylint: disable=W0611
76 INSTANCE_DOWN = [constants.ADMINST_DOWN]
77 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
78 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
80 #: Instance status in which an instance can be marked as offline/online
81 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
82 constants.ADMINST_OFFLINE,
86 def _AnnotateDiskParams(instance, devs, cfg):
87 """Little helper wrapper to the rpc annotation method.
89 @param instance: The instance object
90 @type devs: List of L{objects.Disk}
91 @param devs: The root devices (not any of its children!)
92 @param cfg: The config object
93 @returns The annotated disk copies
94 @see L{rpc.AnnotateDiskParams}
97 return rpc.AnnotateDiskParams(instance.disk_template, devs,
98 cfg.GetInstanceDiskParams(instance))
101 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
103 """Checks if node groups for locked instances are still correct.
105 @type cfg: L{config.ConfigWriter}
106 @param cfg: Cluster configuration
107 @type instances: dict; string as key, L{objects.Instance} as value
108 @param instances: Dictionary, instance name as key, instance object as value
109 @type owned_groups: iterable of string
110 @param owned_groups: List of owned groups
111 @type owned_nodes: iterable of string
112 @param owned_nodes: List of owned nodes
113 @type cur_group_uuid: string or None
114 @param cur_group_uuid: Optional group UUID to check against instance's groups
117 for (name, inst) in instances.items():
118 assert owned_nodes.issuperset(inst.all_nodes), \
119 "Instance %s's nodes changed while we kept the lock" % name
121 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
123 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
124 "Instance %s has no node in group %s" % (name, cur_group_uuid)
127 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
129 """Checks if the owned node groups are still correct for an instance.
131 @type cfg: L{config.ConfigWriter}
132 @param cfg: The cluster configuration
133 @type instance_name: string
134 @param instance_name: Instance name
135 @type owned_groups: set or frozenset
136 @param owned_groups: List of currently owned node groups
137 @type primary_only: boolean
138 @param primary_only: Whether to check node groups for only the primary node
141 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
143 if not owned_groups.issuperset(inst_groups):
144 raise errors.OpPrereqError("Instance %s's node groups changed since"
145 " locks were acquired, current groups are"
146 " are '%s', owning groups '%s'; retry the"
149 utils.CommaJoin(inst_groups),
150 utils.CommaJoin(owned_groups)),
156 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
157 """Checks if the instances in a node group are still correct.
159 @type cfg: L{config.ConfigWriter}
160 @param cfg: The cluster configuration
161 @type group_uuid: string
162 @param group_uuid: Node group UUID
163 @type owned_instances: set or frozenset
164 @param owned_instances: List of currently owned instances
167 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
168 if owned_instances != wanted_instances:
169 raise errors.OpPrereqError("Instances in node group '%s' changed since"
170 " locks were acquired, wanted '%s', have '%s';"
171 " retry the operation" %
173 utils.CommaJoin(wanted_instances),
174 utils.CommaJoin(owned_instances)),
177 return wanted_instances
180 def _SupportsOob(cfg, node):
181 """Tells if node supports OOB.
183 @type cfg: L{config.ConfigWriter}
184 @param cfg: The cluster configuration
185 @type node: L{objects.Node}
186 @param node: The node
187 @return: The OOB script if supported or an empty string otherwise
190 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
193 def _IsExclusiveStorageEnabledNode(cfg, node):
194 """Whether exclusive_storage is in effect for the given node.
196 @type cfg: L{config.ConfigWriter}
197 @param cfg: The cluster configuration
198 @type node: L{objects.Node}
199 @param node: The node
201 @return: The effective value of exclusive_storage
204 return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
207 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
208 """Whether exclusive_storage is in effect for the given node.
210 @type cfg: L{config.ConfigWriter}
211 @param cfg: The cluster configuration
212 @type nodename: string
213 @param nodename: The node
215 @return: The effective value of exclusive_storage
216 @raise errors.OpPrereqError: if no node exists with the given name
219 ni = cfg.GetNodeInfo(nodename)
221 raise errors.OpPrereqError("Invalid node name %s" % nodename,
223 return _IsExclusiveStorageEnabledNode(cfg, ni)
226 def _CopyLockList(names):
227 """Makes a copy of a list of lock names.
229 Handles L{locking.ALL_SET} correctly.
232 if names == locking.ALL_SET:
233 return locking.ALL_SET
238 def _GetWantedNodes(lu, nodes):
239 """Returns list of checked and expanded node names.
241 @type lu: L{LogicalUnit}
242 @param lu: the logical unit on whose behalf we execute
244 @param nodes: list of node names or None for all nodes
246 @return: the list of nodes, sorted
247 @raise errors.ProgrammerError: if the nodes parameter is wrong type
251 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
253 return utils.NiceSort(lu.cfg.GetNodeList())
256 def _GetWantedInstances(lu, instances):
257 """Returns list of checked and expanded instance names.
259 @type lu: L{LogicalUnit}
260 @param lu: the logical unit on whose behalf we execute
261 @type instances: list
262 @param instances: list of instance names or None for all instances
264 @return: the list of instances, sorted
265 @raise errors.OpPrereqError: if the instances parameter is wrong type
266 @raise errors.OpPrereqError: if any of the passed instances is not found
270 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
272 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
276 def _GetUpdatedParams(old_params, update_dict,
277 use_default=True, use_none=False):
278 """Return the new version of a parameter dictionary.
280 @type old_params: dict
281 @param old_params: old parameters
282 @type update_dict: dict
283 @param update_dict: dict containing new parameter values, or
284 constants.VALUE_DEFAULT to reset the parameter to its default
286 @param use_default: boolean
287 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
288 values as 'to be deleted' values
289 @param use_none: boolean
290 @type use_none: whether to recognise C{None} values as 'to be
293 @return: the new parameter dictionary
296 params_copy = copy.deepcopy(old_params)
297 for key, val in update_dict.iteritems():
298 if ((use_default and val == constants.VALUE_DEFAULT) or
299 (use_none and val is None)):
305 params_copy[key] = val
309 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
310 """Return the new version of an instance policy.
312 @param group_policy: whether this policy applies to a group and thus
313 we should support removal of policy entries
316 ipolicy = copy.deepcopy(old_ipolicy)
317 for key, value in new_ipolicy.items():
318 if key not in constants.IPOLICY_ALL_KEYS:
319 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
321 if (not value or value == [constants.VALUE_DEFAULT] or
322 value == constants.VALUE_DEFAULT):
327 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
328 " on the cluster'" % key,
331 if key in constants.IPOLICY_PARAMETERS:
332 # FIXME: we assume all such values are float
334 ipolicy[key] = float(value)
335 except (TypeError, ValueError), err:
336 raise errors.OpPrereqError("Invalid value for attribute"
337 " '%s': '%s', error: %s" %
338 (key, value, err), errors.ECODE_INVAL)
339 elif key == constants.ISPECS_MINMAX:
341 for k in minmax.keys():
342 utils.ForceDictType(minmax[k], constants.ISPECS_PARAMETER_TYPES)
344 elif key == constants.ISPECS_STD:
346 msg = "%s cannot appear in group instance specs" % key
347 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
348 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
349 use_none=False, use_default=False)
350 utils.ForceDictType(ipolicy[key], constants.ISPECS_PARAMETER_TYPES)
352 # FIXME: we assume all others are lists; this should be redone
354 ipolicy[key] = list(value)
356 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
357 except errors.ConfigurationError, err:
358 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
363 def _UpdateAndVerifySubDict(base, updates, type_check):
364 """Updates and verifies a dict with sub dicts of the same type.
366 @param base: The dict with the old data
367 @param updates: The dict with the new data
368 @param type_check: Dict suitable to ForceDictType to verify correct types
369 @returns: A new dict with updated and verified values
373 new = _GetUpdatedParams(old, value)
374 utils.ForceDictType(new, type_check)
377 ret = copy.deepcopy(base)
378 ret.update(dict((key, fn(base.get(key, {}), value))
379 for key, value in updates.items()))
383 def _MergeAndVerifyHvState(op_input, obj_input):
384 """Combines the hv state from an opcode with the one of the object
386 @param op_input: The input dict from the opcode
387 @param obj_input: The input dict from the objects
388 @return: The verified and updated dict
392 invalid_hvs = set(op_input) - constants.HYPER_TYPES
394 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
395 " %s" % utils.CommaJoin(invalid_hvs),
397 if obj_input is None:
399 type_check = constants.HVSTS_PARAMETER_TYPES
400 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
405 def _MergeAndVerifyDiskState(op_input, obj_input):
406 """Combines the disk state from an opcode with the one of the object
408 @param op_input: The input dict from the opcode
409 @param obj_input: The input dict from the objects
410 @return: The verified and updated dict
413 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
415 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
416 utils.CommaJoin(invalid_dst),
418 type_check = constants.DSS_PARAMETER_TYPES
419 if obj_input is None:
421 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
423 for key, value in op_input.items())
428 def _ReleaseLocks(lu, level, names=None, keep=None):
429 """Releases locks owned by an LU.
431 @type lu: L{LogicalUnit}
432 @param level: Lock level
433 @type names: list or None
434 @param names: Names of locks to release
435 @type keep: list or None
436 @param keep: Names of locks to retain
439 assert not (keep is not None and names is not None), \
440 "Only one of the 'names' and the 'keep' parameters can be given"
442 if names is not None:
443 should_release = names.__contains__
445 should_release = lambda name: name not in keep
447 should_release = None
449 owned = lu.owned_locks(level)
451 # Not owning any lock at this level, do nothing
458 # Determine which locks to release
460 if should_release(name):
465 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
467 # Release just some locks
468 lu.glm.release(level, names=release)
470 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
473 lu.glm.release(level)
475 assert not lu.glm.is_owned(level), "No locks should be owned"
478 def _MapInstanceDisksToNodes(instances):
479 """Creates a map from (node, volume) to instance name.
481 @type instances: list of L{objects.Instance}
482 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
485 return dict(((node, vol), inst.name)
486 for inst in instances
487 for (node, vols) in inst.MapLVsByNode().items()
491 def _RunPostHook(lu, node_name):
492 """Runs the post-hook for an opcode on a single node.
495 hm = lu.proc.BuildHooksManager(lu)
497 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
498 except Exception, err: # pylint: disable=W0703
499 lu.LogWarning("Errors occurred running hooks on %s: %s",
503 def _CheckOutputFields(static, dynamic, selected):
504 """Checks whether all selected fields are valid.
506 @type static: L{utils.FieldSet}
507 @param static: static fields set
508 @type dynamic: L{utils.FieldSet}
509 @param dynamic: dynamic fields set
516 delta = f.NonMatching(selected)
518 raise errors.OpPrereqError("Unknown output fields selected: %s"
519 % ",".join(delta), errors.ECODE_INVAL)
522 def _CheckParamsNotGlobal(params, glob_pars, kind, bad_levels, good_levels):
523 """Make sure that none of the given paramters is global.
525 If a global parameter is found, an L{errors.OpPrereqError} exception is
526 raised. This is used to avoid setting global parameters for individual nodes.
528 @type params: dictionary
529 @param params: Parameters to check
530 @type glob_pars: dictionary
531 @param glob_pars: Forbidden parameters
533 @param kind: Kind of parameters (e.g. "node")
534 @type bad_levels: string
535 @param bad_levels: Level(s) at which the parameters are forbidden (e.g.
537 @type good_levels: strings
538 @param good_levels: Level(s) at which the parameters are allowed (e.g.
542 used_globals = glob_pars.intersection(params)
544 msg = ("The following %s parameters are global and cannot"
545 " be customized at %s level, please modify them at"
547 (kind, bad_levels, good_levels, utils.CommaJoin(used_globals)))
548 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
551 def _CheckNodeOnline(lu, node, msg=None):
552 """Ensure that a given node is online.
554 @param lu: the LU on behalf of which we make the check
555 @param node: the node to check
556 @param msg: if passed, should be a message to replace the default one
557 @raise errors.OpPrereqError: if the node is offline
561 msg = "Can't use offline node"
562 if lu.cfg.GetNodeInfo(node).offline:
563 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
566 def _CheckNodeNotDrained(lu, node):
567 """Ensure that a given node is not drained.
569 @param lu: the LU on behalf of which we make the check
570 @param node: the node to check
571 @raise errors.OpPrereqError: if the node is drained
574 if lu.cfg.GetNodeInfo(node).drained:
575 raise errors.OpPrereqError("Can't use drained node %s" % node,
579 def _CheckNodeVmCapable(lu, node):
580 """Ensure that a given node is vm capable.
582 @param lu: the LU on behalf of which we make the check
583 @param node: the node to check
584 @raise errors.OpPrereqError: if the node is not vm capable
587 if not lu.cfg.GetNodeInfo(node).vm_capable:
588 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
592 def _CheckNodeHasOS(lu, node, os_name, force_variant):
593 """Ensure that a node supports a given OS.
595 @param lu: the LU on behalf of which we make the check
596 @param node: the node to check
597 @param os_name: the OS to query about
598 @param force_variant: whether to ignore variant errors
599 @raise errors.OpPrereqError: if the node is not supporting the OS
602 result = lu.rpc.call_os_get(node, os_name)
603 result.Raise("OS '%s' not in supported OS list for node %s" %
605 prereq=True, ecode=errors.ECODE_INVAL)
606 if not force_variant:
607 _CheckOSVariant(result.payload, os_name)
610 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
611 """Ensure that a node has the given secondary ip.
613 @type lu: L{LogicalUnit}
614 @param lu: the LU on behalf of which we make the check
616 @param node: the node to check
617 @type secondary_ip: string
618 @param secondary_ip: the ip to check
619 @type prereq: boolean
620 @param prereq: whether to throw a prerequisite or an execute error
621 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
622 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
625 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
626 result.Raise("Failure checking secondary ip on node %s" % node,
627 prereq=prereq, ecode=errors.ECODE_ENVIRON)
628 if not result.payload:
629 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
630 " please fix and re-run this command" % secondary_ip)
632 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
634 raise errors.OpExecError(msg)
637 def _CheckNodePVs(nresult, exclusive_storage):
641 pvlist_dict = nresult.get(constants.NV_PVLIST, None)
642 if pvlist_dict is None:
643 return (["Can't get PV list from node"], None)
644 pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict)
646 # check that ':' is not present in PV names, since it's a
647 # special character for lvcreate (denotes the range of PEs to
651 errlist.append("Invalid character ':' in PV '%s' of VG '%s'" %
652 (pv.name, pv.vg_name))
654 if exclusive_storage:
655 (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist)
656 errlist.extend(errmsgs)
657 shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None)
659 for (pvname, lvlist) in shared_pvs:
660 # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...)
661 errlist.append("PV %s is shared among unrelated LVs (%s)" %
662 (pvname, utils.CommaJoin(lvlist)))
663 return (errlist, es_pvinfo)
666 def _GetClusterDomainSecret():
667 """Reads the cluster domain secret.
670 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
674 def _CheckInstanceState(lu, instance, req_states, msg=None):
675 """Ensure that an instance is in one of the required states.
677 @param lu: the LU on behalf of which we make the check
678 @param instance: the instance to check
679 @param msg: if passed, should be a message to replace the default one
680 @raise errors.OpPrereqError: if the instance is not in the required state
684 msg = ("can't use instance from outside %s states" %
685 utils.CommaJoin(req_states))
686 if instance.admin_state not in req_states:
687 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
688 (instance.name, instance.admin_state, msg),
691 if constants.ADMINST_UP not in req_states:
692 pnode = instance.primary_node
693 if not lu.cfg.GetNodeInfo(pnode).offline:
694 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
695 ins_l.Raise("Can't contact node %s for instance information" % pnode,
696 prereq=True, ecode=errors.ECODE_ENVIRON)
697 if instance.name in ins_l.payload:
698 raise errors.OpPrereqError("Instance %s is running, %s" %
699 (instance.name, msg), errors.ECODE_STATE)
701 lu.LogWarning("Primary node offline, ignoring check that instance"
705 def _ComputeMinMaxSpec(name, qualifier, ispecs, value):
706 """Computes if value is in the desired range.
708 @param name: name of the parameter for which we perform the check
709 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
711 @param ispecs: dictionary containing min and max values
712 @param value: actual value that we want to use
713 @return: None or an error string
716 if value in [None, constants.VALUE_AUTO]:
718 max_v = ispecs[constants.ISPECS_MAX].get(name, value)
719 min_v = ispecs[constants.ISPECS_MIN].get(name, value)
720 if value > max_v or min_v > value:
722 fqn = "%s/%s" % (name, qualifier)
725 return ("%s value %s is not in range [%s, %s]" %
726 (fqn, value, min_v, max_v))
730 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
731 nic_count, disk_sizes, spindle_use,
733 _compute_fn=_ComputeMinMaxSpec):
734 """Verifies ipolicy against provided specs.
737 @param ipolicy: The ipolicy
739 @param mem_size: The memory size
741 @param cpu_count: Used cpu cores
742 @type disk_count: int
743 @param disk_count: Number of disks used
745 @param nic_count: Number of nics used
746 @type disk_sizes: list of ints
747 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
748 @type spindle_use: int
749 @param spindle_use: The number of spindles this instance uses
750 @type disk_template: string
751 @param disk_template: The disk template of the instance
752 @param _compute_fn: The compute function (unittest only)
753 @return: A list of violations, or an empty list of no violations are found
756 assert disk_count == len(disk_sizes)
759 (constants.ISPEC_MEM_SIZE, "", mem_size),
760 (constants.ISPEC_CPU_COUNT, "", cpu_count),
761 (constants.ISPEC_NIC_COUNT, "", nic_count),
762 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
763 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
764 for idx, d in enumerate(disk_sizes)]
765 if disk_template != constants.DT_DISKLESS:
766 # This check doesn't make sense for diskless instances
767 test_settings.append((constants.ISPEC_DISK_COUNT, "", disk_count))
769 allowed_dts = ipolicy[constants.IPOLICY_DTS]
770 if disk_template not in allowed_dts:
771 ret.append("Disk template %s is not allowed (allowed templates: %s)" %
772 (disk_template, utils.CommaJoin(allowed_dts)))
775 for minmax in ipolicy[constants.ISPECS_MINMAX]:
777 (_compute_fn(name, qualifier, minmax, value)
778 for (name, qualifier, value) in test_settings))
779 if min_errs is None or len(errs) < len(min_errs):
781 assert min_errs is not None
782 return ret + min_errs
785 def _ComputeIPolicyInstanceViolation(ipolicy, instance, cfg,
786 _compute_fn=_ComputeIPolicySpecViolation):
787 """Compute if instance meets the specs of ipolicy.
790 @param ipolicy: The ipolicy to verify against
791 @type instance: L{objects.Instance}
792 @param instance: The instance to verify
793 @type cfg: L{config.ConfigWriter}
794 @param cfg: Cluster configuration
795 @param _compute_fn: The function to verify ipolicy (unittest only)
796 @see: L{_ComputeIPolicySpecViolation}
799 be_full = cfg.GetClusterInfo().FillBE(instance)
800 mem_size = be_full[constants.BE_MAXMEM]
801 cpu_count = be_full[constants.BE_VCPUS]
802 spindle_use = be_full[constants.BE_SPINDLE_USE]
803 disk_count = len(instance.disks)
804 disk_sizes = [disk.size for disk in instance.disks]
805 nic_count = len(instance.nics)
806 disk_template = instance.disk_template
808 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
809 disk_sizes, spindle_use, disk_template)
812 def _ComputeIPolicyInstanceSpecViolation(
813 ipolicy, instance_spec, disk_template,
814 _compute_fn=_ComputeIPolicySpecViolation):
815 """Compute if instance specs meets the specs of ipolicy.
818 @param ipolicy: The ipolicy to verify against
819 @param instance_spec: dict
820 @param instance_spec: The instance spec to verify
821 @type disk_template: string
822 @param disk_template: the disk template of the instance
823 @param _compute_fn: The function to verify ipolicy (unittest only)
824 @see: L{_ComputeIPolicySpecViolation}
827 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
828 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
829 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
830 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
831 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
832 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
834 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
835 disk_sizes, spindle_use, disk_template)
838 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
840 _compute_fn=_ComputeIPolicyInstanceViolation):
841 """Compute if instance meets the specs of the new target group.
843 @param ipolicy: The ipolicy to verify
844 @param instance: The instance object to verify
845 @param current_group: The current group of the instance
846 @param target_group: The new group of the instance
847 @type cfg: L{config.ConfigWriter}
848 @param cfg: Cluster configuration
849 @param _compute_fn: The function to verify ipolicy (unittest only)
850 @see: L{_ComputeIPolicySpecViolation}
853 if current_group == target_group:
856 return _compute_fn(ipolicy, instance, cfg)
859 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, cfg, ignore=False,
860 _compute_fn=_ComputeIPolicyNodeViolation):
861 """Checks that the target node is correct in terms of instance policy.
863 @param ipolicy: The ipolicy to verify
864 @param instance: The instance object to verify
865 @param node: The new node to relocate
866 @type cfg: L{config.ConfigWriter}
867 @param cfg: Cluster configuration
868 @param ignore: Ignore violations of the ipolicy
869 @param _compute_fn: The function to verify ipolicy (unittest only)
870 @see: L{_ComputeIPolicySpecViolation}
873 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
874 res = _compute_fn(ipolicy, instance, primary_node.group, node.group, cfg)
877 msg = ("Instance does not meet target node group's (%s) instance"
878 " policy: %s") % (node.group, utils.CommaJoin(res))
882 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
885 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances, cfg):
886 """Computes a set of any instances that would violate the new ipolicy.
888 @param old_ipolicy: The current (still in-place) ipolicy
889 @param new_ipolicy: The new (to become) ipolicy
890 @param instances: List of instances to verify
891 @type cfg: L{config.ConfigWriter}
892 @param cfg: Cluster configuration
893 @return: A list of instances which violates the new ipolicy but
897 return (_ComputeViolatingInstances(new_ipolicy, instances, cfg) -
898 _ComputeViolatingInstances(old_ipolicy, instances, cfg))
901 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
903 """Builds network related env variables for hooks
905 This builds the hook environment from individual variables.
908 @param name: the name of the network
910 @param subnet: the ipv4 subnet
911 @type gateway: string
912 @param gateway: the ipv4 gateway
913 @type network6: string
914 @param network6: the ipv6 subnet
915 @type gateway6: string
916 @param gateway6: the ipv6 gateway
917 @type mac_prefix: string
918 @param mac_prefix: the mac_prefix
920 @param tags: the tags of the network
925 env["NETWORK_NAME"] = name
927 env["NETWORK_SUBNET"] = subnet
929 env["NETWORK_GATEWAY"] = gateway
931 env["NETWORK_SUBNET6"] = network6
933 env["NETWORK_GATEWAY6"] = gateway6
935 env["NETWORK_MAC_PREFIX"] = mac_prefix
937 env["NETWORK_TAGS"] = " ".join(tags)
942 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
943 minmem, maxmem, vcpus, nics, disk_template, disks,
944 bep, hvp, hypervisor_name, tags):
945 """Builds instance related env variables for hooks
947 This builds the hook environment from individual variables.
950 @param name: the name of the instance
951 @type primary_node: string
952 @param primary_node: the name of the instance's primary node
953 @type secondary_nodes: list
954 @param secondary_nodes: list of secondary nodes as strings
955 @type os_type: string
956 @param os_type: the name of the instance's OS
958 @param status: the desired status of the instance
960 @param minmem: the minimum memory size of the instance
962 @param maxmem: the maximum memory size of the instance
964 @param vcpus: the count of VCPUs the instance has
966 @param nics: list of tuples (name, uuid, ip, mac, mode, link, net, netinfo)
967 representing the NICs the instance has
968 @type disk_template: string
969 @param disk_template: the disk template of the instance
971 @param disks: list of tuples (name, uuid, size, mode)
973 @param bep: the backend parameters for the instance
975 @param hvp: the hypervisor parameters for the instance
976 @type hypervisor_name: string
977 @param hypervisor_name: the hypervisor for the instance
979 @param tags: list of instance tags as strings
981 @return: the hook environment for this instance
986 "INSTANCE_NAME": name,
987 "INSTANCE_PRIMARY": primary_node,
988 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
989 "INSTANCE_OS_TYPE": os_type,
990 "INSTANCE_STATUS": status,
991 "INSTANCE_MINMEM": minmem,
992 "INSTANCE_MAXMEM": maxmem,
993 # TODO(2.9) remove deprecated "memory" value
994 "INSTANCE_MEMORY": maxmem,
995 "INSTANCE_VCPUS": vcpus,
996 "INSTANCE_DISK_TEMPLATE": disk_template,
997 "INSTANCE_HYPERVISOR": hypervisor_name,
1000 nic_count = len(nics)
1001 for idx, (name, _, ip, mac, mode, link, net, netinfo) in enumerate(nics):
1004 env["INSTANCE_NIC%d_NAME" % idx] = name
1005 env["INSTANCE_NIC%d_IP" % idx] = ip
1006 env["INSTANCE_NIC%d_MAC" % idx] = mac
1007 env["INSTANCE_NIC%d_MODE" % idx] = mode
1008 env["INSTANCE_NIC%d_LINK" % idx] = link
1010 nobj = objects.Network.FromDict(netinfo)
1011 env.update(nobj.HooksDict("INSTANCE_NIC%d_" % idx))
1013 # FIXME: broken network reference: the instance NIC specifies a
1014 # network, but the relevant network entry was not in the config. This
1015 # should be made impossible.
1016 env["INSTANCE_NIC%d_NETWORK_NAME" % idx] = net
1017 if mode == constants.NIC_MODE_BRIDGED:
1018 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1022 env["INSTANCE_NIC_COUNT"] = nic_count
1025 disk_count = len(disks)
1026 for idx, (name, size, mode) in enumerate(disks):
1027 env["INSTANCE_DISK%d_NAME" % idx] = name
1028 env["INSTANCE_DISK%d_SIZE" % idx] = size
1029 env["INSTANCE_DISK%d_MODE" % idx] = mode
1033 env["INSTANCE_DISK_COUNT"] = disk_count
1038 env["INSTANCE_TAGS"] = " ".join(tags)
1040 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1041 for key, value in source.items():
1042 env["INSTANCE_%s_%s" % (kind, key)] = value
1047 def _NICToTuple(lu, nic):
1048 """Build a tupple of nic information.
1050 @type lu: L{LogicalUnit}
1051 @param lu: the logical unit on whose behalf we execute
1052 @type nic: L{objects.NIC}
1053 @param nic: nic to convert to hooks tuple
1056 cluster = lu.cfg.GetClusterInfo()
1057 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1058 mode = filled_params[constants.NIC_MODE]
1059 link = filled_params[constants.NIC_LINK]
1062 nobj = lu.cfg.GetNetwork(nic.network)
1063 netinfo = objects.Network.ToDict(nobj)
1064 return (nic.name, nic.uuid, nic.ip, nic.mac, mode, link, nic.network, netinfo)
1067 def _NICListToTuple(lu, nics):
1068 """Build a list of nic information tuples.
1070 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1071 value in LUInstanceQueryData.
1073 @type lu: L{LogicalUnit}
1074 @param lu: the logical unit on whose behalf we execute
1075 @type nics: list of L{objects.NIC}
1076 @param nics: list of nics to convert to hooks tuples
1081 hooks_nics.append(_NICToTuple(lu, nic))
1085 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1086 """Builds instance related env variables for hooks from an object.
1088 @type lu: L{LogicalUnit}
1089 @param lu: the logical unit on whose behalf we execute
1090 @type instance: L{objects.Instance}
1091 @param instance: the instance for which we should build the
1093 @type override: dict
1094 @param override: dictionary with key/values that will override
1097 @return: the hook environment dictionary
1100 cluster = lu.cfg.GetClusterInfo()
1101 bep = cluster.FillBE(instance)
1102 hvp = cluster.FillHV(instance)
1104 "name": instance.name,
1105 "primary_node": instance.primary_node,
1106 "secondary_nodes": instance.secondary_nodes,
1107 "os_type": instance.os,
1108 "status": instance.admin_state,
1109 "maxmem": bep[constants.BE_MAXMEM],
1110 "minmem": bep[constants.BE_MINMEM],
1111 "vcpus": bep[constants.BE_VCPUS],
1112 "nics": _NICListToTuple(lu, instance.nics),
1113 "disk_template": instance.disk_template,
1114 "disks": [(disk.name, disk.size, disk.mode)
1115 for disk in instance.disks],
1118 "hypervisor_name": instance.hypervisor,
1119 "tags": instance.tags,
1122 args.update(override)
1123 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1126 def _AdjustCandidatePool(lu, exceptions):
1127 """Adjust the candidate pool after node operations.
1130 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1132 lu.LogInfo("Promoted nodes to master candidate role: %s",
1133 utils.CommaJoin(node.name for node in mod_list))
1134 for name in mod_list:
1135 lu.context.ReaddNode(name)
1136 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1138 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1142 def _DecideSelfPromotion(lu, exceptions=None):
1143 """Decide whether I should promote myself as a master candidate.
1146 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1147 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1148 # the new node will increase mc_max with one, so:
1149 mc_should = min(mc_should + 1, cp_size)
1150 return mc_now < mc_should
1153 def _ComputeViolatingInstances(ipolicy, instances, cfg):
1154 """Computes a set of instances who violates given ipolicy.
1156 @param ipolicy: The ipolicy to verify
1157 @type instances: L{objects.Instance}
1158 @param instances: List of instances to verify
1159 @type cfg: L{config.ConfigWriter}
1160 @param cfg: Cluster configuration
1161 @return: A frozenset of instance names violating the ipolicy
1164 return frozenset([inst.name for inst in instances
1165 if _ComputeIPolicyInstanceViolation(ipolicy, inst, cfg)])
1168 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1169 """Check that the brigdes needed by a list of nics exist.
1172 cluster = lu.cfg.GetClusterInfo()
1173 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1174 brlist = [params[constants.NIC_LINK] for params in paramslist
1175 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1177 result = lu.rpc.call_bridges_exist(target_node, brlist)
1178 result.Raise("Error checking bridges on destination node '%s'" %
1179 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1182 def _CheckInstanceBridgesExist(lu, instance, node=None):
1183 """Check that the brigdes needed by an instance exist.
1187 node = instance.primary_node
1188 _CheckNicsBridgesExist(lu, instance.nics, node)
1191 def _CheckOSVariant(os_obj, name):
1192 """Check whether an OS name conforms to the os variants specification.
1194 @type os_obj: L{objects.OS}
1195 @param os_obj: OS object to check
1197 @param name: OS name passed by the user, to check for validity
1200 variant = objects.OS.GetVariant(name)
1201 if not os_obj.supported_variants:
1203 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1204 " passed)" % (os_obj.name, variant),
1208 raise errors.OpPrereqError("OS name must include a variant",
1211 if variant not in os_obj.supported_variants:
1212 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1215 def _GetNodeInstancesInner(cfg, fn):
1216 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1219 def _GetNodeInstances(cfg, node_name):
1220 """Returns a list of all primary and secondary instances on a node.
1224 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1227 def _GetNodePrimaryInstances(cfg, node_name):
1228 """Returns primary instances on a node.
1231 return _GetNodeInstancesInner(cfg,
1232 lambda inst: node_name == inst.primary_node)
1235 def _GetNodeSecondaryInstances(cfg, node_name):
1236 """Returns secondary instances on a node.
1239 return _GetNodeInstancesInner(cfg,
1240 lambda inst: node_name in inst.secondary_nodes)
1243 def _GetStorageTypeArgs(cfg, storage_type):
1244 """Returns the arguments for a storage type.
1247 # Special case for file storage
1248 if storage_type == constants.ST_FILE:
1249 # storage.FileStorage wants a list of storage directories
1250 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1255 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1258 for dev in instance.disks:
1259 cfg.SetDiskID(dev, node_name)
1261 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1263 result.Raise("Failed to get disk status from node %s" % node_name,
1264 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1266 for idx, bdev_status in enumerate(result.payload):
1267 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1273 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1274 """Check the sanity of iallocator and node arguments and use the
1275 cluster-wide iallocator if appropriate.
1277 Check that at most one of (iallocator, node) is specified. If none is
1278 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1279 then the LU's opcode's iallocator slot is filled with the cluster-wide
1282 @type iallocator_slot: string
1283 @param iallocator_slot: the name of the opcode iallocator slot
1284 @type node_slot: string
1285 @param node_slot: the name of the opcode target node slot
1288 node = getattr(lu.op, node_slot, None)
1289 ialloc = getattr(lu.op, iallocator_slot, None)
1293 if node is not None and ialloc is not None:
1294 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1296 elif ((node is None and ialloc is None) or
1297 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1298 default_iallocator = lu.cfg.GetDefaultIAllocator()
1299 if default_iallocator:
1300 setattr(lu.op, iallocator_slot, default_iallocator)
1302 raise errors.OpPrereqError("No iallocator or node given and no"
1303 " cluster-wide default iallocator found;"
1304 " please specify either an iallocator or a"
1305 " node, or set a cluster-wide default"
1306 " iallocator", errors.ECODE_INVAL)
1309 def _GetDefaultIAllocator(cfg, ialloc):
1310 """Decides on which iallocator to use.
1312 @type cfg: L{config.ConfigWriter}
1313 @param cfg: Cluster configuration object
1314 @type ialloc: string or None
1315 @param ialloc: Iallocator specified in opcode
1317 @return: Iallocator name
1321 # Use default iallocator
1322 ialloc = cfg.GetDefaultIAllocator()
1325 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1326 " opcode nor as a cluster-wide default",
1332 def _CheckHostnameSane(lu, name):
1333 """Ensures that a given hostname resolves to a 'sane' name.
1335 The given name is required to be a prefix of the resolved hostname,
1336 to prevent accidental mismatches.
1338 @param lu: the logical unit on behalf of which we're checking
1339 @param name: the name we should resolve and check
1340 @return: the resolved hostname object
1343 hostname = netutils.GetHostname(name=name)
1344 if hostname.name != name:
1345 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1346 if not utils.MatchNameComponent(name, [hostname.name]):
1347 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1348 " same as given hostname '%s'") %
1349 (hostname.name, name), errors.ECODE_INVAL)
1353 class LUClusterPostInit(LogicalUnit):
1354 """Logical unit for running hooks after cluster initialization.
1357 HPATH = "cluster-init"
1358 HTYPE = constants.HTYPE_CLUSTER
1360 def BuildHooksEnv(self):
1365 "OP_TARGET": self.cfg.GetClusterName(),
1368 def BuildHooksNodes(self):
1369 """Build hooks nodes.
1372 return ([], [self.cfg.GetMasterNode()])
1374 def Exec(self, feedback_fn):
1381 class LUClusterDestroy(LogicalUnit):
1382 """Logical unit for destroying the cluster.
1385 HPATH = "cluster-destroy"
1386 HTYPE = constants.HTYPE_CLUSTER
1388 def BuildHooksEnv(self):
1393 "OP_TARGET": self.cfg.GetClusterName(),
1396 def BuildHooksNodes(self):
1397 """Build hooks nodes.
1402 def CheckPrereq(self):
1403 """Check prerequisites.
1405 This checks whether the cluster is empty.
1407 Any errors are signaled by raising errors.OpPrereqError.
1410 master = self.cfg.GetMasterNode()
1412 nodelist = self.cfg.GetNodeList()
1413 if len(nodelist) != 1 or nodelist[0] != master:
1414 raise errors.OpPrereqError("There are still %d node(s) in"
1415 " this cluster." % (len(nodelist) - 1),
1417 instancelist = self.cfg.GetInstanceList()
1419 raise errors.OpPrereqError("There are still %d instance(s) in"
1420 " this cluster." % len(instancelist),
1423 def Exec(self, feedback_fn):
1424 """Destroys the cluster.
1427 master_params = self.cfg.GetMasterNetworkParameters()
1429 # Run post hooks on master node before it's removed
1430 _RunPostHook(self, master_params.name)
1432 ems = self.cfg.GetUseExternalMipScript()
1433 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1436 self.LogWarning("Error disabling the master IP address: %s",
1439 return master_params.name
1442 def _VerifyCertificate(filename):
1443 """Verifies a certificate for L{LUClusterVerifyConfig}.
1445 @type filename: string
1446 @param filename: Path to PEM file
1450 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1451 utils.ReadFile(filename))
1452 except Exception, err: # pylint: disable=W0703
1453 return (LUClusterVerifyConfig.ETYPE_ERROR,
1454 "Failed to load X509 certificate %s: %s" % (filename, err))
1457 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1458 constants.SSL_CERT_EXPIRATION_ERROR)
1461 fnamemsg = "While verifying %s: %s" % (filename, msg)
1466 return (None, fnamemsg)
1467 elif errcode == utils.CERT_WARNING:
1468 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1469 elif errcode == utils.CERT_ERROR:
1470 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1472 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1475 def _GetAllHypervisorParameters(cluster, instances):
1476 """Compute the set of all hypervisor parameters.
1478 @type cluster: L{objects.Cluster}
1479 @param cluster: the cluster object
1480 @param instances: list of L{objects.Instance}
1481 @param instances: additional instances from which to obtain parameters
1482 @rtype: list of (origin, hypervisor, parameters)
1483 @return: a list with all parameters found, indicating the hypervisor they
1484 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1489 for hv_name in cluster.enabled_hypervisors:
1490 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1492 for os_name, os_hvp in cluster.os_hvp.items():
1493 for hv_name, hv_params in os_hvp.items():
1495 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1496 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1498 # TODO: collapse identical parameter values in a single one
1499 for instance in instances:
1500 if instance.hvparams:
1501 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1502 cluster.FillHV(instance)))
1507 class _VerifyErrors(object):
1508 """Mix-in for cluster/group verify LUs.
1510 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1511 self.op and self._feedback_fn to be available.)
1515 ETYPE_FIELD = "code"
1516 ETYPE_ERROR = "ERROR"
1517 ETYPE_WARNING = "WARNING"
1519 def _Error(self, ecode, item, msg, *args, **kwargs):
1520 """Format an error message.
1522 Based on the opcode's error_codes parameter, either format a
1523 parseable error code, or a simpler error string.
1525 This must be called only from Exec and functions called from Exec.
1528 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1529 itype, etxt, _ = ecode
1530 # If the error code is in the list of ignored errors, demote the error to a
1532 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1533 ltype = self.ETYPE_WARNING
1534 # first complete the msg
1537 # then format the whole message
1538 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1539 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1545 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1546 # and finally report it via the feedback_fn
1547 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1548 # do not mark the operation as failed for WARN cases only
1549 if ltype == self.ETYPE_ERROR:
1552 def _ErrorIf(self, cond, *args, **kwargs):
1553 """Log an error message if the passed condition is True.
1557 or self.op.debug_simulate_errors): # pylint: disable=E1101
1558 self._Error(*args, **kwargs)
1561 class LUClusterVerify(NoHooksLU):
1562 """Submits all jobs necessary to verify the cluster.
1567 def ExpandNames(self):
1568 self.needed_locks = {}
1570 def Exec(self, feedback_fn):
1573 if self.op.group_name:
1574 groups = [self.op.group_name]
1575 depends_fn = lambda: None
1577 groups = self.cfg.GetNodeGroupList()
1579 # Verify global configuration
1581 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
1584 # Always depend on global verification
1585 depends_fn = lambda: [(-len(jobs), [])]
1588 [opcodes.OpClusterVerifyGroup(group_name=group,
1589 ignore_errors=self.op.ignore_errors,
1590 depends=depends_fn())]
1591 for group in groups)
1593 # Fix up all parameters
1594 for op in itertools.chain(*jobs): # pylint: disable=W0142
1595 op.debug_simulate_errors = self.op.debug_simulate_errors
1596 op.verbose = self.op.verbose
1597 op.error_codes = self.op.error_codes
1599 op.skip_checks = self.op.skip_checks
1600 except AttributeError:
1601 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1603 return ResultWithJobs(jobs)
1606 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1607 """Verifies the cluster config.
1612 def _VerifyHVP(self, hvp_data):
1613 """Verifies locally the syntax of the hypervisor parameters.
1616 for item, hv_name, hv_params in hvp_data:
1617 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1620 hv_class = hypervisor.GetHypervisorClass(hv_name)
1621 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1622 hv_class.CheckParameterSyntax(hv_params)
1623 except errors.GenericError, err:
1624 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1626 def ExpandNames(self):
1627 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1628 self.share_locks = _ShareAll()
1630 def CheckPrereq(self):
1631 """Check prerequisites.
1634 # Retrieve all information
1635 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1636 self.all_node_info = self.cfg.GetAllNodesInfo()
1637 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1639 def Exec(self, feedback_fn):
1640 """Verify integrity of cluster, performing various test on nodes.
1644 self._feedback_fn = feedback_fn
1646 feedback_fn("* Verifying cluster config")
1648 for msg in self.cfg.VerifyConfig():
1649 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1651 feedback_fn("* Verifying cluster certificate files")
1653 for cert_filename in pathutils.ALL_CERT_FILES:
1654 (errcode, msg) = _VerifyCertificate(cert_filename)
1655 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1657 feedback_fn("* Verifying hypervisor parameters")
1659 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1660 self.all_inst_info.values()))
1662 feedback_fn("* Verifying all nodes belong to an existing group")
1664 # We do this verification here because, should this bogus circumstance
1665 # occur, it would never be caught by VerifyGroup, which only acts on
1666 # nodes/instances reachable from existing node groups.
1668 dangling_nodes = set(node.name for node in self.all_node_info.values()
1669 if node.group not in self.all_group_info)
1671 dangling_instances = {}
1672 no_node_instances = []
1674 for inst in self.all_inst_info.values():
1675 if inst.primary_node in dangling_nodes:
1676 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1677 elif inst.primary_node not in self.all_node_info:
1678 no_node_instances.append(inst.name)
1683 utils.CommaJoin(dangling_instances.get(node.name,
1685 for node in dangling_nodes]
1687 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1689 "the following nodes (and their instances) belong to a non"
1690 " existing group: %s", utils.CommaJoin(pretty_dangling))
1692 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1694 "the following instances have a non-existing primary-node:"
1695 " %s", utils.CommaJoin(no_node_instances))
1700 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1701 """Verifies the status of a node group.
1704 HPATH = "cluster-verify"
1705 HTYPE = constants.HTYPE_CLUSTER
1708 _HOOKS_INDENT_RE = re.compile("^", re.M)
1710 class NodeImage(object):
1711 """A class representing the logical and physical status of a node.
1714 @ivar name: the node name to which this object refers
1715 @ivar volumes: a structure as returned from
1716 L{ganeti.backend.GetVolumeList} (runtime)
1717 @ivar instances: a list of running instances (runtime)
1718 @ivar pinst: list of configured primary instances (config)
1719 @ivar sinst: list of configured secondary instances (config)
1720 @ivar sbp: dictionary of {primary-node: list of instances} for all
1721 instances for which this node is secondary (config)
1722 @ivar mfree: free memory, as reported by hypervisor (runtime)
1723 @ivar dfree: free disk, as reported by the node (runtime)
1724 @ivar offline: the offline status (config)
1725 @type rpc_fail: boolean
1726 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1727 not whether the individual keys were correct) (runtime)
1728 @type lvm_fail: boolean
1729 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1730 @type hyp_fail: boolean
1731 @ivar hyp_fail: whether the RPC call didn't return the instance list
1732 @type ghost: boolean
1733 @ivar ghost: whether this is a known node or not (config)
1734 @type os_fail: boolean
1735 @ivar os_fail: whether the RPC call didn't return valid OS data
1737 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1738 @type vm_capable: boolean
1739 @ivar vm_capable: whether the node can host instances
1741 @ivar pv_min: size in MiB of the smallest PVs
1743 @ivar pv_max: size in MiB of the biggest PVs
1746 def __init__(self, offline=False, name=None, vm_capable=True):
1755 self.offline = offline
1756 self.vm_capable = vm_capable
1757 self.rpc_fail = False
1758 self.lvm_fail = False
1759 self.hyp_fail = False
1761 self.os_fail = False
1766 def ExpandNames(self):
1767 # This raises errors.OpPrereqError on its own:
1768 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1770 # Get instances in node group; this is unsafe and needs verification later
1772 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1774 self.needed_locks = {
1775 locking.LEVEL_INSTANCE: inst_names,
1776 locking.LEVEL_NODEGROUP: [self.group_uuid],
1777 locking.LEVEL_NODE: [],
1779 # This opcode is run by watcher every five minutes and acquires all nodes
1780 # for a group. It doesn't run for a long time, so it's better to acquire
1781 # the node allocation lock as well.
1782 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
1785 self.share_locks = _ShareAll()
1787 def DeclareLocks(self, level):
1788 if level == locking.LEVEL_NODE:
1789 # Get members of node group; this is unsafe and needs verification later
1790 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1792 all_inst_info = self.cfg.GetAllInstancesInfo()
1794 # In Exec(), we warn about mirrored instances that have primary and
1795 # secondary living in separate node groups. To fully verify that
1796 # volumes for these instances are healthy, we will need to do an
1797 # extra call to their secondaries. We ensure here those nodes will
1799 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1800 # Important: access only the instances whose lock is owned
1801 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1802 nodes.update(all_inst_info[inst].secondary_nodes)
1804 self.needed_locks[locking.LEVEL_NODE] = nodes
1806 def CheckPrereq(self):
1807 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1808 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1810 group_nodes = set(self.group_info.members)
1812 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1815 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1817 unlocked_instances = \
1818 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1821 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1822 utils.CommaJoin(unlocked_nodes),
1825 if unlocked_instances:
1826 raise errors.OpPrereqError("Missing lock for instances: %s" %
1827 utils.CommaJoin(unlocked_instances),
1830 self.all_node_info = self.cfg.GetAllNodesInfo()
1831 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1833 self.my_node_names = utils.NiceSort(group_nodes)
1834 self.my_inst_names = utils.NiceSort(group_instances)
1836 self.my_node_info = dict((name, self.all_node_info[name])
1837 for name in self.my_node_names)
1839 self.my_inst_info = dict((name, self.all_inst_info[name])
1840 for name in self.my_inst_names)
1842 # We detect here the nodes that will need the extra RPC calls for verifying
1843 # split LV volumes; they should be locked.
1844 extra_lv_nodes = set()
1846 for inst in self.my_inst_info.values():
1847 if inst.disk_template in constants.DTS_INT_MIRROR:
1848 for nname in inst.all_nodes:
1849 if self.all_node_info[nname].group != self.group_uuid:
1850 extra_lv_nodes.add(nname)
1852 unlocked_lv_nodes = \
1853 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1855 if unlocked_lv_nodes:
1856 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
1857 utils.CommaJoin(unlocked_lv_nodes),
1859 self.extra_lv_nodes = list(extra_lv_nodes)
1861 def _VerifyNode(self, ninfo, nresult):
1862 """Perform some basic validation on data returned from a node.
1864 - check the result data structure is well formed and has all the
1866 - check ganeti version
1868 @type ninfo: L{objects.Node}
1869 @param ninfo: the node to check
1870 @param nresult: the results from the node
1872 @return: whether overall this call was successful (and we can expect
1873 reasonable values in the respose)
1877 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1879 # main result, nresult should be a non-empty dict
1880 test = not nresult or not isinstance(nresult, dict)
1881 _ErrorIf(test, constants.CV_ENODERPC, node,
1882 "unable to verify node: no data returned")
1886 # compares ganeti version
1887 local_version = constants.PROTOCOL_VERSION
1888 remote_version = nresult.get("version", None)
1889 test = not (remote_version and
1890 isinstance(remote_version, (list, tuple)) and
1891 len(remote_version) == 2)
1892 _ErrorIf(test, constants.CV_ENODERPC, node,
1893 "connection to node returned invalid data")
1897 test = local_version != remote_version[0]
1898 _ErrorIf(test, constants.CV_ENODEVERSION, node,
1899 "incompatible protocol versions: master %s,"
1900 " node %s", local_version, remote_version[0])
1904 # node seems compatible, we can actually try to look into its results
1906 # full package version
1907 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1908 constants.CV_ENODEVERSION, node,
1909 "software version mismatch: master %s, node %s",
1910 constants.RELEASE_VERSION, remote_version[1],
1911 code=self.ETYPE_WARNING)
1913 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1914 if ninfo.vm_capable and isinstance(hyp_result, dict):
1915 for hv_name, hv_result in hyp_result.iteritems():
1916 test = hv_result is not None
1917 _ErrorIf(test, constants.CV_ENODEHV, node,
1918 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1920 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1921 if ninfo.vm_capable and isinstance(hvp_result, list):
1922 for item, hv_name, hv_result in hvp_result:
1923 _ErrorIf(True, constants.CV_ENODEHV, node,
1924 "hypervisor %s parameter verify failure (source %s): %s",
1925 hv_name, item, hv_result)
1927 test = nresult.get(constants.NV_NODESETUP,
1928 ["Missing NODESETUP results"])
1929 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1934 def _VerifyNodeTime(self, ninfo, nresult,
1935 nvinfo_starttime, nvinfo_endtime):
1936 """Check the node time.
1938 @type ninfo: L{objects.Node}
1939 @param ninfo: the node to check
1940 @param nresult: the remote results for the node
1941 @param nvinfo_starttime: the start time of the RPC call
1942 @param nvinfo_endtime: the end time of the RPC call
1946 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1948 ntime = nresult.get(constants.NV_TIME, None)
1950 ntime_merged = utils.MergeTime(ntime)
1951 except (ValueError, TypeError):
1952 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1955 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1956 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1957 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1958 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1962 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1963 "Node time diverges by at least %s from master node time",
1966 def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
1967 """Check the node LVM results and update info for cross-node checks.
1969 @type ninfo: L{objects.Node}
1970 @param ninfo: the node to check
1971 @param nresult: the remote results for the node
1972 @param vg_name: the configured VG name
1973 @type nimg: L{NodeImage}
1974 @param nimg: node image
1981 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1983 # checks vg existence and size > 20G
1984 vglist = nresult.get(constants.NV_VGLIST, None)
1986 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1988 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1989 constants.MIN_VG_SIZE)
1990 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1993 (errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage)
1995 self._Error(constants.CV_ENODELVM, node, em)
1996 if pvminmax is not None:
1997 (nimg.pv_min, nimg.pv_max) = pvminmax
1999 def _VerifyGroupLVM(self, node_image, vg_name):
2000 """Check cross-node consistency in LVM.
2002 @type node_image: dict
2003 @param node_image: info about nodes, mapping from node to names to
2004 L{NodeImage} objects
2005 @param vg_name: the configured VG name
2011 # Only exlcusive storage needs this kind of checks
2012 if not self._exclusive_storage:
2015 # exclusive_storage wants all PVs to have the same size (approximately),
2016 # if the smallest and the biggest ones are okay, everything is fine.
2017 # pv_min is None iff pv_max is None
2018 vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
2021 (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
2022 (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
2023 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
2024 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
2025 "PV sizes differ too much in the group; smallest (%s MB) is"
2026 " on %s, biggest (%s MB) is on %s",
2027 pvmin, minnode, pvmax, maxnode)
2029 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2030 """Check the node bridges.
2032 @type ninfo: L{objects.Node}
2033 @param ninfo: the node to check
2034 @param nresult: the remote results for the node
2035 @param bridges: the expected list of bridges
2042 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2044 missing = nresult.get(constants.NV_BRIDGES, None)
2045 test = not isinstance(missing, list)
2046 _ErrorIf(test, constants.CV_ENODENET, node,
2047 "did not return valid bridge information")
2049 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2050 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2052 def _VerifyNodeUserScripts(self, ninfo, nresult):
2053 """Check the results of user scripts presence and executability on the node
2055 @type ninfo: L{objects.Node}
2056 @param ninfo: the node to check
2057 @param nresult: the remote results for the node
2062 test = not constants.NV_USERSCRIPTS in nresult
2063 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2064 "did not return user scripts information")
2066 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2068 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2069 "user scripts not present or not executable: %s" %
2070 utils.CommaJoin(sorted(broken_scripts)))
2072 def _VerifyNodeNetwork(self, ninfo, nresult):
2073 """Check the node network connectivity results.
2075 @type ninfo: L{objects.Node}
2076 @param ninfo: the node to check
2077 @param nresult: the remote results for the node
2081 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2083 test = constants.NV_NODELIST not in nresult
2084 _ErrorIf(test, constants.CV_ENODESSH, node,
2085 "node hasn't returned node ssh connectivity data")
2087 if nresult[constants.NV_NODELIST]:
2088 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2089 _ErrorIf(True, constants.CV_ENODESSH, node,
2090 "ssh communication with node '%s': %s", a_node, a_msg)
2092 test = constants.NV_NODENETTEST not in nresult
2093 _ErrorIf(test, constants.CV_ENODENET, node,
2094 "node hasn't returned node tcp connectivity data")
2096 if nresult[constants.NV_NODENETTEST]:
2097 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2099 _ErrorIf(True, constants.CV_ENODENET, node,
2100 "tcp communication with node '%s': %s",
2101 anode, nresult[constants.NV_NODENETTEST][anode])
2103 test = constants.NV_MASTERIP not in nresult
2104 _ErrorIf(test, constants.CV_ENODENET, node,
2105 "node hasn't returned node master IP reachability data")
2107 if not nresult[constants.NV_MASTERIP]:
2108 if node == self.master_node:
2109 msg = "the master node cannot reach the master IP (not configured?)"
2111 msg = "cannot reach the master IP"
2112 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2114 def _VerifyInstance(self, instance, inst_config, node_image,
2116 """Verify an instance.
2118 This function checks to see if the required block devices are
2119 available on the instance's node, and that the nodes are in the correct
2123 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2124 pnode = inst_config.primary_node
2125 pnode_img = node_image[pnode]
2126 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2128 node_vol_should = {}
2129 inst_config.MapLVsByNode(node_vol_should)
2131 cluster = self.cfg.GetClusterInfo()
2132 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2134 err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config, self.cfg)
2135 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2136 code=self.ETYPE_WARNING)
2138 for node in node_vol_should:
2139 n_img = node_image[node]
2140 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2141 # ignore missing volumes on offline or broken nodes
2143 for volume in node_vol_should[node]:
2144 test = volume not in n_img.volumes
2145 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2146 "volume %s missing on node %s", volume, node)
2148 if inst_config.admin_state == constants.ADMINST_UP:
2149 test = instance not in pnode_img.instances and not pnode_img.offline
2150 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2151 "instance not running on its primary node %s",
2153 _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
2154 "instance is marked as running and lives on offline node %s",
2157 diskdata = [(nname, success, status, idx)
2158 for (nname, disks) in diskstatus.items()
2159 for idx, (success, status) in enumerate(disks)]
2161 for nname, success, bdev_status, idx in diskdata:
2162 # the 'ghost node' construction in Exec() ensures that we have a
2164 snode = node_image[nname]
2165 bad_snode = snode.ghost or snode.offline
2166 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2167 not success and not bad_snode,
2168 constants.CV_EINSTANCEFAULTYDISK, instance,
2169 "couldn't retrieve status for disk/%s on %s: %s",
2170 idx, nname, bdev_status)
2171 _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
2172 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2173 constants.CV_EINSTANCEFAULTYDISK, instance,
2174 "disk/%s on %s is faulty", idx, nname)
2176 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2177 constants.CV_ENODERPC, pnode, "instance %s, connection to"
2178 " primary node failed", instance)
2180 _ErrorIf(len(inst_config.secondary_nodes) > 1,
2181 constants.CV_EINSTANCELAYOUT,
2182 instance, "instance has multiple secondary nodes: %s",
2183 utils.CommaJoin(inst_config.secondary_nodes),
2184 code=self.ETYPE_WARNING)
2186 if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
2187 # Disk template not compatible with exclusive_storage: no instance
2188 # node should have the flag set
2189 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
2190 inst_config.all_nodes)
2191 es_nodes = [n for (n, es) in es_flags.items()
2193 _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance,
2194 "instance has template %s, which is not supported on nodes"
2195 " that have exclusive storage set: %s",
2196 inst_config.disk_template, utils.CommaJoin(es_nodes))
2198 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2199 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2200 instance_groups = {}
2202 for node in instance_nodes:
2203 instance_groups.setdefault(self.all_node_info[node].group,
2207 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2208 # Sort so that we always list the primary node first.
2209 for group, nodes in sorted(instance_groups.items(),
2210 key=lambda (_, nodes): pnode in nodes,
2213 self._ErrorIf(len(instance_groups) > 1,
2214 constants.CV_EINSTANCESPLITGROUPS,
2215 instance, "instance has primary and secondary nodes in"
2216 " different groups: %s", utils.CommaJoin(pretty_list),
2217 code=self.ETYPE_WARNING)
2219 inst_nodes_offline = []
2220 for snode in inst_config.secondary_nodes:
2221 s_img = node_image[snode]
2222 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2223 snode, "instance %s, connection to secondary node failed",
2227 inst_nodes_offline.append(snode)
2229 # warn that the instance lives on offline nodes
2230 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2231 "instance has offline secondary node(s) %s",
2232 utils.CommaJoin(inst_nodes_offline))
2233 # ... or ghost/non-vm_capable nodes
2234 for node in inst_config.all_nodes:
2235 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2236 instance, "instance lives on ghost node %s", node)
2237 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2238 instance, "instance lives on non-vm_capable node %s", node)
2240 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2241 """Verify if there are any unknown volumes in the cluster.
2243 The .os, .swap and backup volumes are ignored. All other volumes are
2244 reported as unknown.
2246 @type reserved: L{ganeti.utils.FieldSet}
2247 @param reserved: a FieldSet of reserved volume names
2250 for node, n_img in node_image.items():
2251 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2252 self.all_node_info[node].group != self.group_uuid):
2253 # skip non-healthy nodes
2255 for volume in n_img.volumes:
2256 test = ((node not in node_vol_should or
2257 volume not in node_vol_should[node]) and
2258 not reserved.Matches(volume))
2259 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2260 "volume %s is unknown", volume)
2262 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2263 """Verify N+1 Memory Resilience.
2265 Check that if one single node dies we can still start all the
2266 instances it was primary for.
2269 cluster_info = self.cfg.GetClusterInfo()
2270 for node, n_img in node_image.items():
2271 # This code checks that every node which is now listed as
2272 # secondary has enough memory to host all instances it is
2273 # supposed to should a single other node in the cluster fail.
2274 # FIXME: not ready for failover to an arbitrary node
2275 # FIXME: does not support file-backed instances
2276 # WARNING: we currently take into account down instances as well
2277 # as up ones, considering that even if they're down someone
2278 # might want to start them even in the event of a node failure.
2279 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2280 # we're skipping nodes marked offline and nodes in other groups from
2281 # the N+1 warning, since most likely we don't have good memory
2282 # infromation from them; we already list instances living on such
2283 # nodes, and that's enough warning
2285 #TODO(dynmem): also consider ballooning out other instances
2286 for prinode, instances in n_img.sbp.items():
2288 for instance in instances:
2289 bep = cluster_info.FillBE(instance_cfg[instance])
2290 if bep[constants.BE_AUTO_BALANCE]:
2291 needed_mem += bep[constants.BE_MINMEM]
2292 test = n_img.mfree < needed_mem
2293 self._ErrorIf(test, constants.CV_ENODEN1, node,
2294 "not enough memory to accomodate instance failovers"
2295 " should node %s fail (%dMiB needed, %dMiB available)",
2296 prinode, needed_mem, n_img.mfree)
2299 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2300 (files_all, files_opt, files_mc, files_vm)):
2301 """Verifies file checksums collected from all nodes.
2303 @param errorif: Callback for reporting errors
2304 @param nodeinfo: List of L{objects.Node} objects
2305 @param master_node: Name of master node
2306 @param all_nvinfo: RPC results
2309 # Define functions determining which nodes to consider for a file
2312 (files_mc, lambda node: (node.master_candidate or
2313 node.name == master_node)),
2314 (files_vm, lambda node: node.vm_capable),
2317 # Build mapping from filename to list of nodes which should have the file
2319 for (files, fn) in files2nodefn:
2321 filenodes = nodeinfo
2323 filenodes = filter(fn, nodeinfo)
2324 nodefiles.update((filename,
2325 frozenset(map(operator.attrgetter("name"), filenodes)))
2326 for filename in files)
2328 assert set(nodefiles) == (files_all | files_mc | files_vm)
2330 fileinfo = dict((filename, {}) for filename in nodefiles)
2331 ignore_nodes = set()
2333 for node in nodeinfo:
2335 ignore_nodes.add(node.name)
2338 nresult = all_nvinfo[node.name]
2340 if nresult.fail_msg or not nresult.payload:
2343 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2344 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2345 for (key, value) in fingerprints.items())
2348 test = not (node_files and isinstance(node_files, dict))
2349 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2350 "Node did not return file checksum data")
2352 ignore_nodes.add(node.name)
2355 # Build per-checksum mapping from filename to nodes having it
2356 for (filename, checksum) in node_files.items():
2357 assert filename in nodefiles
2358 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2360 for (filename, checksums) in fileinfo.items():
2361 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2363 # Nodes having the file
2364 with_file = frozenset(node_name
2365 for nodes in fileinfo[filename].values()
2366 for node_name in nodes) - ignore_nodes
2368 expected_nodes = nodefiles[filename] - ignore_nodes
2370 # Nodes missing file
2371 missing_file = expected_nodes - with_file
2373 if filename in files_opt:
2375 errorif(missing_file and missing_file != expected_nodes,
2376 constants.CV_ECLUSTERFILECHECK, None,
2377 "File %s is optional, but it must exist on all or no"
2378 " nodes (not found on %s)",
2379 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2381 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2382 "File %s is missing from node(s) %s", filename,
2383 utils.CommaJoin(utils.NiceSort(missing_file)))
2385 # Warn if a node has a file it shouldn't
2386 unexpected = with_file - expected_nodes
2388 constants.CV_ECLUSTERFILECHECK, None,
2389 "File %s should not exist on node(s) %s",
2390 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2392 # See if there are multiple versions of the file
2393 test = len(checksums) > 1
2395 variants = ["variant %s on %s" %
2396 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2397 for (idx, (checksum, nodes)) in
2398 enumerate(sorted(checksums.items()))]
2402 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2403 "File %s found with %s different checksums (%s)",
2404 filename, len(checksums), "; ".join(variants))
2406 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2408 """Verifies and the node DRBD status.
2410 @type ninfo: L{objects.Node}
2411 @param ninfo: the node to check
2412 @param nresult: the remote results for the node
2413 @param instanceinfo: the dict of instances
2414 @param drbd_helper: the configured DRBD usermode helper
2415 @param drbd_map: the DRBD map as returned by
2416 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2420 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2423 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2424 test = (helper_result is None)
2425 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2426 "no drbd usermode helper returned")
2428 status, payload = helper_result
2430 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2431 "drbd usermode helper check unsuccessful: %s", payload)
2432 test = status and (payload != drbd_helper)
2433 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2434 "wrong drbd usermode helper: %s", payload)
2436 # compute the DRBD minors
2438 for minor, instance in drbd_map[node].items():
2439 test = instance not in instanceinfo
2440 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2441 "ghost instance '%s' in temporary DRBD map", instance)
2442 # ghost instance should not be running, but otherwise we
2443 # don't give double warnings (both ghost instance and
2444 # unallocated minor in use)
2446 node_drbd[minor] = (instance, False)
2448 instance = instanceinfo[instance]
2449 node_drbd[minor] = (instance.name,
2450 instance.admin_state == constants.ADMINST_UP)
2452 # and now check them
2453 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2454 test = not isinstance(used_minors, (tuple, list))
2455 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2456 "cannot parse drbd status file: %s", str(used_minors))
2458 # we cannot check drbd status
2461 for minor, (iname, must_exist) in node_drbd.items():
2462 test = minor not in used_minors and must_exist
2463 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2464 "drbd minor %d of instance %s is not active", minor, iname)
2465 for minor in used_minors:
2466 test = minor not in node_drbd
2467 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2468 "unallocated drbd minor %d is in use", minor)
2470 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2471 """Builds the node OS structures.
2473 @type ninfo: L{objects.Node}
2474 @param ninfo: the node to check
2475 @param nresult: the remote results for the node
2476 @param nimg: the node image object
2480 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2482 remote_os = nresult.get(constants.NV_OSLIST, None)
2483 test = (not isinstance(remote_os, list) or
2484 not compat.all(isinstance(v, list) and len(v) == 7
2485 for v in remote_os))
2487 _ErrorIf(test, constants.CV_ENODEOS, node,
2488 "node hasn't returned valid OS data")
2497 for (name, os_path, status, diagnose,
2498 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2500 if name not in os_dict:
2503 # parameters is a list of lists instead of list of tuples due to
2504 # JSON lacking a real tuple type, fix it:
2505 parameters = [tuple(v) for v in parameters]
2506 os_dict[name].append((os_path, status, diagnose,
2507 set(variants), set(parameters), set(api_ver)))
2509 nimg.oslist = os_dict
2511 def _VerifyNodeOS(self, ninfo, nimg, base):
2512 """Verifies the node OS list.
2514 @type ninfo: L{objects.Node}
2515 @param ninfo: the node to check
2516 @param nimg: the node image object
2517 @param base: the 'template' node we match against (e.g. from the master)
2521 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2523 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2525 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2526 for os_name, os_data in nimg.oslist.items():
2527 assert os_data, "Empty OS status for OS %s?!" % os_name
2528 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2529 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2530 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2531 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2532 "OS '%s' has multiple entries (first one shadows the rest): %s",
2533 os_name, utils.CommaJoin([v[0] for v in os_data]))
2534 # comparisons with the 'base' image
2535 test = os_name not in base.oslist
2536 _ErrorIf(test, constants.CV_ENODEOS, node,
2537 "Extra OS %s not present on reference node (%s)",
2541 assert base.oslist[os_name], "Base node has empty OS status?"
2542 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2544 # base OS is invalid, skipping
2546 for kind, a, b in [("API version", f_api, b_api),
2547 ("variants list", f_var, b_var),
2548 ("parameters", beautify_params(f_param),
2549 beautify_params(b_param))]:
2550 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2551 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2552 kind, os_name, base.name,
2553 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2555 # check any missing OSes
2556 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2557 _ErrorIf(missing, constants.CV_ENODEOS, node,
2558 "OSes present on reference node %s but missing on this node: %s",
2559 base.name, utils.CommaJoin(missing))
2561 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
2562 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2564 @type ninfo: L{objects.Node}
2565 @param ninfo: the node to check
2566 @param nresult: the remote results for the node
2567 @type is_master: bool
2568 @param is_master: Whether node is the master node
2574 (constants.ENABLE_FILE_STORAGE or
2575 constants.ENABLE_SHARED_FILE_STORAGE)):
2577 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
2579 # This should never happen
2580 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
2581 "Node did not return forbidden file storage paths")
2583 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
2584 "Found forbidden file storage paths: %s",
2585 utils.CommaJoin(fspaths))
2587 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
2588 constants.CV_ENODEFILESTORAGEPATHS, node,
2589 "Node should not have returned forbidden file storage"
2592 def _VerifyOob(self, ninfo, nresult):
2593 """Verifies out of band functionality of a node.
2595 @type ninfo: L{objects.Node}
2596 @param ninfo: the node to check
2597 @param nresult: the remote results for the node
2601 # We just have to verify the paths on master and/or master candidates
2602 # as the oob helper is invoked on the master
2603 if ((ninfo.master_candidate or ninfo.master_capable) and
2604 constants.NV_OOB_PATHS in nresult):
2605 for path_result in nresult[constants.NV_OOB_PATHS]:
2606 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2608 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2609 """Verifies and updates the node volume data.
2611 This function will update a L{NodeImage}'s internal structures
2612 with data from the remote call.
2614 @type ninfo: L{objects.Node}
2615 @param ninfo: the node to check
2616 @param nresult: the remote results for the node
2617 @param nimg: the node image object
2618 @param vg_name: the configured VG name
2622 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2624 nimg.lvm_fail = True
2625 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2628 elif isinstance(lvdata, basestring):
2629 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2630 utils.SafeEncode(lvdata))
2631 elif not isinstance(lvdata, dict):
2632 _ErrorIf(True, constants.CV_ENODELVM, node,
2633 "rpc call to node failed (lvlist)")
2635 nimg.volumes = lvdata
2636 nimg.lvm_fail = False
2638 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2639 """Verifies and updates the node instance list.
2641 If the listing was successful, then updates this node's instance
2642 list. Otherwise, it marks the RPC call as failed for the instance
2645 @type ninfo: L{objects.Node}
2646 @param ninfo: the node to check
2647 @param nresult: the remote results for the node
2648 @param nimg: the node image object
2651 idata = nresult.get(constants.NV_INSTANCELIST, None)
2652 test = not isinstance(idata, list)
2653 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2654 "rpc call to node failed (instancelist): %s",
2655 utils.SafeEncode(str(idata)))
2657 nimg.hyp_fail = True
2659 nimg.instances = idata
2661 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2662 """Verifies and computes a node information map
2664 @type ninfo: L{objects.Node}
2665 @param ninfo: the node to check
2666 @param nresult: the remote results for the node
2667 @param nimg: the node image object
2668 @param vg_name: the configured VG name
2672 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2674 # try to read free memory (from the hypervisor)
2675 hv_info = nresult.get(constants.NV_HVINFO, None)
2676 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2677 _ErrorIf(test, constants.CV_ENODEHV, node,
2678 "rpc call to node failed (hvinfo)")
2681 nimg.mfree = int(hv_info["memory_free"])
2682 except (ValueError, TypeError):
2683 _ErrorIf(True, constants.CV_ENODERPC, node,
2684 "node returned invalid nodeinfo, check hypervisor")
2686 # FIXME: devise a free space model for file based instances as well
2687 if vg_name is not None:
2688 test = (constants.NV_VGLIST not in nresult or
2689 vg_name not in nresult[constants.NV_VGLIST])
2690 _ErrorIf(test, constants.CV_ENODELVM, node,
2691 "node didn't return data for the volume group '%s'"
2692 " - it is either missing or broken", vg_name)
2695 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2696 except (ValueError, TypeError):
2697 _ErrorIf(True, constants.CV_ENODERPC, node,
2698 "node returned invalid LVM info, check LVM status")
2700 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2701 """Gets per-disk status information for all instances.
2703 @type nodelist: list of strings
2704 @param nodelist: Node names
2705 @type node_image: dict of (name, L{objects.Node})
2706 @param node_image: Node objects
2707 @type instanceinfo: dict of (name, L{objects.Instance})
2708 @param instanceinfo: Instance objects
2709 @rtype: {instance: {node: [(succes, payload)]}}
2710 @return: a dictionary of per-instance dictionaries with nodes as
2711 keys and disk information as values; the disk information is a
2712 list of tuples (success, payload)
2715 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2718 node_disks_devonly = {}
2719 diskless_instances = set()
2720 diskless = constants.DT_DISKLESS
2722 for nname in nodelist:
2723 node_instances = list(itertools.chain(node_image[nname].pinst,
2724 node_image[nname].sinst))
2725 diskless_instances.update(inst for inst in node_instances
2726 if instanceinfo[inst].disk_template == diskless)
2727 disks = [(inst, disk)
2728 for inst in node_instances
2729 for disk in instanceinfo[inst].disks]
2732 # No need to collect data
2735 node_disks[nname] = disks
2737 # _AnnotateDiskParams makes already copies of the disks
2739 for (inst, dev) in disks:
2740 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2741 self.cfg.SetDiskID(anno_disk, nname)
2742 devonly.append(anno_disk)
2744 node_disks_devonly[nname] = devonly
2746 assert len(node_disks) == len(node_disks_devonly)
2748 # Collect data from all nodes with disks
2749 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2752 assert len(result) == len(node_disks)
2756 for (nname, nres) in result.items():
2757 disks = node_disks[nname]
2760 # No data from this node
2761 data = len(disks) * [(False, "node offline")]
2764 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2765 "while getting disk information: %s", msg)
2767 # No data from this node
2768 data = len(disks) * [(False, msg)]
2771 for idx, i in enumerate(nres.payload):
2772 if isinstance(i, (tuple, list)) and len(i) == 2:
2775 logging.warning("Invalid result from node %s, entry %d: %s",
2777 data.append((False, "Invalid result from the remote node"))
2779 for ((inst, _), status) in zip(disks, data):
2780 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2782 # Add empty entries for diskless instances.
2783 for inst in diskless_instances:
2784 assert inst not in instdisk
2787 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2788 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2789 compat.all(isinstance(s, (tuple, list)) and
2790 len(s) == 2 for s in statuses)
2791 for inst, nnames in instdisk.items()
2792 for nname, statuses in nnames.items())
2794 instdisk_keys = set(instdisk)
2795 instanceinfo_keys = set(instanceinfo)
2796 assert instdisk_keys == instanceinfo_keys, \
2797 ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
2798 (instdisk_keys, instanceinfo_keys))
2803 def _SshNodeSelector(group_uuid, all_nodes):
2804 """Create endless iterators for all potential SSH check hosts.
2807 nodes = [node for node in all_nodes
2808 if (node.group != group_uuid and
2810 keyfunc = operator.attrgetter("group")
2812 return map(itertools.cycle,
2813 [sorted(map(operator.attrgetter("name"), names))
2814 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2818 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2819 """Choose which nodes should talk to which other nodes.
2821 We will make nodes contact all nodes in their group, and one node from
2824 @warning: This algorithm has a known issue if one node group is much
2825 smaller than others (e.g. just one node). In such a case all other
2826 nodes will talk to the single node.
2829 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2830 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2832 return (online_nodes,
2833 dict((name, sorted([i.next() for i in sel]))
2834 for name in online_nodes))
2836 def BuildHooksEnv(self):
2839 Cluster-Verify hooks just ran in the post phase and their failure makes
2840 the output be logged in the verify output and the verification to fail.
2844 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
2847 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2848 for node in self.my_node_info.values())
2852 def BuildHooksNodes(self):
2853 """Build hooks nodes.
2856 return ([], self.my_node_names)
2858 def Exec(self, feedback_fn):
2859 """Verify integrity of the node group, performing various test on nodes.
2862 # This method has too many local variables. pylint: disable=R0914
2863 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2865 if not self.my_node_names:
2867 feedback_fn("* Empty node group, skipping verification")
2871 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2872 verbose = self.op.verbose
2873 self._feedback_fn = feedback_fn
2875 vg_name = self.cfg.GetVGName()
2876 drbd_helper = self.cfg.GetDRBDHelper()
2877 cluster = self.cfg.GetClusterInfo()
2878 hypervisors = cluster.enabled_hypervisors
2879 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2881 i_non_redundant = [] # Non redundant instances
2882 i_non_a_balanced = [] # Non auto-balanced instances
2883 i_offline = 0 # Count of offline instances
2884 n_offline = 0 # Count of offline nodes
2885 n_drained = 0 # Count of nodes being drained
2886 node_vol_should = {}
2888 # FIXME: verify OS list
2891 filemap = _ComputeAncillaryFiles(cluster, False)
2893 # do local checksums
2894 master_node = self.master_node = self.cfg.GetMasterNode()
2895 master_ip = self.cfg.GetMasterIP()
2897 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2900 if self.cfg.GetUseExternalMipScript():
2901 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
2903 node_verify_param = {
2904 constants.NV_FILELIST:
2905 map(vcluster.MakeVirtualPath,
2906 utils.UniqueSequence(filename
2907 for files in filemap
2908 for filename in files)),
2909 constants.NV_NODELIST:
2910 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2911 self.all_node_info.values()),
2912 constants.NV_HYPERVISOR: hypervisors,
2913 constants.NV_HVPARAMS:
2914 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2915 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2916 for node in node_data_list
2917 if not node.offline],
2918 constants.NV_INSTANCELIST: hypervisors,
2919 constants.NV_VERSION: None,
2920 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2921 constants.NV_NODESETUP: None,
2922 constants.NV_TIME: None,
2923 constants.NV_MASTERIP: (master_node, master_ip),
2924 constants.NV_OSLIST: None,
2925 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2926 constants.NV_USERSCRIPTS: user_scripts,
2929 if vg_name is not None:
2930 node_verify_param[constants.NV_VGLIST] = None
2931 node_verify_param[constants.NV_LVLIST] = vg_name
2932 node_verify_param[constants.NV_PVLIST] = [vg_name]
2935 node_verify_param[constants.NV_DRBDLIST] = None
2936 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2938 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
2939 # Load file storage paths only from master node
2940 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
2943 # FIXME: this needs to be changed per node-group, not cluster-wide
2945 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2946 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2947 bridges.add(default_nicpp[constants.NIC_LINK])
2948 for instance in self.my_inst_info.values():
2949 for nic in instance.nics:
2950 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2951 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2952 bridges.add(full_nic[constants.NIC_LINK])
2955 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2957 # Build our expected cluster state
2958 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2960 vm_capable=node.vm_capable))
2961 for node in node_data_list)
2965 for node in self.all_node_info.values():
2966 path = _SupportsOob(self.cfg, node)
2967 if path and path not in oob_paths:
2968 oob_paths.append(path)
2971 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2973 for instance in self.my_inst_names:
2974 inst_config = self.my_inst_info[instance]
2975 if inst_config.admin_state == constants.ADMINST_OFFLINE:
2978 for nname in inst_config.all_nodes:
2979 if nname not in node_image:
2980 gnode = self.NodeImage(name=nname)
2981 gnode.ghost = (nname not in self.all_node_info)
2982 node_image[nname] = gnode
2984 inst_config.MapLVsByNode(node_vol_should)
2986 pnode = inst_config.primary_node
2987 node_image[pnode].pinst.append(instance)
2989 for snode in inst_config.secondary_nodes:
2990 nimg = node_image[snode]
2991 nimg.sinst.append(instance)
2992 if pnode not in nimg.sbp:
2993 nimg.sbp[pnode] = []
2994 nimg.sbp[pnode].append(instance)
2996 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names)
2997 # The value of exclusive_storage should be the same across the group, so if
2998 # it's True for at least a node, we act as if it were set for all the nodes
2999 self._exclusive_storage = compat.any(es_flags.values())
3000 if self._exclusive_storage:
3001 node_verify_param[constants.NV_EXCLUSIVEPVS] = True
3003 # At this point, we have the in-memory data structures complete,
3004 # except for the runtime information, which we'll gather next
3006 # Due to the way our RPC system works, exact response times cannot be
3007 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3008 # time before and after executing the request, we can at least have a time
3010 nvinfo_starttime = time.time()
3011 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3013 self.cfg.GetClusterName())
3014 nvinfo_endtime = time.time()
3016 if self.extra_lv_nodes and vg_name is not None:
3018 self.rpc.call_node_verify(self.extra_lv_nodes,
3019 {constants.NV_LVLIST: vg_name},
3020 self.cfg.GetClusterName())
3022 extra_lv_nvinfo = {}
3024 all_drbd_map = self.cfg.ComputeDRBDMap()
3026 feedback_fn("* Gathering disk information (%s nodes)" %
3027 len(self.my_node_names))
3028 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3031 feedback_fn("* Verifying configuration file consistency")
3033 # If not all nodes are being checked, we need to make sure the master node
3034 # and a non-checked vm_capable node are in the list.
3035 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3037 vf_nvinfo = all_nvinfo.copy()
3038 vf_node_info = list(self.my_node_info.values())
3039 additional_nodes = []
3040 if master_node not in self.my_node_info:
3041 additional_nodes.append(master_node)
3042 vf_node_info.append(self.all_node_info[master_node])
3043 # Add the first vm_capable node we find which is not included,
3044 # excluding the master node (which we already have)
3045 for node in absent_nodes:
3046 nodeinfo = self.all_node_info[node]
3047 if (nodeinfo.vm_capable and not nodeinfo.offline and
3048 node != master_node):
3049 additional_nodes.append(node)
3050 vf_node_info.append(self.all_node_info[node])
3052 key = constants.NV_FILELIST
3053 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3054 {key: node_verify_param[key]},
3055 self.cfg.GetClusterName()))
3057 vf_nvinfo = all_nvinfo
3058 vf_node_info = self.my_node_info.values()
3060 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3062 feedback_fn("* Verifying node status")
3066 for node_i in node_data_list:
3068 nimg = node_image[node]
3072 feedback_fn("* Skipping offline node %s" % (node,))
3076 if node == master_node:
3078 elif node_i.master_candidate:
3079 ntype = "master candidate"
3080 elif node_i.drained:
3086 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3088 msg = all_nvinfo[node].fail_msg
3089 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3092 nimg.rpc_fail = True
3095 nresult = all_nvinfo[node].payload
3097 nimg.call_ok = self._VerifyNode(node_i, nresult)
3098 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3099 self._VerifyNodeNetwork(node_i, nresult)
3100 self._VerifyNodeUserScripts(node_i, nresult)
3101 self._VerifyOob(node_i, nresult)
3102 self._VerifyFileStoragePaths(node_i, nresult,
3103 node == master_node)
3106 self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
3107 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3110 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3111 self._UpdateNodeInstances(node_i, nresult, nimg)
3112 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3113 self._UpdateNodeOS(node_i, nresult, nimg)
3115 if not nimg.os_fail:
3116 if refos_img is None:
3118 self._VerifyNodeOS(node_i, nimg, refos_img)
3119 self._VerifyNodeBridges(node_i, nresult, bridges)
3121 # Check whether all running instancies are primary for the node. (This
3122 # can no longer be done from _VerifyInstance below, since some of the
3123 # wrong instances could be from other node groups.)
3124 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3126 for inst in non_primary_inst:
3127 test = inst in self.all_inst_info
3128 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3129 "instance should not run on node %s", node_i.name)
3130 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3131 "node is running unknown instance %s", inst)
3133 self._VerifyGroupLVM(node_image, vg_name)
3135 for node, result in extra_lv_nvinfo.items():
3136 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3137 node_image[node], vg_name)
3139 feedback_fn("* Verifying instance status")
3140 for instance in self.my_inst_names:
3142 feedback_fn("* Verifying instance %s" % instance)
3143 inst_config = self.my_inst_info[instance]
3144 self._VerifyInstance(instance, inst_config, node_image,
3147 # If the instance is non-redundant we cannot survive losing its primary
3148 # node, so we are not N+1 compliant.
3149 if inst_config.disk_template not in constants.DTS_MIRRORED:
3150 i_non_redundant.append(instance)
3152 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3153 i_non_a_balanced.append(instance)
3155 feedback_fn("* Verifying orphan volumes")
3156 reserved = utils.FieldSet(*cluster.reserved_lvs)
3158 # We will get spurious "unknown volume" warnings if any node of this group
3159 # is secondary for an instance whose primary is in another group. To avoid
3160 # them, we find these instances and add their volumes to node_vol_should.
3161 for inst in self.all_inst_info.values():
3162 for secondary in inst.secondary_nodes:
3163 if (secondary in self.my_node_info
3164 and inst.name not in self.my_inst_info):
3165 inst.MapLVsByNode(node_vol_should)
3168 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3170 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3171 feedback_fn("* Verifying N+1 Memory redundancy")
3172 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3174 feedback_fn("* Other Notes")
3176 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3177 % len(i_non_redundant))
3179 if i_non_a_balanced:
3180 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3181 % len(i_non_a_balanced))
3184 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3187 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3190 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3194 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3195 """Analyze the post-hooks' result
3197 This method analyses the hook result, handles it, and sends some
3198 nicely-formatted feedback back to the user.
3200 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3201 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3202 @param hooks_results: the results of the multi-node hooks rpc call
3203 @param feedback_fn: function used send feedback back to the caller
3204 @param lu_result: previous Exec result
3205 @return: the new Exec result, based on the previous result
3209 # We only really run POST phase hooks, only for non-empty groups,
3210 # and are only interested in their results
3211 if not self.my_node_names:
3214 elif phase == constants.HOOKS_PHASE_POST:
3215 # Used to change hooks' output to proper indentation
3216 feedback_fn("* Hooks Results")
3217 assert hooks_results, "invalid result from hooks"
3219 for node_name in hooks_results:
3220 res = hooks_results[node_name]
3222 test = msg and not res.offline
3223 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3224 "Communication failure in hooks execution: %s", msg)
3225 if res.offline or msg:
3226 # No need to investigate payload if node is offline or gave
3229 for script, hkr, output in res.payload:
3230 test = hkr == constants.HKR_FAIL
3231 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3232 "Script %s failed, output:", script)
3234 output = self._HOOKS_INDENT_RE.sub(" ", output)
3235 feedback_fn("%s" % output)
3241 class LUClusterVerifyDisks(NoHooksLU):
3242 """Verifies the cluster disks status.
3247 def ExpandNames(self):
3248 self.share_locks = _ShareAll()
3249 self.needed_locks = {
3250 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3253 def Exec(self, feedback_fn):
3254 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3256 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3257 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3258 for group in group_names])
3261 class LUGroupVerifyDisks(NoHooksLU):
3262 """Verifies the status of all disks in a node group.
3267 def ExpandNames(self):
3268 # Raises errors.OpPrereqError on its own if group can't be found
3269 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3271 self.share_locks = _ShareAll()
3272 self.needed_locks = {
3273 locking.LEVEL_INSTANCE: [],
3274 locking.LEVEL_NODEGROUP: [],
3275 locking.LEVEL_NODE: [],
3277 # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3278 # starts one instance of this opcode for every group, which means all
3279 # nodes will be locked for a short amount of time, so it's better to
3280 # acquire the node allocation lock as well.
3281 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3284 def DeclareLocks(self, level):
3285 if level == locking.LEVEL_INSTANCE:
3286 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3288 # Lock instances optimistically, needs verification once node and group
3289 # locks have been acquired
3290 self.needed_locks[locking.LEVEL_INSTANCE] = \
3291 self.cfg.GetNodeGroupInstances(self.group_uuid)
3293 elif level == locking.LEVEL_NODEGROUP:
3294 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3296 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3297 set([self.group_uuid] +
3298 # Lock all groups used by instances optimistically; this requires
3299 # going via the node before it's locked, requiring verification
3302 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3303 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3305 elif level == locking.LEVEL_NODE:
3306 # This will only lock the nodes in the group to be verified which contain
3308 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3309 self._LockInstancesNodes()
3311 # Lock all nodes in group to be verified
3312 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3313 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3314 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3316 def CheckPrereq(self):
3317 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3318 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3319 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3321 assert self.group_uuid in owned_groups
3323 # Check if locked instances are still correct
3324 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3326 # Get instance information
3327 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3329 # Check if node groups for locked instances are still correct
3330 _CheckInstancesNodeGroups(self.cfg, self.instances,
3331 owned_groups, owned_nodes, self.group_uuid)
3333 def Exec(self, feedback_fn):
3334 """Verify integrity of cluster disks.
3336 @rtype: tuple of three items
3337 @return: a tuple of (dict of node-to-node_error, list of instances
3338 which need activate-disks, dict of instance: (node, volume) for
3343 res_instances = set()
3346 nv_dict = _MapInstanceDisksToNodes(
3347 [inst for inst in self.instances.values()
3348 if inst.admin_state == constants.ADMINST_UP])
3351 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3352 set(self.cfg.GetVmCapableNodeList()))
3354 node_lvs = self.rpc.call_lv_list(nodes, [])
3356 for (node, node_res) in node_lvs.items():
3357 if node_res.offline:
3360 msg = node_res.fail_msg
3362 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3363 res_nodes[node] = msg
3366 for lv_name, (_, _, lv_online) in node_res.payload.items():
3367 inst = nv_dict.pop((node, lv_name), None)
3368 if not (lv_online or inst is None):
3369 res_instances.add(inst)
3371 # any leftover items in nv_dict are missing LVs, let's arrange the data
3373 for key, inst in nv_dict.iteritems():
3374 res_missing.setdefault(inst, []).append(list(key))
3376 return (res_nodes, list(res_instances), res_missing)
3379 class LUClusterRepairDiskSizes(NoHooksLU):
3380 """Verifies the cluster disks sizes.
3385 def ExpandNames(self):
3386 if self.op.instances:
3387 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3388 # Not getting the node allocation lock as only a specific set of
3389 # instances (and their nodes) is going to be acquired
3390 self.needed_locks = {
3391 locking.LEVEL_NODE_RES: [],
3392 locking.LEVEL_INSTANCE: self.wanted_names,
3394 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3396 self.wanted_names = None
3397 self.needed_locks = {
3398 locking.LEVEL_NODE_RES: locking.ALL_SET,
3399 locking.LEVEL_INSTANCE: locking.ALL_SET,
3401 # This opcode is acquires the node locks for all instances
3402 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3405 self.share_locks = {
3406 locking.LEVEL_NODE_RES: 1,
3407 locking.LEVEL_INSTANCE: 0,
3408 locking.LEVEL_NODE_ALLOC: 1,
3411 def DeclareLocks(self, level):
3412 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3413 self._LockInstancesNodes(primary_only=True, level=level)
3415 def CheckPrereq(self):
3416 """Check prerequisites.
3418 This only checks the optional instance list against the existing names.
3421 if self.wanted_names is None:
3422 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3424 self.wanted_instances = \
3425 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3427 def _EnsureChildSizes(self, disk):
3428 """Ensure children of the disk have the needed disk size.
3430 This is valid mainly for DRBD8 and fixes an issue where the
3431 children have smaller disk size.
3433 @param disk: an L{ganeti.objects.Disk} object
3436 if disk.dev_type == constants.LD_DRBD8:
3437 assert disk.children, "Empty children for DRBD8?"
3438 fchild = disk.children[0]
3439 mismatch = fchild.size < disk.size
3441 self.LogInfo("Child disk has size %d, parent %d, fixing",
3442 fchild.size, disk.size)
3443 fchild.size = disk.size
3445 # and we recurse on this child only, not on the metadev
3446 return self._EnsureChildSizes(fchild) or mismatch
3450 def Exec(self, feedback_fn):
3451 """Verify the size of cluster disks.
3454 # TODO: check child disks too
3455 # TODO: check differences in size between primary/secondary nodes
3457 for instance in self.wanted_instances:
3458 pnode = instance.primary_node
3459 if pnode not in per_node_disks:
3460 per_node_disks[pnode] = []
3461 for idx, disk in enumerate(instance.disks):
3462 per_node_disks[pnode].append((instance, idx, disk))
3464 assert not (frozenset(per_node_disks.keys()) -
3465 self.owned_locks(locking.LEVEL_NODE_RES)), \
3466 "Not owning correct locks"
3467 assert not self.owned_locks(locking.LEVEL_NODE)
3470 for node, dskl in per_node_disks.items():
3471 newl = [v[2].Copy() for v in dskl]
3473 self.cfg.SetDiskID(dsk, node)
3474 result = self.rpc.call_blockdev_getsize(node, newl)
3476 self.LogWarning("Failure in blockdev_getsize call to node"
3477 " %s, ignoring", node)
3479 if len(result.payload) != len(dskl):
3480 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3481 " result.payload=%s", node, len(dskl), result.payload)
3482 self.LogWarning("Invalid result from node %s, ignoring node results",
3485 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3487 self.LogWarning("Disk %d of instance %s did not return size"
3488 " information, ignoring", idx, instance.name)
3490 if not isinstance(size, (int, long)):
3491 self.LogWarning("Disk %d of instance %s did not return valid"
3492 " size information, ignoring", idx, instance.name)
3495 if size != disk.size:
3496 self.LogInfo("Disk %d of instance %s has mismatched size,"
3497 " correcting: recorded %d, actual %d", idx,
3498 instance.name, disk.size, size)
3500 self.cfg.Update(instance, feedback_fn)
3501 changed.append((instance.name, idx, size))
3502 if self._EnsureChildSizes(disk):
3503 self.cfg.Update(instance, feedback_fn)
3504 changed.append((instance.name, idx, disk.size))
3508 class LUClusterRename(LogicalUnit):
3509 """Rename the cluster.
3512 HPATH = "cluster-rename"
3513 HTYPE = constants.HTYPE_CLUSTER
3515 def BuildHooksEnv(self):
3520 "OP_TARGET": self.cfg.GetClusterName(),
3521 "NEW_NAME": self.op.name,
3524 def BuildHooksNodes(self):
3525 """Build hooks nodes.
3528 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3530 def CheckPrereq(self):
3531 """Verify that the passed name is a valid one.
3534 hostname = netutils.GetHostname(name=self.op.name,
3535 family=self.cfg.GetPrimaryIPFamily())
3537 new_name = hostname.name
3538 self.ip = new_ip = hostname.ip
3539 old_name = self.cfg.GetClusterName()
3540 old_ip = self.cfg.GetMasterIP()
3541 if new_name == old_name and new_ip == old_ip:
3542 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3543 " cluster has changed",
3545 if new_ip != old_ip:
3546 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3547 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3548 " reachable on the network" %
3549 new_ip, errors.ECODE_NOTUNIQUE)
3551 self.op.name = new_name
3553 def Exec(self, feedback_fn):
3554 """Rename the cluster.
3557 clustername = self.op.name
3560 # shutdown the master IP
3561 master_params = self.cfg.GetMasterNetworkParameters()
3562 ems = self.cfg.GetUseExternalMipScript()
3563 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3565 result.Raise("Could not disable the master role")
3568 cluster = self.cfg.GetClusterInfo()
3569 cluster.cluster_name = clustername
3570 cluster.master_ip = new_ip
3571 self.cfg.Update(cluster, feedback_fn)
3573 # update the known hosts file
3574 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3575 node_list = self.cfg.GetOnlineNodeList()
3577 node_list.remove(master_params.name)
3580 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3582 master_params.ip = new_ip
3583 result = self.rpc.call_node_activate_master_ip(master_params.name,
3585 msg = result.fail_msg
3587 self.LogWarning("Could not re-enable the master role on"
3588 " the master, please restart manually: %s", msg)
3593 def _ValidateNetmask(cfg, netmask):
3594 """Checks if a netmask is valid.
3596 @type cfg: L{config.ConfigWriter}
3597 @param cfg: The cluster configuration
3599 @param netmask: the netmask to be verified
3600 @raise errors.OpPrereqError: if the validation fails
3603 ip_family = cfg.GetPrimaryIPFamily()
3605 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3606 except errors.ProgrammerError:
3607 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3608 ip_family, errors.ECODE_INVAL)
3609 if not ipcls.ValidateNetmask(netmask):
3610 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3611 (netmask), errors.ECODE_INVAL)
3614 class LUClusterSetParams(LogicalUnit):
3615 """Change the parameters of the cluster.
3618 HPATH = "cluster-modify"
3619 HTYPE = constants.HTYPE_CLUSTER
3622 def CheckArguments(self):
3626 if self.op.uid_pool:
3627 uidpool.CheckUidPool(self.op.uid_pool)
3629 if self.op.add_uids:
3630 uidpool.CheckUidPool(self.op.add_uids)
3632 if self.op.remove_uids:
3633 uidpool.CheckUidPool(self.op.remove_uids)
3635 if self.op.master_netmask is not None:
3636 _ValidateNetmask(self.cfg, self.op.master_netmask)
3638 if self.op.diskparams:
3639 for dt_params in self.op.diskparams.values():
3640 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3642 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3643 except errors.OpPrereqError, err:
3644 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3647 def ExpandNames(self):
3648 # FIXME: in the future maybe other cluster params won't require checking on
3649 # all nodes to be modified.
3650 # FIXME: This opcode changes cluster-wide settings. Is acquiring all
3651 # resource locks the right thing, shouldn't it be the BGL instead?
3652 self.needed_locks = {
3653 locking.LEVEL_NODE: locking.ALL_SET,
3654 locking.LEVEL_INSTANCE: locking.ALL_SET,
3655 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3656 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3658 self.share_locks = _ShareAll()
3660 def BuildHooksEnv(self):
3665 "OP_TARGET": self.cfg.GetClusterName(),
3666 "NEW_VG_NAME": self.op.vg_name,
3669 def BuildHooksNodes(self):
3670 """Build hooks nodes.
3673 mn = self.cfg.GetMasterNode()
3676 def CheckPrereq(self):
3677 """Check prerequisites.
3679 This checks whether the given params don't conflict and
3680 if the given volume group is valid.
3683 if self.op.vg_name is not None and not self.op.vg_name:
3684 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3685 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3686 " instances exist", errors.ECODE_INVAL)
3688 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3689 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3690 raise errors.OpPrereqError("Cannot disable drbd helper while"
3691 " drbd-based instances exist",
3694 node_list = self.owned_locks(locking.LEVEL_NODE)
3696 vm_capable_nodes = [node.name
3697 for node in self.cfg.GetAllNodesInfo().values()
3698 if node.name in node_list and node.vm_capable]
3700 # if vg_name not None, checks given volume group on all nodes
3702 vglist = self.rpc.call_vg_list(vm_capable_nodes)
3703 for node in vm_capable_nodes:
3704 msg = vglist[node].fail_msg
3706 # ignoring down node
3707 self.LogWarning("Error while gathering data on node %s"
3708 " (ignoring node): %s", node, msg)
3710 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3712 constants.MIN_VG_SIZE)
3714 raise errors.OpPrereqError("Error on node '%s': %s" %
3715 (node, vgstatus), errors.ECODE_ENVIRON)
3717 if self.op.drbd_helper:
3718 # checks given drbd helper on all nodes
3719 helpers = self.rpc.call_drbd_helper(node_list)
3720 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3722 self.LogInfo("Not checking drbd helper on offline node %s", node)
3724 msg = helpers[node].fail_msg
3726 raise errors.OpPrereqError("Error checking drbd helper on node"
3727 " '%s': %s" % (node, msg),
3728 errors.ECODE_ENVIRON)
3729 node_helper = helpers[node].payload
3730 if node_helper != self.op.drbd_helper:
3731 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3732 (node, node_helper), errors.ECODE_ENVIRON)
3734 self.cluster = cluster = self.cfg.GetClusterInfo()
3735 # validate params changes
3736 if self.op.beparams:
3737 objects.UpgradeBeParams(self.op.beparams)
3738 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3739 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3741 if self.op.ndparams:
3742 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3743 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3745 # TODO: we need a more general way to handle resetting
3746 # cluster-level parameters to default values
3747 if self.new_ndparams["oob_program"] == "":
3748 self.new_ndparams["oob_program"] = \
3749 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3751 if self.op.hv_state:
3752 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3753 self.cluster.hv_state_static)
3754 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3755 for hv, values in new_hv_state.items())
3757 if self.op.disk_state:
3758 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3759 self.cluster.disk_state_static)
3760 self.new_disk_state = \
3761 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3762 for name, values in svalues.items()))
3763 for storage, svalues in new_disk_state.items())
3766 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3769 all_instances = self.cfg.GetAllInstancesInfo().values()
3771 for group in self.cfg.GetAllNodeGroupsInfo().values():
3772 instances = frozenset([inst for inst in all_instances
3773 if compat.any(node in group.members
3774 for node in inst.all_nodes)])
3775 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3776 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
3777 new = _ComputeNewInstanceViolations(ipol,
3778 new_ipolicy, instances, self.cfg)
3780 violations.update(new)
3783 self.LogWarning("After the ipolicy change the following instances"
3784 " violate them: %s",
3785 utils.CommaJoin(utils.NiceSort(violations)))
3787 if self.op.nicparams:
3788 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3789 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3790 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3793 # check all instances for consistency
3794 for instance in self.cfg.GetAllInstancesInfo().values():
3795 for nic_idx, nic in enumerate(instance.nics):
3796 params_copy = copy.deepcopy(nic.nicparams)
3797 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3799 # check parameter syntax
3801 objects.NIC.CheckParameterSyntax(params_filled)
3802 except errors.ConfigurationError, err:
3803 nic_errors.append("Instance %s, nic/%d: %s" %
3804 (instance.name, nic_idx, err))
3806 # if we're moving instances to routed, check that they have an ip
3807 target_mode = params_filled[constants.NIC_MODE]
3808 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3809 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3810 " address" % (instance.name, nic_idx))
3812 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3813 "\n".join(nic_errors), errors.ECODE_INVAL)
3815 # hypervisor list/parameters
3816 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3817 if self.op.hvparams:
3818 for hv_name, hv_dict in self.op.hvparams.items():
3819 if hv_name not in self.new_hvparams:
3820 self.new_hvparams[hv_name] = hv_dict
3822 self.new_hvparams[hv_name].update(hv_dict)
3824 # disk template parameters
3825 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3826 if self.op.diskparams:
3827 for dt_name, dt_params in self.op.diskparams.items():
3828 if dt_name not in self.op.diskparams:
3829 self.new_diskparams[dt_name] = dt_params
3831 self.new_diskparams[dt_name].update(dt_params)
3833 # os hypervisor parameters
3834 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3836 for os_name, hvs in self.op.os_hvp.items():
3837 if os_name not in self.new_os_hvp:
3838 self.new_os_hvp[os_name] = hvs
3840 for hv_name, hv_dict in hvs.items():
3842 # Delete if it exists
3843 self.new_os_hvp[os_name].pop(hv_name, None)
3844 elif hv_name not in self.new_os_hvp[os_name]:
3845 self.new_os_hvp[os_name][hv_name] = hv_dict
3847 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3850 self.new_osp = objects.FillDict(cluster.osparams, {})
3851 if self.op.osparams:
3852 for os_name, osp in self.op.osparams.items():
3853 if os_name not in self.new_osp:
3854 self.new_osp[os_name] = {}
3856 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3859 if not self.new_osp[os_name]:
3860 # we removed all parameters
3861 del self.new_osp[os_name]
3863 # check the parameter validity (remote check)
3864 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3865 os_name, self.new_osp[os_name])
3867 # changes to the hypervisor list
3868 if self.op.enabled_hypervisors is not None:
3869 self.hv_list = self.op.enabled_hypervisors
3870 for hv in self.hv_list:
3871 # if the hypervisor doesn't already exist in the cluster
3872 # hvparams, we initialize it to empty, and then (in both
3873 # cases) we make sure to fill the defaults, as we might not
3874 # have a complete defaults list if the hypervisor wasn't
3876 if hv not in new_hvp:
3878 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3879 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3881 self.hv_list = cluster.enabled_hypervisors
3883 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3884 # either the enabled list has changed, or the parameters have, validate
3885 for hv_name, hv_params in self.new_hvparams.items():
3886 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3887 (self.op.enabled_hypervisors and
3888 hv_name in self.op.enabled_hypervisors)):
3889 # either this is a new hypervisor, or its parameters have changed
3890 hv_class = hypervisor.GetHypervisorClass(hv_name)
3891 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3892 hv_class.CheckParameterSyntax(hv_params)
3893 _CheckHVParams(self, node_list, hv_name, hv_params)
3895 self._CheckDiskTemplateConsistency()
3898 # no need to check any newly-enabled hypervisors, since the
3899 # defaults have already been checked in the above code-block
3900 for os_name, os_hvp in self.new_os_hvp.items():
3901 for hv_name, hv_params in os_hvp.items():
3902 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3903 # we need to fill in the new os_hvp on top of the actual hv_p
3904 cluster_defaults = self.new_hvparams.get(hv_name, {})
3905 new_osp = objects.FillDict(cluster_defaults, hv_params)
3906 hv_class = hypervisor.GetHypervisorClass(hv_name)
3907 hv_class.CheckParameterSyntax(new_osp)
3908 _CheckHVParams(self, node_list, hv_name, new_osp)
3910 if self.op.default_iallocator:
3911 alloc_script = utils.FindFile(self.op.default_iallocator,
3912 constants.IALLOCATOR_SEARCH_PATH,
3914 if alloc_script is None:
3915 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3916 " specified" % self.op.default_iallocator,
3919 def _CheckDiskTemplateConsistency(self):
3920 """Check whether the disk templates that are going to be disabled
3921 are still in use by some instances.
3924 if self.op.enabled_disk_templates:
3925 cluster = self.cfg.GetClusterInfo()
3926 instances = self.cfg.GetAllInstancesInfo()
3928 disk_templates_to_remove = set(cluster.enabled_disk_templates) \
3929 - set(self.op.enabled_disk_templates)
3930 for instance in instances.itervalues():
3931 if instance.disk_template in disk_templates_to_remove:
3932 raise errors.OpPrereqError("Cannot disable disk template '%s',"
3933 " because instance '%s' is using it." %
3934 (instance.disk_template, instance.name))
3936 def Exec(self, feedback_fn):
3937 """Change the parameters of the cluster.
3940 if self.op.vg_name is not None:
3941 new_volume = self.op.vg_name
3944 if new_volume != self.cfg.GetVGName():
3945 self.cfg.SetVGName(new_volume)
3947 feedback_fn("Cluster LVM configuration already in desired"
3948 " state, not changing")
3949 if self.op.drbd_helper is not None:
3950 new_helper = self.op.drbd_helper
3953 if new_helper != self.cfg.GetDRBDHelper():
3954 self.cfg.SetDRBDHelper(new_helper)
3956 feedback_fn("Cluster DRBD helper already in desired state,"
3958 if self.op.hvparams:
3959 self.cluster.hvparams = self.new_hvparams
3961 self.cluster.os_hvp = self.new_os_hvp
3962 if self.op.enabled_hypervisors is not None:
3963 self.cluster.hvparams = self.new_hvparams
3964 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3965 if self.op.enabled_disk_templates:
3966 self.cluster.enabled_disk_templates = \
3967 list(set(self.op.enabled_disk_templates))
3968 if self.op.beparams:
3969 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3970 if self.op.nicparams:
3971 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3973 self.cluster.ipolicy = self.new_ipolicy
3974 if self.op.osparams:
3975 self.cluster.osparams = self.new_osp
3976 if self.op.ndparams:
3977 self.cluster.ndparams = self.new_ndparams
3978 if self.op.diskparams:
3979 self.cluster.diskparams = self.new_diskparams
3980 if self.op.hv_state:
3981 self.cluster.hv_state_static = self.new_hv_state
3982 if self.op.disk_state:
3983 self.cluster.disk_state_static = self.new_disk_state
3985 if self.op.candidate_pool_size is not None:
3986 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3987 # we need to update the pool size here, otherwise the save will fail
3988 _AdjustCandidatePool(self, [])
3990 if self.op.maintain_node_health is not None:
3991 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
3992 feedback_fn("Note: CONFD was disabled at build time, node health"
3993 " maintenance is not useful (still enabling it)")
3994 self.cluster.maintain_node_health = self.op.maintain_node_health
3996 if self.op.prealloc_wipe_disks is not None:
3997 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3999 if self.op.add_uids is not None:
4000 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4002 if self.op.remove_uids is not None:
4003 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4005 if self.op.uid_pool is not None:
4006 self.cluster.uid_pool = self.op.uid_pool
4008 if self.op.default_iallocator is not None:
4009 self.cluster.default_iallocator = self.op.default_iallocator
4011 if self.op.reserved_lvs is not None:
4012 self.cluster.reserved_lvs = self.op.reserved_lvs
4014 if self.op.use_external_mip_script is not None:
4015 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4017 def helper_os(aname, mods, desc):
4019 lst = getattr(self.cluster, aname)
4020 for key, val in mods:
4021 if key == constants.DDM_ADD:
4023 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4026 elif key == constants.DDM_REMOVE:
4030 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4032 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4034 if self.op.hidden_os:
4035 helper_os("hidden_os", self.op.hidden_os, "hidden")
4037 if self.op.blacklisted_os:
4038 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4040 if self.op.master_netdev:
4041 master_params = self.cfg.GetMasterNetworkParameters()
4042 ems = self.cfg.GetUseExternalMipScript()
4043 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4044 self.cluster.master_netdev)
4045 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4047 result.Raise("Could not disable the master ip")
4048 feedback_fn("Changing master_netdev from %s to %s" %
4049 (master_params.netdev, self.op.master_netdev))
4050 self.cluster.master_netdev = self.op.master_netdev
4052 if self.op.master_netmask:
4053 master_params = self.cfg.GetMasterNetworkParameters()
4054 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4055 result = self.rpc.call_node_change_master_netmask(master_params.name,
4056 master_params.netmask,
4057 self.op.master_netmask,
4059 master_params.netdev)
4061 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4064 self.cluster.master_netmask = self.op.master_netmask
4066 self.cfg.Update(self.cluster, feedback_fn)
4068 if self.op.master_netdev:
4069 master_params = self.cfg.GetMasterNetworkParameters()
4070 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4071 self.op.master_netdev)
4072 ems = self.cfg.GetUseExternalMipScript()
4073 result = self.rpc.call_node_activate_master_ip(master_params.name,
4076 self.LogWarning("Could not re-enable the master ip on"
4077 " the master, please restart manually: %s",
4081 def _UploadHelper(lu, nodes, fname):
4082 """Helper for uploading a file and showing warnings.
4085 if os.path.exists(fname):
4086 result = lu.rpc.call_upload_file(nodes, fname)
4087 for to_node, to_result in result.items():
4088 msg = to_result.fail_msg
4090 msg = ("Copy of file %s to node %s failed: %s" %
4091 (fname, to_node, msg))
4095 def _ComputeAncillaryFiles(cluster, redist):
4096 """Compute files external to Ganeti which need to be consistent.
4098 @type redist: boolean
4099 @param redist: Whether to include files which need to be redistributed
4102 # Compute files for all nodes
4104 pathutils.SSH_KNOWN_HOSTS_FILE,
4105 pathutils.CONFD_HMAC_KEY,
4106 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4107 pathutils.SPICE_CERT_FILE,
4108 pathutils.SPICE_CACERT_FILE,
4109 pathutils.RAPI_USERS_FILE,
4113 # we need to ship at least the RAPI certificate
4114 files_all.add(pathutils.RAPI_CERT_FILE)
4116 files_all.update(pathutils.ALL_CERT_FILES)
4117 files_all.update(ssconf.SimpleStore().GetFileList())
4119 if cluster.modify_etc_hosts:
4120 files_all.add(pathutils.ETC_HOSTS)
4122 if cluster.use_external_mip_script:
4123 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4125 # Files which are optional, these must:
4126 # - be present in one other category as well
4127 # - either exist or not exist on all nodes of that category (mc, vm all)
4129 pathutils.RAPI_USERS_FILE,
4132 # Files which should only be on master candidates
4136 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4140 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4141 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4142 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4144 # Files which should only be on VM-capable nodes
4147 for hv_name in cluster.enabled_hypervisors
4149 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0])
4153 for hv_name in cluster.enabled_hypervisors
4155 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1])
4157 # Filenames in each category must be unique
4158 all_files_set = files_all | files_mc | files_vm
4159 assert (len(all_files_set) ==
4160 sum(map(len, [files_all, files_mc, files_vm]))), \
4161 "Found file listed in more than one file list"
4163 # Optional files must be present in one other category
4164 assert all_files_set.issuperset(files_opt), \
4165 "Optional file not in a different required list"
4167 # This one file should never ever be re-distributed via RPC
4168 assert not (redist and
4169 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4171 return (files_all, files_opt, files_mc, files_vm)
4174 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4175 """Distribute additional files which are part of the cluster configuration.
4177 ConfigWriter takes care of distributing the config and ssconf files, but
4178 there are more files which should be distributed to all nodes. This function
4179 makes sure those are copied.
4181 @param lu: calling logical unit
4182 @param additional_nodes: list of nodes not in the config to distribute to
4183 @type additional_vm: boolean
4184 @param additional_vm: whether the additional nodes are vm-capable or not
4187 # Gather target nodes
4188 cluster = lu.cfg.GetClusterInfo()
4189 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4191 online_nodes = lu.cfg.GetOnlineNodeList()
4192 online_set = frozenset(online_nodes)
4193 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4195 if additional_nodes is not None:
4196 online_nodes.extend(additional_nodes)
4198 vm_nodes.extend(additional_nodes)
4200 # Never distribute to master node
4201 for nodelist in [online_nodes, vm_nodes]:
4202 if master_info.name in nodelist:
4203 nodelist.remove(master_info.name)
4206 (files_all, _, files_mc, files_vm) = \
4207 _ComputeAncillaryFiles(cluster, True)
4209 # Never re-distribute configuration file from here
4210 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4211 pathutils.CLUSTER_CONF_FILE in files_vm)
4212 assert not files_mc, "Master candidates not handled in this function"
4215 (online_nodes, files_all),
4216 (vm_nodes, files_vm),
4220 for (node_list, files) in filemap:
4222 _UploadHelper(lu, node_list, fname)
4225 class LUClusterRedistConf(NoHooksLU):
4226 """Force the redistribution of cluster configuration.
4228 This is a very simple LU.
4233 def ExpandNames(self):
4234 self.needed_locks = {
4235 locking.LEVEL_NODE: locking.ALL_SET,
4236 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4238 self.share_locks = _ShareAll()
4240 def Exec(self, feedback_fn):
4241 """Redistribute the configuration.
4244 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4245 _RedistributeAncillaryFiles(self)
4248 class LUClusterActivateMasterIp(NoHooksLU):
4249 """Activate the master IP on the master node.
4252 def Exec(self, feedback_fn):
4253 """Activate the master IP.
4256 master_params = self.cfg.GetMasterNetworkParameters()
4257 ems = self.cfg.GetUseExternalMipScript()
4258 result = self.rpc.call_node_activate_master_ip(master_params.name,
4260 result.Raise("Could not activate the master IP")
4263 class LUClusterDeactivateMasterIp(NoHooksLU):
4264 """Deactivate the master IP on the master node.
4267 def Exec(self, feedback_fn):
4268 """Deactivate the master IP.
4271 master_params = self.cfg.GetMasterNetworkParameters()
4272 ems = self.cfg.GetUseExternalMipScript()
4273 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4275 result.Raise("Could not deactivate the master IP")
4278 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4279 """Sleep and poll for an instance's disk to sync.
4282 if not instance.disks or disks is not None and not disks:
4285 disks = _ExpandCheckDisks(instance, disks)
4288 lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4290 node = instance.primary_node
4293 lu.cfg.SetDiskID(dev, node)
4295 # TODO: Convert to utils.Retry
4298 degr_retries = 10 # in seconds, as we sleep 1 second each time
4302 cumul_degraded = False
4303 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4304 msg = rstats.fail_msg
4306 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4309 raise errors.RemoteError("Can't contact node %s for mirror data,"
4310 " aborting." % node)
4313 rstats = rstats.payload
4315 for i, mstat in enumerate(rstats):
4317 lu.LogWarning("Can't compute data for node %s/%s",
4318 node, disks[i].iv_name)
4321 cumul_degraded = (cumul_degraded or
4322 (mstat.is_degraded and mstat.sync_percent is None))
4323 if mstat.sync_percent is not None:
4325 if mstat.estimated_time is not None:
4326 rem_time = ("%s remaining (estimated)" %
4327 utils.FormatSeconds(mstat.estimated_time))
4328 max_time = mstat.estimated_time
4330 rem_time = "no time estimate"
4331 lu.LogInfo("- device %s: %5.2f%% done, %s",
4332 disks[i].iv_name, mstat.sync_percent, rem_time)
4334 # if we're done but degraded, let's do a few small retries, to
4335 # make sure we see a stable and not transient situation; therefore
4336 # we force restart of the loop
4337 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4338 logging.info("Degraded disks found, %d retries left", degr_retries)
4346 time.sleep(min(60, max_time))
4349 lu.LogInfo("Instance %s's disks are in sync", instance.name)
4351 return not cumul_degraded
4354 def _BlockdevFind(lu, node, dev, instance):
4355 """Wrapper around call_blockdev_find to annotate diskparams.
4357 @param lu: A reference to the lu object
4358 @param node: The node to call out
4359 @param dev: The device to find
4360 @param instance: The instance object the device belongs to
4361 @returns The result of the rpc call
4364 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4365 return lu.rpc.call_blockdev_find(node, disk)
4368 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4369 """Wrapper around L{_CheckDiskConsistencyInner}.
4372 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4373 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4377 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4379 """Check that mirrors are not degraded.
4381 @attention: The device has to be annotated already.
4383 The ldisk parameter, if True, will change the test from the
4384 is_degraded attribute (which represents overall non-ok status for
4385 the device(s)) to the ldisk (representing the local storage status).
4388 lu.cfg.SetDiskID(dev, node)
4392 if on_primary or dev.AssembleOnSecondary():
4393 rstats = lu.rpc.call_blockdev_find(node, dev)
4394 msg = rstats.fail_msg
4396 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4398 elif not rstats.payload:
4399 lu.LogWarning("Can't find disk on node %s", node)
4403 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4405 result = result and not rstats.payload.is_degraded
4408 for child in dev.children:
4409 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4415 class LUOobCommand(NoHooksLU):
4416 """Logical unit for OOB handling.
4420 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4422 def ExpandNames(self):
4423 """Gather locks we need.
4426 if self.op.node_names:
4427 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4428 lock_names = self.op.node_names
4430 lock_names = locking.ALL_SET
4432 self.needed_locks = {
4433 locking.LEVEL_NODE: lock_names,
4436 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4438 if not self.op.node_names:
4439 # Acquire node allocation lock only if all nodes are affected
4440 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4442 def CheckPrereq(self):
4443 """Check prerequisites.
4446 - the node exists in the configuration
4449 Any errors are signaled by raising errors.OpPrereqError.
4453 self.master_node = self.cfg.GetMasterNode()
4455 assert self.op.power_delay >= 0.0
4457 if self.op.node_names:
4458 if (self.op.command in self._SKIP_MASTER and
4459 self.master_node in self.op.node_names):
4460 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4461 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4463 if master_oob_handler:
4464 additional_text = ("run '%s %s %s' if you want to operate on the"
4465 " master regardless") % (master_oob_handler,
4469 additional_text = "it does not support out-of-band operations"
4471 raise errors.OpPrereqError(("Operating on the master node %s is not"
4472 " allowed for %s; %s") %
4473 (self.master_node, self.op.command,
4474 additional_text), errors.ECODE_INVAL)
4476 self.op.node_names = self.cfg.GetNodeList()
4477 if self.op.command in self._SKIP_MASTER:
4478 self.op.node_names.remove(self.master_node)
4480 if self.op.command in self._SKIP_MASTER:
4481 assert self.master_node not in self.op.node_names
4483 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4485 raise errors.OpPrereqError("Node %s not found" % node_name,
4488 self.nodes.append(node)
4490 if (not self.op.ignore_status and
4491 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4492 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4493 " not marked offline") % node_name,
4496 def Exec(self, feedback_fn):
4497 """Execute OOB and return result if we expect any.
4500 master_node = self.master_node
4503 for idx, node in enumerate(utils.NiceSort(self.nodes,
4504 key=lambda node: node.name)):
4505 node_entry = [(constants.RS_NORMAL, node.name)]
4506 ret.append(node_entry)
4508 oob_program = _SupportsOob(self.cfg, node)
4511 node_entry.append((constants.RS_UNAVAIL, None))
4514 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4515 self.op.command, oob_program, node.name)
4516 result = self.rpc.call_run_oob(master_node, oob_program,
4517 self.op.command, node.name,
4521 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4522 node.name, result.fail_msg)
4523 node_entry.append((constants.RS_NODATA, None))
4526 self._CheckPayload(result)
4527 except errors.OpExecError, err:
4528 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4530 node_entry.append((constants.RS_NODATA, None))
4532 if self.op.command == constants.OOB_HEALTH:
4533 # For health we should log important events
4534 for item, status in result.payload:
4535 if status in [constants.OOB_STATUS_WARNING,
4536 constants.OOB_STATUS_CRITICAL]:
4537 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4538 item, node.name, status)
4540 if self.op.command == constants.OOB_POWER_ON:
4542 elif self.op.command == constants.OOB_POWER_OFF:
4543 node.powered = False
4544 elif self.op.command == constants.OOB_POWER_STATUS:
4545 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4546 if powered != node.powered:
4547 logging.warning(("Recorded power state (%s) of node '%s' does not"
4548 " match actual power state (%s)"), node.powered,
4551 # For configuration changing commands we should update the node
4552 if self.op.command in (constants.OOB_POWER_ON,
4553 constants.OOB_POWER_OFF):
4554 self.cfg.Update(node, feedback_fn)
4556 node_entry.append((constants.RS_NORMAL, result.payload))
4558 if (self.op.command == constants.OOB_POWER_ON and
4559 idx < len(self.nodes) - 1):
4560 time.sleep(self.op.power_delay)
4564 def _CheckPayload(self, result):
4565 """Checks if the payload is valid.
4567 @param result: RPC result
4568 @raises errors.OpExecError: If payload is not valid
4572 if self.op.command == constants.OOB_HEALTH:
4573 if not isinstance(result.payload, list):
4574 errs.append("command 'health' is expected to return a list but got %s" %
4575 type(result.payload))
4577 for item, status in result.payload:
4578 if status not in constants.OOB_STATUSES:
4579 errs.append("health item '%s' has invalid status '%s'" %
4582 if self.op.command == constants.OOB_POWER_STATUS:
4583 if not isinstance(result.payload, dict):
4584 errs.append("power-status is expected to return a dict but got %s" %
4585 type(result.payload))
4587 if self.op.command in [
4588 constants.OOB_POWER_ON,
4589 constants.OOB_POWER_OFF,
4590 constants.OOB_POWER_CYCLE,
4592 if result.payload is not None:
4593 errs.append("%s is expected to not return payload but got '%s'" %
4594 (self.op.command, result.payload))
4597 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4598 utils.CommaJoin(errs))
4601 class _OsQuery(_QueryBase):
4602 FIELDS = query.OS_FIELDS
4604 def ExpandNames(self, lu):
4605 # Lock all nodes in shared mode
4606 # Temporary removal of locks, should be reverted later
4607 # TODO: reintroduce locks when they are lighter-weight
4608 lu.needed_locks = {}
4609 #self.share_locks[locking.LEVEL_NODE] = 1
4610 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4612 # The following variables interact with _QueryBase._GetNames
4614 self.wanted = self.names
4616 self.wanted = locking.ALL_SET
4618 self.do_locking = self.use_locking
4620 def DeclareLocks(self, lu, level):
4624 def _DiagnoseByOS(rlist):
4625 """Remaps a per-node return list into an a per-os per-node dictionary
4627 @param rlist: a map with node names as keys and OS objects as values
4630 @return: a dictionary with osnames as keys and as value another
4631 map, with nodes as keys and tuples of (path, status, diagnose,
4632 variants, parameters, api_versions) as values, eg::
4634 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4635 (/srv/..., False, "invalid api")],
4636 "node2": [(/srv/..., True, "", [], [])]}
4641 # we build here the list of nodes that didn't fail the RPC (at RPC
4642 # level), so that nodes with a non-responding node daemon don't
4643 # make all OSes invalid
4644 good_nodes = [node_name for node_name in rlist
4645 if not rlist[node_name].fail_msg]
4646 for node_name, nr in rlist.items():
4647 if nr.fail_msg or not nr.payload:
4649 for (name, path, status, diagnose, variants,
4650 params, api_versions) in nr.payload:
4651 if name not in all_os:
4652 # build a list of nodes for this os containing empty lists
4653 # for each node in node_list
4655 for nname in good_nodes:
4656 all_os[name][nname] = []
4657 # convert params from [name, help] to (name, help)
4658 params = [tuple(v) for v in params]
4659 all_os[name][node_name].append((path, status, diagnose,
4660 variants, params, api_versions))
4663 def _GetQueryData(self, lu):
4664 """Computes the list of nodes and their attributes.
4667 # Locking is not used
4668 assert not (compat.any(lu.glm.is_owned(level)
4669 for level in locking.LEVELS
4670 if level != locking.LEVEL_CLUSTER) or
4671 self.do_locking or self.use_locking)
4673 valid_nodes = [node.name
4674 for node in lu.cfg.GetAllNodesInfo().values()
4675 if not node.offline and node.vm_capable]
4676 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4677 cluster = lu.cfg.GetClusterInfo()
4681 for (os_name, os_data) in pol.items():
4682 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4683 hidden=(os_name in cluster.hidden_os),
4684 blacklisted=(os_name in cluster.blacklisted_os))
4688 api_versions = set()
4690 for idx, osl in enumerate(os_data.values()):
4691 info.valid = bool(info.valid and osl and osl[0][1])
4695 (node_variants, node_params, node_api) = osl[0][3:6]
4698 variants.update(node_variants)
4699 parameters.update(node_params)
4700 api_versions.update(node_api)
4702 # Filter out inconsistent values
4703 variants.intersection_update(node_variants)
4704 parameters.intersection_update(node_params)
4705 api_versions.intersection_update(node_api)
4707 info.variants = list(variants)
4708 info.parameters = list(parameters)
4709 info.api_versions = list(api_versions)
4711 data[os_name] = info
4713 # Prepare data in requested order
4714 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4718 class LUOsDiagnose(NoHooksLU):
4719 """Logical unit for OS diagnose/query.
4725 def _BuildFilter(fields, names):
4726 """Builds a filter for querying OSes.
4729 name_filter = qlang.MakeSimpleFilter("name", names)
4731 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4732 # respective field is not requested
4733 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4734 for fname in ["hidden", "blacklisted"]
4735 if fname not in fields]
4736 if "valid" not in fields:
4737 status_filter.append([qlang.OP_TRUE, "valid"])
4740 status_filter.insert(0, qlang.OP_AND)
4742 status_filter = None
4744 if name_filter and status_filter:
4745 return [qlang.OP_AND, name_filter, status_filter]
4749 return status_filter
4751 def CheckArguments(self):
4752 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4753 self.op.output_fields, False)
4755 def ExpandNames(self):
4756 self.oq.ExpandNames(self)
4758 def Exec(self, feedback_fn):
4759 return self.oq.OldStyleQuery(self)
4762 class _ExtStorageQuery(_QueryBase):
4763 FIELDS = query.EXTSTORAGE_FIELDS
4765 def ExpandNames(self, lu):
4766 # Lock all nodes in shared mode
4767 # Temporary removal of locks, should be reverted later
4768 # TODO: reintroduce locks when they are lighter-weight
4769 lu.needed_locks = {}
4770 #self.share_locks[locking.LEVEL_NODE] = 1
4771 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4773 # The following variables interact with _QueryBase._GetNames
4775 self.wanted = self.names
4777 self.wanted = locking.ALL_SET
4779 self.do_locking = self.use_locking
4781 def DeclareLocks(self, lu, level):
4785 def _DiagnoseByProvider(rlist):
4786 """Remaps a per-node return list into an a per-provider per-node dictionary
4788 @param rlist: a map with node names as keys and ExtStorage objects as values
4791 @return: a dictionary with extstorage providers as keys and as
4792 value another map, with nodes as keys and tuples of
4793 (path, status, diagnose, parameters) as values, eg::
4795 {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
4796 "node2": [(/srv/..., False, "missing file")]
4797 "node3": [(/srv/..., True, "", [])]
4802 # we build here the list of nodes that didn't fail the RPC (at RPC
4803 # level), so that nodes with a non-responding node daemon don't
4804 # make all OSes invalid
4805 good_nodes = [node_name for node_name in rlist
4806 if not rlist[node_name].fail_msg]
4807 for node_name, nr in rlist.items():
4808 if nr.fail_msg or not nr.payload:
4810 for (name, path, status, diagnose, params) in nr.payload:
4811 if name not in all_es:
4812 # build a list of nodes for this os containing empty lists
4813 # for each node in node_list
4815 for nname in good_nodes:
4816 all_es[name][nname] = []
4817 # convert params from [name, help] to (name, help)
4818 params = [tuple(v) for v in params]
4819 all_es[name][node_name].append((path, status, diagnose, params))
4822 def _GetQueryData(self, lu):
4823 """Computes the list of nodes and their attributes.
4826 # Locking is not used
4827 assert not (compat.any(lu.glm.is_owned(level)
4828 for level in locking.LEVELS
4829 if level != locking.LEVEL_CLUSTER) or
4830 self.do_locking or self.use_locking)
4832 valid_nodes = [node.name
4833 for node in lu.cfg.GetAllNodesInfo().values()
4834 if not node.offline and node.vm_capable]
4835 pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
4839 nodegroup_list = lu.cfg.GetNodeGroupList()
4841 for (es_name, es_data) in pol.items():
4842 # For every provider compute the nodegroup validity.
4843 # To do this we need to check the validity of each node in es_data
4844 # and then construct the corresponding nodegroup dict:
4845 # { nodegroup1: status
4846 # nodegroup2: status
4849 for nodegroup in nodegroup_list:
4850 ndgrp = lu.cfg.GetNodeGroup(nodegroup)
4852 nodegroup_nodes = ndgrp.members
4853 nodegroup_name = ndgrp.name
4856 for node in nodegroup_nodes:
4857 if node in valid_nodes:
4858 if es_data[node] != []:
4859 node_status = es_data[node][0][1]
4860 node_statuses.append(node_status)
4862 node_statuses.append(False)
4864 if False in node_statuses:
4865 ndgrp_data[nodegroup_name] = False
4867 ndgrp_data[nodegroup_name] = True
4869 # Compute the provider's parameters
4871 for idx, esl in enumerate(es_data.values()):
4872 valid = bool(esl and esl[0][1])
4876 node_params = esl[0][3]
4879 parameters.update(node_params)
4881 # Filter out inconsistent values
4882 parameters.intersection_update(node_params)
4884 params = list(parameters)
4886 # Now fill all the info for this provider
4887 info = query.ExtStorageInfo(name=es_name, node_status=es_data,
4888 nodegroup_status=ndgrp_data,
4891 data[es_name] = info
4893 # Prepare data in requested order
4894 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4898 class LUExtStorageDiagnose(NoHooksLU):
4899 """Logical unit for ExtStorage diagnose/query.
4904 def CheckArguments(self):
4905 self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
4906 self.op.output_fields, False)
4908 def ExpandNames(self):
4909 self.eq.ExpandNames(self)
4911 def Exec(self, feedback_fn):
4912 return self.eq.OldStyleQuery(self)
4915 class LUNodeRemove(LogicalUnit):
4916 """Logical unit for removing a node.
4919 HPATH = "node-remove"
4920 HTYPE = constants.HTYPE_NODE
4922 def BuildHooksEnv(self):
4927 "OP_TARGET": self.op.node_name,
4928 "NODE_NAME": self.op.node_name,
4931 def BuildHooksNodes(self):
4932 """Build hooks nodes.
4934 This doesn't run on the target node in the pre phase as a failed
4935 node would then be impossible to remove.
4938 all_nodes = self.cfg.GetNodeList()
4940 all_nodes.remove(self.op.node_name)
4943 return (all_nodes, all_nodes)
4945 def CheckPrereq(self):
4946 """Check prerequisites.
4949 - the node exists in the configuration
4950 - it does not have primary or secondary instances
4951 - it's not the master
4953 Any errors are signaled by raising errors.OpPrereqError.
4956 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4957 node = self.cfg.GetNodeInfo(self.op.node_name)
4958 assert node is not None
4960 masternode = self.cfg.GetMasterNode()
4961 if node.name == masternode:
4962 raise errors.OpPrereqError("Node is the master node, failover to another"
4963 " node is required", errors.ECODE_INVAL)
4965 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4966 if node.name in instance.all_nodes:
4967 raise errors.OpPrereqError("Instance %s is still running on the node,"
4968 " please remove first" % instance_name,
4970 self.op.node_name = node.name
4973 def Exec(self, feedback_fn):
4974 """Removes the node from the cluster.
4978 logging.info("Stopping the node daemon and removing configs from node %s",
4981 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4983 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4986 # Promote nodes to master candidate as needed
4987 _AdjustCandidatePool(self, exceptions=[node.name])
4988 self.context.RemoveNode(node.name)
4990 # Run post hooks on the node before it's removed
4991 _RunPostHook(self, node.name)
4993 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4994 msg = result.fail_msg
4996 self.LogWarning("Errors encountered on the remote node while leaving"
4997 " the cluster: %s", msg)
4999 # Remove node from our /etc/hosts
5000 if self.cfg.GetClusterInfo().modify_etc_hosts:
5001 master_node = self.cfg.GetMasterNode()
5002 result = self.rpc.call_etc_hosts_modify(master_node,
5003 constants.ETC_HOSTS_REMOVE,
5005 result.Raise("Can't update hosts file with new host data")
5006 _RedistributeAncillaryFiles(self)
5009 class _NodeQuery(_QueryBase):
5010 FIELDS = query.NODE_FIELDS
5012 def ExpandNames(self, lu):
5013 lu.needed_locks = {}
5014 lu.share_locks = _ShareAll()
5017 self.wanted = _GetWantedNodes(lu, self.names)
5019 self.wanted = locking.ALL_SET
5021 self.do_locking = (self.use_locking and
5022 query.NQ_LIVE in self.requested_data)
5025 # If any non-static field is requested we need to lock the nodes
5026 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5027 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5029 def DeclareLocks(self, lu, level):
5032 def _GetQueryData(self, lu):
5033 """Computes the list of nodes and their attributes.
5036 all_info = lu.cfg.GetAllNodesInfo()
5038 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5040 # Gather data as requested
5041 if query.NQ_LIVE in self.requested_data:
5042 # filter out non-vm_capable nodes
5043 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5045 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
5046 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5047 [lu.cfg.GetHypervisorType()], es_flags)
5048 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5049 for (name, nresult) in node_data.items()
5050 if not nresult.fail_msg and nresult.payload)
5054 if query.NQ_INST in self.requested_data:
5055 node_to_primary = dict([(name, set()) for name in nodenames])
5056 node_to_secondary = dict([(name, set()) for name in nodenames])
5058 inst_data = lu.cfg.GetAllInstancesInfo()
5060 for inst in inst_data.values():
5061 if inst.primary_node in node_to_primary:
5062 node_to_primary[inst.primary_node].add(inst.name)
5063 for secnode in inst.secondary_nodes:
5064 if secnode in node_to_secondary:
5065 node_to_secondary[secnode].add(inst.name)
5067 node_to_primary = None
5068 node_to_secondary = None
5070 if query.NQ_OOB in self.requested_data:
5071 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5072 for name, node in all_info.iteritems())
5076 if query.NQ_GROUP in self.requested_data:
5077 groups = lu.cfg.GetAllNodeGroupsInfo()
5081 return query.NodeQueryData([all_info[name] for name in nodenames],
5082 live_data, lu.cfg.GetMasterNode(),
5083 node_to_primary, node_to_secondary, groups,
5084 oob_support, lu.cfg.GetClusterInfo())
5087 class LUNodeQuery(NoHooksLU):
5088 """Logical unit for querying nodes.
5091 # pylint: disable=W0142
5094 def CheckArguments(self):
5095 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5096 self.op.output_fields, self.op.use_locking)
5098 def ExpandNames(self):
5099 self.nq.ExpandNames(self)
5101 def DeclareLocks(self, level):
5102 self.nq.DeclareLocks(self, level)
5104 def Exec(self, feedback_fn):
5105 return self.nq.OldStyleQuery(self)
5108 class LUNodeQueryvols(NoHooksLU):
5109 """Logical unit for getting volumes on node(s).
5113 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5114 _FIELDS_STATIC = utils.FieldSet("node")
5116 def CheckArguments(self):
5117 _CheckOutputFields(static=self._FIELDS_STATIC,
5118 dynamic=self._FIELDS_DYNAMIC,
5119 selected=self.op.output_fields)
5121 def ExpandNames(self):
5122 self.share_locks = _ShareAll()
5125 self.needed_locks = {
5126 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5129 self.needed_locks = {
5130 locking.LEVEL_NODE: locking.ALL_SET,
5131 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5134 def Exec(self, feedback_fn):
5135 """Computes the list of nodes and their attributes.
5138 nodenames = self.owned_locks(locking.LEVEL_NODE)
5139 volumes = self.rpc.call_node_volumes(nodenames)
5141 ilist = self.cfg.GetAllInstancesInfo()
5142 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5145 for node in nodenames:
5146 nresult = volumes[node]
5149 msg = nresult.fail_msg
5151 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5154 node_vols = sorted(nresult.payload,
5155 key=operator.itemgetter("dev"))
5157 for vol in node_vols:
5159 for field in self.op.output_fields:
5162 elif field == "phys":
5166 elif field == "name":
5168 elif field == "size":
5169 val = int(float(vol["size"]))
5170 elif field == "instance":
5171 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5173 raise errors.ParameterError(field)
5174 node_output.append(str(val))
5176 output.append(node_output)
5181 class LUNodeQueryStorage(NoHooksLU):
5182 """Logical unit for getting information on storage units on node(s).
5185 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5188 def CheckArguments(self):
5189 _CheckOutputFields(static=self._FIELDS_STATIC,
5190 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5191 selected=self.op.output_fields)
5193 def ExpandNames(self):
5194 self.share_locks = _ShareAll()
5197 self.needed_locks = {
5198 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5201 self.needed_locks = {
5202 locking.LEVEL_NODE: locking.ALL_SET,
5203 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5206 def Exec(self, feedback_fn):
5207 """Computes the list of nodes and their attributes.
5210 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5212 # Always get name to sort by
5213 if constants.SF_NAME in self.op.output_fields:
5214 fields = self.op.output_fields[:]
5216 fields = [constants.SF_NAME] + self.op.output_fields
5218 # Never ask for node or type as it's only known to the LU
5219 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5220 while extra in fields:
5221 fields.remove(extra)
5223 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5224 name_idx = field_idx[constants.SF_NAME]
5226 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5227 data = self.rpc.call_storage_list(self.nodes,
5228 self.op.storage_type, st_args,
5229 self.op.name, fields)
5233 for node in utils.NiceSort(self.nodes):
5234 nresult = data[node]
5238 msg = nresult.fail_msg
5240 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5243 rows = dict([(row[name_idx], row) for row in nresult.payload])
5245 for name in utils.NiceSort(rows.keys()):
5250 for field in self.op.output_fields:
5251 if field == constants.SF_NODE:
5253 elif field == constants.SF_TYPE:
5254 val = self.op.storage_type
5255 elif field in field_idx:
5256 val = row[field_idx[field]]
5258 raise errors.ParameterError(field)
5267 class _InstanceQuery(_QueryBase):
5268 FIELDS = query.INSTANCE_FIELDS
5270 def ExpandNames(self, lu):
5271 lu.needed_locks = {}
5272 lu.share_locks = _ShareAll()
5275 self.wanted = _GetWantedInstances(lu, self.names)
5277 self.wanted = locking.ALL_SET
5279 self.do_locking = (self.use_locking and
5280 query.IQ_LIVE in self.requested_data)
5282 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5283 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5284 lu.needed_locks[locking.LEVEL_NODE] = []
5285 lu.needed_locks[locking.LEVEL_NETWORK] = []
5286 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5288 self.do_grouplocks = (self.do_locking and
5289 query.IQ_NODES in self.requested_data)
5291 def DeclareLocks(self, lu, level):
5293 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5294 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5296 # Lock all groups used by instances optimistically; this requires going
5297 # via the node before it's locked, requiring verification later on
5298 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5300 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5301 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5302 elif level == locking.LEVEL_NODE:
5303 lu._LockInstancesNodes() # pylint: disable=W0212
5305 elif level == locking.LEVEL_NETWORK:
5306 lu.needed_locks[locking.LEVEL_NETWORK] = \
5308 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5309 for net_uuid in lu.cfg.GetInstanceNetworks(instance_name))
5312 def _CheckGroupLocks(lu):
5313 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5314 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5316 # Check if node groups for locked instances are still correct
5317 for instance_name in owned_instances:
5318 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5320 def _GetQueryData(self, lu):
5321 """Computes the list of instances and their attributes.
5324 if self.do_grouplocks:
5325 self._CheckGroupLocks(lu)
5327 cluster = lu.cfg.GetClusterInfo()
5328 all_info = lu.cfg.GetAllInstancesInfo()
5330 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5332 instance_list = [all_info[name] for name in instance_names]
5333 nodes = frozenset(itertools.chain(*(inst.all_nodes
5334 for inst in instance_list)))
5335 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5338 wrongnode_inst = set()
5340 # Gather data as requested
5341 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5343 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5345 result = node_data[name]
5347 # offline nodes will be in both lists
5348 assert result.fail_msg
5349 offline_nodes.append(name)
5351 bad_nodes.append(name)
5352 elif result.payload:
5353 for inst in result.payload:
5354 if inst in all_info:
5355 if all_info[inst].primary_node == name:
5356 live_data.update(result.payload)
5358 wrongnode_inst.add(inst)
5360 # orphan instance; we don't list it here as we don't
5361 # handle this case yet in the output of instance listing
5362 logging.warning("Orphan instance '%s' found on node %s",
5364 # else no instance is alive
5368 if query.IQ_DISKUSAGE in self.requested_data:
5369 gmi = ganeti.masterd.instance
5370 disk_usage = dict((inst.name,
5371 gmi.ComputeDiskSize(inst.disk_template,
5372 [{constants.IDISK_SIZE: disk.size}
5373 for disk in inst.disks]))
5374 for inst in instance_list)
5378 if query.IQ_CONSOLE in self.requested_data:
5380 for inst in instance_list:
5381 if inst.name in live_data:
5382 # Instance is running
5383 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5385 consinfo[inst.name] = None
5386 assert set(consinfo.keys()) == set(instance_names)
5390 if query.IQ_NODES in self.requested_data:
5391 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5393 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5394 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5395 for uuid in set(map(operator.attrgetter("group"),
5401 if query.IQ_NETWORKS in self.requested_data:
5402 net_uuids = itertools.chain(*(lu.cfg.GetInstanceNetworks(i.name)
5403 for i in instance_list))
5404 networks = dict((uuid, lu.cfg.GetNetwork(uuid)) for uuid in net_uuids)
5408 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5409 disk_usage, offline_nodes, bad_nodes,
5410 live_data, wrongnode_inst, consinfo,
5411 nodes, groups, networks)
5414 class LUQuery(NoHooksLU):
5415 """Query for resources/items of a certain kind.
5418 # pylint: disable=W0142
5421 def CheckArguments(self):
5422 qcls = _GetQueryImplementation(self.op.what)
5424 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5426 def ExpandNames(self):
5427 self.impl.ExpandNames(self)
5429 def DeclareLocks(self, level):
5430 self.impl.DeclareLocks(self, level)
5432 def Exec(self, feedback_fn):
5433 return self.impl.NewStyleQuery(self)
5436 class LUQueryFields(NoHooksLU):
5437 """Query for resources/items of a certain kind.
5440 # pylint: disable=W0142
5443 def CheckArguments(self):
5444 self.qcls = _GetQueryImplementation(self.op.what)
5446 def ExpandNames(self):
5447 self.needed_locks = {}
5449 def Exec(self, feedback_fn):
5450 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5453 class LUNodeModifyStorage(NoHooksLU):
5454 """Logical unit for modifying a storage volume on a node.
5459 def CheckArguments(self):
5460 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5462 storage_type = self.op.storage_type
5465 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5467 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5468 " modified" % storage_type,
5471 diff = set(self.op.changes.keys()) - modifiable
5473 raise errors.OpPrereqError("The following fields can not be modified for"
5474 " storage units of type '%s': %r" %
5475 (storage_type, list(diff)),
5478 def ExpandNames(self):
5479 self.needed_locks = {
5480 locking.LEVEL_NODE: self.op.node_name,
5483 def Exec(self, feedback_fn):
5484 """Computes the list of nodes and their attributes.
5487 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5488 result = self.rpc.call_storage_modify(self.op.node_name,
5489 self.op.storage_type, st_args,
5490 self.op.name, self.op.changes)
5491 result.Raise("Failed to modify storage unit '%s' on %s" %
5492 (self.op.name, self.op.node_name))
5495 class LUNodeAdd(LogicalUnit):
5496 """Logical unit for adding node to the cluster.
5500 HTYPE = constants.HTYPE_NODE
5501 _NFLAGS = ["master_capable", "vm_capable"]
5503 def CheckArguments(self):
5504 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5505 # validate/normalize the node name
5506 self.hostname = netutils.GetHostname(name=self.op.node_name,
5507 family=self.primary_ip_family)
5508 self.op.node_name = self.hostname.name
5510 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5511 raise errors.OpPrereqError("Cannot readd the master node",
5514 if self.op.readd and self.op.group:
5515 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5516 " being readded", errors.ECODE_INVAL)
5518 def BuildHooksEnv(self):
5521 This will run on all nodes before, and on all nodes + the new node after.
5525 "OP_TARGET": self.op.node_name,
5526 "NODE_NAME": self.op.node_name,
5527 "NODE_PIP": self.op.primary_ip,
5528 "NODE_SIP": self.op.secondary_ip,
5529 "MASTER_CAPABLE": str(self.op.master_capable),
5530 "VM_CAPABLE": str(self.op.vm_capable),
5533 def BuildHooksNodes(self):
5534 """Build hooks nodes.
5537 # Exclude added node
5538 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5539 post_nodes = pre_nodes + [self.op.node_name, ]
5541 return (pre_nodes, post_nodes)
5543 def CheckPrereq(self):
5544 """Check prerequisites.
5547 - the new node is not already in the config
5549 - its parameters (single/dual homed) matches the cluster
5551 Any errors are signaled by raising errors.OpPrereqError.
5555 hostname = self.hostname
5556 node = hostname.name
5557 primary_ip = self.op.primary_ip = hostname.ip
5558 if self.op.secondary_ip is None:
5559 if self.primary_ip_family == netutils.IP6Address.family:
5560 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5561 " IPv4 address must be given as secondary",
5563 self.op.secondary_ip = primary_ip
5565 secondary_ip = self.op.secondary_ip
5566 if not netutils.IP4Address.IsValid(secondary_ip):
5567 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5568 " address" % secondary_ip, errors.ECODE_INVAL)
5570 node_list = cfg.GetNodeList()
5571 if not self.op.readd and node in node_list:
5572 raise errors.OpPrereqError("Node %s is already in the configuration" %
5573 node, errors.ECODE_EXISTS)
5574 elif self.op.readd and node not in node_list:
5575 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5578 self.changed_primary_ip = False
5580 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5581 if self.op.readd and node == existing_node_name:
5582 if existing_node.secondary_ip != secondary_ip:
5583 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5584 " address configuration as before",
5586 if existing_node.primary_ip != primary_ip:
5587 self.changed_primary_ip = True
5591 if (existing_node.primary_ip == primary_ip or
5592 existing_node.secondary_ip == primary_ip or
5593 existing_node.primary_ip == secondary_ip or
5594 existing_node.secondary_ip == secondary_ip):
5595 raise errors.OpPrereqError("New node ip address(es) conflict with"
5596 " existing node %s" % existing_node.name,
5597 errors.ECODE_NOTUNIQUE)
5599 # After this 'if' block, None is no longer a valid value for the
5600 # _capable op attributes
5602 old_node = self.cfg.GetNodeInfo(node)
5603 assert old_node is not None, "Can't retrieve locked node %s" % node
5604 for attr in self._NFLAGS:
5605 if getattr(self.op, attr) is None:
5606 setattr(self.op, attr, getattr(old_node, attr))
5608 for attr in self._NFLAGS:
5609 if getattr(self.op, attr) is None:
5610 setattr(self.op, attr, True)
5612 if self.op.readd and not self.op.vm_capable:
5613 pri, sec = cfg.GetNodeInstances(node)
5615 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5616 " flag set to false, but it already holds"
5617 " instances" % node,
5620 # check that the type of the node (single versus dual homed) is the
5621 # same as for the master
5622 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5623 master_singlehomed = myself.secondary_ip == myself.primary_ip
5624 newbie_singlehomed = secondary_ip == primary_ip
5625 if master_singlehomed != newbie_singlehomed:
5626 if master_singlehomed:
5627 raise errors.OpPrereqError("The master has no secondary ip but the"
5628 " new node has one",
5631 raise errors.OpPrereqError("The master has a secondary ip but the"
5632 " new node doesn't have one",
5635 # checks reachability
5636 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5637 raise errors.OpPrereqError("Node not reachable by ping",
5638 errors.ECODE_ENVIRON)
5640 if not newbie_singlehomed:
5641 # check reachability from my secondary ip to newbie's secondary ip
5642 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5643 source=myself.secondary_ip):
5644 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5645 " based ping to node daemon port",
5646 errors.ECODE_ENVIRON)
5653 if self.op.master_capable:
5654 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5656 self.master_candidate = False
5659 self.new_node = old_node
5661 node_group = cfg.LookupNodeGroup(self.op.group)
5662 self.new_node = objects.Node(name=node,
5663 primary_ip=primary_ip,
5664 secondary_ip=secondary_ip,
5665 master_candidate=self.master_candidate,
5666 offline=False, drained=False,
5667 group=node_group, ndparams={})
5669 if self.op.ndparams:
5670 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5671 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
5672 "node", "cluster or group")
5674 if self.op.hv_state:
5675 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5677 if self.op.disk_state:
5678 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5680 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5681 # it a property on the base class.
5682 rpcrunner = rpc.DnsOnlyRunner()
5683 result = rpcrunner.call_version([node])[node]
5684 result.Raise("Can't get version information from node %s" % node)
5685 if constants.PROTOCOL_VERSION == result.payload:
5686 logging.info("Communication to node %s fine, sw version %s match",
5687 node, result.payload)
5689 raise errors.OpPrereqError("Version mismatch master version %s,"
5690 " node version %s" %
5691 (constants.PROTOCOL_VERSION, result.payload),
5692 errors.ECODE_ENVIRON)
5694 vg_name = cfg.GetVGName()
5695 if vg_name is not None:
5696 vparams = {constants.NV_PVLIST: [vg_name]}
5697 excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
5698 cname = self.cfg.GetClusterName()
5699 result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
5700 (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
5702 raise errors.OpPrereqError("Checks on node PVs failed: %s" %
5703 "; ".join(errmsgs), errors.ECODE_ENVIRON)
5705 def Exec(self, feedback_fn):
5706 """Adds the new node to the cluster.
5709 new_node = self.new_node
5710 node = new_node.name
5712 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5715 # We adding a new node so we assume it's powered
5716 new_node.powered = True
5718 # for re-adds, reset the offline/drained/master-candidate flags;
5719 # we need to reset here, otherwise offline would prevent RPC calls
5720 # later in the procedure; this also means that if the re-add
5721 # fails, we are left with a non-offlined, broken node
5723 new_node.drained = new_node.offline = False # pylint: disable=W0201
5724 self.LogInfo("Readding a node, the offline/drained flags were reset")
5725 # if we demote the node, we do cleanup later in the procedure
5726 new_node.master_candidate = self.master_candidate
5727 if self.changed_primary_ip:
5728 new_node.primary_ip = self.op.primary_ip
5730 # copy the master/vm_capable flags
5731 for attr in self._NFLAGS:
5732 setattr(new_node, attr, getattr(self.op, attr))
5734 # notify the user about any possible mc promotion
5735 if new_node.master_candidate:
5736 self.LogInfo("Node will be a master candidate")
5738 if self.op.ndparams:
5739 new_node.ndparams = self.op.ndparams
5741 new_node.ndparams = {}
5743 if self.op.hv_state:
5744 new_node.hv_state_static = self.new_hv_state
5746 if self.op.disk_state:
5747 new_node.disk_state_static = self.new_disk_state
5749 # Add node to our /etc/hosts, and add key to known_hosts
5750 if self.cfg.GetClusterInfo().modify_etc_hosts:
5751 master_node = self.cfg.GetMasterNode()
5752 result = self.rpc.call_etc_hosts_modify(master_node,
5753 constants.ETC_HOSTS_ADD,
5756 result.Raise("Can't update hosts file with new host data")
5758 if new_node.secondary_ip != new_node.primary_ip:
5759 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5762 node_verify_list = [self.cfg.GetMasterNode()]
5763 node_verify_param = {
5764 constants.NV_NODELIST: ([node], {}),
5765 # TODO: do a node-net-test as well?
5768 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5769 self.cfg.GetClusterName())
5770 for verifier in node_verify_list:
5771 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5772 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5774 for failed in nl_payload:
5775 feedback_fn("ssh/hostname verification failed"
5776 " (checking from %s): %s" %
5777 (verifier, nl_payload[failed]))
5778 raise errors.OpExecError("ssh/hostname verification failed")
5781 _RedistributeAncillaryFiles(self)
5782 self.context.ReaddNode(new_node)
5783 # make sure we redistribute the config
5784 self.cfg.Update(new_node, feedback_fn)
5785 # and make sure the new node will not have old files around
5786 if not new_node.master_candidate:
5787 result = self.rpc.call_node_demote_from_mc(new_node.name)
5788 msg = result.fail_msg
5790 self.LogWarning("Node failed to demote itself from master"
5791 " candidate status: %s" % msg)
5793 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5794 additional_vm=self.op.vm_capable)
5795 self.context.AddNode(new_node, self.proc.GetECId())
5798 class LUNodeSetParams(LogicalUnit):
5799 """Modifies the parameters of a node.
5801 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5802 to the node role (as _ROLE_*)
5803 @cvar _R2F: a dictionary from node role to tuples of flags
5804 @cvar _FLAGS: a list of attribute names corresponding to the flags
5807 HPATH = "node-modify"
5808 HTYPE = constants.HTYPE_NODE
5810 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5812 (True, False, False): _ROLE_CANDIDATE,
5813 (False, True, False): _ROLE_DRAINED,
5814 (False, False, True): _ROLE_OFFLINE,
5815 (False, False, False): _ROLE_REGULAR,
5817 _R2F = dict((v, k) for k, v in _F2R.items())
5818 _FLAGS = ["master_candidate", "drained", "offline"]
5820 def CheckArguments(self):
5821 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5822 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5823 self.op.master_capable, self.op.vm_capable,
5824 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5826 if all_mods.count(None) == len(all_mods):
5827 raise errors.OpPrereqError("Please pass at least one modification",
5829 if all_mods.count(True) > 1:
5830 raise errors.OpPrereqError("Can't set the node into more than one"
5831 " state at the same time",
5834 # Boolean value that tells us whether we might be demoting from MC
5835 self.might_demote = (self.op.master_candidate is False or
5836 self.op.offline is True or
5837 self.op.drained is True or
5838 self.op.master_capable is False)
5840 if self.op.secondary_ip:
5841 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5842 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5843 " address" % self.op.secondary_ip,
5846 self.lock_all = self.op.auto_promote and self.might_demote
5847 self.lock_instances = self.op.secondary_ip is not None
5849 def _InstanceFilter(self, instance):
5850 """Filter for getting affected instances.
5853 return (instance.disk_template in constants.DTS_INT_MIRROR and
5854 self.op.node_name in instance.all_nodes)
5856 def ExpandNames(self):
5858 self.needed_locks = {
5859 locking.LEVEL_NODE: locking.ALL_SET,
5861 # Block allocations when all nodes are locked
5862 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5865 self.needed_locks = {
5866 locking.LEVEL_NODE: self.op.node_name,
5869 # Since modifying a node can have severe effects on currently running
5870 # operations the resource lock is at least acquired in shared mode
5871 self.needed_locks[locking.LEVEL_NODE_RES] = \
5872 self.needed_locks[locking.LEVEL_NODE]
5874 # Get all locks except nodes in shared mode; they are not used for anything
5875 # but read-only access
5876 self.share_locks = _ShareAll()
5877 self.share_locks[locking.LEVEL_NODE] = 0
5878 self.share_locks[locking.LEVEL_NODE_RES] = 0
5879 self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
5881 if self.lock_instances:
5882 self.needed_locks[locking.LEVEL_INSTANCE] = \
5883 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5885 def BuildHooksEnv(self):
5888 This runs on the master node.
5892 "OP_TARGET": self.op.node_name,
5893 "MASTER_CANDIDATE": str(self.op.master_candidate),
5894 "OFFLINE": str(self.op.offline),
5895 "DRAINED": str(self.op.drained),
5896 "MASTER_CAPABLE": str(self.op.master_capable),
5897 "VM_CAPABLE": str(self.op.vm_capable),
5900 def BuildHooksNodes(self):
5901 """Build hooks nodes.
5904 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5907 def CheckPrereq(self):
5908 """Check prerequisites.
5910 This only checks the instance list against the existing names.
5913 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5915 if self.lock_instances:
5916 affected_instances = \
5917 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5919 # Verify instance locks
5920 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5921 wanted_instances = frozenset(affected_instances.keys())
5922 if wanted_instances - owned_instances:
5923 raise errors.OpPrereqError("Instances affected by changing node %s's"
5924 " secondary IP address have changed since"
5925 " locks were acquired, wanted '%s', have"
5926 " '%s'; retry the operation" %
5928 utils.CommaJoin(wanted_instances),
5929 utils.CommaJoin(owned_instances)),
5932 affected_instances = None
5934 if (self.op.master_candidate is not None or
5935 self.op.drained is not None or
5936 self.op.offline is not None):
5937 # we can't change the master's node flags
5938 if self.op.node_name == self.cfg.GetMasterNode():
5939 raise errors.OpPrereqError("The master role can be changed"
5940 " only via master-failover",
5943 if self.op.master_candidate and not node.master_capable:
5944 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5945 " it a master candidate" % node.name,
5948 if self.op.vm_capable is False:
5949 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5951 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5952 " the vm_capable flag" % node.name,
5955 if node.master_candidate and self.might_demote and not self.lock_all:
5956 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5957 # check if after removing the current node, we're missing master
5959 (mc_remaining, mc_should, _) = \
5960 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5961 if mc_remaining < mc_should:
5962 raise errors.OpPrereqError("Not enough master candidates, please"
5963 " pass auto promote option to allow"
5964 " promotion (--auto-promote or RAPI"
5965 " auto_promote=True)", errors.ECODE_STATE)
5967 self.old_flags = old_flags = (node.master_candidate,
5968 node.drained, node.offline)
5969 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5970 self.old_role = old_role = self._F2R[old_flags]
5972 # Check for ineffective changes
5973 for attr in self._FLAGS:
5974 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
5975 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5976 setattr(self.op, attr, None)
5978 # Past this point, any flag change to False means a transition
5979 # away from the respective state, as only real changes are kept
5981 # TODO: We might query the real power state if it supports OOB
5982 if _SupportsOob(self.cfg, node):
5983 if self.op.offline is False and not (node.powered or
5984 self.op.powered is True):
5985 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5986 " offline status can be reset") %
5987 self.op.node_name, errors.ECODE_STATE)
5988 elif self.op.powered is not None:
5989 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5990 " as it does not support out-of-band"
5991 " handling") % self.op.node_name,
5994 # If we're being deofflined/drained, we'll MC ourself if needed
5995 if (self.op.drained is False or self.op.offline is False or
5996 (self.op.master_capable and not node.master_capable)):
5997 if _DecideSelfPromotion(self):
5998 self.op.master_candidate = True
5999 self.LogInfo("Auto-promoting node to master candidate")
6001 # If we're no longer master capable, we'll demote ourselves from MC
6002 if self.op.master_capable is False and node.master_candidate:
6003 self.LogInfo("Demoting from master candidate")
6004 self.op.master_candidate = False
6007 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6008 if self.op.master_candidate:
6009 new_role = self._ROLE_CANDIDATE
6010 elif self.op.drained:
6011 new_role = self._ROLE_DRAINED
6012 elif self.op.offline:
6013 new_role = self._ROLE_OFFLINE
6014 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6015 # False is still in new flags, which means we're un-setting (the
6017 new_role = self._ROLE_REGULAR
6018 else: # no new flags, nothing, keep old role
6021 self.new_role = new_role
6023 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6024 # Trying to transition out of offline status
6025 result = self.rpc.call_version([node.name])[node.name]
6027 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6028 " to report its version: %s" %
6029 (node.name, result.fail_msg),
6032 self.LogWarning("Transitioning node from offline to online state"
6033 " without using re-add. Please make sure the node"
6036 # When changing the secondary ip, verify if this is a single-homed to
6037 # multi-homed transition or vice versa, and apply the relevant
6039 if self.op.secondary_ip:
6040 # Ok even without locking, because this can't be changed by any LU
6041 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6042 master_singlehomed = master.secondary_ip == master.primary_ip
6043 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6044 if self.op.force and node.name == master.name:
6045 self.LogWarning("Transitioning from single-homed to multi-homed"
6046 " cluster; all nodes will require a secondary IP"
6049 raise errors.OpPrereqError("Changing the secondary ip on a"
6050 " single-homed cluster requires the"
6051 " --force option to be passed, and the"
6052 " target node to be the master",
6054 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6055 if self.op.force and node.name == master.name:
6056 self.LogWarning("Transitioning from multi-homed to single-homed"
6057 " cluster; secondary IP addresses will have to be"
6060 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6061 " same as the primary IP on a multi-homed"
6062 " cluster, unless the --force option is"
6063 " passed, and the target node is the"
6064 " master", errors.ECODE_INVAL)
6066 assert not (frozenset(affected_instances) -
6067 self.owned_locks(locking.LEVEL_INSTANCE))
6070 if affected_instances:
6071 msg = ("Cannot change secondary IP address: offline node has"
6072 " instances (%s) configured to use it" %
6073 utils.CommaJoin(affected_instances.keys()))
6074 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6076 # On online nodes, check that no instances are running, and that
6077 # the node has the new ip and we can reach it.
6078 for instance in affected_instances.values():
6079 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6080 msg="cannot change secondary ip")
6082 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6083 if master.name != node.name:
6084 # check reachability from master secondary ip to new secondary ip
6085 if not netutils.TcpPing(self.op.secondary_ip,
6086 constants.DEFAULT_NODED_PORT,
6087 source=master.secondary_ip):
6088 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6089 " based ping to node daemon port",
6090 errors.ECODE_ENVIRON)
6092 if self.op.ndparams:
6093 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6094 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6095 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6096 "node", "cluster or group")
6097 self.new_ndparams = new_ndparams
6099 if self.op.hv_state:
6100 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6101 self.node.hv_state_static)
6103 if self.op.disk_state:
6104 self.new_disk_state = \
6105 _MergeAndVerifyDiskState(self.op.disk_state,
6106 self.node.disk_state_static)
6108 def Exec(self, feedback_fn):
6113 old_role = self.old_role
6114 new_role = self.new_role
6118 if self.op.ndparams:
6119 node.ndparams = self.new_ndparams
6121 if self.op.powered is not None:
6122 node.powered = self.op.powered
6124 if self.op.hv_state:
6125 node.hv_state_static = self.new_hv_state
6127 if self.op.disk_state:
6128 node.disk_state_static = self.new_disk_state
6130 for attr in ["master_capable", "vm_capable"]:
6131 val = getattr(self.op, attr)
6133 setattr(node, attr, val)
6134 result.append((attr, str(val)))
6136 if new_role != old_role:
6137 # Tell the node to demote itself, if no longer MC and not offline
6138 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6139 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6141 self.LogWarning("Node failed to demote itself: %s", msg)
6143 new_flags = self._R2F[new_role]
6144 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6146 result.append((desc, str(nf)))
6147 (node.master_candidate, node.drained, node.offline) = new_flags
6149 # we locked all nodes, we adjust the CP before updating this node
6151 _AdjustCandidatePool(self, [node.name])
6153 if self.op.secondary_ip:
6154 node.secondary_ip = self.op.secondary_ip
6155 result.append(("secondary_ip", self.op.secondary_ip))
6157 # this will trigger configuration file update, if needed
6158 self.cfg.Update(node, feedback_fn)
6160 # this will trigger job queue propagation or cleanup if the mc
6162 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6163 self.context.ReaddNode(node)
6168 class LUNodePowercycle(NoHooksLU):
6169 """Powercycles a node.
6174 def CheckArguments(self):
6175 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6176 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6177 raise errors.OpPrereqError("The node is the master and the force"
6178 " parameter was not set",
6181 def ExpandNames(self):
6182 """Locking for PowercycleNode.
6184 This is a last-resort option and shouldn't block on other
6185 jobs. Therefore, we grab no locks.
6188 self.needed_locks = {}
6190 def Exec(self, feedback_fn):
6194 result = self.rpc.call_node_powercycle(self.op.node_name,
6195 self.cfg.GetHypervisorType())
6196 result.Raise("Failed to schedule the reboot")
6197 return result.payload
6200 class LUClusterQuery(NoHooksLU):
6201 """Query cluster configuration.
6206 def ExpandNames(self):
6207 self.needed_locks = {}
6209 def Exec(self, feedback_fn):
6210 """Return cluster config.
6213 cluster = self.cfg.GetClusterInfo()
6216 # Filter just for enabled hypervisors
6217 for os_name, hv_dict in cluster.os_hvp.items():
6218 os_hvp[os_name] = {}
6219 for hv_name, hv_params in hv_dict.items():
6220 if hv_name in cluster.enabled_hypervisors:
6221 os_hvp[os_name][hv_name] = hv_params
6223 # Convert ip_family to ip_version
6224 primary_ip_version = constants.IP4_VERSION
6225 if cluster.primary_ip_family == netutils.IP6Address.family:
6226 primary_ip_version = constants.IP6_VERSION
6229 "software_version": constants.RELEASE_VERSION,
6230 "protocol_version": constants.PROTOCOL_VERSION,
6231 "config_version": constants.CONFIG_VERSION,
6232 "os_api_version": max(constants.OS_API_VERSIONS),
6233 "export_version": constants.EXPORT_VERSION,
6234 "architecture": runtime.GetArchInfo(),
6235 "name": cluster.cluster_name,
6236 "master": cluster.master_node,
6237 "default_hypervisor": cluster.primary_hypervisor,
6238 "enabled_hypervisors": cluster.enabled_hypervisors,
6239 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6240 for hypervisor_name in cluster.enabled_hypervisors]),
6242 "beparams": cluster.beparams,
6243 "osparams": cluster.osparams,
6244 "ipolicy": cluster.ipolicy,
6245 "nicparams": cluster.nicparams,
6246 "ndparams": cluster.ndparams,
6247 "diskparams": cluster.diskparams,
6248 "candidate_pool_size": cluster.candidate_pool_size,
6249 "master_netdev": cluster.master_netdev,
6250 "master_netmask": cluster.master_netmask,
6251 "use_external_mip_script": cluster.use_external_mip_script,
6252 "volume_group_name": cluster.volume_group_name,
6253 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6254 "file_storage_dir": cluster.file_storage_dir,
6255 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6256 "maintain_node_health": cluster.maintain_node_health,
6257 "ctime": cluster.ctime,
6258 "mtime": cluster.mtime,
6259 "uuid": cluster.uuid,
6260 "tags": list(cluster.GetTags()),
6261 "uid_pool": cluster.uid_pool,
6262 "default_iallocator": cluster.default_iallocator,
6263 "reserved_lvs": cluster.reserved_lvs,
6264 "primary_ip_version": primary_ip_version,
6265 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6266 "hidden_os": cluster.hidden_os,
6267 "blacklisted_os": cluster.blacklisted_os,
6273 class LUClusterConfigQuery(NoHooksLU):
6274 """Return configuration values.
6279 def CheckArguments(self):
6280 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6282 def ExpandNames(self):
6283 self.cq.ExpandNames(self)
6285 def DeclareLocks(self, level):
6286 self.cq.DeclareLocks(self, level)
6288 def Exec(self, feedback_fn):
6289 result = self.cq.OldStyleQuery(self)
6291 assert len(result) == 1
6296 class _ClusterQuery(_QueryBase):
6297 FIELDS = query.CLUSTER_FIELDS
6299 #: Do not sort (there is only one item)
6302 def ExpandNames(self, lu):
6303 lu.needed_locks = {}
6305 # The following variables interact with _QueryBase._GetNames
6306 self.wanted = locking.ALL_SET
6307 self.do_locking = self.use_locking
6310 raise errors.OpPrereqError("Can not use locking for cluster queries",
6313 def DeclareLocks(self, lu, level):
6316 def _GetQueryData(self, lu):
6317 """Computes the list of nodes and their attributes.
6320 # Locking is not used
6321 assert not (compat.any(lu.glm.is_owned(level)
6322 for level in locking.LEVELS
6323 if level != locking.LEVEL_CLUSTER) or
6324 self.do_locking or self.use_locking)
6326 if query.CQ_CONFIG in self.requested_data:
6327 cluster = lu.cfg.GetClusterInfo()
6329 cluster = NotImplemented
6331 if query.CQ_QUEUE_DRAINED in self.requested_data:
6332 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6334 drain_flag = NotImplemented
6336 if query.CQ_WATCHER_PAUSE in self.requested_data:
6337 master_name = lu.cfg.GetMasterNode()
6339 result = lu.rpc.call_get_watcher_pause(master_name)
6340 result.Raise("Can't retrieve watcher pause from master node '%s'" %
6343 watcher_pause = result.payload
6345 watcher_pause = NotImplemented
6347 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6350 class LUInstanceActivateDisks(NoHooksLU):
6351 """Bring up an instance's disks.
6356 def ExpandNames(self):
6357 self._ExpandAndLockInstance()
6358 self.needed_locks[locking.LEVEL_NODE] = []
6359 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6361 def DeclareLocks(self, level):
6362 if level == locking.LEVEL_NODE:
6363 self._LockInstancesNodes()
6365 def CheckPrereq(self):
6366 """Check prerequisites.
6368 This checks that the instance is in the cluster.
6371 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6372 assert self.instance is not None, \
6373 "Cannot retrieve locked instance %s" % self.op.instance_name
6374 _CheckNodeOnline(self, self.instance.primary_node)
6376 def Exec(self, feedback_fn):
6377 """Activate the disks.
6380 disks_ok, disks_info = \
6381 _AssembleInstanceDisks(self, self.instance,
6382 ignore_size=self.op.ignore_size)
6384 raise errors.OpExecError("Cannot activate block devices")
6386 if self.op.wait_for_sync:
6387 if not _WaitForSync(self, self.instance):
6388 raise errors.OpExecError("Some disks of the instance are degraded!")
6393 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6395 """Prepare the block devices for an instance.
6397 This sets up the block devices on all nodes.
6399 @type lu: L{LogicalUnit}
6400 @param lu: the logical unit on whose behalf we execute
6401 @type instance: L{objects.Instance}
6402 @param instance: the instance for whose disks we assemble
6403 @type disks: list of L{objects.Disk} or None
6404 @param disks: which disks to assemble (or all, if None)
6405 @type ignore_secondaries: boolean
6406 @param ignore_secondaries: if true, errors on secondary nodes
6407 won't result in an error return from the function
6408 @type ignore_size: boolean
6409 @param ignore_size: if true, the current known size of the disk
6410 will not be used during the disk activation, useful for cases
6411 when the size is wrong
6412 @return: False if the operation failed, otherwise a list of
6413 (host, instance_visible_name, node_visible_name)
6414 with the mapping from node devices to instance devices
6419 iname = instance.name
6420 disks = _ExpandCheckDisks(instance, disks)
6422 # With the two passes mechanism we try to reduce the window of
6423 # opportunity for the race condition of switching DRBD to primary
6424 # before handshaking occured, but we do not eliminate it
6426 # The proper fix would be to wait (with some limits) until the
6427 # connection has been made and drbd transitions from WFConnection
6428 # into any other network-connected state (Connected, SyncTarget,
6431 # 1st pass, assemble on all nodes in secondary mode
6432 for idx, inst_disk in enumerate(disks):
6433 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6435 node_disk = node_disk.Copy()
6436 node_disk.UnsetSize()
6437 lu.cfg.SetDiskID(node_disk, node)
6438 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6440 msg = result.fail_msg
6442 is_offline_secondary = (node in instance.secondary_nodes and
6444 lu.LogWarning("Could not prepare block device %s on node %s"
6445 " (is_primary=False, pass=1): %s",
6446 inst_disk.iv_name, node, msg)
6447 if not (ignore_secondaries or is_offline_secondary):
6450 # FIXME: race condition on drbd migration to primary
6452 # 2nd pass, do only the primary node
6453 for idx, inst_disk in enumerate(disks):
6456 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6457 if node != instance.primary_node:
6460 node_disk = node_disk.Copy()
6461 node_disk.UnsetSize()
6462 lu.cfg.SetDiskID(node_disk, node)
6463 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6465 msg = result.fail_msg
6467 lu.LogWarning("Could not prepare block device %s on node %s"
6468 " (is_primary=True, pass=2): %s",
6469 inst_disk.iv_name, node, msg)
6472 dev_path = result.payload
6474 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6476 # leave the disks configured for the primary node
6477 # this is a workaround that would be fixed better by
6478 # improving the logical/physical id handling
6480 lu.cfg.SetDiskID(disk, instance.primary_node)
6482 return disks_ok, device_info
6485 def _StartInstanceDisks(lu, instance, force):
6486 """Start the disks of an instance.
6489 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6490 ignore_secondaries=force)
6492 _ShutdownInstanceDisks(lu, instance)
6493 if force is not None and not force:
6495 hint=("If the message above refers to a secondary node,"
6496 " you can retry the operation using '--force'"))
6497 raise errors.OpExecError("Disk consistency error")
6500 class LUInstanceDeactivateDisks(NoHooksLU):
6501 """Shutdown an instance's disks.
6506 def ExpandNames(self):
6507 self._ExpandAndLockInstance()
6508 self.needed_locks[locking.LEVEL_NODE] = []
6509 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6511 def DeclareLocks(self, level):
6512 if level == locking.LEVEL_NODE:
6513 self._LockInstancesNodes()
6515 def CheckPrereq(self):
6516 """Check prerequisites.
6518 This checks that the instance is in the cluster.
6521 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6522 assert self.instance is not None, \
6523 "Cannot retrieve locked instance %s" % self.op.instance_name
6525 def Exec(self, feedback_fn):
6526 """Deactivate the disks
6529 instance = self.instance
6531 _ShutdownInstanceDisks(self, instance)
6533 _SafeShutdownInstanceDisks(self, instance)
6536 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6537 """Shutdown block devices of an instance.
6539 This function checks if an instance is running, before calling
6540 _ShutdownInstanceDisks.
6543 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6544 _ShutdownInstanceDisks(lu, instance, disks=disks)
6547 def _ExpandCheckDisks(instance, disks):
6548 """Return the instance disks selected by the disks list
6550 @type disks: list of L{objects.Disk} or None
6551 @param disks: selected disks
6552 @rtype: list of L{objects.Disk}
6553 @return: selected instance disks to act on
6557 return instance.disks
6559 if not set(disks).issubset(instance.disks):
6560 raise errors.ProgrammerError("Can only act on disks belonging to the"
6565 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6566 """Shutdown block devices of an instance.
6568 This does the shutdown on all nodes of the instance.
6570 If the ignore_primary is false, errors on the primary node are
6575 disks = _ExpandCheckDisks(instance, disks)
6578 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6579 lu.cfg.SetDiskID(top_disk, node)
6580 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6581 msg = result.fail_msg
6583 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6584 disk.iv_name, node, msg)
6585 if ((node == instance.primary_node and not ignore_primary) or
6586 (node != instance.primary_node and not result.offline)):
6591 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6592 """Checks if a node has enough free memory.
6594 This function checks if a given node has the needed amount of free
6595 memory. In case the node has less memory or we cannot get the
6596 information from the node, this function raises an OpPrereqError
6599 @type lu: C{LogicalUnit}
6600 @param lu: a logical unit from which we get configuration data
6602 @param node: the node to check
6603 @type reason: C{str}
6604 @param reason: string to use in the error message
6605 @type requested: C{int}
6606 @param requested: the amount of memory in MiB to check for
6607 @type hypervisor_name: C{str}
6608 @param hypervisor_name: the hypervisor to ask for memory stats
6610 @return: node current free memory
6611 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6612 we cannot check the node
6615 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
6616 nodeinfo[node].Raise("Can't get data from node %s" % node,
6617 prereq=True, ecode=errors.ECODE_ENVIRON)
6618 (_, _, (hv_info, )) = nodeinfo[node].payload
6620 free_mem = hv_info.get("memory_free", None)
6621 if not isinstance(free_mem, int):
6622 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6623 " was '%s'" % (node, free_mem),
6624 errors.ECODE_ENVIRON)
6625 if requested > free_mem:
6626 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6627 " needed %s MiB, available %s MiB" %
6628 (node, reason, requested, free_mem),
6633 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6634 """Checks if nodes have enough free disk space in all the VGs.
6636 This function checks if all given nodes have the needed amount of
6637 free disk. In case any node has less disk or we cannot get the
6638 information from the node, this function raises an OpPrereqError
6641 @type lu: C{LogicalUnit}
6642 @param lu: a logical unit from which we get configuration data
6643 @type nodenames: C{list}
6644 @param nodenames: the list of node names to check
6645 @type req_sizes: C{dict}
6646 @param req_sizes: the hash of vg and corresponding amount of disk in
6648 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6649 or we cannot check the node
6652 for vg, req_size in req_sizes.items():
6653 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6656 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6657 """Checks if nodes have enough free disk space in the specified VG.
6659 This function checks if all given nodes have the needed amount of
6660 free disk. In case any node has less disk or we cannot get the
6661 information from the node, this function raises an OpPrereqError
6664 @type lu: C{LogicalUnit}
6665 @param lu: a logical unit from which we get configuration data
6666 @type nodenames: C{list}
6667 @param nodenames: the list of node names to check
6669 @param vg: the volume group to check
6670 @type requested: C{int}
6671 @param requested: the amount of disk in MiB to check for
6672 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6673 or we cannot check the node
6676 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
6677 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
6678 for node in nodenames:
6679 info = nodeinfo[node]
6680 info.Raise("Cannot get current information from node %s" % node,
6681 prereq=True, ecode=errors.ECODE_ENVIRON)
6682 (_, (vg_info, ), _) = info.payload
6683 vg_free = vg_info.get("vg_free", None)
6684 if not isinstance(vg_free, int):
6685 raise errors.OpPrereqError("Can't compute free disk space on node"
6686 " %s for vg %s, result was '%s'" %
6687 (node, vg, vg_free), errors.ECODE_ENVIRON)
6688 if requested > vg_free:
6689 raise errors.OpPrereqError("Not enough disk space on target node %s"
6690 " vg %s: required %d MiB, available %d MiB" %
6691 (node, vg, requested, vg_free),
6695 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6696 """Checks if nodes have enough physical CPUs
6698 This function checks if all given nodes have the needed number of
6699 physical CPUs. In case any node has less CPUs or we cannot get the
6700 information from the node, this function raises an OpPrereqError
6703 @type lu: C{LogicalUnit}
6704 @param lu: a logical unit from which we get configuration data
6705 @type nodenames: C{list}
6706 @param nodenames: the list of node names to check
6707 @type requested: C{int}
6708 @param requested: the minimum acceptable number of physical CPUs
6709 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6710 or we cannot check the node
6713 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
6714 for node in nodenames:
6715 info = nodeinfo[node]
6716 info.Raise("Cannot get current information from node %s" % node,
6717 prereq=True, ecode=errors.ECODE_ENVIRON)
6718 (_, _, (hv_info, )) = info.payload
6719 num_cpus = hv_info.get("cpu_total", None)
6720 if not isinstance(num_cpus, int):
6721 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6722 " on node %s, result was '%s'" %
6723 (node, num_cpus), errors.ECODE_ENVIRON)
6724 if requested > num_cpus:
6725 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6726 "required" % (node, num_cpus, requested),
6730 class LUInstanceStartup(LogicalUnit):
6731 """Starts an instance.
6734 HPATH = "instance-start"
6735 HTYPE = constants.HTYPE_INSTANCE
6738 def CheckArguments(self):
6740 if self.op.beparams:
6741 # fill the beparams dict
6742 objects.UpgradeBeParams(self.op.beparams)
6743 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6745 def ExpandNames(self):
6746 self._ExpandAndLockInstance()
6747 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6749 def DeclareLocks(self, level):
6750 if level == locking.LEVEL_NODE_RES:
6751 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6753 def BuildHooksEnv(self):
6756 This runs on master, primary and secondary nodes of the instance.
6760 "FORCE": self.op.force,
6763 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6767 def BuildHooksNodes(self):
6768 """Build hooks nodes.
6771 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6774 def CheckPrereq(self):
6775 """Check prerequisites.
6777 This checks that the instance is in the cluster.
6780 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6781 assert self.instance is not None, \
6782 "Cannot retrieve locked instance %s" % self.op.instance_name
6785 if self.op.hvparams:
6786 # check hypervisor parameter syntax (locally)
6787 cluster = self.cfg.GetClusterInfo()
6788 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6789 filled_hvp = cluster.FillHV(instance)
6790 filled_hvp.update(self.op.hvparams)
6791 hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
6792 hv_type.CheckParameterSyntax(filled_hvp)
6793 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6795 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6797 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6799 if self.primary_offline and self.op.ignore_offline_nodes:
6800 self.LogWarning("Ignoring offline primary node")
6802 if self.op.hvparams or self.op.beparams:
6803 self.LogWarning("Overridden parameters are ignored")
6805 _CheckNodeOnline(self, instance.primary_node)
6807 bep = self.cfg.GetClusterInfo().FillBE(instance)
6808 bep.update(self.op.beparams)
6810 # check bridges existence
6811 _CheckInstanceBridgesExist(self, instance)
6813 remote_info = self.rpc.call_instance_info(instance.primary_node,
6815 instance.hypervisor)
6816 remote_info.Raise("Error checking node %s" % instance.primary_node,
6817 prereq=True, ecode=errors.ECODE_ENVIRON)
6818 if not remote_info.payload: # not running already
6819 _CheckNodeFreeMemory(self, instance.primary_node,
6820 "starting instance %s" % instance.name,
6821 bep[constants.BE_MINMEM], instance.hypervisor)
6823 def Exec(self, feedback_fn):
6824 """Start the instance.
6827 instance = self.instance
6828 force = self.op.force
6829 reason = self.op.reason
6831 if not self.op.no_remember:
6832 self.cfg.MarkInstanceUp(instance.name)
6834 if self.primary_offline:
6835 assert self.op.ignore_offline_nodes
6836 self.LogInfo("Primary node offline, marked instance as started")
6838 node_current = instance.primary_node
6840 _StartInstanceDisks(self, instance, force)
6843 self.rpc.call_instance_start(node_current,
6844 (instance, self.op.hvparams,
6846 self.op.startup_paused, reason)
6847 msg = result.fail_msg
6849 _ShutdownInstanceDisks(self, instance)
6850 raise errors.OpExecError("Could not start instance: %s" % msg)
6853 class LUInstanceReboot(LogicalUnit):
6854 """Reboot an instance.
6857 HPATH = "instance-reboot"
6858 HTYPE = constants.HTYPE_INSTANCE
6861 def ExpandNames(self):
6862 self._ExpandAndLockInstance()
6864 def BuildHooksEnv(self):
6867 This runs on master, primary and secondary nodes of the instance.
6871 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6872 "REBOOT_TYPE": self.op.reboot_type,
6873 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6876 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6880 def BuildHooksNodes(self):
6881 """Build hooks nodes.
6884 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6887 def CheckPrereq(self):
6888 """Check prerequisites.
6890 This checks that the instance is in the cluster.
6893 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6894 assert self.instance is not None, \
6895 "Cannot retrieve locked instance %s" % self.op.instance_name
6896 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6897 _CheckNodeOnline(self, instance.primary_node)
6899 # check bridges existence
6900 _CheckInstanceBridgesExist(self, instance)
6902 def Exec(self, feedback_fn):
6903 """Reboot the instance.
6906 instance = self.instance
6907 ignore_secondaries = self.op.ignore_secondaries
6908 reboot_type = self.op.reboot_type
6909 reason = self.op.reason
6911 remote_info = self.rpc.call_instance_info(instance.primary_node,
6913 instance.hypervisor)
6914 remote_info.Raise("Error checking node %s" % instance.primary_node)
6915 instance_running = bool(remote_info.payload)
6917 node_current = instance.primary_node
6919 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6920 constants.INSTANCE_REBOOT_HARD]:
6921 for disk in instance.disks:
6922 self.cfg.SetDiskID(disk, node_current)
6923 result = self.rpc.call_instance_reboot(node_current, instance,
6925 self.op.shutdown_timeout, reason)
6926 result.Raise("Could not reboot instance")
6928 if instance_running:
6929 result = self.rpc.call_instance_shutdown(node_current, instance,
6930 self.op.shutdown_timeout,
6932 result.Raise("Could not shutdown instance for full reboot")
6933 _ShutdownInstanceDisks(self, instance)
6935 self.LogInfo("Instance %s was already stopped, starting now",
6937 _StartInstanceDisks(self, instance, ignore_secondaries)
6938 result = self.rpc.call_instance_start(node_current,
6939 (instance, None, None), False,
6941 msg = result.fail_msg
6943 _ShutdownInstanceDisks(self, instance)
6944 raise errors.OpExecError("Could not start instance for"
6945 " full reboot: %s" % msg)
6947 self.cfg.MarkInstanceUp(instance.name)
6950 class LUInstanceShutdown(LogicalUnit):
6951 """Shutdown an instance.
6954 HPATH = "instance-stop"
6955 HTYPE = constants.HTYPE_INSTANCE
6958 def ExpandNames(self):
6959 self._ExpandAndLockInstance()
6961 def BuildHooksEnv(self):
6964 This runs on master, primary and secondary nodes of the instance.
6967 env = _BuildInstanceHookEnvByObject(self, self.instance)
6968 env["TIMEOUT"] = self.op.timeout
6971 def BuildHooksNodes(self):
6972 """Build hooks nodes.
6975 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6978 def CheckPrereq(self):
6979 """Check prerequisites.
6981 This checks that the instance is in the cluster.
6984 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6985 assert self.instance is not None, \
6986 "Cannot retrieve locked instance %s" % self.op.instance_name
6988 if not self.op.force:
6989 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6991 self.LogWarning("Ignoring offline instance check")
6993 self.primary_offline = \
6994 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6996 if self.primary_offline and self.op.ignore_offline_nodes:
6997 self.LogWarning("Ignoring offline primary node")
6999 _CheckNodeOnline(self, self.instance.primary_node)
7001 def Exec(self, feedback_fn):
7002 """Shutdown the instance.
7005 instance = self.instance
7006 node_current = instance.primary_node
7007 timeout = self.op.timeout
7008 reason = self.op.reason
7010 # If the instance is offline we shouldn't mark it as down, as that
7011 # resets the offline flag.
7012 if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
7013 self.cfg.MarkInstanceDown(instance.name)
7015 if self.primary_offline:
7016 assert self.op.ignore_offline_nodes
7017 self.LogInfo("Primary node offline, marked instance as stopped")
7019 result = self.rpc.call_instance_shutdown(node_current, instance, timeout,
7021 msg = result.fail_msg
7023 self.LogWarning("Could not shutdown instance: %s", msg)
7025 _ShutdownInstanceDisks(self, instance)
7028 class LUInstanceReinstall(LogicalUnit):
7029 """Reinstall an instance.
7032 HPATH = "instance-reinstall"
7033 HTYPE = constants.HTYPE_INSTANCE
7036 def ExpandNames(self):
7037 self._ExpandAndLockInstance()
7039 def BuildHooksEnv(self):
7042 This runs on master, primary and secondary nodes of the instance.
7045 return _BuildInstanceHookEnvByObject(self, self.instance)
7047 def BuildHooksNodes(self):
7048 """Build hooks nodes.
7051 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7054 def CheckPrereq(self):
7055 """Check prerequisites.
7057 This checks that the instance is in the cluster and is not running.
7060 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7061 assert instance is not None, \
7062 "Cannot retrieve locked instance %s" % self.op.instance_name
7063 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7064 " offline, cannot reinstall")
7066 if instance.disk_template == constants.DT_DISKLESS:
7067 raise errors.OpPrereqError("Instance '%s' has no disks" %
7068 self.op.instance_name,
7070 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7072 if self.op.os_type is not None:
7074 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7075 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7076 instance_os = self.op.os_type
7078 instance_os = instance.os
7080 nodelist = list(instance.all_nodes)
7082 if self.op.osparams:
7083 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7084 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7085 self.os_inst = i_osdict # the new dict (without defaults)
7089 self.instance = instance
7091 def Exec(self, feedback_fn):
7092 """Reinstall the instance.
7095 inst = self.instance
7097 if self.op.os_type is not None:
7098 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7099 inst.os = self.op.os_type
7100 # Write to configuration
7101 self.cfg.Update(inst, feedback_fn)
7103 _StartInstanceDisks(self, inst, None)
7105 feedback_fn("Running the instance OS create scripts...")
7106 # FIXME: pass debug option from opcode to backend
7107 result = self.rpc.call_instance_os_add(inst.primary_node,
7108 (inst, self.os_inst), True,
7109 self.op.debug_level)
7110 result.Raise("Could not install OS for instance %s on node %s" %
7111 (inst.name, inst.primary_node))
7113 _ShutdownInstanceDisks(self, inst)
7116 class LUInstanceRecreateDisks(LogicalUnit):
7117 """Recreate an instance's missing disks.
7120 HPATH = "instance-recreate-disks"
7121 HTYPE = constants.HTYPE_INSTANCE
7124 _MODIFYABLE = compat.UniqueFrozenset([
7125 constants.IDISK_SIZE,
7126 constants.IDISK_MODE,
7129 # New or changed disk parameters may have different semantics
7130 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7131 constants.IDISK_ADOPT,
7133 # TODO: Implement support changing VG while recreating
7135 constants.IDISK_METAVG,
7136 constants.IDISK_PROVIDER,
7137 constants.IDISK_NAME,
7140 def _RunAllocator(self):
7141 """Run the allocator based on input opcode.
7144 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7147 # The allocator should actually run in "relocate" mode, but current
7148 # allocators don't support relocating all the nodes of an instance at
7149 # the same time. As a workaround we use "allocate" mode, but this is
7150 # suboptimal for two reasons:
7151 # - The instance name passed to the allocator is present in the list of
7152 # existing instances, so there could be a conflict within the
7153 # internal structures of the allocator. This doesn't happen with the
7154 # current allocators, but it's a liability.
7155 # - The allocator counts the resources used by the instance twice: once
7156 # because the instance exists already, and once because it tries to
7157 # allocate a new instance.
7158 # The allocator could choose some of the nodes on which the instance is
7159 # running, but that's not a problem. If the instance nodes are broken,
7160 # they should be already be marked as drained or offline, and hence
7161 # skipped by the allocator. If instance disks have been lost for other
7162 # reasons, then recreating the disks on the same nodes should be fine.
7163 disk_template = self.instance.disk_template
7164 spindle_use = be_full[constants.BE_SPINDLE_USE]
7165 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7166 disk_template=disk_template,
7167 tags=list(self.instance.GetTags()),
7168 os=self.instance.os,
7170 vcpus=be_full[constants.BE_VCPUS],
7171 memory=be_full[constants.BE_MAXMEM],
7172 spindle_use=spindle_use,
7173 disks=[{constants.IDISK_SIZE: d.size,
7174 constants.IDISK_MODE: d.mode}
7175 for d in self.instance.disks],
7176 hypervisor=self.instance.hypervisor,
7177 node_whitelist=None)
7178 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7180 ial.Run(self.op.iallocator)
7182 assert req.RequiredNodes() == len(self.instance.all_nodes)
7185 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7186 " %s" % (self.op.iallocator, ial.info),
7189 self.op.nodes = ial.result
7190 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7191 self.op.instance_name, self.op.iallocator,
7192 utils.CommaJoin(ial.result))
7194 def CheckArguments(self):
7195 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7196 # Normalize and convert deprecated list of disk indices
7197 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7199 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7201 raise errors.OpPrereqError("Some disks have been specified more than"
7202 " once: %s" % utils.CommaJoin(duplicates),
7205 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7206 # when neither iallocator nor nodes are specified
7207 if self.op.iallocator or self.op.nodes:
7208 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7210 for (idx, params) in self.op.disks:
7211 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7212 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7214 raise errors.OpPrereqError("Parameters for disk %s try to change"
7215 " unmodifyable parameter(s): %s" %
7216 (idx, utils.CommaJoin(unsupported)),
7219 def ExpandNames(self):
7220 self._ExpandAndLockInstance()
7221 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7224 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7225 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7227 self.needed_locks[locking.LEVEL_NODE] = []
7228 if self.op.iallocator:
7229 # iallocator will select a new node in the same group
7230 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7231 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7233 self.needed_locks[locking.LEVEL_NODE_RES] = []
7235 def DeclareLocks(self, level):
7236 if level == locking.LEVEL_NODEGROUP:
7237 assert self.op.iallocator is not None
7238 assert not self.op.nodes
7239 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7240 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7241 # Lock the primary group used by the instance optimistically; this
7242 # requires going via the node before it's locked, requiring
7243 # verification later on
7244 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7245 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7247 elif level == locking.LEVEL_NODE:
7248 # If an allocator is used, then we lock all the nodes in the current
7249 # instance group, as we don't know yet which ones will be selected;
7250 # if we replace the nodes without using an allocator, locks are
7251 # already declared in ExpandNames; otherwise, we need to lock all the
7252 # instance nodes for disk re-creation
7253 if self.op.iallocator:
7254 assert not self.op.nodes
7255 assert not self.needed_locks[locking.LEVEL_NODE]
7256 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7258 # Lock member nodes of the group of the primary node
7259 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7260 self.needed_locks[locking.LEVEL_NODE].extend(
7261 self.cfg.GetNodeGroup(group_uuid).members)
7263 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7264 elif not self.op.nodes:
7265 self._LockInstancesNodes(primary_only=False)
7266 elif level == locking.LEVEL_NODE_RES:
7268 self.needed_locks[locking.LEVEL_NODE_RES] = \
7269 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7271 def BuildHooksEnv(self):
7274 This runs on master, primary and secondary nodes of the instance.
7277 return _BuildInstanceHookEnvByObject(self, self.instance)
7279 def BuildHooksNodes(self):
7280 """Build hooks nodes.
7283 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7286 def CheckPrereq(self):
7287 """Check prerequisites.
7289 This checks that the instance is in the cluster and is not running.
7292 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7293 assert instance is not None, \
7294 "Cannot retrieve locked instance %s" % self.op.instance_name
7296 if len(self.op.nodes) != len(instance.all_nodes):
7297 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7298 " %d replacement nodes were specified" %
7299 (instance.name, len(instance.all_nodes),
7300 len(self.op.nodes)),
7302 assert instance.disk_template != constants.DT_DRBD8 or \
7303 len(self.op.nodes) == 2
7304 assert instance.disk_template != constants.DT_PLAIN or \
7305 len(self.op.nodes) == 1
7306 primary_node = self.op.nodes[0]
7308 primary_node = instance.primary_node
7309 if not self.op.iallocator:
7310 _CheckNodeOnline(self, primary_node)
7312 if instance.disk_template == constants.DT_DISKLESS:
7313 raise errors.OpPrereqError("Instance '%s' has no disks" %
7314 self.op.instance_name, errors.ECODE_INVAL)
7316 # Verify if node group locks are still correct
7317 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7319 # Node group locks are acquired only for the primary node (and only
7320 # when the allocator is used)
7321 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7324 # if we replace nodes *and* the old primary is offline, we don't
7325 # check the instance state
7326 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7327 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7328 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7329 msg="cannot recreate disks")
7332 self.disks = dict(self.op.disks)
7334 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7336 maxidx = max(self.disks.keys())
7337 if maxidx >= len(instance.disks):
7338 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7341 if ((self.op.nodes or self.op.iallocator) and
7342 sorted(self.disks.keys()) != range(len(instance.disks))):
7343 raise errors.OpPrereqError("Can't recreate disks partially and"
7344 " change the nodes at the same time",
7347 self.instance = instance
7349 if self.op.iallocator:
7350 self._RunAllocator()
7351 # Release unneeded node and node resource locks
7352 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7353 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7354 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7356 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7358 def Exec(self, feedback_fn):
7359 """Recreate the disks.
7362 instance = self.instance
7364 assert (self.owned_locks(locking.LEVEL_NODE) ==
7365 self.owned_locks(locking.LEVEL_NODE_RES))
7368 mods = [] # keeps track of needed changes
7370 for idx, disk in enumerate(instance.disks):
7372 changes = self.disks[idx]
7374 # Disk should not be recreated
7378 # update secondaries for disks, if needed
7379 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7380 # need to update the nodes and minors
7381 assert len(self.op.nodes) == 2
7382 assert len(disk.logical_id) == 6 # otherwise disk internals
7384 (_, _, old_port, _, _, old_secret) = disk.logical_id
7385 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7386 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7387 new_minors[0], new_minors[1], old_secret)
7388 assert len(disk.logical_id) == len(new_id)
7392 mods.append((idx, new_id, changes))
7394 # now that we have passed all asserts above, we can apply the mods
7395 # in a single run (to avoid partial changes)
7396 for idx, new_id, changes in mods:
7397 disk = instance.disks[idx]
7398 if new_id is not None:
7399 assert disk.dev_type == constants.LD_DRBD8
7400 disk.logical_id = new_id
7402 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7403 mode=changes.get(constants.IDISK_MODE, None))
7405 # change primary node, if needed
7407 instance.primary_node = self.op.nodes[0]
7408 self.LogWarning("Changing the instance's nodes, you will have to"
7409 " remove any disks left on the older nodes manually")
7412 self.cfg.Update(instance, feedback_fn)
7414 # All touched nodes must be locked
7415 mylocks = self.owned_locks(locking.LEVEL_NODE)
7416 assert mylocks.issuperset(frozenset(instance.all_nodes))
7417 _CreateDisks(self, instance, to_skip=to_skip)
7420 class LUInstanceRename(LogicalUnit):
7421 """Rename an instance.
7424 HPATH = "instance-rename"
7425 HTYPE = constants.HTYPE_INSTANCE
7427 def CheckArguments(self):
7431 if self.op.ip_check and not self.op.name_check:
7432 # TODO: make the ip check more flexible and not depend on the name check
7433 raise errors.OpPrereqError("IP address check requires a name check",
7436 def BuildHooksEnv(self):
7439 This runs on master, primary and secondary nodes of the instance.
7442 env = _BuildInstanceHookEnvByObject(self, self.instance)
7443 env["INSTANCE_NEW_NAME"] = self.op.new_name
7446 def BuildHooksNodes(self):
7447 """Build hooks nodes.
7450 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7453 def CheckPrereq(self):
7454 """Check prerequisites.
7456 This checks that the instance is in the cluster and is not running.
7459 self.op.instance_name = _ExpandInstanceName(self.cfg,
7460 self.op.instance_name)
7461 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7462 assert instance is not None
7463 _CheckNodeOnline(self, instance.primary_node)
7464 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7465 msg="cannot rename")
7466 self.instance = instance
7468 new_name = self.op.new_name
7469 if self.op.name_check:
7470 hostname = _CheckHostnameSane(self, new_name)
7471 new_name = self.op.new_name = hostname.name
7472 if (self.op.ip_check and
7473 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7474 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7475 (hostname.ip, new_name),
7476 errors.ECODE_NOTUNIQUE)
7478 instance_list = self.cfg.GetInstanceList()
7479 if new_name in instance_list and new_name != instance.name:
7480 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7481 new_name, errors.ECODE_EXISTS)
7483 def Exec(self, feedback_fn):
7484 """Rename the instance.
7487 inst = self.instance
7488 old_name = inst.name
7490 rename_file_storage = False
7491 if (inst.disk_template in constants.DTS_FILEBASED and
7492 self.op.new_name != inst.name):
7493 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7494 rename_file_storage = True
7496 self.cfg.RenameInstance(inst.name, self.op.new_name)
7497 # Change the instance lock. This is definitely safe while we hold the BGL.
7498 # Otherwise the new lock would have to be added in acquired mode.
7500 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7501 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7502 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7504 # re-read the instance from the configuration after rename
7505 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7507 if rename_file_storage:
7508 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7509 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7510 old_file_storage_dir,
7511 new_file_storage_dir)
7512 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7513 " (but the instance has been renamed in Ganeti)" %
7514 (inst.primary_node, old_file_storage_dir,
7515 new_file_storage_dir))
7517 _StartInstanceDisks(self, inst, None)
7518 # update info on disks
7519 info = _GetInstanceInfoText(inst)
7520 for (idx, disk) in enumerate(inst.disks):
7521 for node in inst.all_nodes:
7522 self.cfg.SetDiskID(disk, node)
7523 result = self.rpc.call_blockdev_setinfo(node, disk, info)
7525 self.LogWarning("Error setting info on node %s for disk %s: %s",
7526 node, idx, result.fail_msg)
7528 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7529 old_name, self.op.debug_level)
7530 msg = result.fail_msg
7532 msg = ("Could not run OS rename script for instance %s on node %s"
7533 " (but the instance has been renamed in Ganeti): %s" %
7534 (inst.name, inst.primary_node, msg))
7535 self.LogWarning(msg)
7537 _ShutdownInstanceDisks(self, inst)
7542 class LUInstanceRemove(LogicalUnit):
7543 """Remove an instance.
7546 HPATH = "instance-remove"
7547 HTYPE = constants.HTYPE_INSTANCE
7550 def ExpandNames(self):
7551 self._ExpandAndLockInstance()
7552 self.needed_locks[locking.LEVEL_NODE] = []
7553 self.needed_locks[locking.LEVEL_NODE_RES] = []
7554 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7556 def DeclareLocks(self, level):
7557 if level == locking.LEVEL_NODE:
7558 self._LockInstancesNodes()
7559 elif level == locking.LEVEL_NODE_RES:
7561 self.needed_locks[locking.LEVEL_NODE_RES] = \
7562 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7564 def BuildHooksEnv(self):
7567 This runs on master, primary and secondary nodes of the instance.
7570 env = _BuildInstanceHookEnvByObject(self, self.instance)
7571 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7574 def BuildHooksNodes(self):
7575 """Build hooks nodes.
7578 nl = [self.cfg.GetMasterNode()]
7579 nl_post = list(self.instance.all_nodes) + nl
7580 return (nl, nl_post)
7582 def CheckPrereq(self):
7583 """Check prerequisites.
7585 This checks that the instance is in the cluster.
7588 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7589 assert self.instance is not None, \
7590 "Cannot retrieve locked instance %s" % self.op.instance_name
7592 def Exec(self, feedback_fn):
7593 """Remove the instance.
7596 instance = self.instance
7597 logging.info("Shutting down instance %s on node %s",
7598 instance.name, instance.primary_node)
7600 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7601 self.op.shutdown_timeout,
7603 msg = result.fail_msg
7605 if self.op.ignore_failures:
7606 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7608 raise errors.OpExecError("Could not shutdown instance %s on"
7610 (instance.name, instance.primary_node, msg))
7612 assert (self.owned_locks(locking.LEVEL_NODE) ==
7613 self.owned_locks(locking.LEVEL_NODE_RES))
7614 assert not (set(instance.all_nodes) -
7615 self.owned_locks(locking.LEVEL_NODE)), \
7616 "Not owning correct locks"
7618 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7621 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7622 """Utility function to remove an instance.
7625 logging.info("Removing block devices for instance %s", instance.name)
7627 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7628 if not ignore_failures:
7629 raise errors.OpExecError("Can't remove instance's disks")
7630 feedback_fn("Warning: can't remove instance's disks")
7632 logging.info("Removing instance %s out of cluster config", instance.name)
7634 lu.cfg.RemoveInstance(instance.name)
7636 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7637 "Instance lock removal conflict"
7639 # Remove lock for the instance
7640 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7643 class LUInstanceQuery(NoHooksLU):
7644 """Logical unit for querying instances.
7647 # pylint: disable=W0142
7650 def CheckArguments(self):
7651 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7652 self.op.output_fields, self.op.use_locking)
7654 def ExpandNames(self):
7655 self.iq.ExpandNames(self)
7657 def DeclareLocks(self, level):
7658 self.iq.DeclareLocks(self, level)
7660 def Exec(self, feedback_fn):
7661 return self.iq.OldStyleQuery(self)
7664 def _ExpandNamesForMigration(lu):
7665 """Expands names for use with L{TLMigrateInstance}.
7667 @type lu: L{LogicalUnit}
7670 if lu.op.target_node is not None:
7671 lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
7673 lu.needed_locks[locking.LEVEL_NODE] = []
7674 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7676 lu.needed_locks[locking.LEVEL_NODE_RES] = []
7677 lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7679 # The node allocation lock is actually only needed for externally replicated
7680 # instances (e.g. sharedfile or RBD) and if an iallocator is used.
7681 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
7684 def _DeclareLocksForMigration(lu, level):
7685 """Declares locks for L{TLMigrateInstance}.
7687 @type lu: L{LogicalUnit}
7688 @param level: Lock level
7691 if level == locking.LEVEL_NODE_ALLOC:
7692 assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
7694 instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
7696 # Node locks are already declared here rather than at LEVEL_NODE as we need
7697 # the instance object anyway to declare the node allocation lock.
7698 if instance.disk_template in constants.DTS_EXT_MIRROR:
7699 if lu.op.target_node is None:
7700 lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7701 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7703 lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7705 del lu.recalculate_locks[locking.LEVEL_NODE]
7707 lu._LockInstancesNodes() # pylint: disable=W0212
7709 elif level == locking.LEVEL_NODE:
7710 # Node locks are declared together with the node allocation lock
7711 assert (lu.needed_locks[locking.LEVEL_NODE] or
7712 lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
7714 elif level == locking.LEVEL_NODE_RES:
7716 lu.needed_locks[locking.LEVEL_NODE_RES] = \
7717 _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
7720 class LUInstanceFailover(LogicalUnit):
7721 """Failover an instance.
7724 HPATH = "instance-failover"
7725 HTYPE = constants.HTYPE_INSTANCE
7728 def CheckArguments(self):
7729 """Check the arguments.
7732 self.iallocator = getattr(self.op, "iallocator", None)
7733 self.target_node = getattr(self.op, "target_node", None)
7735 def ExpandNames(self):
7736 self._ExpandAndLockInstance()
7737 _ExpandNamesForMigration(self)
7740 TLMigrateInstance(self, self.op.instance_name, False, True, False,
7741 self.op.ignore_consistency, True,
7742 self.op.shutdown_timeout, self.op.ignore_ipolicy)
7744 self.tasklets = [self._migrater]
7746 def DeclareLocks(self, level):
7747 _DeclareLocksForMigration(self, level)
7749 def BuildHooksEnv(self):
7752 This runs on master, primary and secondary nodes of the instance.
7755 instance = self._migrater.instance
7756 source_node = instance.primary_node
7757 target_node = self.op.target_node
7759 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7760 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7761 "OLD_PRIMARY": source_node,
7762 "NEW_PRIMARY": target_node,
7765 if instance.disk_template in constants.DTS_INT_MIRROR:
7766 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7767 env["NEW_SECONDARY"] = source_node
7769 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7771 env.update(_BuildInstanceHookEnvByObject(self, instance))
7775 def BuildHooksNodes(self):
7776 """Build hooks nodes.
7779 instance = self._migrater.instance
7780 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7781 return (nl, nl + [instance.primary_node])
7784 class LUInstanceMigrate(LogicalUnit):
7785 """Migrate an instance.
7787 This is migration without shutting down, compared to the failover,
7788 which is done with shutdown.
7791 HPATH = "instance-migrate"
7792 HTYPE = constants.HTYPE_INSTANCE
7795 def ExpandNames(self):
7796 self._ExpandAndLockInstance()
7797 _ExpandNamesForMigration(self)
7800 TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
7801 False, self.op.allow_failover, False,
7802 self.op.allow_runtime_changes,
7803 constants.DEFAULT_SHUTDOWN_TIMEOUT,
7804 self.op.ignore_ipolicy)
7806 self.tasklets = [self._migrater]
7808 def DeclareLocks(self, level):
7809 _DeclareLocksForMigration(self, level)
7811 def BuildHooksEnv(self):
7814 This runs on master, primary and secondary nodes of the instance.
7817 instance = self._migrater.instance
7818 source_node = instance.primary_node
7819 target_node = self.op.target_node
7820 env = _BuildInstanceHookEnvByObject(self, instance)
7822 "MIGRATE_LIVE": self._migrater.live,
7823 "MIGRATE_CLEANUP": self.op.cleanup,
7824 "OLD_PRIMARY": source_node,
7825 "NEW_PRIMARY": target_node,
7826 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7829 if instance.disk_template in constants.DTS_INT_MIRROR:
7830 env["OLD_SECONDARY"] = target_node
7831 env["NEW_SECONDARY"] = source_node
7833 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7837 def BuildHooksNodes(self):
7838 """Build hooks nodes.
7841 instance = self._migrater.instance
7842 snodes = list(instance.secondary_nodes)
7843 nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
7847 class LUInstanceMove(LogicalUnit):
7848 """Move an instance by data-copying.
7851 HPATH = "instance-move"
7852 HTYPE = constants.HTYPE_INSTANCE
7855 def ExpandNames(self):
7856 self._ExpandAndLockInstance()
7857 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7858 self.op.target_node = target_node
7859 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7860 self.needed_locks[locking.LEVEL_NODE_RES] = []
7861 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7863 def DeclareLocks(self, level):
7864 if level == locking.LEVEL_NODE:
7865 self._LockInstancesNodes(primary_only=True)
7866 elif level == locking.LEVEL_NODE_RES:
7868 self.needed_locks[locking.LEVEL_NODE_RES] = \
7869 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7871 def BuildHooksEnv(self):
7874 This runs on master, primary and secondary nodes of the instance.
7878 "TARGET_NODE": self.op.target_node,
7879 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7881 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7884 def BuildHooksNodes(self):
7885 """Build hooks nodes.
7889 self.cfg.GetMasterNode(),
7890 self.instance.primary_node,
7891 self.op.target_node,
7895 def CheckPrereq(self):
7896 """Check prerequisites.
7898 This checks that the instance is in the cluster.
7901 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7902 assert self.instance is not None, \
7903 "Cannot retrieve locked instance %s" % self.op.instance_name
7905 if instance.disk_template not in constants.DTS_COPYABLE:
7906 raise errors.OpPrereqError("Disk template %s not suitable for copying" %
7907 instance.disk_template, errors.ECODE_STATE)
7909 node = self.cfg.GetNodeInfo(self.op.target_node)
7910 assert node is not None, \
7911 "Cannot retrieve locked node %s" % self.op.target_node
7913 self.target_node = target_node = node.name
7915 if target_node == instance.primary_node:
7916 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7917 (instance.name, target_node),
7920 bep = self.cfg.GetClusterInfo().FillBE(instance)
7922 for idx, dsk in enumerate(instance.disks):
7923 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7924 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7925 " cannot copy" % idx, errors.ECODE_STATE)
7927 _CheckNodeOnline(self, target_node)
7928 _CheckNodeNotDrained(self, target_node)
7929 _CheckNodeVmCapable(self, target_node)
7930 cluster = self.cfg.GetClusterInfo()
7931 group_info = self.cfg.GetNodeGroup(node.group)
7932 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
7933 _CheckTargetNodeIPolicy(self, ipolicy, instance, node, self.cfg,
7934 ignore=self.op.ignore_ipolicy)
7936 if instance.admin_state == constants.ADMINST_UP:
7937 # check memory requirements on the secondary node
7938 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7939 instance.name, bep[constants.BE_MAXMEM],
7940 instance.hypervisor)
7942 self.LogInfo("Not checking memory on the secondary node as"
7943 " instance will not be started")
7945 # check bridge existance
7946 _CheckInstanceBridgesExist(self, instance, node=target_node)
7948 def Exec(self, feedback_fn):
7949 """Move an instance.
7951 The move is done by shutting it down on its present node, copying
7952 the data over (slow) and starting it on the new node.
7955 instance = self.instance
7957 source_node = instance.primary_node
7958 target_node = self.target_node
7960 self.LogInfo("Shutting down instance %s on source node %s",
7961 instance.name, source_node)
7963 assert (self.owned_locks(locking.LEVEL_NODE) ==
7964 self.owned_locks(locking.LEVEL_NODE_RES))
7966 result = self.rpc.call_instance_shutdown(source_node, instance,
7967 self.op.shutdown_timeout,
7969 msg = result.fail_msg
7971 if self.op.ignore_consistency:
7972 self.LogWarning("Could not shutdown instance %s on node %s."
7973 " Proceeding anyway. Please make sure node"
7974 " %s is down. Error details: %s",
7975 instance.name, source_node, source_node, msg)
7977 raise errors.OpExecError("Could not shutdown instance %s on"
7979 (instance.name, source_node, msg))
7981 # create the target disks
7983 _CreateDisks(self, instance, target_node=target_node)
7984 except errors.OpExecError:
7985 self.LogWarning("Device creation failed")
7986 self.cfg.ReleaseDRBDMinors(instance.name)
7989 cluster_name = self.cfg.GetClusterInfo().cluster_name
7992 # activate, get path, copy the data over
7993 for idx, disk in enumerate(instance.disks):
7994 self.LogInfo("Copying data for disk %d", idx)
7995 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7996 instance.name, True, idx)
7998 self.LogWarning("Can't assemble newly created disk %d: %s",
7999 idx, result.fail_msg)
8000 errs.append(result.fail_msg)
8002 dev_path = result.payload
8003 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8004 target_node, dev_path,
8007 self.LogWarning("Can't copy data over for disk %d: %s",
8008 idx, result.fail_msg)
8009 errs.append(result.fail_msg)
8013 self.LogWarning("Some disks failed to copy, aborting")
8015 _RemoveDisks(self, instance, target_node=target_node)
8017 self.cfg.ReleaseDRBDMinors(instance.name)
8018 raise errors.OpExecError("Errors during disk copy: %s" %
8021 instance.primary_node = target_node
8022 self.cfg.Update(instance, feedback_fn)
8024 self.LogInfo("Removing the disks on the original node")
8025 _RemoveDisks(self, instance, target_node=source_node)
8027 # Only start the instance if it's marked as up
8028 if instance.admin_state == constants.ADMINST_UP:
8029 self.LogInfo("Starting instance %s on node %s",
8030 instance.name, target_node)
8032 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8033 ignore_secondaries=True)
8035 _ShutdownInstanceDisks(self, instance)
8036 raise errors.OpExecError("Can't activate the instance's disks")
8038 result = self.rpc.call_instance_start(target_node,
8039 (instance, None, None), False,
8041 msg = result.fail_msg
8043 _ShutdownInstanceDisks(self, instance)
8044 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8045 (instance.name, target_node, msg))
8048 class LUNodeMigrate(LogicalUnit):
8049 """Migrate all instances from a node.
8052 HPATH = "node-migrate"
8053 HTYPE = constants.HTYPE_NODE
8056 def CheckArguments(self):
8059 def ExpandNames(self):
8060 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8062 self.share_locks = _ShareAll()
8063 self.needed_locks = {
8064 locking.LEVEL_NODE: [self.op.node_name],
8067 def BuildHooksEnv(self):
8070 This runs on the master, the primary and all the secondaries.
8074 "NODE_NAME": self.op.node_name,
8075 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8078 def BuildHooksNodes(self):
8079 """Build hooks nodes.
8082 nl = [self.cfg.GetMasterNode()]
8085 def CheckPrereq(self):
8088 def Exec(self, feedback_fn):
8089 # Prepare jobs for migration instances
8090 allow_runtime_changes = self.op.allow_runtime_changes
8092 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8095 iallocator=self.op.iallocator,
8096 target_node=self.op.target_node,
8097 allow_runtime_changes=allow_runtime_changes,
8098 ignore_ipolicy=self.op.ignore_ipolicy)]
8099 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8101 # TODO: Run iallocator in this opcode and pass correct placement options to
8102 # OpInstanceMigrate. Since other jobs can modify the cluster between
8103 # running the iallocator and the actual migration, a good consistency model
8104 # will have to be found.
8106 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8107 frozenset([self.op.node_name]))
8109 return ResultWithJobs(jobs)
8112 class TLMigrateInstance(Tasklet):
8113 """Tasklet class for instance migration.
8116 @ivar live: whether the migration will be done live or non-live;
8117 this variable is initalized only after CheckPrereq has run
8118 @type cleanup: boolean
8119 @ivar cleanup: Wheater we cleanup from a failed migration
8120 @type iallocator: string
8121 @ivar iallocator: The iallocator used to determine target_node
8122 @type target_node: string
8123 @ivar target_node: If given, the target_node to reallocate the instance to
8124 @type failover: boolean
8125 @ivar failover: Whether operation results in failover or migration
8126 @type fallback: boolean
8127 @ivar fallback: Whether fallback to failover is allowed if migration not
8129 @type ignore_consistency: boolean
8130 @ivar ignore_consistency: Wheter we should ignore consistency between source
8132 @type shutdown_timeout: int
8133 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8134 @type ignore_ipolicy: bool
8135 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8140 _MIGRATION_POLL_INTERVAL = 1 # seconds
8141 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8143 def __init__(self, lu, instance_name, cleanup, failover, fallback,
8144 ignore_consistency, allow_runtime_changes, shutdown_timeout,
8146 """Initializes this class.
8149 Tasklet.__init__(self, lu)
8152 self.instance_name = instance_name
8153 self.cleanup = cleanup
8154 self.live = False # will be overridden later
8155 self.failover = failover
8156 self.fallback = fallback
8157 self.ignore_consistency = ignore_consistency
8158 self.shutdown_timeout = shutdown_timeout
8159 self.ignore_ipolicy = ignore_ipolicy
8160 self.allow_runtime_changes = allow_runtime_changes
8162 def CheckPrereq(self):
8163 """Check prerequisites.
8165 This checks that the instance is in the cluster.
8168 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8169 instance = self.cfg.GetInstanceInfo(instance_name)
8170 assert instance is not None
8171 self.instance = instance
8172 cluster = self.cfg.GetClusterInfo()
8174 if (not self.cleanup and
8175 not instance.admin_state == constants.ADMINST_UP and
8176 not self.failover and self.fallback):
8177 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8178 " switching to failover")
8179 self.failover = True
8181 if instance.disk_template not in constants.DTS_MIRRORED:
8186 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8187 " %s" % (instance.disk_template, text),
8190 if instance.disk_template in constants.DTS_EXT_MIRROR:
8191 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8193 if self.lu.op.iallocator:
8194 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8195 self._RunAllocator()
8197 # We set set self.target_node as it is required by
8199 self.target_node = self.lu.op.target_node
8201 # Check that the target node is correct in terms of instance policy
8202 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8203 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8204 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8206 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
8207 ignore=self.ignore_ipolicy)
8209 # self.target_node is already populated, either directly or by the
8211 target_node = self.target_node
8212 if self.target_node == instance.primary_node:
8213 raise errors.OpPrereqError("Cannot migrate instance %s"
8214 " to its primary (%s)" %
8215 (instance.name, instance.primary_node),
8218 if len(self.lu.tasklets) == 1:
8219 # It is safe to release locks only when we're the only tasklet
8221 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8222 keep=[instance.primary_node, self.target_node])
8223 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8226 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8228 secondary_nodes = instance.secondary_nodes
8229 if not secondary_nodes:
8230 raise errors.ConfigurationError("No secondary node but using"
8231 " %s disk template" %
8232 instance.disk_template)
8233 target_node = secondary_nodes[0]
8234 if self.lu.op.iallocator or (self.lu.op.target_node and
8235 self.lu.op.target_node != target_node):
8237 text = "failed over"
8240 raise errors.OpPrereqError("Instances with disk template %s cannot"
8241 " be %s to arbitrary nodes"
8242 " (neither an iallocator nor a target"
8243 " node can be passed)" %
8244 (instance.disk_template, text),
8246 nodeinfo = self.cfg.GetNodeInfo(target_node)
8247 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8248 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8250 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
8251 ignore=self.ignore_ipolicy)
8253 i_be = cluster.FillBE(instance)
8255 # check memory requirements on the secondary node
8256 if (not self.cleanup and
8257 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8258 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8259 "migrating instance %s" %
8261 i_be[constants.BE_MINMEM],
8262 instance.hypervisor)
8264 self.lu.LogInfo("Not checking memory on the secondary node as"
8265 " instance will not be started")
8267 # check if failover must be forced instead of migration
8268 if (not self.cleanup and not self.failover and
8269 i_be[constants.BE_ALWAYS_FAILOVER]):
8270 self.lu.LogInfo("Instance configured to always failover; fallback"
8272 self.failover = True
8274 # check bridge existance
8275 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8277 if not self.cleanup:
8278 _CheckNodeNotDrained(self.lu, target_node)
8279 if not self.failover:
8280 result = self.rpc.call_instance_migratable(instance.primary_node,
8282 if result.fail_msg and self.fallback:
8283 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8285 self.failover = True
8287 result.Raise("Can't migrate, please use failover",
8288 prereq=True, ecode=errors.ECODE_STATE)
8290 assert not (self.failover and self.cleanup)
8292 if not self.failover:
8293 if self.lu.op.live is not None and self.lu.op.mode is not None:
8294 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8295 " parameters are accepted",
8297 if self.lu.op.live is not None:
8299 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8301 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8302 # reset the 'live' parameter to None so that repeated
8303 # invocations of CheckPrereq do not raise an exception
8304 self.lu.op.live = None
8305 elif self.lu.op.mode is None:
8306 # read the default value from the hypervisor
8307 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8308 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8310 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8312 # Failover is never live
8315 if not (self.failover or self.cleanup):
8316 remote_info = self.rpc.call_instance_info(instance.primary_node,
8318 instance.hypervisor)
8319 remote_info.Raise("Error checking instance on node %s" %
8320 instance.primary_node)
8321 instance_running = bool(remote_info.payload)
8322 if instance_running:
8323 self.current_mem = int(remote_info.payload["memory"])
8325 def _RunAllocator(self):
8326 """Run the allocator based on input opcode.
8329 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8331 # FIXME: add a self.ignore_ipolicy option
8332 req = iallocator.IAReqRelocate(name=self.instance_name,
8333 relocate_from=[self.instance.primary_node])
8334 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8336 ial.Run(self.lu.op.iallocator)
8339 raise errors.OpPrereqError("Can't compute nodes using"
8340 " iallocator '%s': %s" %
8341 (self.lu.op.iallocator, ial.info),
8343 self.target_node = ial.result[0]
8344 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8345 self.instance_name, self.lu.op.iallocator,
8346 utils.CommaJoin(ial.result))
8348 def _WaitUntilSync(self):
8349 """Poll with custom rpc for disk sync.
8351 This uses our own step-based rpc call.
8354 self.feedback_fn("* wait until resync is done")
8358 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8360 (self.instance.disks,
8363 for node, nres in result.items():
8364 nres.Raise("Cannot resync disks on node %s" % node)
8365 node_done, node_percent = nres.payload
8366 all_done = all_done and node_done
8367 if node_percent is not None:
8368 min_percent = min(min_percent, node_percent)
8370 if min_percent < 100:
8371 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8374 def _EnsureSecondary(self, node):
8375 """Demote a node to secondary.
8378 self.feedback_fn("* switching node %s to secondary mode" % node)
8380 for dev in self.instance.disks:
8381 self.cfg.SetDiskID(dev, node)
8383 result = self.rpc.call_blockdev_close(node, self.instance.name,
8384 self.instance.disks)
8385 result.Raise("Cannot change disk to secondary on node %s" % node)
8387 def _GoStandalone(self):
8388 """Disconnect from the network.
8391 self.feedback_fn("* changing into standalone mode")
8392 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8393 self.instance.disks)
8394 for node, nres in result.items():
8395 nres.Raise("Cannot disconnect disks node %s" % node)
8397 def _GoReconnect(self, multimaster):
8398 """Reconnect to the network.
8404 msg = "single-master"
8405 self.feedback_fn("* changing disks into %s mode" % msg)
8406 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8407 (self.instance.disks, self.instance),
8408 self.instance.name, multimaster)
8409 for node, nres in result.items():
8410 nres.Raise("Cannot change disks config on node %s" % node)
8412 def _ExecCleanup(self):
8413 """Try to cleanup after a failed migration.
8415 The cleanup is done by:
8416 - check that the instance is running only on one node
8417 (and update the config if needed)
8418 - change disks on its secondary node to secondary
8419 - wait until disks are fully synchronized
8420 - disconnect from the network
8421 - change disks into single-master mode
8422 - wait again until disks are fully synchronized
8425 instance = self.instance
8426 target_node = self.target_node
8427 source_node = self.source_node
8429 # check running on only one node
8430 self.feedback_fn("* checking where the instance actually runs"
8431 " (if this hangs, the hypervisor might be in"
8433 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8434 for node, result in ins_l.items():
8435 result.Raise("Can't contact node %s" % node)
8437 runningon_source = instance.name in ins_l[source_node].payload
8438 runningon_target = instance.name in ins_l[target_node].payload
8440 if runningon_source and runningon_target:
8441 raise errors.OpExecError("Instance seems to be running on two nodes,"
8442 " or the hypervisor is confused; you will have"
8443 " to ensure manually that it runs only on one"
8444 " and restart this operation")
8446 if not (runningon_source or runningon_target):
8447 raise errors.OpExecError("Instance does not seem to be running at all;"
8448 " in this case it's safer to repair by"
8449 " running 'gnt-instance stop' to ensure disk"
8450 " shutdown, and then restarting it")
8452 if runningon_target:
8453 # the migration has actually succeeded, we need to update the config
8454 self.feedback_fn("* instance running on secondary node (%s),"
8455 " updating config" % target_node)
8456 instance.primary_node = target_node
8457 self.cfg.Update(instance, self.feedback_fn)
8458 demoted_node = source_node
8460 self.feedback_fn("* instance confirmed to be running on its"
8461 " primary node (%s)" % source_node)
8462 demoted_node = target_node
8464 if instance.disk_template in constants.DTS_INT_MIRROR:
8465 self._EnsureSecondary(demoted_node)
8467 self._WaitUntilSync()
8468 except errors.OpExecError:
8469 # we ignore here errors, since if the device is standalone, it
8470 # won't be able to sync
8472 self._GoStandalone()
8473 self._GoReconnect(False)
8474 self._WaitUntilSync()
8476 self.feedback_fn("* done")
8478 def _RevertDiskStatus(self):
8479 """Try to revert the disk status after a failed migration.
8482 target_node = self.target_node
8483 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8487 self._EnsureSecondary(target_node)
8488 self._GoStandalone()
8489 self._GoReconnect(False)
8490 self._WaitUntilSync()
8491 except errors.OpExecError, err:
8492 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8493 " please try to recover the instance manually;"
8494 " error '%s'" % str(err))
8496 def _AbortMigration(self):
8497 """Call the hypervisor code to abort a started migration.
8500 instance = self.instance
8501 target_node = self.target_node
8502 source_node = self.source_node
8503 migration_info = self.migration_info
8505 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8509 abort_msg = abort_result.fail_msg
8511 logging.error("Aborting migration failed on target node %s: %s",
8512 target_node, abort_msg)
8513 # Don't raise an exception here, as we stil have to try to revert the
8514 # disk status, even if this step failed.
8516 abort_result = self.rpc.call_instance_finalize_migration_src(
8517 source_node, instance, False, self.live)
8518 abort_msg = abort_result.fail_msg
8520 logging.error("Aborting migration failed on source node %s: %s",
8521 source_node, abort_msg)
8523 def _ExecMigration(self):
8524 """Migrate an instance.
8526 The migrate is done by:
8527 - change the disks into dual-master mode
8528 - wait until disks are fully synchronized again
8529 - migrate the instance
8530 - change disks on the new secondary node (the old primary) to secondary
8531 - wait until disks are fully synchronized
8532 - change disks into single-master mode
8535 instance = self.instance
8536 target_node = self.target_node
8537 source_node = self.source_node
8539 # Check for hypervisor version mismatch and warn the user.
8540 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8541 None, [self.instance.hypervisor], False)
8542 for ninfo in nodeinfo.values():
8543 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8545 (_, _, (src_info, )) = nodeinfo[source_node].payload
8546 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8548 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8549 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8550 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8551 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8552 if src_version != dst_version:
8553 self.feedback_fn("* warning: hypervisor version mismatch between"
8554 " source (%s) and target (%s) node" %
8555 (src_version, dst_version))
8557 self.feedback_fn("* checking disk consistency between source and target")
8558 for (idx, dev) in enumerate(instance.disks):
8559 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8560 raise errors.OpExecError("Disk %s is degraded or not fully"
8561 " synchronized on target node,"
8562 " aborting migration" % idx)
8564 if self.current_mem > self.tgt_free_mem:
8565 if not self.allow_runtime_changes:
8566 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8567 " free memory to fit instance %s on target"
8568 " node %s (have %dMB, need %dMB)" %
8569 (instance.name, target_node,
8570 self.tgt_free_mem, self.current_mem))
8571 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8572 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8575 rpcres.Raise("Cannot modify instance runtime memory")
8577 # First get the migration information from the remote node
8578 result = self.rpc.call_migration_info(source_node, instance)
8579 msg = result.fail_msg
8581 log_err = ("Failed fetching source migration information from %s: %s" %
8583 logging.error(log_err)
8584 raise errors.OpExecError(log_err)
8586 self.migration_info = migration_info = result.payload
8588 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8589 # Then switch the disks to master/master mode
8590 self._EnsureSecondary(target_node)
8591 self._GoStandalone()
8592 self._GoReconnect(True)
8593 self._WaitUntilSync()
8595 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8596 result = self.rpc.call_accept_instance(target_node,
8599 self.nodes_ip[target_node])
8601 msg = result.fail_msg
8603 logging.error("Instance pre-migration failed, trying to revert"
8604 " disk status: %s", msg)
8605 self.feedback_fn("Pre-migration failed, aborting")
8606 self._AbortMigration()
8607 self._RevertDiskStatus()
8608 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8609 (instance.name, msg))
8611 self.feedback_fn("* migrating instance to %s" % target_node)
8612 result = self.rpc.call_instance_migrate(source_node, instance,
8613 self.nodes_ip[target_node],
8615 msg = result.fail_msg
8617 logging.error("Instance migration failed, trying to revert"
8618 " disk status: %s", msg)
8619 self.feedback_fn("Migration failed, aborting")
8620 self._AbortMigration()
8621 self._RevertDiskStatus()
8622 raise errors.OpExecError("Could not migrate instance %s: %s" %
8623 (instance.name, msg))
8625 self.feedback_fn("* starting memory transfer")
8626 last_feedback = time.time()
8628 result = self.rpc.call_instance_get_migration_status(source_node,
8630 msg = result.fail_msg
8631 ms = result.payload # MigrationStatus instance
8632 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8633 logging.error("Instance migration failed, trying to revert"
8634 " disk status: %s", msg)
8635 self.feedback_fn("Migration failed, aborting")
8636 self._AbortMigration()
8637 self._RevertDiskStatus()
8639 msg = "hypervisor returned failure"
8640 raise errors.OpExecError("Could not migrate instance %s: %s" %
8641 (instance.name, msg))
8643 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8644 self.feedback_fn("* memory transfer complete")
8647 if (utils.TimeoutExpired(last_feedback,
8648 self._MIGRATION_FEEDBACK_INTERVAL) and
8649 ms.transferred_ram is not None):
8650 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8651 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8652 last_feedback = time.time()
8654 time.sleep(self._MIGRATION_POLL_INTERVAL)
8656 result = self.rpc.call_instance_finalize_migration_src(source_node,
8660 msg = result.fail_msg
8662 logging.error("Instance migration succeeded, but finalization failed"
8663 " on the source node: %s", msg)
8664 raise errors.OpExecError("Could not finalize instance migration: %s" %
8667 instance.primary_node = target_node
8669 # distribute new instance config to the other nodes
8670 self.cfg.Update(instance, self.feedback_fn)
8672 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8676 msg = result.fail_msg
8678 logging.error("Instance migration succeeded, but finalization failed"
8679 " on the target node: %s", msg)
8680 raise errors.OpExecError("Could not finalize instance migration: %s" %
8683 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8684 self._EnsureSecondary(source_node)
8685 self._WaitUntilSync()
8686 self._GoStandalone()
8687 self._GoReconnect(False)
8688 self._WaitUntilSync()
8690 # If the instance's disk template is `rbd' or `ext' and there was a
8691 # successful migration, unmap the device from the source node.
8692 if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
8693 disks = _ExpandCheckDisks(instance, instance.disks)
8694 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8696 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8697 msg = result.fail_msg
8699 logging.error("Migration was successful, but couldn't unmap the"
8700 " block device %s on source node %s: %s",
8701 disk.iv_name, source_node, msg)
8702 logging.error("You need to unmap the device %s manually on %s",
8703 disk.iv_name, source_node)
8705 self.feedback_fn("* done")
8707 def _ExecFailover(self):
8708 """Failover an instance.
8710 The failover is done by shutting it down on its present node and
8711 starting it on the secondary.
8714 instance = self.instance
8715 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8717 source_node = instance.primary_node
8718 target_node = self.target_node
8720 if instance.admin_state == constants.ADMINST_UP:
8721 self.feedback_fn("* checking disk consistency between source and target")
8722 for (idx, dev) in enumerate(instance.disks):
8723 # for drbd, these are drbd over lvm
8724 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8726 if primary_node.offline:
8727 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8729 (primary_node.name, idx, target_node))
8730 elif not self.ignore_consistency:
8731 raise errors.OpExecError("Disk %s is degraded on target node,"
8732 " aborting failover" % idx)
8734 self.feedback_fn("* not checking disk consistency as instance is not"
8737 self.feedback_fn("* shutting down instance on source node")
8738 logging.info("Shutting down instance %s on node %s",
8739 instance.name, source_node)
8741 result = self.rpc.call_instance_shutdown(source_node, instance,
8742 self.shutdown_timeout,
8744 msg = result.fail_msg
8746 if self.ignore_consistency or primary_node.offline:
8747 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8748 " proceeding anyway; please make sure node"
8749 " %s is down; error details: %s",
8750 instance.name, source_node, source_node, msg)
8752 raise errors.OpExecError("Could not shutdown instance %s on"
8754 (instance.name, source_node, msg))
8756 self.feedback_fn("* deactivating the instance's disks on source node")
8757 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8758 raise errors.OpExecError("Can't shut down the instance's disks")
8760 instance.primary_node = target_node
8761 # distribute new instance config to the other nodes
8762 self.cfg.Update(instance, self.feedback_fn)
8764 # Only start the instance if it's marked as up
8765 if instance.admin_state == constants.ADMINST_UP:
8766 self.feedback_fn("* activating the instance's disks on target node %s" %
8768 logging.info("Starting instance %s on node %s",
8769 instance.name, target_node)
8771 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8772 ignore_secondaries=True)
8774 _ShutdownInstanceDisks(self.lu, instance)
8775 raise errors.OpExecError("Can't activate the instance's disks")
8777 self.feedback_fn("* starting the instance on the target node %s" %
8779 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8780 False, self.lu.op.reason)
8781 msg = result.fail_msg
8783 _ShutdownInstanceDisks(self.lu, instance)
8784 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8785 (instance.name, target_node, msg))
8787 def Exec(self, feedback_fn):
8788 """Perform the migration.
8791 self.feedback_fn = feedback_fn
8792 self.source_node = self.instance.primary_node
8794 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8795 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8796 self.target_node = self.instance.secondary_nodes[0]
8797 # Otherwise self.target_node has been populated either
8798 # directly, or through an iallocator.
8800 self.all_nodes = [self.source_node, self.target_node]
8801 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8802 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8805 feedback_fn("Failover instance %s" % self.instance.name)
8806 self._ExecFailover()
8808 feedback_fn("Migrating instance %s" % self.instance.name)
8811 return self._ExecCleanup()
8813 return self._ExecMigration()
8816 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8818 """Wrapper around L{_CreateBlockDevInner}.
8820 This method annotates the root device first.
8823 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8824 excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
8825 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8826 force_open, excl_stor)
8829 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8830 info, force_open, excl_stor):
8831 """Create a tree of block devices on a given node.
8833 If this device type has to be created on secondaries, create it and
8836 If not, just recurse to children keeping the same 'force' value.
8838 @attention: The device has to be annotated already.
8840 @param lu: the lu on whose behalf we execute
8841 @param node: the node on which to create the device
8842 @type instance: L{objects.Instance}
8843 @param instance: the instance which owns the device
8844 @type device: L{objects.Disk}
8845 @param device: the device to create
8846 @type force_create: boolean
8847 @param force_create: whether to force creation of this device; this
8848 will be change to True whenever we find a device which has
8849 CreateOnSecondary() attribute
8850 @param info: the extra 'metadata' we should attach to the device
8851 (this will be represented as a LVM tag)
8852 @type force_open: boolean
8853 @param force_open: this parameter will be passes to the
8854 L{backend.BlockdevCreate} function where it specifies
8855 whether we run on primary or not, and it affects both
8856 the child assembly and the device own Open() execution
8857 @type excl_stor: boolean
8858 @param excl_stor: Whether exclusive_storage is active for the node
8860 @return: list of created devices
8862 created_devices = []
8864 if device.CreateOnSecondary():
8868 for child in device.children:
8869 devs = _CreateBlockDevInner(lu, node, instance, child, force_create,
8870 info, force_open, excl_stor)
8871 created_devices.extend(devs)
8873 if not force_create:
8874 return created_devices
8876 _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
8878 # The device has been completely created, so there is no point in keeping
8879 # its subdevices in the list. We just add the device itself instead.
8880 created_devices = [(node, device)]
8881 return created_devices
8883 except errors.DeviceCreationError, e:
8884 e.created_devices.extend(created_devices)
8886 except errors.OpExecError, e:
8887 raise errors.DeviceCreationError(str(e), created_devices)
8890 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
8892 """Create a single block device on a given node.
8894 This will not recurse over children of the device, so they must be
8897 @param lu: the lu on whose behalf we execute
8898 @param node: the node on which to create the device
8899 @type instance: L{objects.Instance}
8900 @param instance: the instance which owns the device
8901 @type device: L{objects.Disk}
8902 @param device: the device to create
8903 @param info: the extra 'metadata' we should attach to the device
8904 (this will be represented as a LVM tag)
8905 @type force_open: boolean
8906 @param force_open: this parameter will be passes to the
8907 L{backend.BlockdevCreate} function where it specifies
8908 whether we run on primary or not, and it affects both
8909 the child assembly and the device own Open() execution
8910 @type excl_stor: boolean
8911 @param excl_stor: Whether exclusive_storage is active for the node
8914 lu.cfg.SetDiskID(device, node)
8915 result = lu.rpc.call_blockdev_create(node, device, device.size,
8916 instance.name, force_open, info,
8918 result.Raise("Can't create block device %s on"
8919 " node %s for instance %s" % (device, node, instance.name))
8920 if device.physical_id is None:
8921 device.physical_id = result.payload
8924 def _GenerateUniqueNames(lu, exts):
8925 """Generate a suitable LV name.
8927 This will generate a logical volume name for the given instance.
8932 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8933 results.append("%s%s" % (new_id, val))
8937 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8938 iv_name, p_minor, s_minor):
8939 """Generate a drbd8 device complete with its children.
8942 assert len(vgnames) == len(names) == 2
8943 port = lu.cfg.AllocatePort()
8944 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8946 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8947 logical_id=(vgnames[0], names[0]),
8949 dev_data.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8950 dev_meta = objects.Disk(dev_type=constants.LD_LV,
8951 size=constants.DRBD_META_SIZE,
8952 logical_id=(vgnames[1], names[1]),
8954 dev_meta.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8955 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8956 logical_id=(primary, secondary, port,
8959 children=[dev_data, dev_meta],
8960 iv_name=iv_name, params={})
8961 drbd_dev.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8965 _DISK_TEMPLATE_NAME_PREFIX = {
8966 constants.DT_PLAIN: "",
8967 constants.DT_RBD: ".rbd",
8968 constants.DT_EXT: ".ext",
8972 _DISK_TEMPLATE_DEVICE_TYPE = {
8973 constants.DT_PLAIN: constants.LD_LV,
8974 constants.DT_FILE: constants.LD_FILE,
8975 constants.DT_SHARED_FILE: constants.LD_FILE,
8976 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8977 constants.DT_RBD: constants.LD_RBD,
8978 constants.DT_EXT: constants.LD_EXT,
8982 def _GenerateDiskTemplate(
8983 lu, template_name, instance_name, primary_node, secondary_nodes,
8984 disk_info, file_storage_dir, file_driver, base_index,
8985 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8986 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8987 """Generate the entire disk layout for a given template type.
8990 vgname = lu.cfg.GetVGName()
8991 disk_count = len(disk_info)
8994 if template_name == constants.DT_DISKLESS:
8996 elif template_name == constants.DT_DRBD8:
8997 if len(secondary_nodes) != 1:
8998 raise errors.ProgrammerError("Wrong template configuration")
8999 remote_node = secondary_nodes[0]
9000 minors = lu.cfg.AllocateDRBDMinor(
9001 [primary_node, remote_node] * len(disk_info), instance_name)
9003 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9005 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9008 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9009 for i in range(disk_count)]):
9010 names.append(lv_prefix + "_data")
9011 names.append(lv_prefix + "_meta")
9012 for idx, disk in enumerate(disk_info):
9013 disk_index = idx + base_index
9014 data_vg = disk.get(constants.IDISK_VG, vgname)
9015 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9016 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9017 disk[constants.IDISK_SIZE],
9019 names[idx * 2:idx * 2 + 2],
9020 "disk/%d" % disk_index,
9021 minors[idx * 2], minors[idx * 2 + 1])
9022 disk_dev.mode = disk[constants.IDISK_MODE]
9023 disk_dev.name = disk.get(constants.IDISK_NAME, None)
9024 disks.append(disk_dev)
9027 raise errors.ProgrammerError("Wrong template configuration")
9029 if template_name == constants.DT_FILE:
9031 elif template_name == constants.DT_SHARED_FILE:
9032 _req_shr_file_storage()
9034 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9035 if name_prefix is None:
9038 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9039 (name_prefix, base_index + i)
9040 for i in range(disk_count)])
9042 if template_name == constants.DT_PLAIN:
9044 def logical_id_fn(idx, _, disk):
9045 vg = disk.get(constants.IDISK_VG, vgname)
9046 return (vg, names[idx])
9048 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9050 lambda _, disk_index, disk: (file_driver,
9051 "%s/disk%d" % (file_storage_dir,
9053 elif template_name == constants.DT_BLOCK:
9055 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9056 disk[constants.IDISK_ADOPT])
9057 elif template_name == constants.DT_RBD:
9058 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9059 elif template_name == constants.DT_EXT:
9060 def logical_id_fn(idx, _, disk):
9061 provider = disk.get(constants.IDISK_PROVIDER, None)
9062 if provider is None:
9063 raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9064 " not found", constants.DT_EXT,
9065 constants.IDISK_PROVIDER)
9066 return (provider, names[idx])
9068 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9070 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9072 for idx, disk in enumerate(disk_info):
9074 # Only for the Ext template add disk_info to params
9075 if template_name == constants.DT_EXT:
9076 params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9078 if key not in constants.IDISK_PARAMS:
9079 params[key] = disk[key]
9080 disk_index = idx + base_index
9081 size = disk[constants.IDISK_SIZE]
9082 feedback_fn("* disk %s, size %s" %
9083 (disk_index, utils.FormatUnit(size, "h")))
9084 disk_dev = objects.Disk(dev_type=dev_type, size=size,
9085 logical_id=logical_id_fn(idx, disk_index, disk),
9086 iv_name="disk/%d" % disk_index,
9087 mode=disk[constants.IDISK_MODE],
9089 disk_dev.name = disk.get(constants.IDISK_NAME, None)
9090 disk_dev.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9091 disks.append(disk_dev)
9096 def _GetInstanceInfoText(instance):
9097 """Compute that text that should be added to the disk's metadata.
9100 return "originstname+%s" % instance.name
9103 def _CalcEta(time_taken, written, total_size):
9104 """Calculates the ETA based on size written and total size.
9106 @param time_taken: The time taken so far
9107 @param written: amount written so far
9108 @param total_size: The total size of data to be written
9109 @return: The remaining time in seconds
9112 avg_time = time_taken / float(written)
9113 return (total_size - written) * avg_time
9116 def _WipeDisks(lu, instance, disks=None):
9117 """Wipes instance disks.
9119 @type lu: L{LogicalUnit}
9120 @param lu: the logical unit on whose behalf we execute
9121 @type instance: L{objects.Instance}
9122 @param instance: the instance whose disks we should create
9123 @type disks: None or list of tuple of (number, L{objects.Disk}, number)
9124 @param disks: Disk details; tuple contains disk index, disk object and the
9128 node = instance.primary_node
9131 disks = [(idx, disk, 0)
9132 for (idx, disk) in enumerate(instance.disks)]
9134 for (_, device, _) in disks:
9135 lu.cfg.SetDiskID(device, node)
9137 logging.info("Pausing synchronization of disks of instance '%s'",
9139 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9140 (map(compat.snd, disks),
9143 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9145 for idx, success in enumerate(result.payload):
9147 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9148 " failed", idx, instance.name)
9151 for (idx, device, offset) in disks:
9152 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9153 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9155 int(min(constants.MAX_WIPE_CHUNK,
9156 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9160 start_time = time.time()
9165 info_text = (" (from %s to %s)" %
9166 (utils.FormatUnit(offset, "h"),
9167 utils.FormatUnit(size, "h")))
9169 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9171 logging.info("Wiping disk %d for instance %s on node %s using"
9172 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9174 while offset < size:
9175 wipe_size = min(wipe_chunk_size, size - offset)
9177 logging.debug("Wiping disk %d, offset %s, chunk %s",
9178 idx, offset, wipe_size)
9180 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9182 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9183 (idx, offset, wipe_size))
9187 if now - last_output >= 60:
9188 eta = _CalcEta(now - start_time, offset, size)
9189 lu.LogInfo(" - done: %.1f%% ETA: %s",
9190 offset / float(size) * 100, utils.FormatSeconds(eta))
9193 logging.info("Resuming synchronization of disks for instance '%s'",
9196 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9197 (map(compat.snd, disks),
9202 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9203 node, result.fail_msg)
9205 for idx, success in enumerate(result.payload):
9207 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9208 " failed", idx, instance.name)
9211 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9212 """Create all disks for an instance.
9214 This abstracts away some work from AddInstance.
9216 @type lu: L{LogicalUnit}
9217 @param lu: the logical unit on whose behalf we execute
9218 @type instance: L{objects.Instance}
9219 @param instance: the instance whose disks we should create
9221 @param to_skip: list of indices to skip
9222 @type target_node: string
9223 @param target_node: if passed, overrides the target node for creation
9225 @return: the success of the creation
9228 info = _GetInstanceInfoText(instance)
9229 if target_node is None:
9230 pnode = instance.primary_node
9231 all_nodes = instance.all_nodes
9236 if instance.disk_template in constants.DTS_FILEBASED:
9237 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9238 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9240 result.Raise("Failed to create directory '%s' on"
9241 " node %s" % (file_storage_dir, pnode))
9244 # Note: this needs to be kept in sync with adding of disks in
9245 # LUInstanceSetParams
9246 for idx, device in enumerate(instance.disks):
9247 if to_skip and idx in to_skip:
9249 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9251 for node in all_nodes:
9252 f_create = node == pnode
9254 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9255 disks_created.append((node, device))
9256 except errors.OpExecError:
9257 logging.warning("Creating disk %s for instance '%s' failed",
9259 except errors.DeviceCreationError, e:
9260 logging.warning("Creating disk %s for instance '%s' failed",
9262 disks_created.extend(e.created_devices)
9263 for (node, disk) in disks_created:
9264 lu.cfg.SetDiskID(disk, node)
9265 result = lu.rpc.call_blockdev_remove(node, disk)
9267 logging.warning("Failed to remove newly-created disk %s on node %s:"
9268 " %s", device, node, result.fail_msg)
9269 raise errors.OpExecError(e.message)
9272 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9273 """Remove all disks for an instance.
9275 This abstracts away some work from `AddInstance()` and
9276 `RemoveInstance()`. Note that in case some of the devices couldn't
9277 be removed, the removal will continue with the other ones.
9279 @type lu: L{LogicalUnit}
9280 @param lu: the logical unit on whose behalf we execute
9281 @type instance: L{objects.Instance}
9282 @param instance: the instance whose disks we should remove
9283 @type target_node: string
9284 @param target_node: used to override the node on which to remove the disks
9286 @return: the success of the removal
9289 logging.info("Removing block devices for instance %s", instance.name)
9292 ports_to_release = set()
9293 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9294 for (idx, device) in enumerate(anno_disks):
9296 edata = [(target_node, device)]
9298 edata = device.ComputeNodeTree(instance.primary_node)
9299 for node, disk in edata:
9300 lu.cfg.SetDiskID(disk, node)
9301 result = lu.rpc.call_blockdev_remove(node, disk)
9303 lu.LogWarning("Could not remove disk %s on node %s,"
9304 " continuing anyway: %s", idx, node, result.fail_msg)
9305 if not (result.offline and node != instance.primary_node):
9308 # if this is a DRBD disk, return its port to the pool
9309 if device.dev_type in constants.LDS_DRBD:
9310 ports_to_release.add(device.logical_id[2])
9312 if all_result or ignore_failures:
9313 for port in ports_to_release:
9314 lu.cfg.AddTcpUdpPort(port)
9316 if instance.disk_template in constants.DTS_FILEBASED:
9317 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9321 tgt = instance.primary_node
9322 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9324 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9325 file_storage_dir, instance.primary_node, result.fail_msg)
9331 def _ComputeDiskSizePerVG(disk_template, disks):
9332 """Compute disk size requirements in the volume group
9335 def _compute(disks, payload):
9336 """Universal algorithm.
9341 vgs[disk[constants.IDISK_VG]] = \
9342 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9346 # Required free disk space as a function of disk and swap space
9348 constants.DT_DISKLESS: {},
9349 constants.DT_PLAIN: _compute(disks, 0),
9350 # 128 MB are added for drbd metadata for each disk
9351 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9352 constants.DT_FILE: {},
9353 constants.DT_SHARED_FILE: {},
9356 if disk_template not in req_size_dict:
9357 raise errors.ProgrammerError("Disk template '%s' size requirement"
9358 " is unknown" % disk_template)
9360 return req_size_dict[disk_template]
9363 def _FilterVmNodes(lu, nodenames):
9364 """Filters out non-vm_capable nodes from a list.
9366 @type lu: L{LogicalUnit}
9367 @param lu: the logical unit for which we check
9368 @type nodenames: list
9369 @param nodenames: the list of nodes on which we should check
9371 @return: the list of vm-capable nodes
9374 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9375 return [name for name in nodenames if name not in vm_nodes]
9378 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9379 """Hypervisor parameter validation.
9381 This function abstract the hypervisor parameter validation to be
9382 used in both instance create and instance modify.
9384 @type lu: L{LogicalUnit}
9385 @param lu: the logical unit for which we check
9386 @type nodenames: list
9387 @param nodenames: the list of nodes on which we should check
9388 @type hvname: string
9389 @param hvname: the name of the hypervisor we should use
9390 @type hvparams: dict
9391 @param hvparams: the parameters which we need to check
9392 @raise errors.OpPrereqError: if the parameters are not valid
9395 nodenames = _FilterVmNodes(lu, nodenames)
9397 cluster = lu.cfg.GetClusterInfo()
9398 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9400 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9401 for node in nodenames:
9405 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9408 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9409 """OS parameters validation.
9411 @type lu: L{LogicalUnit}
9412 @param lu: the logical unit for which we check
9413 @type required: boolean
9414 @param required: whether the validation should fail if the OS is not
9416 @type nodenames: list
9417 @param nodenames: the list of nodes on which we should check
9418 @type osname: string
9419 @param osname: the name of the hypervisor we should use
9420 @type osparams: dict
9421 @param osparams: the parameters which we need to check
9422 @raise errors.OpPrereqError: if the parameters are not valid
9425 nodenames = _FilterVmNodes(lu, nodenames)
9426 result = lu.rpc.call_os_validate(nodenames, required, osname,
9427 [constants.OS_VALIDATE_PARAMETERS],
9429 for node, nres in result.items():
9430 # we don't check for offline cases since this should be run only
9431 # against the master node and/or an instance's nodes
9432 nres.Raise("OS Parameters validation failed on node %s" % node)
9433 if not nres.payload:
9434 lu.LogInfo("OS %s not found on node %s, validation skipped",
9438 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9439 """Wrapper around IAReqInstanceAlloc.
9441 @param op: The instance opcode
9442 @param disks: The computed disks
9443 @param nics: The computed nics
9444 @param beparams: The full filled beparams
9445 @param node_whitelist: List of nodes which should appear as online to the
9446 allocator (unless the node is already marked offline)
9448 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9451 spindle_use = beparams[constants.BE_SPINDLE_USE]
9452 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9453 disk_template=op.disk_template,
9456 vcpus=beparams[constants.BE_VCPUS],
9457 memory=beparams[constants.BE_MAXMEM],
9458 spindle_use=spindle_use,
9460 nics=[n.ToDict() for n in nics],
9461 hypervisor=op.hypervisor,
9462 node_whitelist=node_whitelist)
9465 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9466 """Computes the nics.
9468 @param op: The instance opcode
9469 @param cluster: Cluster configuration object
9470 @param default_ip: The default ip to assign
9471 @param cfg: An instance of the configuration object
9472 @param ec_id: Execution context ID
9474 @returns: The build up nics
9479 nic_mode_req = nic.get(constants.INIC_MODE, None)
9480 nic_mode = nic_mode_req
9481 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9482 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9484 net = nic.get(constants.INIC_NETWORK, None)
9485 link = nic.get(constants.NIC_LINK, None)
9486 ip = nic.get(constants.INIC_IP, None)
9488 if net is None or net.lower() == constants.VALUE_NONE:
9491 if nic_mode_req is not None or link is not None:
9492 raise errors.OpPrereqError("If network is given, no mode or link"
9493 " is allowed to be passed",
9496 # ip validity checks
9497 if ip is None or ip.lower() == constants.VALUE_NONE:
9499 elif ip.lower() == constants.VALUE_AUTO:
9500 if not op.name_check:
9501 raise errors.OpPrereqError("IP address set to auto but name checks"
9502 " have been skipped",
9506 # We defer pool operations until later, so that the iallocator has
9507 # filled in the instance's node(s) dimara
9508 if ip.lower() == constants.NIC_IP_POOL:
9510 raise errors.OpPrereqError("if ip=pool, parameter network"
9511 " must be passed too",
9514 elif not netutils.IPAddress.IsValid(ip):
9515 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9520 # TODO: check the ip address for uniqueness
9521 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9522 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9525 # MAC address verification
9526 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9527 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9528 mac = utils.NormalizeAndValidateMac(mac)
9531 # TODO: We need to factor this out
9532 cfg.ReserveMAC(mac, ec_id)
9533 except errors.ReservationError:
9534 raise errors.OpPrereqError("MAC address %s already in use"
9535 " in cluster" % mac,
9536 errors.ECODE_NOTUNIQUE)
9538 # Build nic parameters
9541 nicparams[constants.NIC_MODE] = nic_mode
9543 nicparams[constants.NIC_LINK] = link
9545 check_params = cluster.SimpleFillNIC(nicparams)
9546 objects.NIC.CheckParameterSyntax(check_params)
9547 net_uuid = cfg.LookupNetwork(net)
9548 name = nic.get(constants.INIC_NAME, None)
9549 if name is not None and name.lower() == constants.VALUE_NONE:
9551 nic_obj = objects.NIC(mac=mac, ip=nic_ip, name=name,
9552 network=net_uuid, nicparams=nicparams)
9553 nic_obj.uuid = cfg.GenerateUniqueID(ec_id)
9554 nics.append(nic_obj)
9559 def _ComputeDisks(op, default_vg):
9560 """Computes the instance disks.
9562 @param op: The instance opcode
9563 @param default_vg: The default_vg to assume
9565 @return: The computed disks
9569 for disk in op.disks:
9570 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9571 if mode not in constants.DISK_ACCESS_SET:
9572 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9573 mode, errors.ECODE_INVAL)
9574 size = disk.get(constants.IDISK_SIZE, None)
9576 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9579 except (TypeError, ValueError):
9580 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9583 ext_provider = disk.get(constants.IDISK_PROVIDER, None)
9584 if ext_provider and op.disk_template != constants.DT_EXT:
9585 raise errors.OpPrereqError("The '%s' option is only valid for the %s"
9586 " disk template, not %s" %
9587 (constants.IDISK_PROVIDER, constants.DT_EXT,
9588 op.disk_template), errors.ECODE_INVAL)
9590 data_vg = disk.get(constants.IDISK_VG, default_vg)
9591 name = disk.get(constants.IDISK_NAME, None)
9592 if name is not None and name.lower() == constants.VALUE_NONE:
9595 constants.IDISK_SIZE: size,
9596 constants.IDISK_MODE: mode,
9597 constants.IDISK_VG: data_vg,
9598 constants.IDISK_NAME: name,
9601 if constants.IDISK_METAVG in disk:
9602 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9603 if constants.IDISK_ADOPT in disk:
9604 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9606 # For extstorage, demand the `provider' option and add any
9607 # additional parameters (ext-params) to the dict
9608 if op.disk_template == constants.DT_EXT:
9610 new_disk[constants.IDISK_PROVIDER] = ext_provider
9612 if key not in constants.IDISK_PARAMS:
9613 new_disk[key] = disk[key]
9615 raise errors.OpPrereqError("Missing provider for template '%s'" %
9616 constants.DT_EXT, errors.ECODE_INVAL)
9618 disks.append(new_disk)
9623 def _ComputeFullBeParams(op, cluster):
9624 """Computes the full beparams.
9626 @param op: The instance opcode
9627 @param cluster: The cluster config object
9629 @return: The fully filled beparams
9632 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9633 for param, value in op.beparams.iteritems():
9634 if value == constants.VALUE_AUTO:
9635 op.beparams[param] = default_beparams[param]
9636 objects.UpgradeBeParams(op.beparams)
9637 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9638 return cluster.SimpleFillBE(op.beparams)
9641 def _CheckOpportunisticLocking(op):
9642 """Generate error if opportunistic locking is not possible.
9645 if op.opportunistic_locking and not op.iallocator:
9646 raise errors.OpPrereqError("Opportunistic locking is only available in"
9647 " combination with an instance allocator",
9651 class LUInstanceCreate(LogicalUnit):
9652 """Create an instance.
9655 HPATH = "instance-add"
9656 HTYPE = constants.HTYPE_INSTANCE
9659 def CheckArguments(self):
9663 # do not require name_check to ease forward/backward compatibility
9665 if self.op.no_install and self.op.start:
9666 self.LogInfo("No-installation mode selected, disabling startup")
9667 self.op.start = False
9668 # validate/normalize the instance name
9669 self.op.instance_name = \
9670 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9672 if self.op.ip_check and not self.op.name_check:
9673 # TODO: make the ip check more flexible and not depend on the name check
9674 raise errors.OpPrereqError("Cannot do IP address check without a name"
9675 " check", errors.ECODE_INVAL)
9677 # check nics' parameter names
9678 for nic in self.op.nics:
9679 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9680 # check that NIC's parameters names are unique and valid
9681 utils.ValidateDeviceNames("NIC", self.op.nics)
9683 # check that disk's names are unique and valid
9684 utils.ValidateDeviceNames("disk", self.op.disks)
9686 cluster = self.cfg.GetClusterInfo()
9687 if not self.op.disk_template in cluster.enabled_disk_templates:
9688 raise errors.OpPrereqError("Cannot create an instance with disk template"
9689 " '%s', because it is not enabled in the"
9690 " cluster. Enabled disk templates are: %s." %
9691 (self.op.disk_template,
9692 ",".join(cluster.enabled_disk_templates)))
9694 # check disks. parameter names and consistent adopt/no-adopt strategy
9695 has_adopt = has_no_adopt = False
9696 for disk in self.op.disks:
9697 if self.op.disk_template != constants.DT_EXT:
9698 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9699 if constants.IDISK_ADOPT in disk:
9703 if has_adopt and has_no_adopt:
9704 raise errors.OpPrereqError("Either all disks are adopted or none is",
9707 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9708 raise errors.OpPrereqError("Disk adoption is not supported for the"
9709 " '%s' disk template" %
9710 self.op.disk_template,
9712 if self.op.iallocator is not None:
9713 raise errors.OpPrereqError("Disk adoption not allowed with an"
9714 " iallocator script", errors.ECODE_INVAL)
9715 if self.op.mode == constants.INSTANCE_IMPORT:
9716 raise errors.OpPrereqError("Disk adoption not allowed for"
9717 " instance import", errors.ECODE_INVAL)
9719 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9720 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9721 " but no 'adopt' parameter given" %
9722 self.op.disk_template,
9725 self.adopt_disks = has_adopt
9727 # instance name verification
9728 if self.op.name_check:
9729 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
9730 self.op.instance_name = self.hostname1.name
9731 # used in CheckPrereq for ip ping check
9732 self.check_ip = self.hostname1.ip
9734 self.check_ip = None
9736 # file storage checks
9737 if (self.op.file_driver and
9738 not self.op.file_driver in constants.FILE_DRIVER):
9739 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9740 self.op.file_driver, errors.ECODE_INVAL)
9742 if self.op.disk_template == constants.DT_FILE:
9743 opcodes.RequireFileStorage()
9744 elif self.op.disk_template == constants.DT_SHARED_FILE:
9745 opcodes.RequireSharedFileStorage()
9747 ### Node/iallocator related checks
9748 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9750 if self.op.pnode is not None:
9751 if self.op.disk_template in constants.DTS_INT_MIRROR:
9752 if self.op.snode is None:
9753 raise errors.OpPrereqError("The networked disk templates need"
9754 " a mirror node", errors.ECODE_INVAL)
9756 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9758 self.op.snode = None
9760 _CheckOpportunisticLocking(self.op)
9762 self._cds = _GetClusterDomainSecret()
9764 if self.op.mode == constants.INSTANCE_IMPORT:
9765 # On import force_variant must be True, because if we forced it at
9766 # initial install, our only chance when importing it back is that it
9768 self.op.force_variant = True
9770 if self.op.no_install:
9771 self.LogInfo("No-installation mode has no effect during import")
9773 elif self.op.mode == constants.INSTANCE_CREATE:
9774 if self.op.os_type is None:
9775 raise errors.OpPrereqError("No guest OS specified",
9777 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9778 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9779 " installation" % self.op.os_type,
9781 if self.op.disk_template is None:
9782 raise errors.OpPrereqError("No disk template specified",
9785 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9786 # Check handshake to ensure both clusters have the same domain secret
9787 src_handshake = self.op.source_handshake
9788 if not src_handshake:
9789 raise errors.OpPrereqError("Missing source handshake",
9792 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9795 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9798 # Load and check source CA
9799 self.source_x509_ca_pem = self.op.source_x509_ca
9800 if not self.source_x509_ca_pem:
9801 raise errors.OpPrereqError("Missing source X509 CA",
9805 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9807 except OpenSSL.crypto.Error, err:
9808 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9809 (err, ), errors.ECODE_INVAL)
9811 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9812 if errcode is not None:
9813 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9816 self.source_x509_ca = cert
9818 src_instance_name = self.op.source_instance_name
9819 if not src_instance_name:
9820 raise errors.OpPrereqError("Missing source instance name",
9823 self.source_instance_name = \
9824 netutils.GetHostname(name=src_instance_name).name
9827 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9828 self.op.mode, errors.ECODE_INVAL)
9830 def ExpandNames(self):
9831 """ExpandNames for CreateInstance.
9833 Figure out the right locks for instance creation.
9836 self.needed_locks = {}
9838 instance_name = self.op.instance_name
9839 # this is just a preventive check, but someone might still add this
9840 # instance in the meantime, and creation will fail at lock-add time
9841 if instance_name in self.cfg.GetInstanceList():
9842 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9843 instance_name, errors.ECODE_EXISTS)
9845 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9847 if self.op.iallocator:
9848 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9849 # specifying a group on instance creation and then selecting nodes from
9851 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9852 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
9854 if self.op.opportunistic_locking:
9855 self.opportunistic_locks[locking.LEVEL_NODE] = True
9856 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
9858 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9859 nodelist = [self.op.pnode]
9860 if self.op.snode is not None:
9861 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9862 nodelist.append(self.op.snode)
9863 self.needed_locks[locking.LEVEL_NODE] = nodelist
9865 # in case of import lock the source node too
9866 if self.op.mode == constants.INSTANCE_IMPORT:
9867 src_node = self.op.src_node
9868 src_path = self.op.src_path
9870 if src_path is None:
9871 self.op.src_path = src_path = self.op.instance_name
9873 if src_node is None:
9874 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9875 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
9876 self.op.src_node = None
9877 if os.path.isabs(src_path):
9878 raise errors.OpPrereqError("Importing an instance from a path"
9879 " requires a source node option",
9882 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9883 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9884 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9885 if not os.path.isabs(src_path):
9886 self.op.src_path = src_path = \
9887 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9889 self.needed_locks[locking.LEVEL_NODE_RES] = \
9890 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
9892 def _RunAllocator(self):
9893 """Run the allocator based on input opcode.
9896 if self.op.opportunistic_locking:
9897 # Only consider nodes for which a lock is held
9898 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
9900 node_whitelist = None
9902 #TODO Export network to iallocator so that it chooses a pnode
9903 # in a nodegroup that has the desired network connected to
9904 req = _CreateInstanceAllocRequest(self.op, self.disks,
9905 self.nics, self.be_full,
9907 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9909 ial.Run(self.op.iallocator)
9912 # When opportunistic locks are used only a temporary failure is generated
9913 if self.op.opportunistic_locking:
9914 ecode = errors.ECODE_TEMP_NORES
9916 ecode = errors.ECODE_NORES
9918 raise errors.OpPrereqError("Can't compute nodes using"
9919 " iallocator '%s': %s" %
9920 (self.op.iallocator, ial.info),
9923 self.op.pnode = ial.result[0]
9924 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9925 self.op.instance_name, self.op.iallocator,
9926 utils.CommaJoin(ial.result))
9928 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9930 if req.RequiredNodes() == 2:
9931 self.op.snode = ial.result[1]
9933 def BuildHooksEnv(self):
9936 This runs on master, primary and secondary nodes of the instance.
9940 "ADD_MODE": self.op.mode,
9942 if self.op.mode == constants.INSTANCE_IMPORT:
9943 env["SRC_NODE"] = self.op.src_node
9944 env["SRC_PATH"] = self.op.src_path
9945 env["SRC_IMAGES"] = self.src_images
9947 env.update(_BuildInstanceHookEnv(
9948 name=self.op.instance_name,
9949 primary_node=self.op.pnode,
9950 secondary_nodes=self.secondaries,
9951 status=self.op.start,
9952 os_type=self.op.os_type,
9953 minmem=self.be_full[constants.BE_MINMEM],
9954 maxmem=self.be_full[constants.BE_MAXMEM],
9955 vcpus=self.be_full[constants.BE_VCPUS],
9956 nics=_NICListToTuple(self, self.nics),
9957 disk_template=self.op.disk_template,
9958 disks=[(d[constants.IDISK_NAME], d[constants.IDISK_SIZE],
9959 d[constants.IDISK_MODE]) for d in self.disks],
9962 hypervisor_name=self.op.hypervisor,
9968 def BuildHooksNodes(self):
9969 """Build hooks nodes.
9972 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9975 def _ReadExportInfo(self):
9976 """Reads the export information from disk.
9978 It will override the opcode source node and path with the actual
9979 information, if these two were not specified before.
9981 @return: the export information
9984 assert self.op.mode == constants.INSTANCE_IMPORT
9986 src_node = self.op.src_node
9987 src_path = self.op.src_path
9989 if src_node is None:
9990 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9991 exp_list = self.rpc.call_export_list(locked_nodes)
9993 for node in exp_list:
9994 if exp_list[node].fail_msg:
9996 if src_path in exp_list[node].payload:
9998 self.op.src_node = src_node = node
9999 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10003 raise errors.OpPrereqError("No export found for relative path %s" %
10004 src_path, errors.ECODE_INVAL)
10006 _CheckNodeOnline(self, src_node)
10007 result = self.rpc.call_export_info(src_node, src_path)
10008 result.Raise("No export or invalid export found in dir %s" % src_path)
10010 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10011 if not export_info.has_section(constants.INISECT_EXP):
10012 raise errors.ProgrammerError("Corrupted export config",
10013 errors.ECODE_ENVIRON)
10015 ei_version = export_info.get(constants.INISECT_EXP, "version")
10016 if (int(ei_version) != constants.EXPORT_VERSION):
10017 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10018 (ei_version, constants.EXPORT_VERSION),
10019 errors.ECODE_ENVIRON)
10022 def _ReadExportParams(self, einfo):
10023 """Use export parameters as defaults.
10025 In case the opcode doesn't specify (as in override) some instance
10026 parameters, then try to use them from the export information, if
10027 that declares them.
10030 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10032 if self.op.disk_template is None:
10033 if einfo.has_option(constants.INISECT_INS, "disk_template"):
10034 self.op.disk_template = einfo.get(constants.INISECT_INS,
10036 if self.op.disk_template not in constants.DISK_TEMPLATES:
10037 raise errors.OpPrereqError("Disk template specified in configuration"
10038 " file is not one of the allowed values:"
10040 " ".join(constants.DISK_TEMPLATES),
10041 errors.ECODE_INVAL)
10043 raise errors.OpPrereqError("No disk template specified and the export"
10044 " is missing the disk_template information",
10045 errors.ECODE_INVAL)
10047 if not self.op.disks:
10049 # TODO: import the disk iv_name too
10050 for idx in range(constants.MAX_DISKS):
10051 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10052 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10053 disks.append({constants.IDISK_SIZE: disk_sz})
10054 self.op.disks = disks
10055 if not disks and self.op.disk_template != constants.DT_DISKLESS:
10056 raise errors.OpPrereqError("No disk info specified and the export"
10057 " is missing the disk information",
10058 errors.ECODE_INVAL)
10060 if not self.op.nics:
10062 for idx in range(constants.MAX_NICS):
10063 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10065 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10066 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10071 self.op.nics = nics
10073 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10074 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10076 if (self.op.hypervisor is None and
10077 einfo.has_option(constants.INISECT_INS, "hypervisor")):
10078 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10080 if einfo.has_section(constants.INISECT_HYP):
10081 # use the export parameters but do not override the ones
10082 # specified by the user
10083 for name, value in einfo.items(constants.INISECT_HYP):
10084 if name not in self.op.hvparams:
10085 self.op.hvparams[name] = value
10087 if einfo.has_section(constants.INISECT_BEP):
10088 # use the parameters, without overriding
10089 for name, value in einfo.items(constants.INISECT_BEP):
10090 if name not in self.op.beparams:
10091 self.op.beparams[name] = value
10092 # Compatibility for the old "memory" be param
10093 if name == constants.BE_MEMORY:
10094 if constants.BE_MAXMEM not in self.op.beparams:
10095 self.op.beparams[constants.BE_MAXMEM] = value
10096 if constants.BE_MINMEM not in self.op.beparams:
10097 self.op.beparams[constants.BE_MINMEM] = value
10099 # try to read the parameters old style, from the main section
10100 for name in constants.BES_PARAMETERS:
10101 if (name not in self.op.beparams and
10102 einfo.has_option(constants.INISECT_INS, name)):
10103 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10105 if einfo.has_section(constants.INISECT_OSP):
10106 # use the parameters, without overriding
10107 for name, value in einfo.items(constants.INISECT_OSP):
10108 if name not in self.op.osparams:
10109 self.op.osparams[name] = value
10111 def _RevertToDefaults(self, cluster):
10112 """Revert the instance parameters to the default values.
10116 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10117 for name in self.op.hvparams.keys():
10118 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10119 del self.op.hvparams[name]
10121 be_defs = cluster.SimpleFillBE({})
10122 for name in self.op.beparams.keys():
10123 if name in be_defs and be_defs[name] == self.op.beparams[name]:
10124 del self.op.beparams[name]
10126 nic_defs = cluster.SimpleFillNIC({})
10127 for nic in self.op.nics:
10128 for name in constants.NICS_PARAMETERS:
10129 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10132 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10133 for name in self.op.osparams.keys():
10134 if name in os_defs and os_defs[name] == self.op.osparams[name]:
10135 del self.op.osparams[name]
10137 def _CalculateFileStorageDir(self):
10138 """Calculate final instance file storage dir.
10141 # file storage dir calculation/check
10142 self.instance_file_storage_dir = None
10143 if self.op.disk_template in constants.DTS_FILEBASED:
10144 # build the full file storage dir path
10147 if self.op.disk_template == constants.DT_SHARED_FILE:
10148 get_fsd_fn = self.cfg.GetSharedFileStorageDir
10150 get_fsd_fn = self.cfg.GetFileStorageDir
10152 cfg_storagedir = get_fsd_fn()
10153 if not cfg_storagedir:
10154 raise errors.OpPrereqError("Cluster file storage dir not defined",
10155 errors.ECODE_STATE)
10156 joinargs.append(cfg_storagedir)
10158 if self.op.file_storage_dir is not None:
10159 joinargs.append(self.op.file_storage_dir)
10161 joinargs.append(self.op.instance_name)
10163 # pylint: disable=W0142
10164 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10166 def CheckPrereq(self): # pylint: disable=R0914
10167 """Check prerequisites.
10170 self._CalculateFileStorageDir()
10172 if self.op.mode == constants.INSTANCE_IMPORT:
10173 export_info = self._ReadExportInfo()
10174 self._ReadExportParams(export_info)
10175 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10177 self._old_instance_name = None
10179 if (not self.cfg.GetVGName() and
10180 self.op.disk_template not in constants.DTS_NOT_LVM):
10181 raise errors.OpPrereqError("Cluster does not support lvm-based"
10182 " instances", errors.ECODE_STATE)
10184 if (self.op.hypervisor is None or
10185 self.op.hypervisor == constants.VALUE_AUTO):
10186 self.op.hypervisor = self.cfg.GetHypervisorType()
10188 cluster = self.cfg.GetClusterInfo()
10189 enabled_hvs = cluster.enabled_hypervisors
10190 if self.op.hypervisor not in enabled_hvs:
10191 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10193 (self.op.hypervisor, ",".join(enabled_hvs)),
10194 errors.ECODE_STATE)
10196 # Check tag validity
10197 for tag in self.op.tags:
10198 objects.TaggableObject.ValidateTag(tag)
10200 # check hypervisor parameter syntax (locally)
10201 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10202 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10204 hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
10205 hv_type.CheckParameterSyntax(filled_hvp)
10206 self.hv_full = filled_hvp
10207 # check that we don't specify global parameters on an instance
10208 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS, "hypervisor",
10209 "instance", "cluster")
10211 # fill and remember the beparams dict
10212 self.be_full = _ComputeFullBeParams(self.op, cluster)
10214 # build os parameters
10215 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10217 # now that hvp/bep are in final format, let's reset to defaults,
10219 if self.op.identify_defaults:
10220 self._RevertToDefaults(cluster)
10223 self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10224 self.proc.GetECId())
10226 # disk checks/pre-build
10227 default_vg = self.cfg.GetVGName()
10228 self.disks = _ComputeDisks(self.op, default_vg)
10230 if self.op.mode == constants.INSTANCE_IMPORT:
10232 for idx in range(len(self.disks)):
10233 option = "disk%d_dump" % idx
10234 if export_info.has_option(constants.INISECT_INS, option):
10235 # FIXME: are the old os-es, disk sizes, etc. useful?
10236 export_name = export_info.get(constants.INISECT_INS, option)
10237 image = utils.PathJoin(self.op.src_path, export_name)
10238 disk_images.append(image)
10240 disk_images.append(False)
10242 self.src_images = disk_images
10244 if self.op.instance_name == self._old_instance_name:
10245 for idx, nic in enumerate(self.nics):
10246 if nic.mac == constants.VALUE_AUTO:
10247 nic_mac_ini = "nic%d_mac" % idx
10248 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10250 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10252 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10253 if self.op.ip_check:
10254 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10255 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10256 (self.check_ip, self.op.instance_name),
10257 errors.ECODE_NOTUNIQUE)
10259 #### mac address generation
10260 # By generating here the mac address both the allocator and the hooks get
10261 # the real final mac address rather than the 'auto' or 'generate' value.
10262 # There is a race condition between the generation and the instance object
10263 # creation, which means that we know the mac is valid now, but we're not
10264 # sure it will be when we actually add the instance. If things go bad
10265 # adding the instance will abort because of a duplicate mac, and the
10266 # creation job will fail.
10267 for nic in self.nics:
10268 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10269 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10273 if self.op.iallocator is not None:
10274 self._RunAllocator()
10276 # Release all unneeded node locks
10277 keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10278 _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10279 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10280 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10282 assert (self.owned_locks(locking.LEVEL_NODE) ==
10283 self.owned_locks(locking.LEVEL_NODE_RES)), \
10284 "Node locks differ from node resource locks"
10286 #### node related checks
10288 # check primary node
10289 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10290 assert self.pnode is not None, \
10291 "Cannot retrieve locked node %s" % self.op.pnode
10293 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10294 pnode.name, errors.ECODE_STATE)
10296 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10297 pnode.name, errors.ECODE_STATE)
10298 if not pnode.vm_capable:
10299 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10300 " '%s'" % pnode.name, errors.ECODE_STATE)
10302 self.secondaries = []
10304 # Fill in any IPs from IP pools. This must happen here, because we need to
10305 # know the nic's primary node, as specified by the iallocator
10306 for idx, nic in enumerate(self.nics):
10307 net_uuid = nic.network
10308 if net_uuid is not None:
10309 nobj = self.cfg.GetNetwork(net_uuid)
10310 netparams = self.cfg.GetGroupNetParams(net_uuid, self.pnode.name)
10311 if netparams is None:
10312 raise errors.OpPrereqError("No netparams found for network"
10313 " %s. Propably not connected to"
10314 " node's %s nodegroup" %
10315 (nobj.name, self.pnode.name),
10316 errors.ECODE_INVAL)
10317 self.LogInfo("NIC/%d inherits netparams %s" %
10318 (idx, netparams.values()))
10319 nic.nicparams = dict(netparams)
10320 if nic.ip is not None:
10321 if nic.ip.lower() == constants.NIC_IP_POOL:
10323 nic.ip = self.cfg.GenerateIp(net_uuid, self.proc.GetECId())
10324 except errors.ReservationError:
10325 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10326 " from the address pool" % idx,
10327 errors.ECODE_STATE)
10328 self.LogInfo("Chose IP %s from network %s", nic.ip, nobj.name)
10331 self.cfg.ReserveIp(net_uuid, nic.ip, self.proc.GetECId())
10332 except errors.ReservationError:
10333 raise errors.OpPrereqError("IP address %s already in use"
10334 " or does not belong to network %s" %
10335 (nic.ip, nobj.name),
10336 errors.ECODE_NOTUNIQUE)
10338 # net is None, ip None or given
10339 elif self.op.conflicts_check:
10340 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10342 # mirror node verification
10343 if self.op.disk_template in constants.DTS_INT_MIRROR:
10344 if self.op.snode == pnode.name:
10345 raise errors.OpPrereqError("The secondary node cannot be the"
10346 " primary node", errors.ECODE_INVAL)
10347 _CheckNodeOnline(self, self.op.snode)
10348 _CheckNodeNotDrained(self, self.op.snode)
10349 _CheckNodeVmCapable(self, self.op.snode)
10350 self.secondaries.append(self.op.snode)
10352 snode = self.cfg.GetNodeInfo(self.op.snode)
10353 if pnode.group != snode.group:
10354 self.LogWarning("The primary and secondary nodes are in two"
10355 " different node groups; the disk parameters"
10356 " from the first disk's node group will be"
10359 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
10361 if self.op.disk_template in constants.DTS_INT_MIRROR:
10362 nodes.append(snode)
10363 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
10364 if compat.any(map(has_es, nodes)):
10365 raise errors.OpPrereqError("Disk template %s not supported with"
10366 " exclusive storage" % self.op.disk_template,
10367 errors.ECODE_STATE)
10369 nodenames = [pnode.name] + self.secondaries
10371 if not self.adopt_disks:
10372 if self.op.disk_template == constants.DT_RBD:
10373 # _CheckRADOSFreeSpace() is just a placeholder.
10374 # Any function that checks prerequisites can be placed here.
10375 # Check if there is enough space on the RADOS cluster.
10376 _CheckRADOSFreeSpace()
10377 elif self.op.disk_template == constants.DT_EXT:
10378 # FIXME: Function that checks prereqs if needed
10381 # Check lv size requirements, if not adopting
10382 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10383 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10385 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10386 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10387 disk[constants.IDISK_ADOPT])
10388 for disk in self.disks])
10389 if len(all_lvs) != len(self.disks):
10390 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10391 errors.ECODE_INVAL)
10392 for lv_name in all_lvs:
10394 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10395 # to ReserveLV uses the same syntax
10396 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10397 except errors.ReservationError:
10398 raise errors.OpPrereqError("LV named %s used by another instance" %
10399 lv_name, errors.ECODE_NOTUNIQUE)
10401 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10402 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10404 node_lvs = self.rpc.call_lv_list([pnode.name],
10405 vg_names.payload.keys())[pnode.name]
10406 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10407 node_lvs = node_lvs.payload
10409 delta = all_lvs.difference(node_lvs.keys())
10411 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10412 utils.CommaJoin(delta),
10413 errors.ECODE_INVAL)
10414 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10416 raise errors.OpPrereqError("Online logical volumes found, cannot"
10417 " adopt: %s" % utils.CommaJoin(online_lvs),
10418 errors.ECODE_STATE)
10419 # update the size of disk based on what is found
10420 for dsk in self.disks:
10421 dsk[constants.IDISK_SIZE] = \
10422 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10423 dsk[constants.IDISK_ADOPT])][0]))
10425 elif self.op.disk_template == constants.DT_BLOCK:
10426 # Normalize and de-duplicate device paths
10427 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10428 for disk in self.disks])
10429 if len(all_disks) != len(self.disks):
10430 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10431 errors.ECODE_INVAL)
10432 baddisks = [d for d in all_disks
10433 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10435 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10436 " cannot be adopted" %
10437 (utils.CommaJoin(baddisks),
10438 constants.ADOPTABLE_BLOCKDEV_ROOT),
10439 errors.ECODE_INVAL)
10441 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10442 list(all_disks))[pnode.name]
10443 node_disks.Raise("Cannot get block device information from node %s" %
10445 node_disks = node_disks.payload
10446 delta = all_disks.difference(node_disks.keys())
10448 raise errors.OpPrereqError("Missing block device(s): %s" %
10449 utils.CommaJoin(delta),
10450 errors.ECODE_INVAL)
10451 for dsk in self.disks:
10452 dsk[constants.IDISK_SIZE] = \
10453 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10455 # Verify instance specs
10456 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10458 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10459 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10460 constants.ISPEC_DISK_COUNT: len(self.disks),
10461 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10462 for disk in self.disks],
10463 constants.ISPEC_NIC_COUNT: len(self.nics),
10464 constants.ISPEC_SPINDLE_USE: spindle_use,
10467 group_info = self.cfg.GetNodeGroup(pnode.group)
10468 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10469 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec,
10470 self.op.disk_template)
10471 if not self.op.ignore_ipolicy and res:
10472 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10473 (pnode.group, group_info.name, utils.CommaJoin(res)))
10474 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10476 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10478 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10479 # check OS parameters (remotely)
10480 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10482 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10484 #TODO: _CheckExtParams (remotely)
10485 # Check parameters for extstorage
10487 # memory check on primary node
10488 #TODO(dynmem): use MINMEM for checking
10490 _CheckNodeFreeMemory(self, self.pnode.name,
10491 "creating instance %s" % self.op.instance_name,
10492 self.be_full[constants.BE_MAXMEM],
10493 self.op.hypervisor)
10495 self.dry_run_result = list(nodenames)
10497 def Exec(self, feedback_fn):
10498 """Create and add the instance to the cluster.
10501 instance = self.op.instance_name
10502 pnode_name = self.pnode.name
10504 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10505 self.owned_locks(locking.LEVEL_NODE)), \
10506 "Node locks differ from node resource locks"
10507 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10509 ht_kind = self.op.hypervisor
10510 if ht_kind in constants.HTS_REQ_PORT:
10511 network_port = self.cfg.AllocatePort()
10513 network_port = None
10515 # This is ugly but we got a chicken-egg problem here
10516 # We can only take the group disk parameters, as the instance
10517 # has no disks yet (we are generating them right here).
10518 node = self.cfg.GetNodeInfo(pnode_name)
10519 nodegroup = self.cfg.GetNodeGroup(node.group)
10520 disks = _GenerateDiskTemplate(self,
10521 self.op.disk_template,
10522 instance, pnode_name,
10525 self.instance_file_storage_dir,
10526 self.op.file_driver,
10529 self.cfg.GetGroupDiskParams(nodegroup))
10531 iobj = objects.Instance(name=instance, os=self.op.os_type,
10532 primary_node=pnode_name,
10533 nics=self.nics, disks=disks,
10534 disk_template=self.op.disk_template,
10535 admin_state=constants.ADMINST_DOWN,
10536 network_port=network_port,
10537 beparams=self.op.beparams,
10538 hvparams=self.op.hvparams,
10539 hypervisor=self.op.hypervisor,
10540 osparams=self.op.osparams,
10544 for tag in self.op.tags:
10547 if self.adopt_disks:
10548 if self.op.disk_template == constants.DT_PLAIN:
10549 # rename LVs to the newly-generated names; we need to construct
10550 # 'fake' LV disks with the old data, plus the new unique_id
10551 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10553 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10554 rename_to.append(t_dsk.logical_id)
10555 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10556 self.cfg.SetDiskID(t_dsk, pnode_name)
10557 result = self.rpc.call_blockdev_rename(pnode_name,
10558 zip(tmp_disks, rename_to))
10559 result.Raise("Failed to rename adoped LVs")
10561 feedback_fn("* creating instance disks...")
10563 _CreateDisks(self, iobj)
10564 except errors.OpExecError:
10565 self.LogWarning("Device creation failed")
10566 self.cfg.ReleaseDRBDMinors(instance)
10569 feedback_fn("adding instance %s to cluster config" % instance)
10571 self.cfg.AddInstance(iobj, self.proc.GetECId())
10573 # Declare that we don't want to remove the instance lock anymore, as we've
10574 # added the instance to the config
10575 del self.remove_locks[locking.LEVEL_INSTANCE]
10577 if self.op.mode == constants.INSTANCE_IMPORT:
10578 # Release unused nodes
10579 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10581 # Release all nodes
10582 _ReleaseLocks(self, locking.LEVEL_NODE)
10585 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10586 feedback_fn("* wiping instance disks...")
10588 _WipeDisks(self, iobj)
10589 except errors.OpExecError, err:
10590 logging.exception("Wiping disks failed")
10591 self.LogWarning("Wiping instance disks failed (%s)", err)
10595 # Something is already wrong with the disks, don't do anything else
10597 elif self.op.wait_for_sync:
10598 disk_abort = not _WaitForSync(self, iobj)
10599 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10600 # make sure the disks are not degraded (still sync-ing is ok)
10601 feedback_fn("* checking mirrors status")
10602 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10607 _RemoveDisks(self, iobj)
10608 self.cfg.RemoveInstance(iobj.name)
10609 # Make sure the instance lock gets removed
10610 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10611 raise errors.OpExecError("There are some degraded disks for"
10614 # Release all node resource locks
10615 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10617 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10618 # we need to set the disks ID to the primary node, since the
10619 # preceding code might or might have not done it, depending on
10620 # disk template and other options
10621 for disk in iobj.disks:
10622 self.cfg.SetDiskID(disk, pnode_name)
10623 if self.op.mode == constants.INSTANCE_CREATE:
10624 if not self.op.no_install:
10625 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10626 not self.op.wait_for_sync)
10628 feedback_fn("* pausing disk sync to install instance OS")
10629 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10632 for idx, success in enumerate(result.payload):
10634 logging.warn("pause-sync of instance %s for disk %d failed",
10637 feedback_fn("* running the instance OS create scripts...")
10638 # FIXME: pass debug option from opcode to backend
10640 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10641 self.op.debug_level)
10643 feedback_fn("* resuming disk sync")
10644 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10647 for idx, success in enumerate(result.payload):
10649 logging.warn("resume-sync of instance %s for disk %d failed",
10652 os_add_result.Raise("Could not add os for instance %s"
10653 " on node %s" % (instance, pnode_name))
10656 if self.op.mode == constants.INSTANCE_IMPORT:
10657 feedback_fn("* running the instance OS import scripts...")
10661 for idx, image in enumerate(self.src_images):
10665 # FIXME: pass debug option from opcode to backend
10666 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10667 constants.IEIO_FILE, (image, ),
10668 constants.IEIO_SCRIPT,
10669 (iobj.disks[idx], idx),
10671 transfers.append(dt)
10674 masterd.instance.TransferInstanceData(self, feedback_fn,
10675 self.op.src_node, pnode_name,
10676 self.pnode.secondary_ip,
10678 if not compat.all(import_result):
10679 self.LogWarning("Some disks for instance %s on node %s were not"
10680 " imported successfully" % (instance, pnode_name))
10682 rename_from = self._old_instance_name
10684 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10685 feedback_fn("* preparing remote import...")
10686 # The source cluster will stop the instance before attempting to make
10687 # a connection. In some cases stopping an instance can take a long
10688 # time, hence the shutdown timeout is added to the connection
10690 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10691 self.op.source_shutdown_timeout)
10692 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10694 assert iobj.primary_node == self.pnode.name
10696 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10697 self.source_x509_ca,
10698 self._cds, timeouts)
10699 if not compat.all(disk_results):
10700 # TODO: Should the instance still be started, even if some disks
10701 # failed to import (valid for local imports, too)?
10702 self.LogWarning("Some disks for instance %s on node %s were not"
10703 " imported successfully" % (instance, pnode_name))
10705 rename_from = self.source_instance_name
10708 # also checked in the prereq part
10709 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10712 # Run rename script on newly imported instance
10713 assert iobj.name == instance
10714 feedback_fn("Running rename script for %s" % instance)
10715 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10717 self.op.debug_level)
10718 if result.fail_msg:
10719 self.LogWarning("Failed to run rename script for %s on node"
10720 " %s: %s" % (instance, pnode_name, result.fail_msg))
10722 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10725 iobj.admin_state = constants.ADMINST_UP
10726 self.cfg.Update(iobj, feedback_fn)
10727 logging.info("Starting instance %s on node %s", instance, pnode_name)
10728 feedback_fn("* starting instance...")
10729 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10730 False, self.op.reason)
10731 result.Raise("Could not start instance")
10733 return list(iobj.all_nodes)
10736 class LUInstanceMultiAlloc(NoHooksLU):
10737 """Allocates multiple instances at the same time.
10742 def CheckArguments(self):
10743 """Check arguments.
10747 for inst in self.op.instances:
10748 if inst.iallocator is not None:
10749 raise errors.OpPrereqError("iallocator are not allowed to be set on"
10750 " instance objects", errors.ECODE_INVAL)
10751 nodes.append(bool(inst.pnode))
10752 if inst.disk_template in constants.DTS_INT_MIRROR:
10753 nodes.append(bool(inst.snode))
10755 has_nodes = compat.any(nodes)
10756 if compat.all(nodes) ^ has_nodes:
10757 raise errors.OpPrereqError("There are instance objects providing"
10758 " pnode/snode while others do not",
10759 errors.ECODE_INVAL)
10761 if self.op.iallocator is None:
10762 default_iallocator = self.cfg.GetDefaultIAllocator()
10763 if default_iallocator and has_nodes:
10764 self.op.iallocator = default_iallocator
10766 raise errors.OpPrereqError("No iallocator or nodes on the instances"
10767 " given and no cluster-wide default"
10768 " iallocator found; please specify either"
10769 " an iallocator or nodes on the instances"
10770 " or set a cluster-wide default iallocator",
10771 errors.ECODE_INVAL)
10773 _CheckOpportunisticLocking(self.op)
10775 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10777 raise errors.OpPrereqError("There are duplicate instance names: %s" %
10778 utils.CommaJoin(dups), errors.ECODE_INVAL)
10780 def ExpandNames(self):
10781 """Calculate the locks.
10784 self.share_locks = _ShareAll()
10785 self.needed_locks = {
10786 # iallocator will select nodes and even if no iallocator is used,
10787 # collisions with LUInstanceCreate should be avoided
10788 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
10791 if self.op.iallocator:
10792 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10793 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10795 if self.op.opportunistic_locking:
10796 self.opportunistic_locks[locking.LEVEL_NODE] = True
10797 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10800 for inst in self.op.instances:
10801 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10802 nodeslist.append(inst.pnode)
10803 if inst.snode is not None:
10804 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10805 nodeslist.append(inst.snode)
10807 self.needed_locks[locking.LEVEL_NODE] = nodeslist
10808 # Lock resources of instance's primary and secondary nodes (copy to
10809 # prevent accidential modification)
10810 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10812 def CheckPrereq(self):
10813 """Check prerequisite.
10816 cluster = self.cfg.GetClusterInfo()
10817 default_vg = self.cfg.GetVGName()
10818 ec_id = self.proc.GetECId()
10820 if self.op.opportunistic_locking:
10821 # Only consider nodes for which a lock is held
10822 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
10824 node_whitelist = None
10826 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10827 _ComputeNics(op, cluster, None,
10829 _ComputeFullBeParams(op, cluster),
10831 for op in self.op.instances]
10833 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10834 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10836 ial.Run(self.op.iallocator)
10838 if not ial.success:
10839 raise errors.OpPrereqError("Can't compute nodes using"
10840 " iallocator '%s': %s" %
10841 (self.op.iallocator, ial.info),
10842 errors.ECODE_NORES)
10844 self.ia_result = ial.result
10846 if self.op.dry_run:
10847 self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
10848 constants.JOB_IDS_KEY: [],
10851 def _ConstructPartialResult(self):
10852 """Contructs the partial result.
10855 (allocatable, failed) = self.ia_result
10857 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10858 map(compat.fst, allocatable),
10859 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10862 def Exec(self, feedback_fn):
10863 """Executes the opcode.
10866 op2inst = dict((op.instance_name, op) for op in self.op.instances)
10867 (allocatable, failed) = self.ia_result
10870 for (name, nodes) in allocatable:
10871 op = op2inst.pop(name)
10874 (op.pnode, op.snode) = nodes
10876 (op.pnode,) = nodes
10880 missing = set(op2inst.keys()) - set(failed)
10881 assert not missing, \
10882 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10884 return ResultWithJobs(jobs, **self._ConstructPartialResult())
10887 def _CheckRADOSFreeSpace():
10888 """Compute disk size requirements inside the RADOS cluster.
10891 # For the RADOS cluster we assume there is always enough space.
10895 class LUInstanceConsole(NoHooksLU):
10896 """Connect to an instance's console.
10898 This is somewhat special in that it returns the command line that
10899 you need to run on the master node in order to connect to the
10905 def ExpandNames(self):
10906 self.share_locks = _ShareAll()
10907 self._ExpandAndLockInstance()
10909 def CheckPrereq(self):
10910 """Check prerequisites.
10912 This checks that the instance is in the cluster.
10915 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10916 assert self.instance is not None, \
10917 "Cannot retrieve locked instance %s" % self.op.instance_name
10918 _CheckNodeOnline(self, self.instance.primary_node)
10920 def Exec(self, feedback_fn):
10921 """Connect to the console of an instance
10924 instance = self.instance
10925 node = instance.primary_node
10927 node_insts = self.rpc.call_instance_list([node],
10928 [instance.hypervisor])[node]
10929 node_insts.Raise("Can't get node information from %s" % node)
10931 if instance.name not in node_insts.payload:
10932 if instance.admin_state == constants.ADMINST_UP:
10933 state = constants.INSTST_ERRORDOWN
10934 elif instance.admin_state == constants.ADMINST_DOWN:
10935 state = constants.INSTST_ADMINDOWN
10937 state = constants.INSTST_ADMINOFFLINE
10938 raise errors.OpExecError("Instance %s is not running (state %s)" %
10939 (instance.name, state))
10941 logging.debug("Connecting to console of %s on %s", instance.name, node)
10943 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10946 def _GetInstanceConsole(cluster, instance):
10947 """Returns console information for an instance.
10949 @type cluster: L{objects.Cluster}
10950 @type instance: L{objects.Instance}
10954 hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
10955 # beparams and hvparams are passed separately, to avoid editing the
10956 # instance and then saving the defaults in the instance itself.
10957 hvparams = cluster.FillHV(instance)
10958 beparams = cluster.FillBE(instance)
10959 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10961 assert console.instance == instance.name
10962 assert console.Validate()
10964 return console.ToDict()
10967 class LUInstanceReplaceDisks(LogicalUnit):
10968 """Replace the disks of an instance.
10971 HPATH = "mirrors-replace"
10972 HTYPE = constants.HTYPE_INSTANCE
10975 def CheckArguments(self):
10976 """Check arguments.
10979 remote_node = self.op.remote_node
10980 ialloc = self.op.iallocator
10981 if self.op.mode == constants.REPLACE_DISK_CHG:
10982 if remote_node is None and ialloc is None:
10983 raise errors.OpPrereqError("When changing the secondary either an"
10984 " iallocator script must be used or the"
10985 " new node given", errors.ECODE_INVAL)
10987 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10989 elif remote_node is not None or ialloc is not None:
10990 # Not replacing the secondary
10991 raise errors.OpPrereqError("The iallocator and new node options can"
10992 " only be used when changing the"
10993 " secondary node", errors.ECODE_INVAL)
10995 def ExpandNames(self):
10996 self._ExpandAndLockInstance()
10998 assert locking.LEVEL_NODE not in self.needed_locks
10999 assert locking.LEVEL_NODE_RES not in self.needed_locks
11000 assert locking.LEVEL_NODEGROUP not in self.needed_locks
11002 assert self.op.iallocator is None or self.op.remote_node is None, \
11003 "Conflicting options"
11005 if self.op.remote_node is not None:
11006 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11008 # Warning: do not remove the locking of the new secondary here
11009 # unless DRBD8.AddChildren is changed to work in parallel;
11010 # currently it doesn't since parallel invocations of
11011 # FindUnusedMinor will conflict
11012 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11013 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11015 self.needed_locks[locking.LEVEL_NODE] = []
11016 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11018 if self.op.iallocator is not None:
11019 # iallocator will select a new node in the same group
11020 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11021 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11023 self.needed_locks[locking.LEVEL_NODE_RES] = []
11025 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11026 self.op.iallocator, self.op.remote_node,
11027 self.op.disks, self.op.early_release,
11028 self.op.ignore_ipolicy)
11030 self.tasklets = [self.replacer]
11032 def DeclareLocks(self, level):
11033 if level == locking.LEVEL_NODEGROUP:
11034 assert self.op.remote_node is None
11035 assert self.op.iallocator is not None
11036 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11038 self.share_locks[locking.LEVEL_NODEGROUP] = 1
11039 # Lock all groups used by instance optimistically; this requires going
11040 # via the node before it's locked, requiring verification later on
11041 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11042 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11044 elif level == locking.LEVEL_NODE:
11045 if self.op.iallocator is not None:
11046 assert self.op.remote_node is None
11047 assert not self.needed_locks[locking.LEVEL_NODE]
11048 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11050 # Lock member nodes of all locked groups
11051 self.needed_locks[locking.LEVEL_NODE] = \
11053 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11054 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11056 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11058 self._LockInstancesNodes()
11060 elif level == locking.LEVEL_NODE_RES:
11062 self.needed_locks[locking.LEVEL_NODE_RES] = \
11063 self.needed_locks[locking.LEVEL_NODE]
11065 def BuildHooksEnv(self):
11066 """Build hooks env.
11068 This runs on the master, the primary and all the secondaries.
11071 instance = self.replacer.instance
11073 "MODE": self.op.mode,
11074 "NEW_SECONDARY": self.op.remote_node,
11075 "OLD_SECONDARY": instance.secondary_nodes[0],
11077 env.update(_BuildInstanceHookEnvByObject(self, instance))
11080 def BuildHooksNodes(self):
11081 """Build hooks nodes.
11084 instance = self.replacer.instance
11086 self.cfg.GetMasterNode(),
11087 instance.primary_node,
11089 if self.op.remote_node is not None:
11090 nl.append(self.op.remote_node)
11093 def CheckPrereq(self):
11094 """Check prerequisites.
11097 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11098 self.op.iallocator is None)
11100 # Verify if node group locks are still correct
11101 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11103 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11105 return LogicalUnit.CheckPrereq(self)
11108 class TLReplaceDisks(Tasklet):
11109 """Replaces disks for an instance.
11111 Note: Locking is not within the scope of this class.
11114 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11115 disks, early_release, ignore_ipolicy):
11116 """Initializes this class.
11119 Tasklet.__init__(self, lu)
11122 self.instance_name = instance_name
11124 self.iallocator_name = iallocator_name
11125 self.remote_node = remote_node
11127 self.early_release = early_release
11128 self.ignore_ipolicy = ignore_ipolicy
11131 self.instance = None
11132 self.new_node = None
11133 self.target_node = None
11134 self.other_node = None
11135 self.remote_node_info = None
11136 self.node_secondary_ip = None
11139 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11140 """Compute a new secondary node using an IAllocator.
11143 req = iallocator.IAReqRelocate(name=instance_name,
11144 relocate_from=list(relocate_from))
11145 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11147 ial.Run(iallocator_name)
11149 if not ial.success:
11150 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11151 " %s" % (iallocator_name, ial.info),
11152 errors.ECODE_NORES)
11154 remote_node_name = ial.result[0]
11156 lu.LogInfo("Selected new secondary for instance '%s': %s",
11157 instance_name, remote_node_name)
11159 return remote_node_name
11161 def _FindFaultyDisks(self, node_name):
11162 """Wrapper for L{_FindFaultyInstanceDisks}.
11165 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11168 def _CheckDisksActivated(self, instance):
11169 """Checks if the instance disks are activated.
11171 @param instance: The instance to check disks
11172 @return: True if they are activated, False otherwise
11175 nodes = instance.all_nodes
11177 for idx, dev in enumerate(instance.disks):
11179 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11180 self.cfg.SetDiskID(dev, node)
11182 result = _BlockdevFind(self, node, dev, instance)
11186 elif result.fail_msg or not result.payload:
11191 def CheckPrereq(self):
11192 """Check prerequisites.
11194 This checks that the instance is in the cluster.
11197 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11198 assert instance is not None, \
11199 "Cannot retrieve locked instance %s" % self.instance_name
11201 if instance.disk_template != constants.DT_DRBD8:
11202 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11203 " instances", errors.ECODE_INVAL)
11205 if len(instance.secondary_nodes) != 1:
11206 raise errors.OpPrereqError("The instance has a strange layout,"
11207 " expected one secondary but found %d" %
11208 len(instance.secondary_nodes),
11209 errors.ECODE_FAULT)
11211 instance = self.instance
11212 secondary_node = instance.secondary_nodes[0]
11214 if self.iallocator_name is None:
11215 remote_node = self.remote_node
11217 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11218 instance.name, instance.secondary_nodes)
11220 if remote_node is None:
11221 self.remote_node_info = None
11223 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11224 "Remote node '%s' is not locked" % remote_node
11226 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11227 assert self.remote_node_info is not None, \
11228 "Cannot retrieve locked node %s" % remote_node
11230 if remote_node == self.instance.primary_node:
11231 raise errors.OpPrereqError("The specified node is the primary node of"
11232 " the instance", errors.ECODE_INVAL)
11234 if remote_node == secondary_node:
11235 raise errors.OpPrereqError("The specified node is already the"
11236 " secondary node of the instance",
11237 errors.ECODE_INVAL)
11239 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11240 constants.REPLACE_DISK_CHG):
11241 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11242 errors.ECODE_INVAL)
11244 if self.mode == constants.REPLACE_DISK_AUTO:
11245 if not self._CheckDisksActivated(instance):
11246 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11247 " first" % self.instance_name,
11248 errors.ECODE_STATE)
11249 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11250 faulty_secondary = self._FindFaultyDisks(secondary_node)
11252 if faulty_primary and faulty_secondary:
11253 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11254 " one node and can not be repaired"
11255 " automatically" % self.instance_name,
11256 errors.ECODE_STATE)
11259 self.disks = faulty_primary
11260 self.target_node = instance.primary_node
11261 self.other_node = secondary_node
11262 check_nodes = [self.target_node, self.other_node]
11263 elif faulty_secondary:
11264 self.disks = faulty_secondary
11265 self.target_node = secondary_node
11266 self.other_node = instance.primary_node
11267 check_nodes = [self.target_node, self.other_node]
11273 # Non-automatic modes
11274 if self.mode == constants.REPLACE_DISK_PRI:
11275 self.target_node = instance.primary_node
11276 self.other_node = secondary_node
11277 check_nodes = [self.target_node, self.other_node]
11279 elif self.mode == constants.REPLACE_DISK_SEC:
11280 self.target_node = secondary_node
11281 self.other_node = instance.primary_node
11282 check_nodes = [self.target_node, self.other_node]
11284 elif self.mode == constants.REPLACE_DISK_CHG:
11285 self.new_node = remote_node
11286 self.other_node = instance.primary_node
11287 self.target_node = secondary_node
11288 check_nodes = [self.new_node, self.other_node]
11290 _CheckNodeNotDrained(self.lu, remote_node)
11291 _CheckNodeVmCapable(self.lu, remote_node)
11293 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11294 assert old_node_info is not None
11295 if old_node_info.offline and not self.early_release:
11296 # doesn't make sense to delay the release
11297 self.early_release = True
11298 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11299 " early-release mode", secondary_node)
11302 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11305 # If not specified all disks should be replaced
11307 self.disks = range(len(self.instance.disks))
11309 # TODO: This is ugly, but right now we can't distinguish between internal
11310 # submitted opcode and external one. We should fix that.
11311 if self.remote_node_info:
11312 # We change the node, lets verify it still meets instance policy
11313 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11314 cluster = self.cfg.GetClusterInfo()
11315 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11317 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11318 self.cfg, ignore=self.ignore_ipolicy)
11320 for node in check_nodes:
11321 _CheckNodeOnline(self.lu, node)
11323 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11326 if node_name is not None)
11328 # Release unneeded node and node resource locks
11329 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11330 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11331 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11333 # Release any owned node group
11334 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11336 # Check whether disks are valid
11337 for disk_idx in self.disks:
11338 instance.FindDisk(disk_idx)
11340 # Get secondary node IP addresses
11341 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11342 in self.cfg.GetMultiNodeInfo(touched_nodes))
11344 def Exec(self, feedback_fn):
11345 """Execute disk replacement.
11347 This dispatches the disk replacement to the appropriate handler.
11351 # Verify owned locks before starting operation
11352 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11353 assert set(owned_nodes) == set(self.node_secondary_ip), \
11354 ("Incorrect node locks, owning %s, expected %s" %
11355 (owned_nodes, self.node_secondary_ip.keys()))
11356 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11357 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11358 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11360 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11361 assert list(owned_instances) == [self.instance_name], \
11362 "Instance '%s' not locked" % self.instance_name
11364 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11365 "Should not own any node group lock at this point"
11368 feedback_fn("No disks need replacement for instance '%s'" %
11369 self.instance.name)
11372 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11373 (utils.CommaJoin(self.disks), self.instance.name))
11374 feedback_fn("Current primary node: %s" % self.instance.primary_node)
11375 feedback_fn("Current seconary node: %s" %
11376 utils.CommaJoin(self.instance.secondary_nodes))
11378 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11380 # Activate the instance disks if we're replacing them on a down instance
11382 _StartInstanceDisks(self.lu, self.instance, True)
11385 # Should we replace the secondary node?
11386 if self.new_node is not None:
11387 fn = self._ExecDrbd8Secondary
11389 fn = self._ExecDrbd8DiskOnly
11391 result = fn(feedback_fn)
11393 # Deactivate the instance disks if we're replacing them on a
11396 _SafeShutdownInstanceDisks(self.lu, self.instance)
11398 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11401 # Verify owned locks
11402 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11403 nodes = frozenset(self.node_secondary_ip)
11404 assert ((self.early_release and not owned_nodes) or
11405 (not self.early_release and not (set(owned_nodes) - nodes))), \
11406 ("Not owning the correct locks, early_release=%s, owned=%r,"
11407 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11411 def _CheckVolumeGroup(self, nodes):
11412 self.lu.LogInfo("Checking volume groups")
11414 vgname = self.cfg.GetVGName()
11416 # Make sure volume group exists on all involved nodes
11417 results = self.rpc.call_vg_list(nodes)
11419 raise errors.OpExecError("Can't list volume groups on the nodes")
11422 res = results[node]
11423 res.Raise("Error checking node %s" % node)
11424 if vgname not in res.payload:
11425 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11428 def _CheckDisksExistence(self, nodes):
11429 # Check disk existence
11430 for idx, dev in enumerate(self.instance.disks):
11431 if idx not in self.disks:
11435 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11436 self.cfg.SetDiskID(dev, node)
11438 result = _BlockdevFind(self, node, dev, self.instance)
11440 msg = result.fail_msg
11441 if msg or not result.payload:
11443 msg = "disk not found"
11444 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11447 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11448 for idx, dev in enumerate(self.instance.disks):
11449 if idx not in self.disks:
11452 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11455 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11456 on_primary, ldisk=ldisk):
11457 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11458 " replace disks for instance %s" %
11459 (node_name, self.instance.name))
11461 def _CreateNewStorage(self, node_name):
11462 """Create new storage on the primary or secondary node.
11464 This is only used for same-node replaces, not for changing the
11465 secondary node, hence we don't want to modify the existing disk.
11470 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11471 for idx, dev in enumerate(disks):
11472 if idx not in self.disks:
11475 self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11477 self.cfg.SetDiskID(dev, node_name)
11479 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11480 names = _GenerateUniqueNames(self.lu, lv_names)
11482 (data_disk, meta_disk) = dev.children
11483 vg_data = data_disk.logical_id[0]
11484 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11485 logical_id=(vg_data, names[0]),
11486 params=data_disk.params)
11487 vg_meta = meta_disk.logical_id[0]
11488 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11489 size=constants.DRBD_META_SIZE,
11490 logical_id=(vg_meta, names[1]),
11491 params=meta_disk.params)
11493 new_lvs = [lv_data, lv_meta]
11494 old_lvs = [child.Copy() for child in dev.children]
11495 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11496 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
11498 # we pass force_create=True to force the LVM creation
11499 for new_lv in new_lvs:
11500 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11501 _GetInstanceInfoText(self.instance), False,
11506 def _CheckDevices(self, node_name, iv_names):
11507 for name, (dev, _, _) in iv_names.iteritems():
11508 self.cfg.SetDiskID(dev, node_name)
11510 result = _BlockdevFind(self, node_name, dev, self.instance)
11512 msg = result.fail_msg
11513 if msg or not result.payload:
11515 msg = "disk not found"
11516 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11519 if result.payload.is_degraded:
11520 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11522 def _RemoveOldStorage(self, node_name, iv_names):
11523 for name, (_, old_lvs, _) in iv_names.iteritems():
11524 self.lu.LogInfo("Remove logical volumes for %s", name)
11527 self.cfg.SetDiskID(lv, node_name)
11529 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11531 self.lu.LogWarning("Can't remove old LV: %s", msg,
11532 hint="remove unused LVs manually")
11534 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11535 """Replace a disk on the primary or secondary for DRBD 8.
11537 The algorithm for replace is quite complicated:
11539 1. for each disk to be replaced:
11541 1. create new LVs on the target node with unique names
11542 1. detach old LVs from the drbd device
11543 1. rename old LVs to name_replaced.<time_t>
11544 1. rename new LVs to old LVs
11545 1. attach the new LVs (with the old names now) to the drbd device
11547 1. wait for sync across all devices
11549 1. for each modified disk:
11551 1. remove old LVs (which have the name name_replaces.<time_t>)
11553 Failures are not very well handled.
11558 # Step: check device activation
11559 self.lu.LogStep(1, steps_total, "Check device existence")
11560 self._CheckDisksExistence([self.other_node, self.target_node])
11561 self._CheckVolumeGroup([self.target_node, self.other_node])
11563 # Step: check other node consistency
11564 self.lu.LogStep(2, steps_total, "Check peer consistency")
11565 self._CheckDisksConsistency(self.other_node,
11566 self.other_node == self.instance.primary_node,
11569 # Step: create new storage
11570 self.lu.LogStep(3, steps_total, "Allocate new storage")
11571 iv_names = self._CreateNewStorage(self.target_node)
11573 # Step: for each lv, detach+rename*2+attach
11574 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11575 for dev, old_lvs, new_lvs in iv_names.itervalues():
11576 self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11578 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11580 result.Raise("Can't detach drbd from local storage on node"
11581 " %s for device %s" % (self.target_node, dev.iv_name))
11583 #cfg.Update(instance)
11585 # ok, we created the new LVs, so now we know we have the needed
11586 # storage; as such, we proceed on the target node to rename
11587 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11588 # using the assumption that logical_id == physical_id (which in
11589 # turn is the unique_id on that node)
11591 # FIXME(iustin): use a better name for the replaced LVs
11592 temp_suffix = int(time.time())
11593 ren_fn = lambda d, suff: (d.physical_id[0],
11594 d.physical_id[1] + "_replaced-%s" % suff)
11596 # Build the rename list based on what LVs exist on the node
11597 rename_old_to_new = []
11598 for to_ren in old_lvs:
11599 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11600 if not result.fail_msg and result.payload:
11602 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11604 self.lu.LogInfo("Renaming the old LVs on the target node")
11605 result = self.rpc.call_blockdev_rename(self.target_node,
11607 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11609 # Now we rename the new LVs to the old LVs
11610 self.lu.LogInfo("Renaming the new LVs on the target node")
11611 rename_new_to_old = [(new, old.physical_id)
11612 for old, new in zip(old_lvs, new_lvs)]
11613 result = self.rpc.call_blockdev_rename(self.target_node,
11615 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11617 # Intermediate steps of in memory modifications
11618 for old, new in zip(old_lvs, new_lvs):
11619 new.logical_id = old.logical_id
11620 self.cfg.SetDiskID(new, self.target_node)
11622 # We need to modify old_lvs so that removal later removes the
11623 # right LVs, not the newly added ones; note that old_lvs is a
11625 for disk in old_lvs:
11626 disk.logical_id = ren_fn(disk, temp_suffix)
11627 self.cfg.SetDiskID(disk, self.target_node)
11629 # Now that the new lvs have the old name, we can add them to the device
11630 self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
11631 result = self.rpc.call_blockdev_addchildren(self.target_node,
11632 (dev, self.instance), new_lvs)
11633 msg = result.fail_msg
11635 for new_lv in new_lvs:
11636 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11639 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11640 hint=("cleanup manually the unused logical"
11642 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11644 cstep = itertools.count(5)
11646 if self.early_release:
11647 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11648 self._RemoveOldStorage(self.target_node, iv_names)
11649 # TODO: Check if releasing locks early still makes sense
11650 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11652 # Release all resource locks except those used by the instance
11653 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11654 keep=self.node_secondary_ip.keys())
11656 # Release all node locks while waiting for sync
11657 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11659 # TODO: Can the instance lock be downgraded here? Take the optional disk
11660 # shutdown in the caller into consideration.
11663 # This can fail as the old devices are degraded and _WaitForSync
11664 # does a combined result over all disks, so we don't check its return value
11665 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11666 _WaitForSync(self.lu, self.instance)
11668 # Check all devices manually
11669 self._CheckDevices(self.instance.primary_node, iv_names)
11671 # Step: remove old storage
11672 if not self.early_release:
11673 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11674 self._RemoveOldStorage(self.target_node, iv_names)
11676 def _ExecDrbd8Secondary(self, feedback_fn):
11677 """Replace the secondary node for DRBD 8.
11679 The algorithm for replace is quite complicated:
11680 - for all disks of the instance:
11681 - create new LVs on the new node with same names
11682 - shutdown the drbd device on the old secondary
11683 - disconnect the drbd network on the primary
11684 - create the drbd device on the new secondary
11685 - network attach the drbd on the primary, using an artifice:
11686 the drbd code for Attach() will connect to the network if it
11687 finds a device which is connected to the good local disks but
11688 not network enabled
11689 - wait for sync across all devices
11690 - remove all disks from the old secondary
11692 Failures are not very well handled.
11697 pnode = self.instance.primary_node
11699 # Step: check device activation
11700 self.lu.LogStep(1, steps_total, "Check device existence")
11701 self._CheckDisksExistence([self.instance.primary_node])
11702 self._CheckVolumeGroup([self.instance.primary_node])
11704 # Step: check other node consistency
11705 self.lu.LogStep(2, steps_total, "Check peer consistency")
11706 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11708 # Step: create new storage
11709 self.lu.LogStep(3, steps_total, "Allocate new storage")
11710 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11711 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
11712 for idx, dev in enumerate(disks):
11713 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11714 (self.new_node, idx))
11715 # we pass force_create=True to force LVM creation
11716 for new_lv in dev.children:
11717 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11718 True, _GetInstanceInfoText(self.instance), False,
11721 # Step 4: dbrd minors and drbd setups changes
11722 # after this, we must manually remove the drbd minors on both the
11723 # error and the success paths
11724 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11725 minors = self.cfg.AllocateDRBDMinor([self.new_node
11726 for dev in self.instance.disks],
11727 self.instance.name)
11728 logging.debug("Allocated minors %r", minors)
11731 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11732 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11733 (self.new_node, idx))
11734 # create new devices on new_node; note that we create two IDs:
11735 # one without port, so the drbd will be activated without
11736 # networking information on the new node at this stage, and one
11737 # with network, for the latter activation in step 4
11738 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11739 if self.instance.primary_node == o_node1:
11742 assert self.instance.primary_node == o_node2, "Three-node instance?"
11745 new_alone_id = (self.instance.primary_node, self.new_node, None,
11746 p_minor, new_minor, o_secret)
11747 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11748 p_minor, new_minor, o_secret)
11750 iv_names[idx] = (dev, dev.children, new_net_id)
11751 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11753 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11754 logical_id=new_alone_id,
11755 children=dev.children,
11758 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11761 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11763 _GetInstanceInfoText(self.instance), False,
11765 except errors.GenericError:
11766 self.cfg.ReleaseDRBDMinors(self.instance.name)
11769 # We have new devices, shutdown the drbd on the old secondary
11770 for idx, dev in enumerate(self.instance.disks):
11771 self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
11772 self.cfg.SetDiskID(dev, self.target_node)
11773 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11774 (dev, self.instance)).fail_msg
11776 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11777 "node: %s" % (idx, msg),
11778 hint=("Please cleanup this device manually as"
11779 " soon as possible"))
11781 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11782 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11783 self.instance.disks)[pnode]
11785 msg = result.fail_msg
11787 # detaches didn't succeed (unlikely)
11788 self.cfg.ReleaseDRBDMinors(self.instance.name)
11789 raise errors.OpExecError("Can't detach the disks from the network on"
11790 " old node: %s" % (msg,))
11792 # if we managed to detach at least one, we update all the disks of
11793 # the instance to point to the new secondary
11794 self.lu.LogInfo("Updating instance configuration")
11795 for dev, _, new_logical_id in iv_names.itervalues():
11796 dev.logical_id = new_logical_id
11797 self.cfg.SetDiskID(dev, self.instance.primary_node)
11799 self.cfg.Update(self.instance, feedback_fn)
11801 # Release all node locks (the configuration has been updated)
11802 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11804 # and now perform the drbd attach
11805 self.lu.LogInfo("Attaching primary drbds to new secondary"
11806 " (standalone => connected)")
11807 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11809 self.node_secondary_ip,
11810 (self.instance.disks, self.instance),
11811 self.instance.name,
11813 for to_node, to_result in result.items():
11814 msg = to_result.fail_msg
11816 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11818 hint=("please do a gnt-instance info to see the"
11819 " status of disks"))
11821 cstep = itertools.count(5)
11823 if self.early_release:
11824 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11825 self._RemoveOldStorage(self.target_node, iv_names)
11826 # TODO: Check if releasing locks early still makes sense
11827 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11829 # Release all resource locks except those used by the instance
11830 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11831 keep=self.node_secondary_ip.keys())
11833 # TODO: Can the instance lock be downgraded here? Take the optional disk
11834 # shutdown in the caller into consideration.
11837 # This can fail as the old devices are degraded and _WaitForSync
11838 # does a combined result over all disks, so we don't check its return value
11839 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11840 _WaitForSync(self.lu, self.instance)
11842 # Check all devices manually
11843 self._CheckDevices(self.instance.primary_node, iv_names)
11845 # Step: remove old storage
11846 if not self.early_release:
11847 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11848 self._RemoveOldStorage(self.target_node, iv_names)
11851 class LURepairNodeStorage(NoHooksLU):
11852 """Repairs the volume group on a node.
11857 def CheckArguments(self):
11858 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11860 storage_type = self.op.storage_type
11862 if (constants.SO_FIX_CONSISTENCY not in
11863 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11864 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11865 " repaired" % storage_type,
11866 errors.ECODE_INVAL)
11868 def ExpandNames(self):
11869 self.needed_locks = {
11870 locking.LEVEL_NODE: [self.op.node_name],
11873 def _CheckFaultyDisks(self, instance, node_name):
11874 """Ensure faulty disks abort the opcode or at least warn."""
11876 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11878 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11879 " node '%s'" % (instance.name, node_name),
11880 errors.ECODE_STATE)
11881 except errors.OpPrereqError, err:
11882 if self.op.ignore_consistency:
11883 self.LogWarning(str(err.args[0]))
11887 def CheckPrereq(self):
11888 """Check prerequisites.
11891 # Check whether any instance on this node has faulty disks
11892 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11893 if inst.admin_state != constants.ADMINST_UP:
11895 check_nodes = set(inst.all_nodes)
11896 check_nodes.discard(self.op.node_name)
11897 for inst_node_name in check_nodes:
11898 self._CheckFaultyDisks(inst, inst_node_name)
11900 def Exec(self, feedback_fn):
11901 feedback_fn("Repairing storage unit '%s' on %s ..." %
11902 (self.op.name, self.op.node_name))
11904 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11905 result = self.rpc.call_storage_execute(self.op.node_name,
11906 self.op.storage_type, st_args,
11908 constants.SO_FIX_CONSISTENCY)
11909 result.Raise("Failed to repair storage unit '%s' on %s" %
11910 (self.op.name, self.op.node_name))
11913 class LUNodeEvacuate(NoHooksLU):
11914 """Evacuates instances off a list of nodes.
11919 _MODE2IALLOCATOR = {
11920 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11921 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11922 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11924 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11925 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11926 constants.IALLOCATOR_NEVAC_MODES)
11928 def CheckArguments(self):
11929 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11931 def ExpandNames(self):
11932 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11934 if self.op.remote_node is not None:
11935 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11936 assert self.op.remote_node
11938 if self.op.remote_node == self.op.node_name:
11939 raise errors.OpPrereqError("Can not use evacuated node as a new"
11940 " secondary node", errors.ECODE_INVAL)
11942 if self.op.mode != constants.NODE_EVAC_SEC:
11943 raise errors.OpPrereqError("Without the use of an iallocator only"
11944 " secondary instances can be evacuated",
11945 errors.ECODE_INVAL)
11948 self.share_locks = _ShareAll()
11949 self.needed_locks = {
11950 locking.LEVEL_INSTANCE: [],
11951 locking.LEVEL_NODEGROUP: [],
11952 locking.LEVEL_NODE: [],
11955 # Determine nodes (via group) optimistically, needs verification once locks
11956 # have been acquired
11957 self.lock_nodes = self._DetermineNodes()
11959 def _DetermineNodes(self):
11960 """Gets the list of nodes to operate on.
11963 if self.op.remote_node is None:
11964 # Iallocator will choose any node(s) in the same group
11965 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11967 group_nodes = frozenset([self.op.remote_node])
11969 # Determine nodes to be locked
11970 return set([self.op.node_name]) | group_nodes
11972 def _DetermineInstances(self):
11973 """Builds list of instances to operate on.
11976 assert self.op.mode in constants.NODE_EVAC_MODES
11978 if self.op.mode == constants.NODE_EVAC_PRI:
11979 # Primary instances only
11980 inst_fn = _GetNodePrimaryInstances
11981 assert self.op.remote_node is None, \
11982 "Evacuating primary instances requires iallocator"
11983 elif self.op.mode == constants.NODE_EVAC_SEC:
11984 # Secondary instances only
11985 inst_fn = _GetNodeSecondaryInstances
11988 assert self.op.mode == constants.NODE_EVAC_ALL
11989 inst_fn = _GetNodeInstances
11990 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11992 raise errors.OpPrereqError("Due to an issue with the iallocator"
11993 " interface it is not possible to evacuate"
11994 " all instances at once; specify explicitly"
11995 " whether to evacuate primary or secondary"
11997 errors.ECODE_INVAL)
11999 return inst_fn(self.cfg, self.op.node_name)
12001 def DeclareLocks(self, level):
12002 if level == locking.LEVEL_INSTANCE:
12003 # Lock instances optimistically, needs verification once node and group
12004 # locks have been acquired
12005 self.needed_locks[locking.LEVEL_INSTANCE] = \
12006 set(i.name for i in self._DetermineInstances())
12008 elif level == locking.LEVEL_NODEGROUP:
12009 # Lock node groups for all potential target nodes optimistically, needs
12010 # verification once nodes have been acquired
12011 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12012 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12014 elif level == locking.LEVEL_NODE:
12015 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12017 def CheckPrereq(self):
12019 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12020 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12021 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12023 need_nodes = self._DetermineNodes()
12025 if not owned_nodes.issuperset(need_nodes):
12026 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12027 " locks were acquired, current nodes are"
12028 " are '%s', used to be '%s'; retry the"
12030 (self.op.node_name,
12031 utils.CommaJoin(need_nodes),
12032 utils.CommaJoin(owned_nodes)),
12033 errors.ECODE_STATE)
12035 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12036 if owned_groups != wanted_groups:
12037 raise errors.OpExecError("Node groups changed since locks were acquired,"
12038 " current groups are '%s', used to be '%s';"
12039 " retry the operation" %
12040 (utils.CommaJoin(wanted_groups),
12041 utils.CommaJoin(owned_groups)))
12043 # Determine affected instances
12044 self.instances = self._DetermineInstances()
12045 self.instance_names = [i.name for i in self.instances]
12047 if set(self.instance_names) != owned_instances:
12048 raise errors.OpExecError("Instances on node '%s' changed since locks"
12049 " were acquired, current instances are '%s',"
12050 " used to be '%s'; retry the operation" %
12051 (self.op.node_name,
12052 utils.CommaJoin(self.instance_names),
12053 utils.CommaJoin(owned_instances)))
12055 if self.instance_names:
12056 self.LogInfo("Evacuating instances from node '%s': %s",
12058 utils.CommaJoin(utils.NiceSort(self.instance_names)))
12060 self.LogInfo("No instances to evacuate from node '%s'",
12063 if self.op.remote_node is not None:
12064 for i in self.instances:
12065 if i.primary_node == self.op.remote_node:
12066 raise errors.OpPrereqError("Node %s is the primary node of"
12067 " instance %s, cannot use it as"
12069 (self.op.remote_node, i.name),
12070 errors.ECODE_INVAL)
12072 def Exec(self, feedback_fn):
12073 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12075 if not self.instance_names:
12076 # No instances to evacuate
12079 elif self.op.iallocator is not None:
12080 # TODO: Implement relocation to other group
12081 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12082 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12083 instances=list(self.instance_names))
12084 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12086 ial.Run(self.op.iallocator)
12088 if not ial.success:
12089 raise errors.OpPrereqError("Can't compute node evacuation using"
12090 " iallocator '%s': %s" %
12091 (self.op.iallocator, ial.info),
12092 errors.ECODE_NORES)
12094 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12096 elif self.op.remote_node is not None:
12097 assert self.op.mode == constants.NODE_EVAC_SEC
12099 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12100 remote_node=self.op.remote_node,
12102 mode=constants.REPLACE_DISK_CHG,
12103 early_release=self.op.early_release)]
12104 for instance_name in self.instance_names]
12107 raise errors.ProgrammerError("No iallocator or remote node")
12109 return ResultWithJobs(jobs)
12112 def _SetOpEarlyRelease(early_release, op):
12113 """Sets C{early_release} flag on opcodes if available.
12117 op.early_release = early_release
12118 except AttributeError:
12119 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12124 def _NodeEvacDest(use_nodes, group, nodes):
12125 """Returns group or nodes depending on caller's choice.
12129 return utils.CommaJoin(nodes)
12134 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12135 """Unpacks the result of change-group and node-evacuate iallocator requests.
12137 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12138 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12140 @type lu: L{LogicalUnit}
12141 @param lu: Logical unit instance
12142 @type alloc_result: tuple/list
12143 @param alloc_result: Result from iallocator
12144 @type early_release: bool
12145 @param early_release: Whether to release locks early if possible
12146 @type use_nodes: bool
12147 @param use_nodes: Whether to display node names instead of groups
12150 (moved, failed, jobs) = alloc_result
12153 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12154 for (name, reason) in failed)
12155 lu.LogWarning("Unable to evacuate instances %s", failreason)
12156 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12159 lu.LogInfo("Instances to be moved: %s",
12160 utils.CommaJoin("%s (to %s)" %
12161 (name, _NodeEvacDest(use_nodes, group, nodes))
12162 for (name, group, nodes) in moved))
12164 return [map(compat.partial(_SetOpEarlyRelease, early_release),
12165 map(opcodes.OpCode.LoadOpCode, ops))
12169 def _DiskSizeInBytesToMebibytes(lu, size):
12170 """Converts a disk size in bytes to mebibytes.
12172 Warns and rounds up if the size isn't an even multiple of 1 MiB.
12175 (mib, remainder) = divmod(size, 1024 * 1024)
12178 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12179 " to not overwrite existing data (%s bytes will not be"
12180 " wiped)", (1024 * 1024) - remainder)
12186 class LUInstanceGrowDisk(LogicalUnit):
12187 """Grow a disk of an instance.
12190 HPATH = "disk-grow"
12191 HTYPE = constants.HTYPE_INSTANCE
12194 def ExpandNames(self):
12195 self._ExpandAndLockInstance()
12196 self.needed_locks[locking.LEVEL_NODE] = []
12197 self.needed_locks[locking.LEVEL_NODE_RES] = []
12198 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12199 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12201 def DeclareLocks(self, level):
12202 if level == locking.LEVEL_NODE:
12203 self._LockInstancesNodes()
12204 elif level == locking.LEVEL_NODE_RES:
12206 self.needed_locks[locking.LEVEL_NODE_RES] = \
12207 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12209 def BuildHooksEnv(self):
12210 """Build hooks env.
12212 This runs on the master, the primary and all the secondaries.
12216 "DISK": self.op.disk,
12217 "AMOUNT": self.op.amount,
12218 "ABSOLUTE": self.op.absolute,
12220 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12223 def BuildHooksNodes(self):
12224 """Build hooks nodes.
12227 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12230 def CheckPrereq(self):
12231 """Check prerequisites.
12233 This checks that the instance is in the cluster.
12236 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12237 assert instance is not None, \
12238 "Cannot retrieve locked instance %s" % self.op.instance_name
12239 nodenames = list(instance.all_nodes)
12240 for node in nodenames:
12241 _CheckNodeOnline(self, node)
12243 self.instance = instance
12245 if instance.disk_template not in constants.DTS_GROWABLE:
12246 raise errors.OpPrereqError("Instance's disk layout does not support"
12247 " growing", errors.ECODE_INVAL)
12249 self.disk = instance.FindDisk(self.op.disk)
12251 if self.op.absolute:
12252 self.target = self.op.amount
12253 self.delta = self.target - self.disk.size
12255 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12256 "current disk size (%s)" %
12257 (utils.FormatUnit(self.target, "h"),
12258 utils.FormatUnit(self.disk.size, "h")),
12259 errors.ECODE_STATE)
12261 self.delta = self.op.amount
12262 self.target = self.disk.size + self.delta
12264 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12265 utils.FormatUnit(self.delta, "h"),
12266 errors.ECODE_INVAL)
12268 self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
12270 def _CheckDiskSpace(self, nodenames, req_vgspace):
12271 template = self.instance.disk_template
12272 if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
12273 # TODO: check the free disk space for file, when that feature will be
12275 nodes = map(self.cfg.GetNodeInfo, nodenames)
12276 es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
12279 # With exclusive storage we need to something smarter than just looking
12280 # at free space; for now, let's simply abort the operation.
12281 raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
12282 " is enabled", errors.ECODE_STATE)
12283 _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
12285 def Exec(self, feedback_fn):
12286 """Execute disk grow.
12289 instance = self.instance
12292 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12293 assert (self.owned_locks(locking.LEVEL_NODE) ==
12294 self.owned_locks(locking.LEVEL_NODE_RES))
12296 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12298 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12300 raise errors.OpExecError("Cannot activate block device to grow")
12302 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12303 (self.op.disk, instance.name,
12304 utils.FormatUnit(self.delta, "h"),
12305 utils.FormatUnit(self.target, "h")))
12307 # First run all grow ops in dry-run mode
12308 for node in instance.all_nodes:
12309 self.cfg.SetDiskID(disk, node)
12310 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12312 result.Raise("Dry-run grow request failed to node %s" % node)
12315 # Get disk size from primary node for wiping
12316 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12317 result.Raise("Failed to retrieve disk size from node '%s'" %
12318 instance.primary_node)
12320 (disk_size_in_bytes, ) = result.payload
12322 if disk_size_in_bytes is None:
12323 raise errors.OpExecError("Failed to retrieve disk size from primary"
12324 " node '%s'" % instance.primary_node)
12326 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12328 assert old_disk_size >= disk.size, \
12329 ("Retrieved disk size too small (got %s, should be at least %s)" %
12330 (old_disk_size, disk.size))
12332 old_disk_size = None
12334 # We know that (as far as we can test) operations across different
12335 # nodes will succeed, time to run it for real on the backing storage
12336 for node in instance.all_nodes:
12337 self.cfg.SetDiskID(disk, node)
12338 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12340 result.Raise("Grow request failed to node %s" % node)
12342 # And now execute it for logical storage, on the primary node
12343 node = instance.primary_node
12344 self.cfg.SetDiskID(disk, node)
12345 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12347 result.Raise("Grow request failed to node %s" % node)
12349 disk.RecordGrow(self.delta)
12350 self.cfg.Update(instance, feedback_fn)
12352 # Changes have been recorded, release node lock
12353 _ReleaseLocks(self, locking.LEVEL_NODE)
12355 # Downgrade lock while waiting for sync
12356 self.glm.downgrade(locking.LEVEL_INSTANCE)
12358 assert wipe_disks ^ (old_disk_size is None)
12361 assert instance.disks[self.op.disk] == disk
12363 # Wipe newly added disk space
12364 _WipeDisks(self, instance,
12365 disks=[(self.op.disk, disk, old_disk_size)])
12367 if self.op.wait_for_sync:
12368 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12370 self.LogWarning("Disk syncing has not returned a good status; check"
12372 if instance.admin_state != constants.ADMINST_UP:
12373 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12374 elif instance.admin_state != constants.ADMINST_UP:
12375 self.LogWarning("Not shutting down the disk even if the instance is"
12376 " not supposed to be running because no wait for"
12377 " sync mode was requested")
12379 assert self.owned_locks(locking.LEVEL_NODE_RES)
12380 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12383 class LUInstanceQueryData(NoHooksLU):
12384 """Query runtime instance data.
12389 def ExpandNames(self):
12390 self.needed_locks = {}
12392 # Use locking if requested or when non-static information is wanted
12393 if not (self.op.static or self.op.use_locking):
12394 self.LogWarning("Non-static data requested, locks need to be acquired")
12395 self.op.use_locking = True
12397 if self.op.instances or not self.op.use_locking:
12398 # Expand instance names right here
12399 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12401 # Will use acquired locks
12402 self.wanted_names = None
12404 if self.op.use_locking:
12405 self.share_locks = _ShareAll()
12407 if self.wanted_names is None:
12408 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12410 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12412 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12413 self.needed_locks[locking.LEVEL_NODE] = []
12414 self.needed_locks[locking.LEVEL_NETWORK] = []
12415 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12417 def DeclareLocks(self, level):
12418 if self.op.use_locking:
12419 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12420 if level == locking.LEVEL_NODEGROUP:
12422 # Lock all groups used by instances optimistically; this requires going
12423 # via the node before it's locked, requiring verification later on
12424 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12425 frozenset(group_uuid
12426 for instance_name in owned_instances
12428 self.cfg.GetInstanceNodeGroups(instance_name))
12430 elif level == locking.LEVEL_NODE:
12431 self._LockInstancesNodes()
12433 elif level == locking.LEVEL_NETWORK:
12434 self.needed_locks[locking.LEVEL_NETWORK] = \
12436 for instance_name in owned_instances
12438 self.cfg.GetInstanceNetworks(instance_name))
12440 def CheckPrereq(self):
12441 """Check prerequisites.
12443 This only checks the optional instance list against the existing names.
12446 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12447 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12448 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12449 owned_networks = frozenset(self.owned_locks(locking.LEVEL_NETWORK))
12451 if self.wanted_names is None:
12452 assert self.op.use_locking, "Locking was not used"
12453 self.wanted_names = owned_instances
12455 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12457 if self.op.use_locking:
12458 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12461 assert not (owned_instances or owned_groups or
12462 owned_nodes or owned_networks)
12464 self.wanted_instances = instances.values()
12466 def _ComputeBlockdevStatus(self, node, instance, dev):
12467 """Returns the status of a block device
12470 if self.op.static or not node:
12473 self.cfg.SetDiskID(dev, node)
12475 result = self.rpc.call_blockdev_find(node, dev)
12479 result.Raise("Can't compute disk status for %s" % instance.name)
12481 status = result.payload
12485 return (status.dev_path, status.major, status.minor,
12486 status.sync_percent, status.estimated_time,
12487 status.is_degraded, status.ldisk_status)
12489 def _ComputeDiskStatus(self, instance, snode, dev):
12490 """Compute block device status.
12493 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12495 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12497 def _ComputeDiskStatusInner(self, instance, snode, dev):
12498 """Compute block device status.
12500 @attention: The device has to be annotated already.
12503 if dev.dev_type in constants.LDS_DRBD:
12504 # we change the snode then (otherwise we use the one passed in)
12505 if dev.logical_id[0] == instance.primary_node:
12506 snode = dev.logical_id[1]
12508 snode = dev.logical_id[0]
12510 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12512 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12515 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12522 "iv_name": dev.iv_name,
12523 "dev_type": dev.dev_type,
12524 "logical_id": dev.logical_id,
12525 "physical_id": dev.physical_id,
12526 "pstatus": dev_pstatus,
12527 "sstatus": dev_sstatus,
12528 "children": dev_children,
12535 def Exec(self, feedback_fn):
12536 """Gather and return data"""
12539 cluster = self.cfg.GetClusterInfo()
12541 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12542 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12544 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12545 for node in nodes.values()))
12547 group2name_fn = lambda uuid: groups[uuid].name
12548 for instance in self.wanted_instances:
12549 pnode = nodes[instance.primary_node]
12551 if self.op.static or pnode.offline:
12552 remote_state = None
12554 self.LogWarning("Primary node %s is marked offline, returning static"
12555 " information only for instance %s" %
12556 (pnode.name, instance.name))
12558 remote_info = self.rpc.call_instance_info(instance.primary_node,
12560 instance.hypervisor)
12561 remote_info.Raise("Error checking node %s" % instance.primary_node)
12562 remote_info = remote_info.payload
12563 if remote_info and "state" in remote_info:
12564 remote_state = "up"
12566 if instance.admin_state == constants.ADMINST_UP:
12567 remote_state = "down"
12569 remote_state = instance.admin_state
12571 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12574 snodes_group_uuids = [nodes[snode_name].group
12575 for snode_name in instance.secondary_nodes]
12577 result[instance.name] = {
12578 "name": instance.name,
12579 "config_state": instance.admin_state,
12580 "run_state": remote_state,
12581 "pnode": instance.primary_node,
12582 "pnode_group_uuid": pnode.group,
12583 "pnode_group_name": group2name_fn(pnode.group),
12584 "snodes": instance.secondary_nodes,
12585 "snodes_group_uuids": snodes_group_uuids,
12586 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12588 # this happens to be the same format used for hooks
12589 "nics": _NICListToTuple(self, instance.nics),
12590 "disk_template": instance.disk_template,
12592 "hypervisor": instance.hypervisor,
12593 "network_port": instance.network_port,
12594 "hv_instance": instance.hvparams,
12595 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12596 "be_instance": instance.beparams,
12597 "be_actual": cluster.FillBE(instance),
12598 "os_instance": instance.osparams,
12599 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12600 "serial_no": instance.serial_no,
12601 "mtime": instance.mtime,
12602 "ctime": instance.ctime,
12603 "uuid": instance.uuid,
12609 def PrepareContainerMods(mods, private_fn):
12610 """Prepares a list of container modifications by adding a private data field.
12612 @type mods: list of tuples; (operation, index, parameters)
12613 @param mods: List of modifications
12614 @type private_fn: callable or None
12615 @param private_fn: Callable for constructing a private data field for a
12620 if private_fn is None:
12625 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12628 def GetItemFromContainer(identifier, kind, container):
12629 """Return the item refered by the identifier.
12631 @type identifier: string
12632 @param identifier: Item index or name or UUID
12634 @param kind: One-word item description
12635 @type container: list
12636 @param container: Container to get the item from
12641 idx = int(identifier)
12644 absidx = len(container) - 1
12646 raise IndexError("Not accepting negative indices other than -1")
12647 elif idx > len(container):
12648 raise IndexError("Got %s index %s, but there are only %s" %
12649 (kind, idx, len(container)))
12652 return (absidx, container[idx])
12656 for idx, item in enumerate(container):
12657 if item.uuid == identifier or item.name == identifier:
12660 raise errors.OpPrereqError("Cannot find %s with identifier %s" %
12661 (kind, identifier), errors.ECODE_NOENT)
12664 #: Type description for changes as returned by L{ApplyContainerMods}'s
12666 _TApplyContModsCbChanges = \
12667 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12668 ht.TNonEmptyString,
12673 def ApplyContainerMods(kind, container, chgdesc, mods,
12674 create_fn, modify_fn, remove_fn):
12675 """Applies descriptions in C{mods} to C{container}.
12678 @param kind: One-word item description
12679 @type container: list
12680 @param container: Container to modify
12681 @type chgdesc: None or list
12682 @param chgdesc: List of applied changes
12684 @param mods: Modifications as returned by L{PrepareContainerMods}
12685 @type create_fn: callable
12686 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12687 receives absolute item index, parameters and private data object as added
12688 by L{PrepareContainerMods}, returns tuple containing new item and changes
12690 @type modify_fn: callable
12691 @param modify_fn: Callback for modifying an existing item
12692 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12693 and private data object as added by L{PrepareContainerMods}, returns
12695 @type remove_fn: callable
12696 @param remove_fn: Callback on removing item; receives absolute item index,
12697 item and private data object as added by L{PrepareContainerMods}
12700 for (op, identifier, params, private) in mods:
12703 if op == constants.DDM_ADD:
12704 # Calculate where item will be added
12705 # When adding an item, identifier can only be an index
12707 idx = int(identifier)
12709 raise errors.OpPrereqError("Only possitive integer or -1 is accepted as"
12710 " identifier for %s" % constants.DDM_ADD,
12711 errors.ECODE_INVAL)
12713 addidx = len(container)
12716 raise IndexError("Not accepting negative indices other than -1")
12717 elif idx > len(container):
12718 raise IndexError("Got %s index %s, but there are only %s" %
12719 (kind, idx, len(container)))
12722 if create_fn is None:
12725 (item, changes) = create_fn(addidx, params, private)
12728 container.append(item)
12731 assert idx <= len(container)
12732 # list.insert does so before the specified index
12733 container.insert(idx, item)
12735 # Retrieve existing item
12736 (absidx, item) = GetItemFromContainer(identifier, kind, container)
12738 if op == constants.DDM_REMOVE:
12741 if remove_fn is not None:
12742 remove_fn(absidx, item, private)
12744 changes = [("%s/%s" % (kind, absidx), "remove")]
12746 assert container[absidx] == item
12747 del container[absidx]
12748 elif op == constants.DDM_MODIFY:
12749 if modify_fn is not None:
12750 changes = modify_fn(absidx, item, params, private)
12752 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12754 assert _TApplyContModsCbChanges(changes)
12756 if not (chgdesc is None or changes is None):
12757 chgdesc.extend(changes)
12760 def _UpdateIvNames(base_index, disks):
12761 """Updates the C{iv_name} attribute of disks.
12763 @type disks: list of L{objects.Disk}
12766 for (idx, disk) in enumerate(disks):
12767 disk.iv_name = "disk/%s" % (base_index + idx, )
12770 class _InstNicModPrivate:
12771 """Data structure for network interface modifications.
12773 Used by L{LUInstanceSetParams}.
12776 def __init__(self):
12781 class LUInstanceSetParams(LogicalUnit):
12782 """Modifies an instances's parameters.
12785 HPATH = "instance-modify"
12786 HTYPE = constants.HTYPE_INSTANCE
12790 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12791 assert ht.TList(mods)
12792 assert not mods or len(mods[0]) in (2, 3)
12794 if mods and len(mods[0]) == 2:
12798 for op, params in mods:
12799 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12800 result.append((op, -1, params))
12804 raise errors.OpPrereqError("Only one %s add or remove operation is"
12805 " supported at a time" % kind,
12806 errors.ECODE_INVAL)
12808 result.append((constants.DDM_MODIFY, op, params))
12810 assert verify_fn(result)
12817 def _CheckMods(kind, mods, key_types, item_fn):
12818 """Ensures requested disk/NIC modifications are valid.
12821 for (op, _, params) in mods:
12822 assert ht.TDict(params)
12824 # If 'key_types' is an empty dict, we assume we have an
12825 # 'ext' template and thus do not ForceDictType
12827 utils.ForceDictType(params, key_types)
12829 if op == constants.DDM_REMOVE:
12831 raise errors.OpPrereqError("No settings should be passed when"
12832 " removing a %s" % kind,
12833 errors.ECODE_INVAL)
12834 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12835 item_fn(op, params)
12837 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12840 def _VerifyDiskModification(op, params):
12841 """Verifies a disk modification.
12844 if op == constants.DDM_ADD:
12845 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12846 if mode not in constants.DISK_ACCESS_SET:
12847 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12848 errors.ECODE_INVAL)
12850 size = params.get(constants.IDISK_SIZE, None)
12852 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12853 constants.IDISK_SIZE, errors.ECODE_INVAL)
12857 except (TypeError, ValueError), err:
12858 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12859 errors.ECODE_INVAL)
12861 params[constants.IDISK_SIZE] = size
12862 name = params.get(constants.IDISK_NAME, None)
12863 if name is not None and name.lower() == constants.VALUE_NONE:
12864 params[constants.IDISK_NAME] = None
12866 elif op == constants.DDM_MODIFY:
12867 if constants.IDISK_SIZE in params:
12868 raise errors.OpPrereqError("Disk size change not possible, use"
12869 " grow-disk", errors.ECODE_INVAL)
12870 if len(params) > 2:
12871 raise errors.OpPrereqError("Disk modification doesn't support"
12872 " additional arbitrary parameters",
12873 errors.ECODE_INVAL)
12874 name = params.get(constants.IDISK_NAME, None)
12875 if name is not None and name.lower() == constants.VALUE_NONE:
12876 params[constants.IDISK_NAME] = None
12879 def _VerifyNicModification(op, params):
12880 """Verifies a network interface modification.
12883 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12884 ip = params.get(constants.INIC_IP, None)
12885 name = params.get(constants.INIC_NAME, None)
12886 req_net = params.get(constants.INIC_NETWORK, None)
12887 link = params.get(constants.NIC_LINK, None)
12888 mode = params.get(constants.NIC_MODE, None)
12889 if name is not None and name.lower() == constants.VALUE_NONE:
12890 params[constants.INIC_NAME] = None
12891 if req_net is not None:
12892 if req_net.lower() == constants.VALUE_NONE:
12893 params[constants.INIC_NETWORK] = None
12895 elif link is not None or mode is not None:
12896 raise errors.OpPrereqError("If network is given"
12897 " mode or link should not",
12898 errors.ECODE_INVAL)
12900 if op == constants.DDM_ADD:
12901 macaddr = params.get(constants.INIC_MAC, None)
12902 if macaddr is None:
12903 params[constants.INIC_MAC] = constants.VALUE_AUTO
12906 if ip.lower() == constants.VALUE_NONE:
12907 params[constants.INIC_IP] = None
12909 if ip.lower() == constants.NIC_IP_POOL:
12910 if op == constants.DDM_ADD and req_net is None:
12911 raise errors.OpPrereqError("If ip=pool, parameter network"
12913 errors.ECODE_INVAL)
12915 if not netutils.IPAddress.IsValid(ip):
12916 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12917 errors.ECODE_INVAL)
12919 if constants.INIC_MAC in params:
12920 macaddr = params[constants.INIC_MAC]
12921 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12922 macaddr = utils.NormalizeAndValidateMac(macaddr)
12924 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12925 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12926 " modifying an existing NIC",
12927 errors.ECODE_INVAL)
12929 def CheckArguments(self):
12930 if not (self.op.nics or self.op.disks or self.op.disk_template or
12931 self.op.hvparams or self.op.beparams or self.op.os_name or
12932 self.op.offline is not None or self.op.runtime_mem or
12934 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12936 if self.op.hvparams:
12937 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS,
12938 "hypervisor", "instance", "cluster")
12940 self.op.disks = self._UpgradeDiskNicMods(
12941 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12942 self.op.nics = self._UpgradeDiskNicMods(
12943 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12945 if self.op.disks and self.op.disk_template is not None:
12946 raise errors.OpPrereqError("Disk template conversion and other disk"
12947 " changes not supported at the same time",
12948 errors.ECODE_INVAL)
12950 if (self.op.disk_template and
12951 self.op.disk_template in constants.DTS_INT_MIRROR and
12952 self.op.remote_node is None):
12953 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12954 " one requires specifying a secondary node",
12955 errors.ECODE_INVAL)
12957 # Check NIC modifications
12958 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12959 self._VerifyNicModification)
12962 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
12964 def ExpandNames(self):
12965 self._ExpandAndLockInstance()
12966 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12967 # Can't even acquire node locks in shared mode as upcoming changes in
12968 # Ganeti 2.6 will start to modify the node object on disk conversion
12969 self.needed_locks[locking.LEVEL_NODE] = []
12970 self.needed_locks[locking.LEVEL_NODE_RES] = []
12971 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12972 # Look node group to look up the ipolicy
12973 self.share_locks[locking.LEVEL_NODEGROUP] = 1
12975 def DeclareLocks(self, level):
12976 if level == locking.LEVEL_NODEGROUP:
12977 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12978 # Acquire locks for the instance's nodegroups optimistically. Needs
12979 # to be verified in CheckPrereq
12980 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12981 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12982 elif level == locking.LEVEL_NODE:
12983 self._LockInstancesNodes()
12984 if self.op.disk_template and self.op.remote_node:
12985 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12986 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12987 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12989 self.needed_locks[locking.LEVEL_NODE_RES] = \
12990 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12992 def BuildHooksEnv(self):
12993 """Build hooks env.
12995 This runs on the master, primary and secondaries.
12999 if constants.BE_MINMEM in self.be_new:
13000 args["minmem"] = self.be_new[constants.BE_MINMEM]
13001 if constants.BE_MAXMEM in self.be_new:
13002 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
13003 if constants.BE_VCPUS in self.be_new:
13004 args["vcpus"] = self.be_new[constants.BE_VCPUS]
13005 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
13006 # information at all.
13008 if self._new_nics is not None:
13011 for nic in self._new_nics:
13012 n = copy.deepcopy(nic)
13013 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
13014 n.nicparams = nicparams
13015 nics.append(_NICToTuple(self, n))
13017 args["nics"] = nics
13019 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
13020 if self.op.disk_template:
13021 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
13022 if self.op.runtime_mem:
13023 env["RUNTIME_MEMORY"] = self.op.runtime_mem
13027 def BuildHooksNodes(self):
13028 """Build hooks nodes.
13031 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
13034 def _PrepareNicModification(self, params, private, old_ip, old_net_uuid,
13035 old_params, cluster, pnode):
13037 update_params_dict = dict([(key, params[key])
13038 for key in constants.NICS_PARAMETERS
13041 req_link = update_params_dict.get(constants.NIC_LINK, None)
13042 req_mode = update_params_dict.get(constants.NIC_MODE, None)
13044 new_net_uuid = None
13045 new_net_uuid_or_name = params.get(constants.INIC_NETWORK, old_net_uuid)
13046 if new_net_uuid_or_name:
13047 new_net_uuid = self.cfg.LookupNetwork(new_net_uuid_or_name)
13048 new_net_obj = self.cfg.GetNetwork(new_net_uuid)
13051 old_net_obj = self.cfg.GetNetwork(old_net_uuid)
13054 netparams = self.cfg.GetGroupNetParams(new_net_uuid, pnode)
13056 raise errors.OpPrereqError("No netparams found for the network"
13057 " %s, probably not connected" %
13058 new_net_obj.name, errors.ECODE_INVAL)
13059 new_params = dict(netparams)
13061 new_params = _GetUpdatedParams(old_params, update_params_dict)
13063 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13065 new_filled_params = cluster.SimpleFillNIC(new_params)
13066 objects.NIC.CheckParameterSyntax(new_filled_params)
13068 new_mode = new_filled_params[constants.NIC_MODE]
13069 if new_mode == constants.NIC_MODE_BRIDGED:
13070 bridge = new_filled_params[constants.NIC_LINK]
13071 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13073 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13075 self.warn.append(msg)
13077 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13079 elif new_mode == constants.NIC_MODE_ROUTED:
13080 ip = params.get(constants.INIC_IP, old_ip)
13082 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13083 " on a routed NIC", errors.ECODE_INVAL)
13085 elif new_mode == constants.NIC_MODE_OVS:
13086 # TODO: check OVS link
13087 self.LogInfo("OVS links are currently not checked for correctness")
13089 if constants.INIC_MAC in params:
13090 mac = params[constants.INIC_MAC]
13092 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13093 errors.ECODE_INVAL)
13094 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13095 # otherwise generate the MAC address
13096 params[constants.INIC_MAC] = \
13097 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13099 # or validate/reserve the current one
13101 self.cfg.ReserveMAC(mac, self.proc.GetECId())
13102 except errors.ReservationError:
13103 raise errors.OpPrereqError("MAC address '%s' already in use"
13104 " in cluster" % mac,
13105 errors.ECODE_NOTUNIQUE)
13106 elif new_net_uuid != old_net_uuid:
13108 def get_net_prefix(net_uuid):
13111 nobj = self.cfg.GetNetwork(net_uuid)
13112 mac_prefix = nobj.mac_prefix
13116 new_prefix = get_net_prefix(new_net_uuid)
13117 old_prefix = get_net_prefix(old_net_uuid)
13118 if old_prefix != new_prefix:
13119 params[constants.INIC_MAC] = \
13120 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13122 # if there is a change in (ip, network) tuple
13123 new_ip = params.get(constants.INIC_IP, old_ip)
13124 if (new_ip, new_net_uuid) != (old_ip, old_net_uuid):
13126 # if IP is pool then require a network and generate one IP
13127 if new_ip.lower() == constants.NIC_IP_POOL:
13130 new_ip = self.cfg.GenerateIp(new_net_uuid, self.proc.GetECId())
13131 except errors.ReservationError:
13132 raise errors.OpPrereqError("Unable to get a free IP"
13133 " from the address pool",
13134 errors.ECODE_STATE)
13135 self.LogInfo("Chose IP %s from network %s",
13138 params[constants.INIC_IP] = new_ip
13140 raise errors.OpPrereqError("ip=pool, but no network found",
13141 errors.ECODE_INVAL)
13142 # Reserve new IP if in the new network if any
13145 self.cfg.ReserveIp(new_net_uuid, new_ip, self.proc.GetECId())
13146 self.LogInfo("Reserving IP %s in network %s",
13147 new_ip, new_net_obj.name)
13148 except errors.ReservationError:
13149 raise errors.OpPrereqError("IP %s not available in network %s" %
13150 (new_ip, new_net_obj.name),
13151 errors.ECODE_NOTUNIQUE)
13152 # new network is None so check if new IP is a conflicting IP
13153 elif self.op.conflicts_check:
13154 _CheckForConflictingIp(self, new_ip, pnode)
13156 # release old IP if old network is not None
13157 if old_ip and old_net_uuid:
13159 self.cfg.ReleaseIp(old_net_uuid, old_ip, self.proc.GetECId())
13160 except errors.AddressPoolError:
13161 logging.warning("Release IP %s not contained in network %s",
13162 old_ip, old_net_obj.name)
13164 # there are no changes in (ip, network) tuple and old network is not None
13165 elif (old_net_uuid is not None and
13166 (req_link is not None or req_mode is not None)):
13167 raise errors.OpPrereqError("Not allowed to change link or mode of"
13168 " a NIC that is connected to a network",
13169 errors.ECODE_INVAL)
13171 private.params = new_params
13172 private.filled = new_filled_params
13174 def _PreCheckDiskTemplate(self, pnode_info):
13175 """CheckPrereq checks related to a new disk template."""
13176 # Arguments are passed to avoid configuration lookups
13177 instance = self.instance
13178 pnode = instance.primary_node
13179 cluster = self.cluster
13180 if instance.disk_template == self.op.disk_template:
13181 raise errors.OpPrereqError("Instance already has disk template %s" %
13182 instance.disk_template, errors.ECODE_INVAL)
13184 if (instance.disk_template,
13185 self.op.disk_template) not in self._DISK_CONVERSIONS:
13186 raise errors.OpPrereqError("Unsupported disk template conversion from"
13187 " %s to %s" % (instance.disk_template,
13188 self.op.disk_template),
13189 errors.ECODE_INVAL)
13190 _CheckInstanceState(self, instance, INSTANCE_DOWN,
13191 msg="cannot change disk template")
13192 if self.op.disk_template in constants.DTS_INT_MIRROR:
13193 if self.op.remote_node == pnode:
13194 raise errors.OpPrereqError("Given new secondary node %s is the same"
13195 " as the primary node of the instance" %
13196 self.op.remote_node, errors.ECODE_STATE)
13197 _CheckNodeOnline(self, self.op.remote_node)
13198 _CheckNodeNotDrained(self, self.op.remote_node)
13199 # FIXME: here we assume that the old instance type is DT_PLAIN
13200 assert instance.disk_template == constants.DT_PLAIN
13201 disks = [{constants.IDISK_SIZE: d.size,
13202 constants.IDISK_VG: d.logical_id[0]}
13203 for d in instance.disks]
13204 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13205 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13207 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13208 snode_group = self.cfg.GetNodeGroup(snode_info.group)
13209 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13211 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info, self.cfg,
13212 ignore=self.op.ignore_ipolicy)
13213 if pnode_info.group != snode_info.group:
13214 self.LogWarning("The primary and secondary nodes are in two"
13215 " different node groups; the disk parameters"
13216 " from the first disk's node group will be"
13219 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
13220 # Make sure none of the nodes require exclusive storage
13221 nodes = [pnode_info]
13222 if self.op.disk_template in constants.DTS_INT_MIRROR:
13224 nodes.append(snode_info)
13225 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
13226 if compat.any(map(has_es, nodes)):
13227 errmsg = ("Cannot convert disk template from %s to %s when exclusive"
13228 " storage is enabled" % (instance.disk_template,
13229 self.op.disk_template))
13230 raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
13232 def CheckPrereq(self):
13233 """Check prerequisites.
13235 This only checks the instance list against the existing names.
13238 assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13239 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13241 cluster = self.cluster = self.cfg.GetClusterInfo()
13242 assert self.instance is not None, \
13243 "Cannot retrieve locked instance %s" % self.op.instance_name
13245 pnode = instance.primary_node
13249 if (self.op.pnode is not None and self.op.pnode != pnode and
13250 not self.op.force):
13251 # verify that the instance is not up
13252 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13253 instance.hypervisor)
13254 if instance_info.fail_msg:
13255 self.warn.append("Can't get instance runtime information: %s" %
13256 instance_info.fail_msg)
13257 elif instance_info.payload:
13258 raise errors.OpPrereqError("Instance is still running on %s" % pnode,
13259 errors.ECODE_STATE)
13261 assert pnode in self.owned_locks(locking.LEVEL_NODE)
13262 nodelist = list(instance.all_nodes)
13263 pnode_info = self.cfg.GetNodeInfo(pnode)
13264 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13266 #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13267 assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13268 group_info = self.cfg.GetNodeGroup(pnode_info.group)
13270 # dictionary with instance information after the modification
13273 # Check disk modifications. This is done here and not in CheckArguments
13274 # (as with NICs), because we need to know the instance's disk template
13275 if instance.disk_template == constants.DT_EXT:
13276 self._CheckMods("disk", self.op.disks, {},
13277 self._VerifyDiskModification)
13279 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
13280 self._VerifyDiskModification)
13282 # Prepare disk/NIC modifications
13283 self.diskmod = PrepareContainerMods(self.op.disks, None)
13284 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13286 # Check the validity of the `provider' parameter
13287 if instance.disk_template in constants.DT_EXT:
13288 for mod in self.diskmod:
13289 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13290 if mod[0] == constants.DDM_ADD:
13291 if ext_provider is None:
13292 raise errors.OpPrereqError("Instance template is '%s' and parameter"
13293 " '%s' missing, during disk add" %
13295 constants.IDISK_PROVIDER),
13296 errors.ECODE_NOENT)
13297 elif mod[0] == constants.DDM_MODIFY:
13299 raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
13301 constants.IDISK_PROVIDER,
13302 errors.ECODE_INVAL)
13304 for mod in self.diskmod:
13305 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13306 if ext_provider is not None:
13307 raise errors.OpPrereqError("Parameter '%s' is only valid for"
13308 " instances of type '%s'" %
13309 (constants.IDISK_PROVIDER,
13311 errors.ECODE_INVAL)
13314 if self.op.os_name and not self.op.force:
13315 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13316 self.op.force_variant)
13317 instance_os = self.op.os_name
13319 instance_os = instance.os
13321 assert not (self.op.disk_template and self.op.disks), \
13322 "Can't modify disk template and apply disk changes at the same time"
13324 if self.op.disk_template:
13325 self._PreCheckDiskTemplate(pnode_info)
13327 # hvparams processing
13328 if self.op.hvparams:
13329 hv_type = instance.hypervisor
13330 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13331 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13332 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13335 hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
13336 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13337 self.hv_proposed = self.hv_new = hv_new # the new actual values
13338 self.hv_inst = i_hvdict # the new dict (without defaults)
13340 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13342 self.hv_new = self.hv_inst = {}
13344 # beparams processing
13345 if self.op.beparams:
13346 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13348 objects.UpgradeBeParams(i_bedict)
13349 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13350 be_new = cluster.SimpleFillBE(i_bedict)
13351 self.be_proposed = self.be_new = be_new # the new actual values
13352 self.be_inst = i_bedict # the new dict (without defaults)
13354 self.be_new = self.be_inst = {}
13355 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13356 be_old = cluster.FillBE(instance)
13358 # CPU param validation -- checking every time a parameter is
13359 # changed to cover all cases where either CPU mask or vcpus have
13361 if (constants.BE_VCPUS in self.be_proposed and
13362 constants.HV_CPU_MASK in self.hv_proposed):
13364 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13365 # Verify mask is consistent with number of vCPUs. Can skip this
13366 # test if only 1 entry in the CPU mask, which means same mask
13367 # is applied to all vCPUs.
13368 if (len(cpu_list) > 1 and
13369 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13370 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13372 (self.be_proposed[constants.BE_VCPUS],
13373 self.hv_proposed[constants.HV_CPU_MASK]),
13374 errors.ECODE_INVAL)
13376 # Only perform this test if a new CPU mask is given
13377 if constants.HV_CPU_MASK in self.hv_new:
13378 # Calculate the largest CPU number requested
13379 max_requested_cpu = max(map(max, cpu_list))
13380 # Check that all of the instance's nodes have enough physical CPUs to
13381 # satisfy the requested CPU mask
13382 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13383 max_requested_cpu + 1, instance.hypervisor)
13385 # osparams processing
13386 if self.op.osparams:
13387 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13388 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13389 self.os_inst = i_osdict # the new dict (without defaults)
13393 #TODO(dynmem): do the appropriate check involving MINMEM
13394 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13395 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13396 mem_check_list = [pnode]
13397 if be_new[constants.BE_AUTO_BALANCE]:
13398 # either we changed auto_balance to yes or it was from before
13399 mem_check_list.extend(instance.secondary_nodes)
13400 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13401 instance.hypervisor)
13402 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13403 [instance.hypervisor], False)
13404 pninfo = nodeinfo[pnode]
13405 msg = pninfo.fail_msg
13407 # Assume the primary node is unreachable and go ahead
13408 self.warn.append("Can't get info from primary node %s: %s" %
13411 (_, _, (pnhvinfo, )) = pninfo.payload
13412 if not isinstance(pnhvinfo.get("memory_free", None), int):
13413 self.warn.append("Node data from primary node %s doesn't contain"
13414 " free memory information" % pnode)
13415 elif instance_info.fail_msg:
13416 self.warn.append("Can't get instance runtime information: %s" %
13417 instance_info.fail_msg)
13419 if instance_info.payload:
13420 current_mem = int(instance_info.payload["memory"])
13422 # Assume instance not running
13423 # (there is a slight race condition here, but it's not very
13424 # probable, and we have no other way to check)
13425 # TODO: Describe race condition
13427 #TODO(dynmem): do the appropriate check involving MINMEM
13428 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13429 pnhvinfo["memory_free"])
13431 raise errors.OpPrereqError("This change will prevent the instance"
13432 " from starting, due to %d MB of memory"
13433 " missing on its primary node" %
13434 miss_mem, errors.ECODE_NORES)
13436 if be_new[constants.BE_AUTO_BALANCE]:
13437 for node, nres in nodeinfo.items():
13438 if node not in instance.secondary_nodes:
13440 nres.Raise("Can't get info from secondary node %s" % node,
13441 prereq=True, ecode=errors.ECODE_STATE)
13442 (_, _, (nhvinfo, )) = nres.payload
13443 if not isinstance(nhvinfo.get("memory_free", None), int):
13444 raise errors.OpPrereqError("Secondary node %s didn't return free"
13445 " memory information" % node,
13446 errors.ECODE_STATE)
13447 #TODO(dynmem): do the appropriate check involving MINMEM
13448 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13449 raise errors.OpPrereqError("This change will prevent the instance"
13450 " from failover to its secondary node"
13451 " %s, due to not enough memory" % node,
13452 errors.ECODE_STATE)
13454 if self.op.runtime_mem:
13455 remote_info = self.rpc.call_instance_info(instance.primary_node,
13457 instance.hypervisor)
13458 remote_info.Raise("Error checking node %s" % instance.primary_node)
13459 if not remote_info.payload: # not running already
13460 raise errors.OpPrereqError("Instance %s is not running" %
13461 instance.name, errors.ECODE_STATE)
13463 current_memory = remote_info.payload["memory"]
13464 if (not self.op.force and
13465 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13466 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13467 raise errors.OpPrereqError("Instance %s must have memory between %d"
13468 " and %d MB of memory unless --force is"
13471 self.be_proposed[constants.BE_MINMEM],
13472 self.be_proposed[constants.BE_MAXMEM]),
13473 errors.ECODE_INVAL)
13475 delta = self.op.runtime_mem - current_memory
13477 _CheckNodeFreeMemory(self, instance.primary_node,
13478 "ballooning memory for instance %s" %
13479 instance.name, delta, instance.hypervisor)
13481 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13482 raise errors.OpPrereqError("Disk operations not supported for"
13483 " diskless instances", errors.ECODE_INVAL)
13485 def _PrepareNicCreate(_, params, private):
13486 self._PrepareNicModification(params, private, None, None,
13487 {}, cluster, pnode)
13488 return (None, None)
13490 def _PrepareNicMod(_, nic, params, private):
13491 self._PrepareNicModification(params, private, nic.ip, nic.network,
13492 nic.nicparams, cluster, pnode)
13495 def _PrepareNicRemove(_, params, __):
13497 net = params.network
13498 if net is not None and ip is not None:
13499 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13501 # Verify NIC changes (operating on copy)
13502 nics = instance.nics[:]
13503 ApplyContainerMods("NIC", nics, None, self.nicmod,
13504 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13505 if len(nics) > constants.MAX_NICS:
13506 raise errors.OpPrereqError("Instance has too many network interfaces"
13507 " (%d), cannot add more" % constants.MAX_NICS,
13508 errors.ECODE_STATE)
13510 def _PrepareDiskMod(_, disk, params, __):
13511 disk.name = params.get(constants.IDISK_NAME, None)
13513 # Verify disk changes (operating on a copy)
13514 disks = copy.deepcopy(instance.disks)
13515 ApplyContainerMods("disk", disks, None, self.diskmod, None, _PrepareDiskMod,
13517 utils.ValidateDeviceNames("disk", disks)
13518 if len(disks) > constants.MAX_DISKS:
13519 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13520 " more" % constants.MAX_DISKS,
13521 errors.ECODE_STATE)
13522 disk_sizes = [disk.size for disk in instance.disks]
13523 disk_sizes.extend(params["size"] for (op, idx, params, private) in
13524 self.diskmod if op == constants.DDM_ADD)
13525 ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13526 ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13528 if self.op.offline is not None and self.op.offline:
13529 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
13530 msg="can't change to offline")
13532 # Pre-compute NIC changes (necessary to use result in hooks)
13533 self._nic_chgdesc = []
13535 # Operate on copies as this is still in prereq
13536 nics = [nic.Copy() for nic in instance.nics]
13537 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13538 self._CreateNewNic, self._ApplyNicMods, None)
13539 # Verify that NIC names are unique and valid
13540 utils.ValidateDeviceNames("NIC", nics)
13541 self._new_nics = nics
13542 ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13544 self._new_nics = None
13545 ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13547 if not self.op.ignore_ipolicy:
13548 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13551 # Fill ispec with backend parameters
13552 ispec[constants.ISPEC_SPINDLE_USE] = \
13553 self.be_new.get(constants.BE_SPINDLE_USE, None)
13554 ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13557 # Copy ispec to verify parameters with min/max values separately
13558 if self.op.disk_template:
13559 new_disk_template = self.op.disk_template
13561 new_disk_template = instance.disk_template
13562 ispec_max = ispec.copy()
13563 ispec_max[constants.ISPEC_MEM_SIZE] = \
13564 self.be_new.get(constants.BE_MAXMEM, None)
13565 res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max,
13567 ispec_min = ispec.copy()
13568 ispec_min[constants.ISPEC_MEM_SIZE] = \
13569 self.be_new.get(constants.BE_MINMEM, None)
13570 res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min,
13573 if (res_max or res_min):
13574 # FIXME: Improve error message by including information about whether
13575 # the upper or lower limit of the parameter fails the ipolicy.
13576 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13577 (group_info, group_info.name,
13578 utils.CommaJoin(set(res_max + res_min))))
13579 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13581 def _ConvertPlainToDrbd(self, feedback_fn):
13582 """Converts an instance from plain to drbd.
13585 feedback_fn("Converting template to drbd")
13586 instance = self.instance
13587 pnode = instance.primary_node
13588 snode = self.op.remote_node
13590 assert instance.disk_template == constants.DT_PLAIN
13592 # create a fake disk info for _GenerateDiskTemplate
13593 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13594 constants.IDISK_VG: d.logical_id[0],
13595 constants.IDISK_NAME: d.name}
13596 for d in instance.disks]
13597 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13598 instance.name, pnode, [snode],
13599 disk_info, None, None, 0, feedback_fn,
13601 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13603 p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
13604 s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
13605 info = _GetInstanceInfoText(instance)
13606 feedback_fn("Creating additional volumes...")
13607 # first, create the missing data and meta devices
13608 for disk in anno_disks:
13609 # unfortunately this is... not too nice
13610 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13611 info, True, p_excl_stor)
13612 for child in disk.children:
13613 _CreateSingleBlockDev(self, snode, instance, child, info, True,
13615 # at this stage, all new LVs have been created, we can rename the
13617 feedback_fn("Renaming original volumes...")
13618 rename_list = [(o, n.children[0].logical_id)
13619 for (o, n) in zip(instance.disks, new_disks)]
13620 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13621 result.Raise("Failed to rename original LVs")
13623 feedback_fn("Initializing DRBD devices...")
13624 # all child devices are in place, we can now create the DRBD devices
13626 for disk in anno_disks:
13627 for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
13628 f_create = node == pnode
13629 _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
13631 except errors.GenericError, e:
13632 feedback_fn("Initializing of DRBD devices failed;"
13633 " renaming back original volumes...")
13634 for disk in new_disks:
13635 self.cfg.SetDiskID(disk, pnode)
13636 rename_back_list = [(n.children[0], o.logical_id)
13637 for (n, o) in zip(new_disks, instance.disks)]
13638 result = self.rpc.call_blockdev_rename(pnode, rename_back_list)
13639 result.Raise("Failed to rename LVs back after error %s" % str(e))
13642 # at this point, the instance has been modified
13643 instance.disk_template = constants.DT_DRBD8
13644 instance.disks = new_disks
13645 self.cfg.Update(instance, feedback_fn)
13647 # Release node locks while waiting for sync
13648 _ReleaseLocks(self, locking.LEVEL_NODE)
13650 # disks are created, waiting for sync
13651 disk_abort = not _WaitForSync(self, instance,
13652 oneshot=not self.op.wait_for_sync)
13654 raise errors.OpExecError("There are some degraded disks for"
13655 " this instance, please cleanup manually")
13657 # Node resource locks will be released by caller
13659 def _ConvertDrbdToPlain(self, feedback_fn):
13660 """Converts an instance from drbd to plain.
13663 instance = self.instance
13665 assert len(instance.secondary_nodes) == 1
13666 assert instance.disk_template == constants.DT_DRBD8
13668 pnode = instance.primary_node
13669 snode = instance.secondary_nodes[0]
13670 feedback_fn("Converting template to plain")
13672 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13673 new_disks = [d.children[0] for d in instance.disks]
13675 # copy over size, mode and name
13676 for parent, child in zip(old_disks, new_disks):
13677 child.size = parent.size
13678 child.mode = parent.mode
13679 child.name = parent.name
13681 # this is a DRBD disk, return its port to the pool
13682 # NOTE: this must be done right before the call to cfg.Update!
13683 for disk in old_disks:
13684 tcp_port = disk.logical_id[2]
13685 self.cfg.AddTcpUdpPort(tcp_port)
13687 # update instance structure
13688 instance.disks = new_disks
13689 instance.disk_template = constants.DT_PLAIN
13690 _UpdateIvNames(0, instance.disks)
13691 self.cfg.Update(instance, feedback_fn)
13693 # Release locks in case removing disks takes a while
13694 _ReleaseLocks(self, locking.LEVEL_NODE)
13696 feedback_fn("Removing volumes on the secondary node...")
13697 for disk in old_disks:
13698 self.cfg.SetDiskID(disk, snode)
13699 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13701 self.LogWarning("Could not remove block device %s on node %s,"
13702 " continuing anyway: %s", disk.iv_name, snode, msg)
13704 feedback_fn("Removing unneeded volumes on the primary node...")
13705 for idx, disk in enumerate(old_disks):
13706 meta = disk.children[1]
13707 self.cfg.SetDiskID(meta, pnode)
13708 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13710 self.LogWarning("Could not remove metadata for disk %d on node %s,"
13711 " continuing anyway: %s", idx, pnode, msg)
13713 def _CreateNewDisk(self, idx, params, _):
13714 """Creates a new disk.
13717 instance = self.instance
13720 if instance.disk_template in constants.DTS_FILEBASED:
13721 (file_driver, file_path) = instance.disks[0].logical_id
13722 file_path = os.path.dirname(file_path)
13724 file_driver = file_path = None
13727 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13728 instance.primary_node, instance.secondary_nodes,
13729 [params], file_path, file_driver, idx,
13730 self.Log, self.diskparams)[0]
13732 info = _GetInstanceInfoText(instance)
13734 logging.info("Creating volume %s for instance %s",
13735 disk.iv_name, instance.name)
13736 # Note: this needs to be kept in sync with _CreateDisks
13738 for node in instance.all_nodes:
13739 f_create = (node == instance.primary_node)
13741 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13742 except errors.OpExecError, err:
13743 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13744 disk.iv_name, disk, node, err)
13746 if self.cluster.prealloc_wipe_disks:
13748 _WipeDisks(self, instance,
13749 disks=[(idx, disk, 0)])
13752 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13756 def _ModifyDisk(idx, disk, params, _):
13757 """Modifies a disk.
13761 mode = params.get(constants.IDISK_MODE, None)
13764 changes.append(("disk.mode/%d" % idx, disk.mode))
13766 name = params.get(constants.IDISK_NAME, None)
13768 changes.append(("disk.name/%d" % idx, disk.name))
13772 def _RemoveDisk(self, idx, root, _):
13776 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13777 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13778 self.cfg.SetDiskID(disk, node)
13779 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13781 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13782 " continuing anyway", idx, node, msg)
13784 # if this is a DRBD disk, return its port to the pool
13785 if root.dev_type in constants.LDS_DRBD:
13786 self.cfg.AddTcpUdpPort(root.logical_id[2])
13788 def _CreateNewNic(self, idx, params, private):
13789 """Creates data structure for a new network interface.
13792 mac = params[constants.INIC_MAC]
13793 ip = params.get(constants.INIC_IP, None)
13794 net = params.get(constants.INIC_NETWORK, None)
13795 name = params.get(constants.INIC_NAME, None)
13796 net_uuid = self.cfg.LookupNetwork(net)
13797 #TODO: not private.filled?? can a nic have no nicparams??
13798 nicparams = private.filled
13799 nobj = objects.NIC(mac=mac, ip=ip, network=net_uuid, name=name,
13800 nicparams=nicparams)
13801 nobj.uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13805 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
13806 (mac, ip, private.filled[constants.NIC_MODE],
13807 private.filled[constants.NIC_LINK],
13811 def _ApplyNicMods(self, idx, nic, params, private):
13812 """Modifies a network interface.
13817 for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NAME]:
13819 changes.append(("nic.%s/%d" % (key, idx), params[key]))
13820 setattr(nic, key, params[key])
13822 new_net = params.get(constants.INIC_NETWORK, nic.network)
13823 new_net_uuid = self.cfg.LookupNetwork(new_net)
13824 if new_net_uuid != nic.network:
13825 changes.append(("nic.network/%d" % idx, new_net))
13826 nic.network = new_net_uuid
13829 nic.nicparams = private.filled
13831 for (key, val) in nic.nicparams.items():
13832 changes.append(("nic.%s/%d" % (key, idx), val))
13836 def Exec(self, feedback_fn):
13837 """Modifies an instance.
13839 All parameters take effect only at the next restart of the instance.
13842 # Process here the warnings from CheckPrereq, as we don't have a
13843 # feedback_fn there.
13844 # TODO: Replace with self.LogWarning
13845 for warn in self.warn:
13846 feedback_fn("WARNING: %s" % warn)
13848 assert ((self.op.disk_template is None) ^
13849 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13850 "Not owning any node resource locks"
13853 instance = self.instance
13857 instance.primary_node = self.op.pnode
13860 if self.op.runtime_mem:
13861 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13863 self.op.runtime_mem)
13864 rpcres.Raise("Cannot modify instance runtime memory")
13865 result.append(("runtime_memory", self.op.runtime_mem))
13867 # Apply disk changes
13868 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13869 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13870 _UpdateIvNames(0, instance.disks)
13872 if self.op.disk_template:
13874 check_nodes = set(instance.all_nodes)
13875 if self.op.remote_node:
13876 check_nodes.add(self.op.remote_node)
13877 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13878 owned = self.owned_locks(level)
13879 assert not (check_nodes - owned), \
13880 ("Not owning the correct locks, owning %r, expected at least %r" %
13881 (owned, check_nodes))
13883 r_shut = _ShutdownInstanceDisks(self, instance)
13885 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13886 " proceed with disk template conversion")
13887 mode = (instance.disk_template, self.op.disk_template)
13889 self._DISK_CONVERSIONS[mode](self, feedback_fn)
13891 self.cfg.ReleaseDRBDMinors(instance.name)
13893 result.append(("disk_template", self.op.disk_template))
13895 assert instance.disk_template == self.op.disk_template, \
13896 ("Expected disk template '%s', found '%s'" %
13897 (self.op.disk_template, instance.disk_template))
13899 # Release node and resource locks if there are any (they might already have
13900 # been released during disk conversion)
13901 _ReleaseLocks(self, locking.LEVEL_NODE)
13902 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13904 # Apply NIC changes
13905 if self._new_nics is not None:
13906 instance.nics = self._new_nics
13907 result.extend(self._nic_chgdesc)
13910 if self.op.hvparams:
13911 instance.hvparams = self.hv_inst
13912 for key, val in self.op.hvparams.iteritems():
13913 result.append(("hv/%s" % key, val))
13916 if self.op.beparams:
13917 instance.beparams = self.be_inst
13918 for key, val in self.op.beparams.iteritems():
13919 result.append(("be/%s" % key, val))
13922 if self.op.os_name:
13923 instance.os = self.op.os_name
13926 if self.op.osparams:
13927 instance.osparams = self.os_inst
13928 for key, val in self.op.osparams.iteritems():
13929 result.append(("os/%s" % key, val))
13931 if self.op.offline is None:
13934 elif self.op.offline:
13935 # Mark instance as offline
13936 self.cfg.MarkInstanceOffline(instance.name)
13937 result.append(("admin_state", constants.ADMINST_OFFLINE))
13939 # Mark instance as online, but stopped
13940 self.cfg.MarkInstanceDown(instance.name)
13941 result.append(("admin_state", constants.ADMINST_DOWN))
13943 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
13945 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13946 self.owned_locks(locking.LEVEL_NODE)), \
13947 "All node locks should have been released by now"
13951 _DISK_CONVERSIONS = {
13952 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13953 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13957 class LUInstanceChangeGroup(LogicalUnit):
13958 HPATH = "instance-change-group"
13959 HTYPE = constants.HTYPE_INSTANCE
13962 def ExpandNames(self):
13963 self.share_locks = _ShareAll()
13965 self.needed_locks = {
13966 locking.LEVEL_NODEGROUP: [],
13967 locking.LEVEL_NODE: [],
13968 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
13971 self._ExpandAndLockInstance()
13973 if self.op.target_groups:
13974 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13975 self.op.target_groups)
13977 self.req_target_uuids = None
13979 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13981 def DeclareLocks(self, level):
13982 if level == locking.LEVEL_NODEGROUP:
13983 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13985 if self.req_target_uuids:
13986 lock_groups = set(self.req_target_uuids)
13988 # Lock all groups used by instance optimistically; this requires going
13989 # via the node before it's locked, requiring verification later on
13990 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13991 lock_groups.update(instance_groups)
13993 # No target groups, need to lock all of them
13994 lock_groups = locking.ALL_SET
13996 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13998 elif level == locking.LEVEL_NODE:
13999 if self.req_target_uuids:
14000 # Lock all nodes used by instances
14001 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14002 self._LockInstancesNodes()
14004 # Lock all nodes in all potential target groups
14005 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
14006 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
14007 member_nodes = [node_name
14008 for group in lock_groups
14009 for node_name in self.cfg.GetNodeGroup(group).members]
14010 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14012 # Lock all nodes as all groups are potential targets
14013 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14015 def CheckPrereq(self):
14016 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14017 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14018 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14020 assert (self.req_target_uuids is None or
14021 owned_groups.issuperset(self.req_target_uuids))
14022 assert owned_instances == set([self.op.instance_name])
14024 # Get instance information
14025 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
14027 # Check if node groups for locked instance are still correct
14028 assert owned_nodes.issuperset(self.instance.all_nodes), \
14029 ("Instance %s's nodes changed while we kept the lock" %
14030 self.op.instance_name)
14032 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
14035 if self.req_target_uuids:
14036 # User requested specific target groups
14037 self.target_uuids = frozenset(self.req_target_uuids)
14039 # All groups except those used by the instance are potential targets
14040 self.target_uuids = owned_groups - inst_groups
14042 conflicting_groups = self.target_uuids & inst_groups
14043 if conflicting_groups:
14044 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
14045 " used by the instance '%s'" %
14046 (utils.CommaJoin(conflicting_groups),
14047 self.op.instance_name),
14048 errors.ECODE_INVAL)
14050 if not self.target_uuids:
14051 raise errors.OpPrereqError("There are no possible target groups",
14052 errors.ECODE_INVAL)
14054 def BuildHooksEnv(self):
14055 """Build hooks env.
14058 assert self.target_uuids
14061 "TARGET_GROUPS": " ".join(self.target_uuids),
14064 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14068 def BuildHooksNodes(self):
14069 """Build hooks nodes.
14072 mn = self.cfg.GetMasterNode()
14073 return ([mn], [mn])
14075 def Exec(self, feedback_fn):
14076 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14078 assert instances == [self.op.instance_name], "Instance not locked"
14080 req = iallocator.IAReqGroupChange(instances=instances,
14081 target_groups=list(self.target_uuids))
14082 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14084 ial.Run(self.op.iallocator)
14086 if not ial.success:
14087 raise errors.OpPrereqError("Can't compute solution for changing group of"
14088 " instance '%s' using iallocator '%s': %s" %
14089 (self.op.instance_name, self.op.iallocator,
14090 ial.info), errors.ECODE_NORES)
14092 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14094 self.LogInfo("Iallocator returned %s job(s) for changing group of"
14095 " instance '%s'", len(jobs), self.op.instance_name)
14097 return ResultWithJobs(jobs)
14100 class LUBackupQuery(NoHooksLU):
14101 """Query the exports list
14106 def CheckArguments(self):
14107 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
14108 ["node", "export"], self.op.use_locking)
14110 def ExpandNames(self):
14111 self.expq.ExpandNames(self)
14113 def DeclareLocks(self, level):
14114 self.expq.DeclareLocks(self, level)
14116 def Exec(self, feedback_fn):
14119 for (node, expname) in self.expq.OldStyleQuery(self):
14120 if expname is None:
14121 result[node] = False
14123 result.setdefault(node, []).append(expname)
14128 class _ExportQuery(_QueryBase):
14129 FIELDS = query.EXPORT_FIELDS
14131 #: The node name is not a unique key for this query
14132 SORT_FIELD = "node"
14134 def ExpandNames(self, lu):
14135 lu.needed_locks = {}
14137 # The following variables interact with _QueryBase._GetNames
14139 self.wanted = _GetWantedNodes(lu, self.names)
14141 self.wanted = locking.ALL_SET
14143 self.do_locking = self.use_locking
14145 if self.do_locking:
14146 lu.share_locks = _ShareAll()
14147 lu.needed_locks = {
14148 locking.LEVEL_NODE: self.wanted,
14152 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14154 def DeclareLocks(self, lu, level):
14157 def _GetQueryData(self, lu):
14158 """Computes the list of nodes and their attributes.
14161 # Locking is not used
14163 assert not (compat.any(lu.glm.is_owned(level)
14164 for level in locking.LEVELS
14165 if level != locking.LEVEL_CLUSTER) or
14166 self.do_locking or self.use_locking)
14168 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
14172 for (node, nres) in lu.rpc.call_export_list(nodes).items():
14174 result.append((node, None))
14176 result.extend((node, expname) for expname in nres.payload)
14181 class LUBackupPrepare(NoHooksLU):
14182 """Prepares an instance for an export and returns useful information.
14187 def ExpandNames(self):
14188 self._ExpandAndLockInstance()
14190 def CheckPrereq(self):
14191 """Check prerequisites.
14194 instance_name = self.op.instance_name
14196 self.instance = self.cfg.GetInstanceInfo(instance_name)
14197 assert self.instance is not None, \
14198 "Cannot retrieve locked instance %s" % self.op.instance_name
14199 _CheckNodeOnline(self, self.instance.primary_node)
14201 self._cds = _GetClusterDomainSecret()
14203 def Exec(self, feedback_fn):
14204 """Prepares an instance for an export.
14207 instance = self.instance
14209 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14210 salt = utils.GenerateSecret(8)
14212 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14213 result = self.rpc.call_x509_cert_create(instance.primary_node,
14214 constants.RIE_CERT_VALIDITY)
14215 result.Raise("Can't create X509 key and certificate on %s" % result.node)
14217 (name, cert_pem) = result.payload
14219 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14223 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14224 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14226 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14232 class LUBackupExport(LogicalUnit):
14233 """Export an instance to an image in the cluster.
14236 HPATH = "instance-export"
14237 HTYPE = constants.HTYPE_INSTANCE
14240 def CheckArguments(self):
14241 """Check the arguments.
14244 self.x509_key_name = self.op.x509_key_name
14245 self.dest_x509_ca_pem = self.op.destination_x509_ca
14247 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14248 if not self.x509_key_name:
14249 raise errors.OpPrereqError("Missing X509 key name for encryption",
14250 errors.ECODE_INVAL)
14252 if not self.dest_x509_ca_pem:
14253 raise errors.OpPrereqError("Missing destination X509 CA",
14254 errors.ECODE_INVAL)
14256 def ExpandNames(self):
14257 self._ExpandAndLockInstance()
14259 # Lock all nodes for local exports
14260 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14261 # FIXME: lock only instance primary and destination node
14263 # Sad but true, for now we have do lock all nodes, as we don't know where
14264 # the previous export might be, and in this LU we search for it and
14265 # remove it from its current node. In the future we could fix this by:
14266 # - making a tasklet to search (share-lock all), then create the
14267 # new one, then one to remove, after
14268 # - removing the removal operation altogether
14269 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14271 # Allocations should be stopped while this LU runs with node locks, but
14272 # it doesn't have to be exclusive
14273 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14274 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14276 def DeclareLocks(self, level):
14277 """Last minute lock declaration."""
14278 # All nodes are locked anyway, so nothing to do here.
14280 def BuildHooksEnv(self):
14281 """Build hooks env.
14283 This will run on the master, primary node and target node.
14287 "EXPORT_MODE": self.op.mode,
14288 "EXPORT_NODE": self.op.target_node,
14289 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14290 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14291 # TODO: Generic function for boolean env variables
14292 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14295 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14299 def BuildHooksNodes(self):
14300 """Build hooks nodes.
14303 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14305 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14306 nl.append(self.op.target_node)
14310 def CheckPrereq(self):
14311 """Check prerequisites.
14313 This checks that the instance and node names are valid.
14316 instance_name = self.op.instance_name
14318 self.instance = self.cfg.GetInstanceInfo(instance_name)
14319 assert self.instance is not None, \
14320 "Cannot retrieve locked instance %s" % self.op.instance_name
14321 _CheckNodeOnline(self, self.instance.primary_node)
14323 if (self.op.remove_instance and
14324 self.instance.admin_state == constants.ADMINST_UP and
14325 not self.op.shutdown):
14326 raise errors.OpPrereqError("Can not remove instance without shutting it"
14327 " down before", errors.ECODE_STATE)
14329 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14330 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14331 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14332 assert self.dst_node is not None
14334 _CheckNodeOnline(self, self.dst_node.name)
14335 _CheckNodeNotDrained(self, self.dst_node.name)
14338 self.dest_disk_info = None
14339 self.dest_x509_ca = None
14341 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14342 self.dst_node = None
14344 if len(self.op.target_node) != len(self.instance.disks):
14345 raise errors.OpPrereqError(("Received destination information for %s"
14346 " disks, but instance %s has %s disks") %
14347 (len(self.op.target_node), instance_name,
14348 len(self.instance.disks)),
14349 errors.ECODE_INVAL)
14351 cds = _GetClusterDomainSecret()
14353 # Check X509 key name
14355 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14356 except (TypeError, ValueError), err:
14357 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14358 errors.ECODE_INVAL)
14360 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14361 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14362 errors.ECODE_INVAL)
14364 # Load and verify CA
14366 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14367 except OpenSSL.crypto.Error, err:
14368 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14369 (err, ), errors.ECODE_INVAL)
14371 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14372 if errcode is not None:
14373 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14374 (msg, ), errors.ECODE_INVAL)
14376 self.dest_x509_ca = cert
14378 # Verify target information
14380 for idx, disk_data in enumerate(self.op.target_node):
14382 (host, port, magic) = \
14383 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14384 except errors.GenericError, err:
14385 raise errors.OpPrereqError("Target info for disk %s: %s" %
14386 (idx, err), errors.ECODE_INVAL)
14388 disk_info.append((host, port, magic))
14390 assert len(disk_info) == len(self.op.target_node)
14391 self.dest_disk_info = disk_info
14394 raise errors.ProgrammerError("Unhandled export mode %r" %
14397 # instance disk type verification
14398 # TODO: Implement export support for file-based disks
14399 for disk in self.instance.disks:
14400 if disk.dev_type == constants.LD_FILE:
14401 raise errors.OpPrereqError("Export not supported for instances with"
14402 " file-based disks", errors.ECODE_INVAL)
14404 def _CleanupExports(self, feedback_fn):
14405 """Removes exports of current instance from all other nodes.
14407 If an instance in a cluster with nodes A..D was exported to node C, its
14408 exports will be removed from the nodes A, B and D.
14411 assert self.op.mode != constants.EXPORT_MODE_REMOTE
14413 nodelist = self.cfg.GetNodeList()
14414 nodelist.remove(self.dst_node.name)
14416 # on one-node clusters nodelist will be empty after the removal
14417 # if we proceed the backup would be removed because OpBackupQuery
14418 # substitutes an empty list with the full cluster node list.
14419 iname = self.instance.name
14421 feedback_fn("Removing old exports for instance %s" % iname)
14422 exportlist = self.rpc.call_export_list(nodelist)
14423 for node in exportlist:
14424 if exportlist[node].fail_msg:
14426 if iname in exportlist[node].payload:
14427 msg = self.rpc.call_export_remove(node, iname).fail_msg
14429 self.LogWarning("Could not remove older export for instance %s"
14430 " on node %s: %s", iname, node, msg)
14432 def Exec(self, feedback_fn):
14433 """Export an instance to an image in the cluster.
14436 assert self.op.mode in constants.EXPORT_MODES
14438 instance = self.instance
14439 src_node = instance.primary_node
14441 if self.op.shutdown:
14442 # shutdown the instance, but not the disks
14443 feedback_fn("Shutting down instance %s" % instance.name)
14444 result = self.rpc.call_instance_shutdown(src_node, instance,
14445 self.op.shutdown_timeout,
14447 # TODO: Maybe ignore failures if ignore_remove_failures is set
14448 result.Raise("Could not shutdown instance %s on"
14449 " node %s" % (instance.name, src_node))
14451 # set the disks ID correctly since call_instance_start needs the
14452 # correct drbd minor to create the symlinks
14453 for disk in instance.disks:
14454 self.cfg.SetDiskID(disk, src_node)
14456 activate_disks = (instance.admin_state != constants.ADMINST_UP)
14459 # Activate the instance disks if we'exporting a stopped instance
14460 feedback_fn("Activating disks for %s" % instance.name)
14461 _StartInstanceDisks(self, instance, None)
14464 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14467 helper.CreateSnapshots()
14469 if (self.op.shutdown and
14470 instance.admin_state == constants.ADMINST_UP and
14471 not self.op.remove_instance):
14472 assert not activate_disks
14473 feedback_fn("Starting instance %s" % instance.name)
14474 result = self.rpc.call_instance_start(src_node,
14475 (instance, None, None), False,
14477 msg = result.fail_msg
14479 feedback_fn("Failed to start instance: %s" % msg)
14480 _ShutdownInstanceDisks(self, instance)
14481 raise errors.OpExecError("Could not start instance: %s" % msg)
14483 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14484 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14485 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14486 connect_timeout = constants.RIE_CONNECT_TIMEOUT
14487 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14489 (key_name, _, _) = self.x509_key_name
14492 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14495 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14496 key_name, dest_ca_pem,
14501 # Check for backwards compatibility
14502 assert len(dresults) == len(instance.disks)
14503 assert compat.all(isinstance(i, bool) for i in dresults), \
14504 "Not all results are boolean: %r" % dresults
14508 feedback_fn("Deactivating disks for %s" % instance.name)
14509 _ShutdownInstanceDisks(self, instance)
14511 if not (compat.all(dresults) and fin_resu):
14514 failures.append("export finalization")
14515 if not compat.all(dresults):
14516 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14518 failures.append("disk export: disk(s) %s" % fdsk)
14520 raise errors.OpExecError("Export failed, errors in %s" %
14521 utils.CommaJoin(failures))
14523 # At this point, the export was successful, we can cleanup/finish
14525 # Remove instance if requested
14526 if self.op.remove_instance:
14527 feedback_fn("Removing instance %s" % instance.name)
14528 _RemoveInstance(self, feedback_fn, instance,
14529 self.op.ignore_remove_failures)
14531 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14532 self._CleanupExports(feedback_fn)
14534 return fin_resu, dresults
14537 class LUBackupRemove(NoHooksLU):
14538 """Remove exports related to the named instance.
14543 def ExpandNames(self):
14544 self.needed_locks = {
14545 # We need all nodes to be locked in order for RemoveExport to work, but
14546 # we don't need to lock the instance itself, as nothing will happen to it
14547 # (and we can remove exports also for a removed instance)
14548 locking.LEVEL_NODE: locking.ALL_SET,
14550 # Removing backups is quick, so blocking allocations is justified
14551 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14554 # Allocations should be stopped while this LU runs with node locks, but it
14555 # doesn't have to be exclusive
14556 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14558 def Exec(self, feedback_fn):
14559 """Remove any export.
14562 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14563 # If the instance was not found we'll try with the name that was passed in.
14564 # This will only work if it was an FQDN, though.
14566 if not instance_name:
14568 instance_name = self.op.instance_name
14570 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14571 exportlist = self.rpc.call_export_list(locked_nodes)
14573 for node in exportlist:
14574 msg = exportlist[node].fail_msg
14576 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14578 if instance_name in exportlist[node].payload:
14580 result = self.rpc.call_export_remove(node, instance_name)
14581 msg = result.fail_msg
14583 logging.error("Could not remove export for instance %s"
14584 " on node %s: %s", instance_name, node, msg)
14586 if fqdn_warn and not found:
14587 feedback_fn("Export not found. If trying to remove an export belonging"
14588 " to a deleted instance please use its Fully Qualified"
14592 class LUGroupAdd(LogicalUnit):
14593 """Logical unit for creating node groups.
14596 HPATH = "group-add"
14597 HTYPE = constants.HTYPE_GROUP
14600 def ExpandNames(self):
14601 # We need the new group's UUID here so that we can create and acquire the
14602 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14603 # that it should not check whether the UUID exists in the configuration.
14604 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14605 self.needed_locks = {}
14606 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14608 def CheckPrereq(self):
14609 """Check prerequisites.
14611 This checks that the given group name is not an existing node group
14616 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14617 except errors.OpPrereqError:
14620 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14621 " node group (UUID: %s)" %
14622 (self.op.group_name, existing_uuid),
14623 errors.ECODE_EXISTS)
14625 if self.op.ndparams:
14626 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14628 if self.op.hv_state:
14629 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14631 self.new_hv_state = None
14633 if self.op.disk_state:
14634 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14636 self.new_disk_state = None
14638 if self.op.diskparams:
14639 for templ in constants.DISK_TEMPLATES:
14640 if templ in self.op.diskparams:
14641 utils.ForceDictType(self.op.diskparams[templ],
14642 constants.DISK_DT_TYPES)
14643 self.new_diskparams = self.op.diskparams
14645 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14646 except errors.OpPrereqError, err:
14647 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14648 errors.ECODE_INVAL)
14650 self.new_diskparams = {}
14652 if self.op.ipolicy:
14653 cluster = self.cfg.GetClusterInfo()
14654 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14656 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14657 except errors.ConfigurationError, err:
14658 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14659 errors.ECODE_INVAL)
14661 def BuildHooksEnv(self):
14662 """Build hooks env.
14666 "GROUP_NAME": self.op.group_name,
14669 def BuildHooksNodes(self):
14670 """Build hooks nodes.
14673 mn = self.cfg.GetMasterNode()
14674 return ([mn], [mn])
14676 def Exec(self, feedback_fn):
14677 """Add the node group to the cluster.
14680 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14681 uuid=self.group_uuid,
14682 alloc_policy=self.op.alloc_policy,
14683 ndparams=self.op.ndparams,
14684 diskparams=self.new_diskparams,
14685 ipolicy=self.op.ipolicy,
14686 hv_state_static=self.new_hv_state,
14687 disk_state_static=self.new_disk_state)
14689 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14690 del self.remove_locks[locking.LEVEL_NODEGROUP]
14693 class LUGroupAssignNodes(NoHooksLU):
14694 """Logical unit for assigning nodes to groups.
14699 def ExpandNames(self):
14700 # These raise errors.OpPrereqError on their own:
14701 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14702 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14704 # We want to lock all the affected nodes and groups. We have readily
14705 # available the list of nodes, and the *destination* group. To gather the
14706 # list of "source" groups, we need to fetch node information later on.
14707 self.needed_locks = {
14708 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14709 locking.LEVEL_NODE: self.op.nodes,
14712 def DeclareLocks(self, level):
14713 if level == locking.LEVEL_NODEGROUP:
14714 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14716 # Try to get all affected nodes' groups without having the group or node
14717 # lock yet. Needs verification later in the code flow.
14718 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14720 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14722 def CheckPrereq(self):
14723 """Check prerequisites.
14726 assert self.needed_locks[locking.LEVEL_NODEGROUP]
14727 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14728 frozenset(self.op.nodes))
14730 expected_locks = (set([self.group_uuid]) |
14731 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14732 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14733 if actual_locks != expected_locks:
14734 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14735 " current groups are '%s', used to be '%s'" %
14736 (utils.CommaJoin(expected_locks),
14737 utils.CommaJoin(actual_locks)))
14739 self.node_data = self.cfg.GetAllNodesInfo()
14740 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14741 instance_data = self.cfg.GetAllInstancesInfo()
14743 if self.group is None:
14744 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14745 (self.op.group_name, self.group_uuid))
14747 (new_splits, previous_splits) = \
14748 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14749 for node in self.op.nodes],
14750 self.node_data, instance_data)
14753 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14755 if not self.op.force:
14756 raise errors.OpExecError("The following instances get split by this"
14757 " change and --force was not given: %s" %
14760 self.LogWarning("This operation will split the following instances: %s",
14763 if previous_splits:
14764 self.LogWarning("In addition, these already-split instances continue"
14765 " to be split across groups: %s",
14766 utils.CommaJoin(utils.NiceSort(previous_splits)))
14768 def Exec(self, feedback_fn):
14769 """Assign nodes to a new group.
14772 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14774 self.cfg.AssignGroupNodes(mods)
14777 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14778 """Check for split instances after a node assignment.
14780 This method considers a series of node assignments as an atomic operation,
14781 and returns information about split instances after applying the set of
14784 In particular, it returns information about newly split instances, and
14785 instances that were already split, and remain so after the change.
14787 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14790 @type changes: list of (node_name, new_group_uuid) pairs.
14791 @param changes: list of node assignments to consider.
14792 @param node_data: a dict with data for all nodes
14793 @param instance_data: a dict with all instances to consider
14794 @rtype: a two-tuple
14795 @return: a list of instances that were previously okay and result split as a
14796 consequence of this change, and a list of instances that were previously
14797 split and this change does not fix.
14800 changed_nodes = dict((node, group) for node, group in changes
14801 if node_data[node].group != group)
14803 all_split_instances = set()
14804 previously_split_instances = set()
14806 def InstanceNodes(instance):
14807 return [instance.primary_node] + list(instance.secondary_nodes)
14809 for inst in instance_data.values():
14810 if inst.disk_template not in constants.DTS_INT_MIRROR:
14813 instance_nodes = InstanceNodes(inst)
14815 if len(set(node_data[node].group for node in instance_nodes)) > 1:
14816 previously_split_instances.add(inst.name)
14818 if len(set(changed_nodes.get(node, node_data[node].group)
14819 for node in instance_nodes)) > 1:
14820 all_split_instances.add(inst.name)
14822 return (list(all_split_instances - previously_split_instances),
14823 list(previously_split_instances & all_split_instances))
14826 class _GroupQuery(_QueryBase):
14827 FIELDS = query.GROUP_FIELDS
14829 def ExpandNames(self, lu):
14830 lu.needed_locks = {}
14832 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14833 self._cluster = lu.cfg.GetClusterInfo()
14834 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14837 self.wanted = [name_to_uuid[name]
14838 for name in utils.NiceSort(name_to_uuid.keys())]
14840 # Accept names to be either names or UUIDs.
14843 all_uuid = frozenset(self._all_groups.keys())
14845 for name in self.names:
14846 if name in all_uuid:
14847 self.wanted.append(name)
14848 elif name in name_to_uuid:
14849 self.wanted.append(name_to_uuid[name])
14851 missing.append(name)
14854 raise errors.OpPrereqError("Some groups do not exist: %s" %
14855 utils.CommaJoin(missing),
14856 errors.ECODE_NOENT)
14858 def DeclareLocks(self, lu, level):
14861 def _GetQueryData(self, lu):
14862 """Computes the list of node groups and their attributes.
14865 do_nodes = query.GQ_NODE in self.requested_data
14866 do_instances = query.GQ_INST in self.requested_data
14868 group_to_nodes = None
14869 group_to_instances = None
14871 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14872 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14873 # latter GetAllInstancesInfo() is not enough, for we have to go through
14874 # instance->node. Hence, we will need to process nodes even if we only need
14875 # instance information.
14876 if do_nodes or do_instances:
14877 all_nodes = lu.cfg.GetAllNodesInfo()
14878 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14881 for node in all_nodes.values():
14882 if node.group in group_to_nodes:
14883 group_to_nodes[node.group].append(node.name)
14884 node_to_group[node.name] = node.group
14887 all_instances = lu.cfg.GetAllInstancesInfo()
14888 group_to_instances = dict((uuid, []) for uuid in self.wanted)
14890 for instance in all_instances.values():
14891 node = instance.primary_node
14892 if node in node_to_group:
14893 group_to_instances[node_to_group[node]].append(instance.name)
14896 # Do not pass on node information if it was not requested.
14897 group_to_nodes = None
14899 return query.GroupQueryData(self._cluster,
14900 [self._all_groups[uuid]
14901 for uuid in self.wanted],
14902 group_to_nodes, group_to_instances,
14903 query.GQ_DISKPARAMS in self.requested_data)
14906 class LUGroupQuery(NoHooksLU):
14907 """Logical unit for querying node groups.
14912 def CheckArguments(self):
14913 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14914 self.op.output_fields, False)
14916 def ExpandNames(self):
14917 self.gq.ExpandNames(self)
14919 def DeclareLocks(self, level):
14920 self.gq.DeclareLocks(self, level)
14922 def Exec(self, feedback_fn):
14923 return self.gq.OldStyleQuery(self)
14926 class LUGroupSetParams(LogicalUnit):
14927 """Modifies the parameters of a node group.
14930 HPATH = "group-modify"
14931 HTYPE = constants.HTYPE_GROUP
14934 def CheckArguments(self):
14937 self.op.diskparams,
14938 self.op.alloc_policy,
14940 self.op.disk_state,
14944 if all_changes.count(None) == len(all_changes):
14945 raise errors.OpPrereqError("Please pass at least one modification",
14946 errors.ECODE_INVAL)
14948 def ExpandNames(self):
14949 # This raises errors.OpPrereqError on its own:
14950 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14952 self.needed_locks = {
14953 locking.LEVEL_INSTANCE: [],
14954 locking.LEVEL_NODEGROUP: [self.group_uuid],
14957 self.share_locks[locking.LEVEL_INSTANCE] = 1
14959 def DeclareLocks(self, level):
14960 if level == locking.LEVEL_INSTANCE:
14961 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14963 # Lock instances optimistically, needs verification once group lock has
14965 self.needed_locks[locking.LEVEL_INSTANCE] = \
14966 self.cfg.GetNodeGroupInstances(self.group_uuid)
14969 def _UpdateAndVerifyDiskParams(old, new):
14970 """Updates and verifies disk parameters.
14973 new_params = _GetUpdatedParams(old, new)
14974 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14977 def CheckPrereq(self):
14978 """Check prerequisites.
14981 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14983 # Check if locked instances are still correct
14984 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14986 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14987 cluster = self.cfg.GetClusterInfo()
14989 if self.group is None:
14990 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14991 (self.op.group_name, self.group_uuid))
14993 if self.op.ndparams:
14994 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14995 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14996 self.new_ndparams = new_ndparams
14998 if self.op.diskparams:
14999 diskparams = self.group.diskparams
15000 uavdp = self._UpdateAndVerifyDiskParams
15001 # For each disktemplate subdict update and verify the values
15002 new_diskparams = dict((dt,
15003 uavdp(diskparams.get(dt, {}),
15004 self.op.diskparams[dt]))
15005 for dt in constants.DISK_TEMPLATES
15006 if dt in self.op.diskparams)
15007 # As we've all subdicts of diskparams ready, lets merge the actual
15008 # dict with all updated subdicts
15009 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
15011 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15012 except errors.OpPrereqError, err:
15013 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15014 errors.ECODE_INVAL)
15016 if self.op.hv_state:
15017 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
15018 self.group.hv_state_static)
15020 if self.op.disk_state:
15021 self.new_disk_state = \
15022 _MergeAndVerifyDiskState(self.op.disk_state,
15023 self.group.disk_state_static)
15025 if self.op.ipolicy:
15026 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
15030 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
15031 inst_filter = lambda inst: inst.name in owned_instances
15032 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
15033 gmi = ganeti.masterd.instance
15035 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
15037 new_ipolicy, instances, self.cfg)
15040 self.LogWarning("After the ipolicy change the following instances"
15041 " violate them: %s",
15042 utils.CommaJoin(violations))
15044 def BuildHooksEnv(self):
15045 """Build hooks env.
15049 "GROUP_NAME": self.op.group_name,
15050 "NEW_ALLOC_POLICY": self.op.alloc_policy,
15053 def BuildHooksNodes(self):
15054 """Build hooks nodes.
15057 mn = self.cfg.GetMasterNode()
15058 return ([mn], [mn])
15060 def Exec(self, feedback_fn):
15061 """Modifies the node group.
15066 if self.op.ndparams:
15067 self.group.ndparams = self.new_ndparams
15068 result.append(("ndparams", str(self.group.ndparams)))
15070 if self.op.diskparams:
15071 self.group.diskparams = self.new_diskparams
15072 result.append(("diskparams", str(self.group.diskparams)))
15074 if self.op.alloc_policy:
15075 self.group.alloc_policy = self.op.alloc_policy
15077 if self.op.hv_state:
15078 self.group.hv_state_static = self.new_hv_state
15080 if self.op.disk_state:
15081 self.group.disk_state_static = self.new_disk_state
15083 if self.op.ipolicy:
15084 self.group.ipolicy = self.new_ipolicy
15086 self.cfg.Update(self.group, feedback_fn)
15090 class LUGroupRemove(LogicalUnit):
15091 HPATH = "group-remove"
15092 HTYPE = constants.HTYPE_GROUP
15095 def ExpandNames(self):
15096 # This will raises errors.OpPrereqError on its own:
15097 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15098 self.needed_locks = {
15099 locking.LEVEL_NODEGROUP: [self.group_uuid],
15102 def CheckPrereq(self):
15103 """Check prerequisites.
15105 This checks that the given group name exists as a node group, that is
15106 empty (i.e., contains no nodes), and that is not the last group of the
15110 # Verify that the group is empty.
15111 group_nodes = [node.name
15112 for node in self.cfg.GetAllNodesInfo().values()
15113 if node.group == self.group_uuid]
15116 raise errors.OpPrereqError("Group '%s' not empty, has the following"
15118 (self.op.group_name,
15119 utils.CommaJoin(utils.NiceSort(group_nodes))),
15120 errors.ECODE_STATE)
15122 # Verify the cluster would not be left group-less.
15123 if len(self.cfg.GetNodeGroupList()) == 1:
15124 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
15125 " removed" % self.op.group_name,
15126 errors.ECODE_STATE)
15128 def BuildHooksEnv(self):
15129 """Build hooks env.
15133 "GROUP_NAME": self.op.group_name,
15136 def BuildHooksNodes(self):
15137 """Build hooks nodes.
15140 mn = self.cfg.GetMasterNode()
15141 return ([mn], [mn])
15143 def Exec(self, feedback_fn):
15144 """Remove the node group.
15148 self.cfg.RemoveNodeGroup(self.group_uuid)
15149 except errors.ConfigurationError:
15150 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
15151 (self.op.group_name, self.group_uuid))
15153 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15156 class LUGroupRename(LogicalUnit):
15157 HPATH = "group-rename"
15158 HTYPE = constants.HTYPE_GROUP
15161 def ExpandNames(self):
15162 # This raises errors.OpPrereqError on its own:
15163 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15165 self.needed_locks = {
15166 locking.LEVEL_NODEGROUP: [self.group_uuid],
15169 def CheckPrereq(self):
15170 """Check prerequisites.
15172 Ensures requested new name is not yet used.
15176 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15177 except errors.OpPrereqError:
15180 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15181 " node group (UUID: %s)" %
15182 (self.op.new_name, new_name_uuid),
15183 errors.ECODE_EXISTS)
15185 def BuildHooksEnv(self):
15186 """Build hooks env.
15190 "OLD_NAME": self.op.group_name,
15191 "NEW_NAME": self.op.new_name,
15194 def BuildHooksNodes(self):
15195 """Build hooks nodes.
15198 mn = self.cfg.GetMasterNode()
15200 all_nodes = self.cfg.GetAllNodesInfo()
15201 all_nodes.pop(mn, None)
15204 run_nodes.extend(node.name for node in all_nodes.values()
15205 if node.group == self.group_uuid)
15207 return (run_nodes, run_nodes)
15209 def Exec(self, feedback_fn):
15210 """Rename the node group.
15213 group = self.cfg.GetNodeGroup(self.group_uuid)
15216 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15217 (self.op.group_name, self.group_uuid))
15219 group.name = self.op.new_name
15220 self.cfg.Update(group, feedback_fn)
15222 return self.op.new_name
15225 class LUGroupEvacuate(LogicalUnit):
15226 HPATH = "group-evacuate"
15227 HTYPE = constants.HTYPE_GROUP
15230 def ExpandNames(self):
15231 # This raises errors.OpPrereqError on its own:
15232 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15234 if self.op.target_groups:
15235 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15236 self.op.target_groups)
15238 self.req_target_uuids = []
15240 if self.group_uuid in self.req_target_uuids:
15241 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15242 " as a target group (targets are %s)" %
15244 utils.CommaJoin(self.req_target_uuids)),
15245 errors.ECODE_INVAL)
15247 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15249 self.share_locks = _ShareAll()
15250 self.needed_locks = {
15251 locking.LEVEL_INSTANCE: [],
15252 locking.LEVEL_NODEGROUP: [],
15253 locking.LEVEL_NODE: [],
15256 def DeclareLocks(self, level):
15257 if level == locking.LEVEL_INSTANCE:
15258 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15260 # Lock instances optimistically, needs verification once node and group
15261 # locks have been acquired
15262 self.needed_locks[locking.LEVEL_INSTANCE] = \
15263 self.cfg.GetNodeGroupInstances(self.group_uuid)
15265 elif level == locking.LEVEL_NODEGROUP:
15266 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15268 if self.req_target_uuids:
15269 lock_groups = set([self.group_uuid] + self.req_target_uuids)
15271 # Lock all groups used by instances optimistically; this requires going
15272 # via the node before it's locked, requiring verification later on
15273 lock_groups.update(group_uuid
15274 for instance_name in
15275 self.owned_locks(locking.LEVEL_INSTANCE)
15277 self.cfg.GetInstanceNodeGroups(instance_name))
15279 # No target groups, need to lock all of them
15280 lock_groups = locking.ALL_SET
15282 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15284 elif level == locking.LEVEL_NODE:
15285 # This will only lock the nodes in the group to be evacuated which
15286 # contain actual instances
15287 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15288 self._LockInstancesNodes()
15290 # Lock all nodes in group to be evacuated and target groups
15291 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15292 assert self.group_uuid in owned_groups
15293 member_nodes = [node_name
15294 for group in owned_groups
15295 for node_name in self.cfg.GetNodeGroup(group).members]
15296 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15298 def CheckPrereq(self):
15299 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15300 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15301 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15303 assert owned_groups.issuperset(self.req_target_uuids)
15304 assert self.group_uuid in owned_groups
15306 # Check if locked instances are still correct
15307 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15309 # Get instance information
15310 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15312 # Check if node groups for locked instances are still correct
15313 _CheckInstancesNodeGroups(self.cfg, self.instances,
15314 owned_groups, owned_nodes, self.group_uuid)
15316 if self.req_target_uuids:
15317 # User requested specific target groups
15318 self.target_uuids = self.req_target_uuids
15320 # All groups except the one to be evacuated are potential targets
15321 self.target_uuids = [group_uuid for group_uuid in owned_groups
15322 if group_uuid != self.group_uuid]
15324 if not self.target_uuids:
15325 raise errors.OpPrereqError("There are no possible target groups",
15326 errors.ECODE_INVAL)
15328 def BuildHooksEnv(self):
15329 """Build hooks env.
15333 "GROUP_NAME": self.op.group_name,
15334 "TARGET_GROUPS": " ".join(self.target_uuids),
15337 def BuildHooksNodes(self):
15338 """Build hooks nodes.
15341 mn = self.cfg.GetMasterNode()
15343 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15345 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15347 return (run_nodes, run_nodes)
15349 def Exec(self, feedback_fn):
15350 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15352 assert self.group_uuid not in self.target_uuids
15354 req = iallocator.IAReqGroupChange(instances=instances,
15355 target_groups=self.target_uuids)
15356 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15358 ial.Run(self.op.iallocator)
15360 if not ial.success:
15361 raise errors.OpPrereqError("Can't compute group evacuation using"
15362 " iallocator '%s': %s" %
15363 (self.op.iallocator, ial.info),
15364 errors.ECODE_NORES)
15366 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15368 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15369 len(jobs), self.op.group_name)
15371 return ResultWithJobs(jobs)
15374 class LUTestDelay(NoHooksLU):
15375 """Sleep for a specified amount of time.
15377 This LU sleeps on the master and/or nodes for a specified amount of
15383 def ExpandNames(self):
15384 """Expand names and set required locks.
15386 This expands the node list, if any.
15389 self.needed_locks = {}
15390 if self.op.on_nodes:
15391 # _GetWantedNodes can be used here, but is not always appropriate to use
15392 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15393 # more information.
15394 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15395 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15397 def _TestDelay(self):
15398 """Do the actual sleep.
15401 if self.op.on_master:
15402 if not utils.TestDelay(self.op.duration):
15403 raise errors.OpExecError("Error during master delay test")
15404 if self.op.on_nodes:
15405 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15406 for node, node_result in result.items():
15407 node_result.Raise("Failure during rpc call to node %s" % node)
15409 def Exec(self, feedback_fn):
15410 """Execute the test delay opcode, with the wanted repetitions.
15413 if self.op.repeat == 0:
15416 top_value = self.op.repeat - 1
15417 for i in range(self.op.repeat):
15418 self.LogInfo("Test delay iteration %d/%d", i, top_value)
15422 class LURestrictedCommand(NoHooksLU):
15423 """Logical unit for executing restricted commands.
15428 def ExpandNames(self):
15430 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15432 self.needed_locks = {
15433 locking.LEVEL_NODE: self.op.nodes,
15435 self.share_locks = {
15436 locking.LEVEL_NODE: not self.op.use_locking,
15439 def CheckPrereq(self):
15440 """Check prerequisites.
15444 def Exec(self, feedback_fn):
15445 """Execute restricted command and return output.
15448 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15450 # Check if correct locks are held
15451 assert set(self.op.nodes).issubset(owned_nodes)
15453 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15457 for node_name in self.op.nodes:
15458 nres = rpcres[node_name]
15460 msg = ("Command '%s' on node '%s' failed: %s" %
15461 (self.op.command, node_name, nres.fail_msg))
15462 result.append((False, msg))
15464 result.append((True, nres.payload))
15469 class LUTestJqueue(NoHooksLU):
15470 """Utility LU to test some aspects of the job queue.
15475 # Must be lower than default timeout for WaitForJobChange to see whether it
15476 # notices changed jobs
15477 _CLIENT_CONNECT_TIMEOUT = 20.0
15478 _CLIENT_CONFIRM_TIMEOUT = 60.0
15481 def _NotifyUsingSocket(cls, cb, errcls):
15482 """Opens a Unix socket and waits for another program to connect.
15485 @param cb: Callback to send socket name to client
15486 @type errcls: class
15487 @param errcls: Exception class to use for errors
15490 # Using a temporary directory as there's no easy way to create temporary
15491 # sockets without writing a custom loop around tempfile.mktemp and
15493 tmpdir = tempfile.mkdtemp()
15495 tmpsock = utils.PathJoin(tmpdir, "sock")
15497 logging.debug("Creating temporary socket at %s", tmpsock)
15498 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15503 # Send details to client
15506 # Wait for client to connect before continuing
15507 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15509 (conn, _) = sock.accept()
15510 except socket.error, err:
15511 raise errcls("Client didn't connect in time (%s)" % err)
15515 # Remove as soon as client is connected
15516 shutil.rmtree(tmpdir)
15518 # Wait for client to close
15521 # pylint: disable=E1101
15522 # Instance of '_socketobject' has no ... member
15523 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15525 except socket.error, err:
15526 raise errcls("Client failed to confirm notification (%s)" % err)
15530 def _SendNotification(self, test, arg, sockname):
15531 """Sends a notification to the client.
15534 @param test: Test name
15535 @param arg: Test argument (depends on test)
15536 @type sockname: string
15537 @param sockname: Socket path
15540 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15542 def _Notify(self, prereq, test, arg):
15543 """Notifies the client of a test.
15546 @param prereq: Whether this is a prereq-phase test
15548 @param test: Test name
15549 @param arg: Test argument (depends on test)
15553 errcls = errors.OpPrereqError
15555 errcls = errors.OpExecError
15557 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15561 def CheckArguments(self):
15562 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15563 self.expandnames_calls = 0
15565 def ExpandNames(self):
15566 checkargs_calls = getattr(self, "checkargs_calls", 0)
15567 if checkargs_calls < 1:
15568 raise errors.ProgrammerError("CheckArguments was not called")
15570 self.expandnames_calls += 1
15572 if self.op.notify_waitlock:
15573 self._Notify(True, constants.JQT_EXPANDNAMES, None)
15575 self.LogInfo("Expanding names")
15577 # Get lock on master node (just to get a lock, not for a particular reason)
15578 self.needed_locks = {
15579 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15582 def Exec(self, feedback_fn):
15583 if self.expandnames_calls < 1:
15584 raise errors.ProgrammerError("ExpandNames was not called")
15586 if self.op.notify_exec:
15587 self._Notify(False, constants.JQT_EXEC, None)
15589 self.LogInfo("Executing")
15591 if self.op.log_messages:
15592 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15593 for idx, msg in enumerate(self.op.log_messages):
15594 self.LogInfo("Sending log message %s", idx + 1)
15595 feedback_fn(constants.JQT_MSGPREFIX + msg)
15596 # Report how many test messages have been sent
15597 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15600 raise errors.OpExecError("Opcode failure was requested")
15605 class LUTestAllocator(NoHooksLU):
15606 """Run allocator tests.
15608 This LU runs the allocator tests
15611 def CheckPrereq(self):
15612 """Check prerequisites.
15614 This checks the opcode parameters depending on the director and mode test.
15617 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15618 constants.IALLOCATOR_MODE_MULTI_ALLOC):
15619 for attr in ["memory", "disks", "disk_template",
15620 "os", "tags", "nics", "vcpus"]:
15621 if not hasattr(self.op, attr):
15622 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15623 attr, errors.ECODE_INVAL)
15624 iname = self.cfg.ExpandInstanceName(self.op.name)
15625 if iname is not None:
15626 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15627 iname, errors.ECODE_EXISTS)
15628 if not isinstance(self.op.nics, list):
15629 raise errors.OpPrereqError("Invalid parameter 'nics'",
15630 errors.ECODE_INVAL)
15631 if not isinstance(self.op.disks, list):
15632 raise errors.OpPrereqError("Invalid parameter 'disks'",
15633 errors.ECODE_INVAL)
15634 for row in self.op.disks:
15635 if (not isinstance(row, dict) or
15636 constants.IDISK_SIZE not in row or
15637 not isinstance(row[constants.IDISK_SIZE], int) or
15638 constants.IDISK_MODE not in row or
15639 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15640 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15641 " parameter", errors.ECODE_INVAL)
15642 if self.op.hypervisor is None:
15643 self.op.hypervisor = self.cfg.GetHypervisorType()
15644 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15645 fname = _ExpandInstanceName(self.cfg, self.op.name)
15646 self.op.name = fname
15647 self.relocate_from = \
15648 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15649 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15650 constants.IALLOCATOR_MODE_NODE_EVAC):
15651 if not self.op.instances:
15652 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15653 self.op.instances = _GetWantedInstances(self, self.op.instances)
15655 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15656 self.op.mode, errors.ECODE_INVAL)
15658 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15659 if self.op.iallocator is None:
15660 raise errors.OpPrereqError("Missing allocator name",
15661 errors.ECODE_INVAL)
15662 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15663 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15664 self.op.direction, errors.ECODE_INVAL)
15666 def Exec(self, feedback_fn):
15667 """Run the allocator test.
15670 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15671 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15672 memory=self.op.memory,
15673 disks=self.op.disks,
15674 disk_template=self.op.disk_template,
15678 vcpus=self.op.vcpus,
15679 spindle_use=self.op.spindle_use,
15680 hypervisor=self.op.hypervisor,
15681 node_whitelist=None)
15682 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15683 req = iallocator.IAReqRelocate(name=self.op.name,
15684 relocate_from=list(self.relocate_from))
15685 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15686 req = iallocator.IAReqGroupChange(instances=self.op.instances,
15687 target_groups=self.op.target_groups)
15688 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15689 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15690 evac_mode=self.op.evac_mode)
15691 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15692 disk_template = self.op.disk_template
15693 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15694 memory=self.op.memory,
15695 disks=self.op.disks,
15696 disk_template=disk_template,
15700 vcpus=self.op.vcpus,
15701 spindle_use=self.op.spindle_use,
15702 hypervisor=self.op.hypervisor)
15703 for idx in range(self.op.count)]
15704 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15706 raise errors.ProgrammerError("Uncatched mode %s in"
15707 " LUTestAllocator.Exec", self.op.mode)
15709 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15710 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15711 result = ial.in_text
15713 ial.Run(self.op.iallocator, validate=False)
15714 result = ial.out_text
15718 class LUNetworkAdd(LogicalUnit):
15719 """Logical unit for creating networks.
15722 HPATH = "network-add"
15723 HTYPE = constants.HTYPE_NETWORK
15726 def BuildHooksNodes(self):
15727 """Build hooks nodes.
15730 mn = self.cfg.GetMasterNode()
15731 return ([mn], [mn])
15733 def CheckArguments(self):
15734 if self.op.mac_prefix:
15735 self.op.mac_prefix = \
15736 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
15738 def ExpandNames(self):
15739 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15741 if self.op.conflicts_check:
15742 self.share_locks[locking.LEVEL_NODE] = 1
15743 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
15744 self.needed_locks = {
15745 locking.LEVEL_NODE: locking.ALL_SET,
15746 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
15749 self.needed_locks = {}
15751 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
15753 def CheckPrereq(self):
15754 if self.op.network is None:
15755 raise errors.OpPrereqError("Network must be given",
15756 errors.ECODE_INVAL)
15759 existing_uuid = self.cfg.LookupNetwork(self.op.network_name)
15760 except errors.OpPrereqError:
15763 raise errors.OpPrereqError("Desired network name '%s' already exists as a"
15764 " network (UUID: %s)" %
15765 (self.op.network_name, existing_uuid),
15766 errors.ECODE_EXISTS)
15768 # Check tag validity
15769 for tag in self.op.tags:
15770 objects.TaggableObject.ValidateTag(tag)
15772 def BuildHooksEnv(self):
15773 """Build hooks env.
15777 "name": self.op.network_name,
15778 "subnet": self.op.network,
15779 "gateway": self.op.gateway,
15780 "network6": self.op.network6,
15781 "gateway6": self.op.gateway6,
15782 "mac_prefix": self.op.mac_prefix,
15783 "tags": self.op.tags,
15785 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15787 def Exec(self, feedback_fn):
15788 """Add the ip pool to the cluster.
15791 nobj = objects.Network(name=self.op.network_name,
15792 network=self.op.network,
15793 gateway=self.op.gateway,
15794 network6=self.op.network6,
15795 gateway6=self.op.gateway6,
15796 mac_prefix=self.op.mac_prefix,
15797 uuid=self.network_uuid)
15798 # Initialize the associated address pool
15800 pool = network.AddressPool.InitializeNetwork(nobj)
15801 except errors.AddressPoolError, err:
15802 raise errors.OpExecError("Cannot create IP address pool for network"
15803 " '%s': %s" % (self.op.network_name, err))
15805 # Check if we need to reserve the nodes and the cluster master IP
15806 # These may not be allocated to any instances in routed mode, as
15807 # they wouldn't function anyway.
15808 if self.op.conflicts_check:
15809 for node in self.cfg.GetAllNodesInfo().values():
15810 for ip in [node.primary_ip, node.secondary_ip]:
15812 if pool.Contains(ip):
15814 self.LogInfo("Reserved IP address of node '%s' (%s)",
15816 except errors.AddressPoolError, err:
15817 self.LogWarning("Cannot reserve IP address '%s' of node '%s': %s",
15818 ip, node.name, err)
15820 master_ip = self.cfg.GetClusterInfo().master_ip
15822 if pool.Contains(master_ip):
15823 pool.Reserve(master_ip)
15824 self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
15825 except errors.AddressPoolError, err:
15826 self.LogWarning("Cannot reserve cluster master IP address (%s): %s",
15829 if self.op.add_reserved_ips:
15830 for ip in self.op.add_reserved_ips:
15832 pool.Reserve(ip, external=True)
15833 except errors.AddressPoolError, err:
15834 raise errors.OpExecError("Cannot reserve IP address '%s': %s" %
15838 for tag in self.op.tags:
15841 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
15842 del self.remove_locks[locking.LEVEL_NETWORK]
15845 class LUNetworkRemove(LogicalUnit):
15846 HPATH = "network-remove"
15847 HTYPE = constants.HTYPE_NETWORK
15850 def ExpandNames(self):
15851 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15853 self.share_locks[locking.LEVEL_NODEGROUP] = 1
15854 self.needed_locks = {
15855 locking.LEVEL_NETWORK: [self.network_uuid],
15856 locking.LEVEL_NODEGROUP: locking.ALL_SET,
15859 def CheckPrereq(self):
15860 """Check prerequisites.
15862 This checks that the given network name exists as a network, that is
15863 empty (i.e., contains no nodes), and that is not the last group of the
15867 # Verify that the network is not conncted.
15868 node_groups = [group.name
15869 for group in self.cfg.GetAllNodeGroupsInfo().values()
15870 if self.network_uuid in group.networks]
15873 self.LogWarning("Network '%s' is connected to the following"
15874 " node groups: %s" %
15875 (self.op.network_name,
15876 utils.CommaJoin(utils.NiceSort(node_groups))))
15877 raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
15879 def BuildHooksEnv(self):
15880 """Build hooks env.
15884 "NETWORK_NAME": self.op.network_name,
15887 def BuildHooksNodes(self):
15888 """Build hooks nodes.
15891 mn = self.cfg.GetMasterNode()
15892 return ([mn], [mn])
15894 def Exec(self, feedback_fn):
15895 """Remove the network.
15899 self.cfg.RemoveNetwork(self.network_uuid)
15900 except errors.ConfigurationError:
15901 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
15902 (self.op.network_name, self.network_uuid))
15905 class LUNetworkSetParams(LogicalUnit):
15906 """Modifies the parameters of a network.
15909 HPATH = "network-modify"
15910 HTYPE = constants.HTYPE_NETWORK
15913 def CheckArguments(self):
15914 if (self.op.gateway and
15915 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
15916 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
15917 " at once", errors.ECODE_INVAL)
15919 def ExpandNames(self):
15920 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15922 self.needed_locks = {
15923 locking.LEVEL_NETWORK: [self.network_uuid],
15926 def CheckPrereq(self):
15927 """Check prerequisites.
15930 self.network = self.cfg.GetNetwork(self.network_uuid)
15931 self.gateway = self.network.gateway
15932 self.mac_prefix = self.network.mac_prefix
15933 self.network6 = self.network.network6
15934 self.gateway6 = self.network.gateway6
15935 self.tags = self.network.tags
15937 self.pool = network.AddressPool(self.network)
15939 if self.op.gateway:
15940 if self.op.gateway == constants.VALUE_NONE:
15941 self.gateway = None
15943 self.gateway = self.op.gateway
15944 if self.pool.IsReserved(self.gateway):
15945 raise errors.OpPrereqError("Gateway IP address '%s' is already"
15946 " reserved" % self.gateway,
15947 errors.ECODE_STATE)
15949 if self.op.mac_prefix:
15950 if self.op.mac_prefix == constants.VALUE_NONE:
15951 self.mac_prefix = None
15953 self.mac_prefix = \
15954 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
15956 if self.op.gateway6:
15957 if self.op.gateway6 == constants.VALUE_NONE:
15958 self.gateway6 = None
15960 self.gateway6 = self.op.gateway6
15962 if self.op.network6:
15963 if self.op.network6 == constants.VALUE_NONE:
15964 self.network6 = None
15966 self.network6 = self.op.network6
15968 def BuildHooksEnv(self):
15969 """Build hooks env.
15973 "name": self.op.network_name,
15974 "subnet": self.network.network,
15975 "gateway": self.gateway,
15976 "network6": self.network6,
15977 "gateway6": self.gateway6,
15978 "mac_prefix": self.mac_prefix,
15981 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15983 def BuildHooksNodes(self):
15984 """Build hooks nodes.
15987 mn = self.cfg.GetMasterNode()
15988 return ([mn], [mn])
15990 def Exec(self, feedback_fn):
15991 """Modifies the network.
15994 #TODO: reserve/release via temporary reservation manager
15995 # extend cfg.ReserveIp/ReleaseIp with the external flag
15996 if self.op.gateway:
15997 if self.gateway == self.network.gateway:
15998 self.LogWarning("Gateway is already %s", self.gateway)
16001 self.pool.Reserve(self.gateway, external=True)
16002 if self.network.gateway:
16003 self.pool.Release(self.network.gateway, external=True)
16004 self.network.gateway = self.gateway
16006 if self.op.add_reserved_ips:
16007 for ip in self.op.add_reserved_ips:
16009 if self.pool.IsReserved(ip):
16010 self.LogWarning("IP address %s is already reserved", ip)
16012 self.pool.Reserve(ip, external=True)
16013 except errors.AddressPoolError, err:
16014 self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16016 if self.op.remove_reserved_ips:
16017 for ip in self.op.remove_reserved_ips:
16018 if ip == self.network.gateway:
16019 self.LogWarning("Cannot unreserve Gateway's IP")
16022 if not self.pool.IsReserved(ip):
16023 self.LogWarning("IP address %s is already unreserved", ip)
16025 self.pool.Release(ip, external=True)
16026 except errors.AddressPoolError, err:
16027 self.LogWarning("Cannot release IP address %s: %s", ip, err)
16029 if self.op.mac_prefix:
16030 self.network.mac_prefix = self.mac_prefix
16032 if self.op.network6:
16033 self.network.network6 = self.network6
16035 if self.op.gateway6:
16036 self.network.gateway6 = self.gateway6
16038 self.pool.Validate()
16040 self.cfg.Update(self.network, feedback_fn)
16043 class _NetworkQuery(_QueryBase):
16044 FIELDS = query.NETWORK_FIELDS
16046 def ExpandNames(self, lu):
16047 lu.needed_locks = {}
16048 lu.share_locks = _ShareAll()
16050 self.do_locking = self.use_locking
16052 all_networks = lu.cfg.GetAllNetworksInfo()
16053 name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16059 for name in self.names:
16060 if name in name_to_uuid:
16061 self.wanted.append(name_to_uuid[name])
16063 missing.append(name)
16066 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16067 errors.ECODE_NOENT)
16069 self.wanted = locking.ALL_SET
16071 if self.do_locking:
16072 lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted
16073 if query.NETQ_INST in self.requested_data:
16074 lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
16075 if query.NETQ_GROUP in self.requested_data:
16076 lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET
16078 def DeclareLocks(self, lu, level):
16081 def _GetQueryData(self, lu):
16082 """Computes the list of networks and their attributes.
16085 all_networks = lu.cfg.GetAllNetworksInfo()
16087 network_uuids = self._GetNames(lu, all_networks.keys(),
16088 locking.LEVEL_NETWORK)
16090 do_instances = query.NETQ_INST in self.requested_data
16091 do_groups = query.NETQ_GROUP in self.requested_data
16093 network_to_instances = None
16094 network_to_groups = None
16096 # For NETQ_GROUP, we need to map network->[groups]
16098 all_groups = lu.cfg.GetAllNodeGroupsInfo()
16099 network_to_groups = dict((uuid, []) for uuid in network_uuids)
16100 for _, group in all_groups.iteritems():
16101 for net_uuid in network_uuids:
16102 netparams = group.networks.get(net_uuid, None)
16104 info = (group.name, netparams[constants.NIC_MODE],
16105 netparams[constants.NIC_LINK])
16107 network_to_groups[net_uuid].append(info)
16110 all_instances = lu.cfg.GetAllInstancesInfo()
16111 network_to_instances = dict((uuid, []) for uuid in network_uuids)
16112 for instance in all_instances.values():
16113 for nic in instance.nics:
16114 if nic.network in network_uuids:
16115 network_to_instances[nic.network].append(instance.name)
16118 if query.NETQ_STATS in self.requested_data:
16121 self._GetStats(network.AddressPool(all_networks[uuid])))
16122 for uuid in network_uuids)
16126 return query.NetworkQueryData([all_networks[uuid]
16127 for uuid in network_uuids],
16129 network_to_instances,
16133 def _GetStats(pool):
16134 """Returns statistics for a network address pool.
16138 "free_count": pool.GetFreeCount(),
16139 "reserved_count": pool.GetReservedCount(),
16140 "map": pool.GetMap(),
16141 "external_reservations":
16142 utils.CommaJoin(pool.GetExternalReservations()),
16146 class LUNetworkQuery(NoHooksLU):
16147 """Logical unit for querying networks.
16152 def CheckArguments(self):
16153 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16154 self.op.output_fields, self.op.use_locking)
16156 def ExpandNames(self):
16157 self.nq.ExpandNames(self)
16159 def Exec(self, feedback_fn):
16160 return self.nq.OldStyleQuery(self)
16163 class LUNetworkConnect(LogicalUnit):
16164 """Connect a network to a nodegroup
16167 HPATH = "network-connect"
16168 HTYPE = constants.HTYPE_NETWORK
16171 def ExpandNames(self):
16172 self.network_name = self.op.network_name
16173 self.group_name = self.op.group_name
16174 self.network_mode = self.op.network_mode
16175 self.network_link = self.op.network_link
16177 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16178 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16180 self.needed_locks = {
16181 locking.LEVEL_INSTANCE: [],
16182 locking.LEVEL_NODEGROUP: [self.group_uuid],
16184 self.share_locks[locking.LEVEL_INSTANCE] = 1
16186 if self.op.conflicts_check:
16187 self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16188 self.share_locks[locking.LEVEL_NETWORK] = 1
16190 def DeclareLocks(self, level):
16191 if level == locking.LEVEL_INSTANCE:
16192 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16194 # Lock instances optimistically, needs verification once group lock has
16196 if self.op.conflicts_check:
16197 self.needed_locks[locking.LEVEL_INSTANCE] = \
16198 self.cfg.GetNodeGroupInstances(self.group_uuid)
16200 def BuildHooksEnv(self):
16202 "GROUP_NAME": self.group_name,
16203 "GROUP_NETWORK_MODE": self.network_mode,
16204 "GROUP_NETWORK_LINK": self.network_link,
16208 def BuildHooksNodes(self):
16209 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16210 return (nodes, nodes)
16212 def CheckPrereq(self):
16213 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16215 assert self.group_uuid in owned_groups
16217 # Check if locked instances are still correct
16218 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16219 if self.op.conflicts_check:
16220 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16223 constants.NIC_MODE: self.network_mode,
16224 constants.NIC_LINK: self.network_link,
16226 objects.NIC.CheckParameterSyntax(self.netparams)
16228 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16229 #if self.network_mode == constants.NIC_MODE_BRIDGED:
16230 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16231 self.connected = False
16232 if self.network_uuid in self.group.networks:
16233 self.LogWarning("Network '%s' is already mapped to group '%s'" %
16234 (self.network_name, self.group.name))
16235 self.connected = True
16237 # check only if not already connected
16238 elif self.op.conflicts_check:
16239 pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid))
16241 _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip),
16242 "connect to", owned_instances)
16244 def Exec(self, feedback_fn):
16245 # Connect the network and update the group only if not already connected
16246 if not self.connected:
16247 self.group.networks[self.network_uuid] = self.netparams
16248 self.cfg.Update(self.group, feedback_fn)
16251 def _NetworkConflictCheck(lu, check_fn, action, instances):
16252 """Checks for network interface conflicts with a network.
16254 @type lu: L{LogicalUnit}
16255 @type check_fn: callable receiving one parameter (L{objects.NIC}) and
16257 @param check_fn: Function checking for conflict
16258 @type action: string
16259 @param action: Part of error message (see code)
16260 @raise errors.OpPrereqError: If conflicting IP addresses are found.
16265 for (_, instance) in lu.cfg.GetMultiInstanceInfo(instances):
16266 instconflicts = [(idx, nic.ip)
16267 for (idx, nic) in enumerate(instance.nics)
16271 conflicts.append((instance.name, instconflicts))
16274 lu.LogWarning("IP addresses from network '%s', which is about to %s"
16275 " node group '%s', are in use: %s" %
16276 (lu.network_name, action, lu.group.name,
16277 utils.CommaJoin(("%s: %s" %
16278 (name, _FmtNetworkConflict(details)))
16279 for (name, details) in conflicts)))
16281 raise errors.OpPrereqError("Conflicting IP addresses found; "
16282 " remove/modify the corresponding network"
16283 " interfaces", errors.ECODE_STATE)
16286 def _FmtNetworkConflict(details):
16287 """Utility for L{_NetworkConflictCheck}.
16290 return utils.CommaJoin("nic%s/%s" % (idx, ipaddr)
16291 for (idx, ipaddr) in details)
16294 class LUNetworkDisconnect(LogicalUnit):
16295 """Disconnect a network to a nodegroup
16298 HPATH = "network-disconnect"
16299 HTYPE = constants.HTYPE_NETWORK
16302 def ExpandNames(self):
16303 self.network_name = self.op.network_name
16304 self.group_name = self.op.group_name
16306 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16307 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16309 self.needed_locks = {
16310 locking.LEVEL_INSTANCE: [],
16311 locking.LEVEL_NODEGROUP: [self.group_uuid],
16313 self.share_locks[locking.LEVEL_INSTANCE] = 1
16315 def DeclareLocks(self, level):
16316 if level == locking.LEVEL_INSTANCE:
16317 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16319 # Lock instances optimistically, needs verification once group lock has
16321 self.needed_locks[locking.LEVEL_INSTANCE] = \
16322 self.cfg.GetNodeGroupInstances(self.group_uuid)
16324 def BuildHooksEnv(self):
16326 "GROUP_NAME": self.group_name,
16330 def BuildHooksNodes(self):
16331 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16332 return (nodes, nodes)
16334 def CheckPrereq(self):
16335 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16337 assert self.group_uuid in owned_groups
16339 # Check if locked instances are still correct
16340 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16341 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16343 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16344 self.connected = True
16345 if self.network_uuid not in self.group.networks:
16346 self.LogWarning("Network '%s' is not mapped to group '%s'",
16347 self.network_name, self.group.name)
16348 self.connected = False
16350 # We need this check only if network is not already connected
16352 _NetworkConflictCheck(self, lambda nic: nic.network == self.network_uuid,
16353 "disconnect from", owned_instances)
16355 def Exec(self, feedback_fn):
16356 # Disconnect the network and update the group only if network is connected
16358 del self.group.networks[self.network_uuid]
16359 self.cfg.Update(self.group, feedback_fn)
16362 #: Query type implementations
16364 constants.QR_CLUSTER: _ClusterQuery,
16365 constants.QR_INSTANCE: _InstanceQuery,
16366 constants.QR_NODE: _NodeQuery,
16367 constants.QR_GROUP: _GroupQuery,
16368 constants.QR_NETWORK: _NetworkQuery,
16369 constants.QR_OS: _OsQuery,
16370 constants.QR_EXTSTORAGE: _ExtStorageQuery,
16371 constants.QR_EXPORT: _ExportQuery,
16374 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16377 def _GetQueryImplementation(name):
16378 """Returns the implemtnation for a query type.
16380 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16384 return _QUERY_IMPL[name]
16386 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16387 errors.ECODE_INVAL)
16390 def _CheckForConflictingIp(lu, ip, node):
16391 """In case of conflicting IP address raise error.
16394 @param ip: IP address
16396 @param node: node name
16399 (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16400 if conf_net is not None:
16401 raise errors.OpPrereqError(("The requested IP address (%s) belongs to"
16402 " network %s, but the target NIC does not." %
16404 errors.ECODE_STATE)
16406 return (None, None)