4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
43 from ganeti import ssh
44 from ganeti import utils
45 from ganeti import errors
46 from ganeti import hypervisor
47 from ganeti import locking
48 from ganeti import constants
49 from ganeti import objects
50 from ganeti import ssconf
51 from ganeti import uidpool
52 from ganeti import compat
53 from ganeti import masterd
54 from ganeti import netutils
55 from ganeti import query
56 from ganeti import qlang
57 from ganeti import opcodes
59 from ganeti import rpc
60 from ganeti import runtime
61 from ganeti import pathutils
62 from ganeti import vcluster
63 from ganeti import network
64 from ganeti.masterd import iallocator
66 from ganeti.cmdlib.base import ResultWithJobs, LogicalUnit, NoHooksLU, \
68 from ganeti.cmdlib.common import _ExpandInstanceName, _ExpandItemName, \
71 import ganeti.masterd.instance # pylint: disable=W0611
75 INSTANCE_DOWN = [constants.ADMINST_DOWN]
76 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
77 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
79 #: Instance status in which an instance can be marked as offline/online
80 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
81 constants.ADMINST_OFFLINE,
86 """Returns a dict declaring all lock levels shared.
89 return dict.fromkeys(locking.LEVELS, 1)
92 def _AnnotateDiskParams(instance, devs, cfg):
93 """Little helper wrapper to the rpc annotation method.
95 @param instance: The instance object
96 @type devs: List of L{objects.Disk}
97 @param devs: The root devices (not any of its children!)
98 @param cfg: The config object
99 @returns The annotated disk copies
100 @see L{rpc.AnnotateDiskParams}
103 return rpc.AnnotateDiskParams(instance.disk_template, devs,
104 cfg.GetInstanceDiskParams(instance))
107 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
109 """Checks if node groups for locked instances are still correct.
111 @type cfg: L{config.ConfigWriter}
112 @param cfg: Cluster configuration
113 @type instances: dict; string as key, L{objects.Instance} as value
114 @param instances: Dictionary, instance name as key, instance object as value
115 @type owned_groups: iterable of string
116 @param owned_groups: List of owned groups
117 @type owned_nodes: iterable of string
118 @param owned_nodes: List of owned nodes
119 @type cur_group_uuid: string or None
120 @param cur_group_uuid: Optional group UUID to check against instance's groups
123 for (name, inst) in instances.items():
124 assert owned_nodes.issuperset(inst.all_nodes), \
125 "Instance %s's nodes changed while we kept the lock" % name
127 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
129 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
130 "Instance %s has no node in group %s" % (name, cur_group_uuid)
133 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
135 """Checks if the owned node groups are still correct for an instance.
137 @type cfg: L{config.ConfigWriter}
138 @param cfg: The cluster configuration
139 @type instance_name: string
140 @param instance_name: Instance name
141 @type owned_groups: set or frozenset
142 @param owned_groups: List of currently owned node groups
143 @type primary_only: boolean
144 @param primary_only: Whether to check node groups for only the primary node
147 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
149 if not owned_groups.issuperset(inst_groups):
150 raise errors.OpPrereqError("Instance %s's node groups changed since"
151 " locks were acquired, current groups are"
152 " are '%s', owning groups '%s'; retry the"
155 utils.CommaJoin(inst_groups),
156 utils.CommaJoin(owned_groups)),
162 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
163 """Checks if the instances in a node group are still correct.
165 @type cfg: L{config.ConfigWriter}
166 @param cfg: The cluster configuration
167 @type group_uuid: string
168 @param group_uuid: Node group UUID
169 @type owned_instances: set or frozenset
170 @param owned_instances: List of currently owned instances
173 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
174 if owned_instances != wanted_instances:
175 raise errors.OpPrereqError("Instances in node group '%s' changed since"
176 " locks were acquired, wanted '%s', have '%s';"
177 " retry the operation" %
179 utils.CommaJoin(wanted_instances),
180 utils.CommaJoin(owned_instances)),
183 return wanted_instances
186 def _SupportsOob(cfg, node):
187 """Tells if node supports OOB.
189 @type cfg: L{config.ConfigWriter}
190 @param cfg: The cluster configuration
191 @type node: L{objects.Node}
192 @param node: The node
193 @return: The OOB script if supported or an empty string otherwise
196 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
199 def _IsExclusiveStorageEnabledNode(cfg, node):
200 """Whether exclusive_storage is in effect for the given node.
202 @type cfg: L{config.ConfigWriter}
203 @param cfg: The cluster configuration
204 @type node: L{objects.Node}
205 @param node: The node
207 @return: The effective value of exclusive_storage
210 return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
213 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
214 """Whether exclusive_storage is in effect for the given node.
216 @type cfg: L{config.ConfigWriter}
217 @param cfg: The cluster configuration
218 @type nodename: string
219 @param nodename: The node
221 @return: The effective value of exclusive_storage
222 @raise errors.OpPrereqError: if no node exists with the given name
225 ni = cfg.GetNodeInfo(nodename)
227 raise errors.OpPrereqError("Invalid node name %s" % nodename,
229 return _IsExclusiveStorageEnabledNode(cfg, ni)
232 def _CopyLockList(names):
233 """Makes a copy of a list of lock names.
235 Handles L{locking.ALL_SET} correctly.
238 if names == locking.ALL_SET:
239 return locking.ALL_SET
244 def _GetWantedNodes(lu, nodes):
245 """Returns list of checked and expanded node names.
247 @type lu: L{LogicalUnit}
248 @param lu: the logical unit on whose behalf we execute
250 @param nodes: list of node names or None for all nodes
252 @return: the list of nodes, sorted
253 @raise errors.ProgrammerError: if the nodes parameter is wrong type
257 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
259 return utils.NiceSort(lu.cfg.GetNodeList())
262 def _GetWantedInstances(lu, instances):
263 """Returns list of checked and expanded instance names.
265 @type lu: L{LogicalUnit}
266 @param lu: the logical unit on whose behalf we execute
267 @type instances: list
268 @param instances: list of instance names or None for all instances
270 @return: the list of instances, sorted
271 @raise errors.OpPrereqError: if the instances parameter is wrong type
272 @raise errors.OpPrereqError: if any of the passed instances is not found
276 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
278 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
282 def _GetUpdatedParams(old_params, update_dict,
283 use_default=True, use_none=False):
284 """Return the new version of a parameter dictionary.
286 @type old_params: dict
287 @param old_params: old parameters
288 @type update_dict: dict
289 @param update_dict: dict containing new parameter values, or
290 constants.VALUE_DEFAULT to reset the parameter to its default
292 @param use_default: boolean
293 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
294 values as 'to be deleted' values
295 @param use_none: boolean
296 @type use_none: whether to recognise C{None} values as 'to be
299 @return: the new parameter dictionary
302 params_copy = copy.deepcopy(old_params)
303 for key, val in update_dict.iteritems():
304 if ((use_default and val == constants.VALUE_DEFAULT) or
305 (use_none and val is None)):
311 params_copy[key] = val
315 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
316 """Return the new version of an instance policy.
318 @param group_policy: whether this policy applies to a group and thus
319 we should support removal of policy entries
322 ipolicy = copy.deepcopy(old_ipolicy)
323 for key, value in new_ipolicy.items():
324 if key not in constants.IPOLICY_ALL_KEYS:
325 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
327 if (not value or value == [constants.VALUE_DEFAULT] or
328 value == constants.VALUE_DEFAULT):
333 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
334 " on the cluster'" % key,
337 if key in constants.IPOLICY_PARAMETERS:
338 # FIXME: we assume all such values are float
340 ipolicy[key] = float(value)
341 except (TypeError, ValueError), err:
342 raise errors.OpPrereqError("Invalid value for attribute"
343 " '%s': '%s', error: %s" %
344 (key, value, err), errors.ECODE_INVAL)
345 elif key == constants.ISPECS_MINMAX:
347 for k in minmax.keys():
348 utils.ForceDictType(minmax[k], constants.ISPECS_PARAMETER_TYPES)
350 elif key == constants.ISPECS_STD:
352 msg = "%s cannot appear in group instance specs" % key
353 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
354 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
355 use_none=False, use_default=False)
356 utils.ForceDictType(ipolicy[key], constants.ISPECS_PARAMETER_TYPES)
358 # FIXME: we assume all others are lists; this should be redone
360 ipolicy[key] = list(value)
362 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
363 except errors.ConfigurationError, err:
364 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
369 def _UpdateAndVerifySubDict(base, updates, type_check):
370 """Updates and verifies a dict with sub dicts of the same type.
372 @param base: The dict with the old data
373 @param updates: The dict with the new data
374 @param type_check: Dict suitable to ForceDictType to verify correct types
375 @returns: A new dict with updated and verified values
379 new = _GetUpdatedParams(old, value)
380 utils.ForceDictType(new, type_check)
383 ret = copy.deepcopy(base)
384 ret.update(dict((key, fn(base.get(key, {}), value))
385 for key, value in updates.items()))
389 def _MergeAndVerifyHvState(op_input, obj_input):
390 """Combines the hv state from an opcode with the one of the object
392 @param op_input: The input dict from the opcode
393 @param obj_input: The input dict from the objects
394 @return: The verified and updated dict
398 invalid_hvs = set(op_input) - constants.HYPER_TYPES
400 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
401 " %s" % utils.CommaJoin(invalid_hvs),
403 if obj_input is None:
405 type_check = constants.HVSTS_PARAMETER_TYPES
406 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
411 def _MergeAndVerifyDiskState(op_input, obj_input):
412 """Combines the disk state from an opcode with the one of the object
414 @param op_input: The input dict from the opcode
415 @param obj_input: The input dict from the objects
416 @return: The verified and updated dict
419 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
421 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
422 utils.CommaJoin(invalid_dst),
424 type_check = constants.DSS_PARAMETER_TYPES
425 if obj_input is None:
427 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
429 for key, value in op_input.items())
434 def _ReleaseLocks(lu, level, names=None, keep=None):
435 """Releases locks owned by an LU.
437 @type lu: L{LogicalUnit}
438 @param level: Lock level
439 @type names: list or None
440 @param names: Names of locks to release
441 @type keep: list or None
442 @param keep: Names of locks to retain
445 assert not (keep is not None and names is not None), \
446 "Only one of the 'names' and the 'keep' parameters can be given"
448 if names is not None:
449 should_release = names.__contains__
451 should_release = lambda name: name not in keep
453 should_release = None
455 owned = lu.owned_locks(level)
457 # Not owning any lock at this level, do nothing
464 # Determine which locks to release
466 if should_release(name):
471 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
473 # Release just some locks
474 lu.glm.release(level, names=release)
476 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
479 lu.glm.release(level)
481 assert not lu.glm.is_owned(level), "No locks should be owned"
484 def _MapInstanceDisksToNodes(instances):
485 """Creates a map from (node, volume) to instance name.
487 @type instances: list of L{objects.Instance}
488 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
491 return dict(((node, vol), inst.name)
492 for inst in instances
493 for (node, vols) in inst.MapLVsByNode().items()
497 def _RunPostHook(lu, node_name):
498 """Runs the post-hook for an opcode on a single node.
501 hm = lu.proc.BuildHooksManager(lu)
503 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
504 except Exception, err: # pylint: disable=W0703
505 lu.LogWarning("Errors occurred running hooks on %s: %s",
509 def _CheckOutputFields(static, dynamic, selected):
510 """Checks whether all selected fields are valid.
512 @type static: L{utils.FieldSet}
513 @param static: static fields set
514 @type dynamic: L{utils.FieldSet}
515 @param dynamic: dynamic fields set
522 delta = f.NonMatching(selected)
524 raise errors.OpPrereqError("Unknown output fields selected: %s"
525 % ",".join(delta), errors.ECODE_INVAL)
528 def _CheckParamsNotGlobal(params, glob_pars, kind, bad_levels, good_levels):
529 """Make sure that none of the given paramters is global.
531 If a global parameter is found, an L{errors.OpPrereqError} exception is
532 raised. This is used to avoid setting global parameters for individual nodes.
534 @type params: dictionary
535 @param params: Parameters to check
536 @type glob_pars: dictionary
537 @param glob_pars: Forbidden parameters
539 @param kind: Kind of parameters (e.g. "node")
540 @type bad_levels: string
541 @param bad_levels: Level(s) at which the parameters are forbidden (e.g.
543 @type good_levels: strings
544 @param good_levels: Level(s) at which the parameters are allowed (e.g.
548 used_globals = glob_pars.intersection(params)
550 msg = ("The following %s parameters are global and cannot"
551 " be customized at %s level, please modify them at"
553 (kind, bad_levels, good_levels, utils.CommaJoin(used_globals)))
554 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
557 def _CheckNodeOnline(lu, node, msg=None):
558 """Ensure that a given node is online.
560 @param lu: the LU on behalf of which we make the check
561 @param node: the node to check
562 @param msg: if passed, should be a message to replace the default one
563 @raise errors.OpPrereqError: if the node is offline
567 msg = "Can't use offline node"
568 if lu.cfg.GetNodeInfo(node).offline:
569 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
572 def _CheckNodeNotDrained(lu, node):
573 """Ensure that a given node is not drained.
575 @param lu: the LU on behalf of which we make the check
576 @param node: the node to check
577 @raise errors.OpPrereqError: if the node is drained
580 if lu.cfg.GetNodeInfo(node).drained:
581 raise errors.OpPrereqError("Can't use drained node %s" % node,
585 def _CheckNodeVmCapable(lu, node):
586 """Ensure that a given node is vm capable.
588 @param lu: the LU on behalf of which we make the check
589 @param node: the node to check
590 @raise errors.OpPrereqError: if the node is not vm capable
593 if not lu.cfg.GetNodeInfo(node).vm_capable:
594 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
598 def _CheckNodeHasOS(lu, node, os_name, force_variant):
599 """Ensure that a node supports a given OS.
601 @param lu: the LU on behalf of which we make the check
602 @param node: the node to check
603 @param os_name: the OS to query about
604 @param force_variant: whether to ignore variant errors
605 @raise errors.OpPrereqError: if the node is not supporting the OS
608 result = lu.rpc.call_os_get(node, os_name)
609 result.Raise("OS '%s' not in supported OS list for node %s" %
611 prereq=True, ecode=errors.ECODE_INVAL)
612 if not force_variant:
613 _CheckOSVariant(result.payload, os_name)
616 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
617 """Ensure that a node has the given secondary ip.
619 @type lu: L{LogicalUnit}
620 @param lu: the LU on behalf of which we make the check
622 @param node: the node to check
623 @type secondary_ip: string
624 @param secondary_ip: the ip to check
625 @type prereq: boolean
626 @param prereq: whether to throw a prerequisite or an execute error
627 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
628 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
631 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
632 result.Raise("Failure checking secondary ip on node %s" % node,
633 prereq=prereq, ecode=errors.ECODE_ENVIRON)
634 if not result.payload:
635 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
636 " please fix and re-run this command" % secondary_ip)
638 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
640 raise errors.OpExecError(msg)
643 def _CheckNodePVs(nresult, exclusive_storage):
647 pvlist_dict = nresult.get(constants.NV_PVLIST, None)
648 if pvlist_dict is None:
649 return (["Can't get PV list from node"], None)
650 pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict)
652 # check that ':' is not present in PV names, since it's a
653 # special character for lvcreate (denotes the range of PEs to
657 errlist.append("Invalid character ':' in PV '%s' of VG '%s'" %
658 (pv.name, pv.vg_name))
660 if exclusive_storage:
661 (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist)
662 errlist.extend(errmsgs)
663 shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None)
665 for (pvname, lvlist) in shared_pvs:
666 # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...)
667 errlist.append("PV %s is shared among unrelated LVs (%s)" %
668 (pvname, utils.CommaJoin(lvlist)))
669 return (errlist, es_pvinfo)
672 def _GetClusterDomainSecret():
673 """Reads the cluster domain secret.
676 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
680 def _CheckInstanceState(lu, instance, req_states, msg=None):
681 """Ensure that an instance is in one of the required states.
683 @param lu: the LU on behalf of which we make the check
684 @param instance: the instance to check
685 @param msg: if passed, should be a message to replace the default one
686 @raise errors.OpPrereqError: if the instance is not in the required state
690 msg = ("can't use instance from outside %s states" %
691 utils.CommaJoin(req_states))
692 if instance.admin_state not in req_states:
693 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
694 (instance.name, instance.admin_state, msg),
697 if constants.ADMINST_UP not in req_states:
698 pnode = instance.primary_node
699 if not lu.cfg.GetNodeInfo(pnode).offline:
700 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
701 ins_l.Raise("Can't contact node %s for instance information" % pnode,
702 prereq=True, ecode=errors.ECODE_ENVIRON)
703 if instance.name in ins_l.payload:
704 raise errors.OpPrereqError("Instance %s is running, %s" %
705 (instance.name, msg), errors.ECODE_STATE)
707 lu.LogWarning("Primary node offline, ignoring check that instance"
711 def _ComputeMinMaxSpec(name, qualifier, ispecs, value):
712 """Computes if value is in the desired range.
714 @param name: name of the parameter for which we perform the check
715 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
717 @param ispecs: dictionary containing min and max values
718 @param value: actual value that we want to use
719 @return: None or an error string
722 if value in [None, constants.VALUE_AUTO]:
724 max_v = ispecs[constants.ISPECS_MAX].get(name, value)
725 min_v = ispecs[constants.ISPECS_MIN].get(name, value)
726 if value > max_v or min_v > value:
728 fqn = "%s/%s" % (name, qualifier)
731 return ("%s value %s is not in range [%s, %s]" %
732 (fqn, value, min_v, max_v))
736 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
737 nic_count, disk_sizes, spindle_use,
739 _compute_fn=_ComputeMinMaxSpec):
740 """Verifies ipolicy against provided specs.
743 @param ipolicy: The ipolicy
745 @param mem_size: The memory size
747 @param cpu_count: Used cpu cores
748 @type disk_count: int
749 @param disk_count: Number of disks used
751 @param nic_count: Number of nics used
752 @type disk_sizes: list of ints
753 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
754 @type spindle_use: int
755 @param spindle_use: The number of spindles this instance uses
756 @type disk_template: string
757 @param disk_template: The disk template of the instance
758 @param _compute_fn: The compute function (unittest only)
759 @return: A list of violations, or an empty list of no violations are found
762 assert disk_count == len(disk_sizes)
765 (constants.ISPEC_MEM_SIZE, "", mem_size),
766 (constants.ISPEC_CPU_COUNT, "", cpu_count),
767 (constants.ISPEC_NIC_COUNT, "", nic_count),
768 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
769 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
770 for idx, d in enumerate(disk_sizes)]
771 if disk_template != constants.DT_DISKLESS:
772 # This check doesn't make sense for diskless instances
773 test_settings.append((constants.ISPEC_DISK_COUNT, "", disk_count))
775 allowed_dts = ipolicy[constants.IPOLICY_DTS]
776 if disk_template not in allowed_dts:
777 ret.append("Disk template %s is not allowed (allowed templates: %s)" %
778 (disk_template, utils.CommaJoin(allowed_dts)))
781 for minmax in ipolicy[constants.ISPECS_MINMAX]:
783 (_compute_fn(name, qualifier, minmax, value)
784 for (name, qualifier, value) in test_settings))
785 if min_errs is None or len(errs) < len(min_errs):
787 assert min_errs is not None
788 return ret + min_errs
791 def _ComputeIPolicyInstanceViolation(ipolicy, instance, cfg,
792 _compute_fn=_ComputeIPolicySpecViolation):
793 """Compute if instance meets the specs of ipolicy.
796 @param ipolicy: The ipolicy to verify against
797 @type instance: L{objects.Instance}
798 @param instance: The instance to verify
799 @type cfg: L{config.ConfigWriter}
800 @param cfg: Cluster configuration
801 @param _compute_fn: The function to verify ipolicy (unittest only)
802 @see: L{_ComputeIPolicySpecViolation}
805 be_full = cfg.GetClusterInfo().FillBE(instance)
806 mem_size = be_full[constants.BE_MAXMEM]
807 cpu_count = be_full[constants.BE_VCPUS]
808 spindle_use = be_full[constants.BE_SPINDLE_USE]
809 disk_count = len(instance.disks)
810 disk_sizes = [disk.size for disk in instance.disks]
811 nic_count = len(instance.nics)
812 disk_template = instance.disk_template
814 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
815 disk_sizes, spindle_use, disk_template)
818 def _ComputeIPolicyInstanceSpecViolation(
819 ipolicy, instance_spec, disk_template,
820 _compute_fn=_ComputeIPolicySpecViolation):
821 """Compute if instance specs meets the specs of ipolicy.
824 @param ipolicy: The ipolicy to verify against
825 @param instance_spec: dict
826 @param instance_spec: The instance spec to verify
827 @type disk_template: string
828 @param disk_template: the disk template of the instance
829 @param _compute_fn: The function to verify ipolicy (unittest only)
830 @see: L{_ComputeIPolicySpecViolation}
833 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
834 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
835 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
836 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
837 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
838 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
840 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
841 disk_sizes, spindle_use, disk_template)
844 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
846 _compute_fn=_ComputeIPolicyInstanceViolation):
847 """Compute if instance meets the specs of the new target group.
849 @param ipolicy: The ipolicy to verify
850 @param instance: The instance object to verify
851 @param current_group: The current group of the instance
852 @param target_group: The new group of the instance
853 @type cfg: L{config.ConfigWriter}
854 @param cfg: Cluster configuration
855 @param _compute_fn: The function to verify ipolicy (unittest only)
856 @see: L{_ComputeIPolicySpecViolation}
859 if current_group == target_group:
862 return _compute_fn(ipolicy, instance, cfg)
865 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, cfg, ignore=False,
866 _compute_fn=_ComputeIPolicyNodeViolation):
867 """Checks that the target node is correct in terms of instance policy.
869 @param ipolicy: The ipolicy to verify
870 @param instance: The instance object to verify
871 @param node: The new node to relocate
872 @type cfg: L{config.ConfigWriter}
873 @param cfg: Cluster configuration
874 @param ignore: Ignore violations of the ipolicy
875 @param _compute_fn: The function to verify ipolicy (unittest only)
876 @see: L{_ComputeIPolicySpecViolation}
879 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
880 res = _compute_fn(ipolicy, instance, primary_node.group, node.group, cfg)
883 msg = ("Instance does not meet target node group's (%s) instance"
884 " policy: %s") % (node.group, utils.CommaJoin(res))
888 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
891 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances, cfg):
892 """Computes a set of any instances that would violate the new ipolicy.
894 @param old_ipolicy: The current (still in-place) ipolicy
895 @param new_ipolicy: The new (to become) ipolicy
896 @param instances: List of instances to verify
897 @type cfg: L{config.ConfigWriter}
898 @param cfg: Cluster configuration
899 @return: A list of instances which violates the new ipolicy but
903 return (_ComputeViolatingInstances(new_ipolicy, instances, cfg) -
904 _ComputeViolatingInstances(old_ipolicy, instances, cfg))
907 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
909 """Builds network related env variables for hooks
911 This builds the hook environment from individual variables.
914 @param name: the name of the network
916 @param subnet: the ipv4 subnet
917 @type gateway: string
918 @param gateway: the ipv4 gateway
919 @type network6: string
920 @param network6: the ipv6 subnet
921 @type gateway6: string
922 @param gateway6: the ipv6 gateway
923 @type mac_prefix: string
924 @param mac_prefix: the mac_prefix
926 @param tags: the tags of the network
931 env["NETWORK_NAME"] = name
933 env["NETWORK_SUBNET"] = subnet
935 env["NETWORK_GATEWAY"] = gateway
937 env["NETWORK_SUBNET6"] = network6
939 env["NETWORK_GATEWAY6"] = gateway6
941 env["NETWORK_MAC_PREFIX"] = mac_prefix
943 env["NETWORK_TAGS"] = " ".join(tags)
948 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
949 minmem, maxmem, vcpus, nics, disk_template, disks,
950 bep, hvp, hypervisor_name, tags):
951 """Builds instance related env variables for hooks
953 This builds the hook environment from individual variables.
956 @param name: the name of the instance
957 @type primary_node: string
958 @param primary_node: the name of the instance's primary node
959 @type secondary_nodes: list
960 @param secondary_nodes: list of secondary nodes as strings
961 @type os_type: string
962 @param os_type: the name of the instance's OS
964 @param status: the desired status of the instance
966 @param minmem: the minimum memory size of the instance
968 @param maxmem: the maximum memory size of the instance
970 @param vcpus: the count of VCPUs the instance has
972 @param nics: list of tuples (name, uuid, ip, mac, mode, link, net, netinfo)
973 representing the NICs the instance has
974 @type disk_template: string
975 @param disk_template: the disk template of the instance
977 @param disks: list of tuples (name, uuid, size, mode)
979 @param bep: the backend parameters for the instance
981 @param hvp: the hypervisor parameters for the instance
982 @type hypervisor_name: string
983 @param hypervisor_name: the hypervisor for the instance
985 @param tags: list of instance tags as strings
987 @return: the hook environment for this instance
992 "INSTANCE_NAME": name,
993 "INSTANCE_PRIMARY": primary_node,
994 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
995 "INSTANCE_OS_TYPE": os_type,
996 "INSTANCE_STATUS": status,
997 "INSTANCE_MINMEM": minmem,
998 "INSTANCE_MAXMEM": maxmem,
999 # TODO(2.9) remove deprecated "memory" value
1000 "INSTANCE_MEMORY": maxmem,
1001 "INSTANCE_VCPUS": vcpus,
1002 "INSTANCE_DISK_TEMPLATE": disk_template,
1003 "INSTANCE_HYPERVISOR": hypervisor_name,
1006 nic_count = len(nics)
1007 for idx, (name, _, ip, mac, mode, link, net, netinfo) in enumerate(nics):
1010 env["INSTANCE_NIC%d_NAME" % idx] = name
1011 env["INSTANCE_NIC%d_IP" % idx] = ip
1012 env["INSTANCE_NIC%d_MAC" % idx] = mac
1013 env["INSTANCE_NIC%d_MODE" % idx] = mode
1014 env["INSTANCE_NIC%d_LINK" % idx] = link
1016 nobj = objects.Network.FromDict(netinfo)
1017 env.update(nobj.HooksDict("INSTANCE_NIC%d_" % idx))
1019 # FIXME: broken network reference: the instance NIC specifies a
1020 # network, but the relevant network entry was not in the config. This
1021 # should be made impossible.
1022 env["INSTANCE_NIC%d_NETWORK_NAME" % idx] = net
1023 if mode == constants.NIC_MODE_BRIDGED:
1024 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1028 env["INSTANCE_NIC_COUNT"] = nic_count
1031 disk_count = len(disks)
1032 for idx, (name, size, mode) in enumerate(disks):
1033 env["INSTANCE_DISK%d_NAME" % idx] = name
1034 env["INSTANCE_DISK%d_SIZE" % idx] = size
1035 env["INSTANCE_DISK%d_MODE" % idx] = mode
1039 env["INSTANCE_DISK_COUNT"] = disk_count
1044 env["INSTANCE_TAGS"] = " ".join(tags)
1046 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1047 for key, value in source.items():
1048 env["INSTANCE_%s_%s" % (kind, key)] = value
1053 def _NICToTuple(lu, nic):
1054 """Build a tupple of nic information.
1056 @type lu: L{LogicalUnit}
1057 @param lu: the logical unit on whose behalf we execute
1058 @type nic: L{objects.NIC}
1059 @param nic: nic to convert to hooks tuple
1062 cluster = lu.cfg.GetClusterInfo()
1063 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1064 mode = filled_params[constants.NIC_MODE]
1065 link = filled_params[constants.NIC_LINK]
1068 nobj = lu.cfg.GetNetwork(nic.network)
1069 netinfo = objects.Network.ToDict(nobj)
1070 return (nic.name, nic.uuid, nic.ip, nic.mac, mode, link, nic.network, netinfo)
1073 def _NICListToTuple(lu, nics):
1074 """Build a list of nic information tuples.
1076 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1077 value in LUInstanceQueryData.
1079 @type lu: L{LogicalUnit}
1080 @param lu: the logical unit on whose behalf we execute
1081 @type nics: list of L{objects.NIC}
1082 @param nics: list of nics to convert to hooks tuples
1087 hooks_nics.append(_NICToTuple(lu, nic))
1091 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1092 """Builds instance related env variables for hooks from an object.
1094 @type lu: L{LogicalUnit}
1095 @param lu: the logical unit on whose behalf we execute
1096 @type instance: L{objects.Instance}
1097 @param instance: the instance for which we should build the
1099 @type override: dict
1100 @param override: dictionary with key/values that will override
1103 @return: the hook environment dictionary
1106 cluster = lu.cfg.GetClusterInfo()
1107 bep = cluster.FillBE(instance)
1108 hvp = cluster.FillHV(instance)
1110 "name": instance.name,
1111 "primary_node": instance.primary_node,
1112 "secondary_nodes": instance.secondary_nodes,
1113 "os_type": instance.os,
1114 "status": instance.admin_state,
1115 "maxmem": bep[constants.BE_MAXMEM],
1116 "minmem": bep[constants.BE_MINMEM],
1117 "vcpus": bep[constants.BE_VCPUS],
1118 "nics": _NICListToTuple(lu, instance.nics),
1119 "disk_template": instance.disk_template,
1120 "disks": [(disk.name, disk.size, disk.mode)
1121 for disk in instance.disks],
1124 "hypervisor_name": instance.hypervisor,
1125 "tags": instance.tags,
1128 args.update(override)
1129 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1132 def _AdjustCandidatePool(lu, exceptions):
1133 """Adjust the candidate pool after node operations.
1136 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1138 lu.LogInfo("Promoted nodes to master candidate role: %s",
1139 utils.CommaJoin(node.name for node in mod_list))
1140 for name in mod_list:
1141 lu.context.ReaddNode(name)
1142 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1144 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1148 def _DecideSelfPromotion(lu, exceptions=None):
1149 """Decide whether I should promote myself as a master candidate.
1152 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1153 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1154 # the new node will increase mc_max with one, so:
1155 mc_should = min(mc_should + 1, cp_size)
1156 return mc_now < mc_should
1159 def _ComputeViolatingInstances(ipolicy, instances, cfg):
1160 """Computes a set of instances who violates given ipolicy.
1162 @param ipolicy: The ipolicy to verify
1163 @type instances: L{objects.Instance}
1164 @param instances: List of instances to verify
1165 @type cfg: L{config.ConfigWriter}
1166 @param cfg: Cluster configuration
1167 @return: A frozenset of instance names violating the ipolicy
1170 return frozenset([inst.name for inst in instances
1171 if _ComputeIPolicyInstanceViolation(ipolicy, inst, cfg)])
1174 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1175 """Check that the brigdes needed by a list of nics exist.
1178 cluster = lu.cfg.GetClusterInfo()
1179 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1180 brlist = [params[constants.NIC_LINK] for params in paramslist
1181 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1183 result = lu.rpc.call_bridges_exist(target_node, brlist)
1184 result.Raise("Error checking bridges on destination node '%s'" %
1185 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1188 def _CheckInstanceBridgesExist(lu, instance, node=None):
1189 """Check that the brigdes needed by an instance exist.
1193 node = instance.primary_node
1194 _CheckNicsBridgesExist(lu, instance.nics, node)
1197 def _CheckOSVariant(os_obj, name):
1198 """Check whether an OS name conforms to the os variants specification.
1200 @type os_obj: L{objects.OS}
1201 @param os_obj: OS object to check
1203 @param name: OS name passed by the user, to check for validity
1206 variant = objects.OS.GetVariant(name)
1207 if not os_obj.supported_variants:
1209 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1210 " passed)" % (os_obj.name, variant),
1214 raise errors.OpPrereqError("OS name must include a variant",
1217 if variant not in os_obj.supported_variants:
1218 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1221 def _GetNodeInstancesInner(cfg, fn):
1222 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1225 def _GetNodeInstances(cfg, node_name):
1226 """Returns a list of all primary and secondary instances on a node.
1230 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1233 def _GetNodePrimaryInstances(cfg, node_name):
1234 """Returns primary instances on a node.
1237 return _GetNodeInstancesInner(cfg,
1238 lambda inst: node_name == inst.primary_node)
1241 def _GetNodeSecondaryInstances(cfg, node_name):
1242 """Returns secondary instances on a node.
1245 return _GetNodeInstancesInner(cfg,
1246 lambda inst: node_name in inst.secondary_nodes)
1249 def _GetStorageTypeArgs(cfg, storage_type):
1250 """Returns the arguments for a storage type.
1253 # Special case for file storage
1254 if storage_type == constants.ST_FILE:
1255 # storage.FileStorage wants a list of storage directories
1256 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1261 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1264 for dev in instance.disks:
1265 cfg.SetDiskID(dev, node_name)
1267 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1269 result.Raise("Failed to get disk status from node %s" % node_name,
1270 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1272 for idx, bdev_status in enumerate(result.payload):
1273 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1279 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1280 """Check the sanity of iallocator and node arguments and use the
1281 cluster-wide iallocator if appropriate.
1283 Check that at most one of (iallocator, node) is specified. If none is
1284 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1285 then the LU's opcode's iallocator slot is filled with the cluster-wide
1288 @type iallocator_slot: string
1289 @param iallocator_slot: the name of the opcode iallocator slot
1290 @type node_slot: string
1291 @param node_slot: the name of the opcode target node slot
1294 node = getattr(lu.op, node_slot, None)
1295 ialloc = getattr(lu.op, iallocator_slot, None)
1299 if node is not None and ialloc is not None:
1300 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1302 elif ((node is None and ialloc is None) or
1303 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1304 default_iallocator = lu.cfg.GetDefaultIAllocator()
1305 if default_iallocator:
1306 setattr(lu.op, iallocator_slot, default_iallocator)
1308 raise errors.OpPrereqError("No iallocator or node given and no"
1309 " cluster-wide default iallocator found;"
1310 " please specify either an iallocator or a"
1311 " node, or set a cluster-wide default"
1312 " iallocator", errors.ECODE_INVAL)
1315 def _GetDefaultIAllocator(cfg, ialloc):
1316 """Decides on which iallocator to use.
1318 @type cfg: L{config.ConfigWriter}
1319 @param cfg: Cluster configuration object
1320 @type ialloc: string or None
1321 @param ialloc: Iallocator specified in opcode
1323 @return: Iallocator name
1327 # Use default iallocator
1328 ialloc = cfg.GetDefaultIAllocator()
1331 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1332 " opcode nor as a cluster-wide default",
1338 def _CheckHostnameSane(lu, name):
1339 """Ensures that a given hostname resolves to a 'sane' name.
1341 The given name is required to be a prefix of the resolved hostname,
1342 to prevent accidental mismatches.
1344 @param lu: the logical unit on behalf of which we're checking
1345 @param name: the name we should resolve and check
1346 @return: the resolved hostname object
1349 hostname = netutils.GetHostname(name=name)
1350 if hostname.name != name:
1351 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1352 if not utils.MatchNameComponent(name, [hostname.name]):
1353 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1354 " same as given hostname '%s'") %
1355 (hostname.name, name), errors.ECODE_INVAL)
1359 class LUClusterPostInit(LogicalUnit):
1360 """Logical unit for running hooks after cluster initialization.
1363 HPATH = "cluster-init"
1364 HTYPE = constants.HTYPE_CLUSTER
1366 def BuildHooksEnv(self):
1371 "OP_TARGET": self.cfg.GetClusterName(),
1374 def BuildHooksNodes(self):
1375 """Build hooks nodes.
1378 return ([], [self.cfg.GetMasterNode()])
1380 def Exec(self, feedback_fn):
1387 class LUClusterDestroy(LogicalUnit):
1388 """Logical unit for destroying the cluster.
1391 HPATH = "cluster-destroy"
1392 HTYPE = constants.HTYPE_CLUSTER
1394 def BuildHooksEnv(self):
1399 "OP_TARGET": self.cfg.GetClusterName(),
1402 def BuildHooksNodes(self):
1403 """Build hooks nodes.
1408 def CheckPrereq(self):
1409 """Check prerequisites.
1411 This checks whether the cluster is empty.
1413 Any errors are signaled by raising errors.OpPrereqError.
1416 master = self.cfg.GetMasterNode()
1418 nodelist = self.cfg.GetNodeList()
1419 if len(nodelist) != 1 or nodelist[0] != master:
1420 raise errors.OpPrereqError("There are still %d node(s) in"
1421 " this cluster." % (len(nodelist) - 1),
1423 instancelist = self.cfg.GetInstanceList()
1425 raise errors.OpPrereqError("There are still %d instance(s) in"
1426 " this cluster." % len(instancelist),
1429 def Exec(self, feedback_fn):
1430 """Destroys the cluster.
1433 master_params = self.cfg.GetMasterNetworkParameters()
1435 # Run post hooks on master node before it's removed
1436 _RunPostHook(self, master_params.name)
1438 ems = self.cfg.GetUseExternalMipScript()
1439 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1442 self.LogWarning("Error disabling the master IP address: %s",
1445 return master_params.name
1448 def _VerifyCertificate(filename):
1449 """Verifies a certificate for L{LUClusterVerifyConfig}.
1451 @type filename: string
1452 @param filename: Path to PEM file
1456 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1457 utils.ReadFile(filename))
1458 except Exception, err: # pylint: disable=W0703
1459 return (LUClusterVerifyConfig.ETYPE_ERROR,
1460 "Failed to load X509 certificate %s: %s" % (filename, err))
1463 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1464 constants.SSL_CERT_EXPIRATION_ERROR)
1467 fnamemsg = "While verifying %s: %s" % (filename, msg)
1472 return (None, fnamemsg)
1473 elif errcode == utils.CERT_WARNING:
1474 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1475 elif errcode == utils.CERT_ERROR:
1476 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1478 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1481 def _GetAllHypervisorParameters(cluster, instances):
1482 """Compute the set of all hypervisor parameters.
1484 @type cluster: L{objects.Cluster}
1485 @param cluster: the cluster object
1486 @param instances: list of L{objects.Instance}
1487 @param instances: additional instances from which to obtain parameters
1488 @rtype: list of (origin, hypervisor, parameters)
1489 @return: a list with all parameters found, indicating the hypervisor they
1490 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1495 for hv_name in cluster.enabled_hypervisors:
1496 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1498 for os_name, os_hvp in cluster.os_hvp.items():
1499 for hv_name, hv_params in os_hvp.items():
1501 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1502 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1504 # TODO: collapse identical parameter values in a single one
1505 for instance in instances:
1506 if instance.hvparams:
1507 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1508 cluster.FillHV(instance)))
1513 class _VerifyErrors(object):
1514 """Mix-in for cluster/group verify LUs.
1516 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1517 self.op and self._feedback_fn to be available.)
1521 ETYPE_FIELD = "code"
1522 ETYPE_ERROR = "ERROR"
1523 ETYPE_WARNING = "WARNING"
1525 def _Error(self, ecode, item, msg, *args, **kwargs):
1526 """Format an error message.
1528 Based on the opcode's error_codes parameter, either format a
1529 parseable error code, or a simpler error string.
1531 This must be called only from Exec and functions called from Exec.
1534 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1535 itype, etxt, _ = ecode
1536 # If the error code is in the list of ignored errors, demote the error to a
1538 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1539 ltype = self.ETYPE_WARNING
1540 # first complete the msg
1543 # then format the whole message
1544 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1545 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1551 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1552 # and finally report it via the feedback_fn
1553 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1554 # do not mark the operation as failed for WARN cases only
1555 if ltype == self.ETYPE_ERROR:
1558 def _ErrorIf(self, cond, *args, **kwargs):
1559 """Log an error message if the passed condition is True.
1563 or self.op.debug_simulate_errors): # pylint: disable=E1101
1564 self._Error(*args, **kwargs)
1567 class LUClusterVerify(NoHooksLU):
1568 """Submits all jobs necessary to verify the cluster.
1573 def ExpandNames(self):
1574 self.needed_locks = {}
1576 def Exec(self, feedback_fn):
1579 if self.op.group_name:
1580 groups = [self.op.group_name]
1581 depends_fn = lambda: None
1583 groups = self.cfg.GetNodeGroupList()
1585 # Verify global configuration
1587 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
1590 # Always depend on global verification
1591 depends_fn = lambda: [(-len(jobs), [])]
1594 [opcodes.OpClusterVerifyGroup(group_name=group,
1595 ignore_errors=self.op.ignore_errors,
1596 depends=depends_fn())]
1597 for group in groups)
1599 # Fix up all parameters
1600 for op in itertools.chain(*jobs): # pylint: disable=W0142
1601 op.debug_simulate_errors = self.op.debug_simulate_errors
1602 op.verbose = self.op.verbose
1603 op.error_codes = self.op.error_codes
1605 op.skip_checks = self.op.skip_checks
1606 except AttributeError:
1607 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1609 return ResultWithJobs(jobs)
1612 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1613 """Verifies the cluster config.
1618 def _VerifyHVP(self, hvp_data):
1619 """Verifies locally the syntax of the hypervisor parameters.
1622 for item, hv_name, hv_params in hvp_data:
1623 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1626 hv_class = hypervisor.GetHypervisorClass(hv_name)
1627 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1628 hv_class.CheckParameterSyntax(hv_params)
1629 except errors.GenericError, err:
1630 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1632 def ExpandNames(self):
1633 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1634 self.share_locks = _ShareAll()
1636 def CheckPrereq(self):
1637 """Check prerequisites.
1640 # Retrieve all information
1641 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1642 self.all_node_info = self.cfg.GetAllNodesInfo()
1643 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1645 def Exec(self, feedback_fn):
1646 """Verify integrity of cluster, performing various test on nodes.
1650 self._feedback_fn = feedback_fn
1652 feedback_fn("* Verifying cluster config")
1654 for msg in self.cfg.VerifyConfig():
1655 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1657 feedback_fn("* Verifying cluster certificate files")
1659 for cert_filename in pathutils.ALL_CERT_FILES:
1660 (errcode, msg) = _VerifyCertificate(cert_filename)
1661 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1663 feedback_fn("* Verifying hypervisor parameters")
1665 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1666 self.all_inst_info.values()))
1668 feedback_fn("* Verifying all nodes belong to an existing group")
1670 # We do this verification here because, should this bogus circumstance
1671 # occur, it would never be caught by VerifyGroup, which only acts on
1672 # nodes/instances reachable from existing node groups.
1674 dangling_nodes = set(node.name for node in self.all_node_info.values()
1675 if node.group not in self.all_group_info)
1677 dangling_instances = {}
1678 no_node_instances = []
1680 for inst in self.all_inst_info.values():
1681 if inst.primary_node in dangling_nodes:
1682 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1683 elif inst.primary_node not in self.all_node_info:
1684 no_node_instances.append(inst.name)
1689 utils.CommaJoin(dangling_instances.get(node.name,
1691 for node in dangling_nodes]
1693 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1695 "the following nodes (and their instances) belong to a non"
1696 " existing group: %s", utils.CommaJoin(pretty_dangling))
1698 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1700 "the following instances have a non-existing primary-node:"
1701 " %s", utils.CommaJoin(no_node_instances))
1706 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1707 """Verifies the status of a node group.
1710 HPATH = "cluster-verify"
1711 HTYPE = constants.HTYPE_CLUSTER
1714 _HOOKS_INDENT_RE = re.compile("^", re.M)
1716 class NodeImage(object):
1717 """A class representing the logical and physical status of a node.
1720 @ivar name: the node name to which this object refers
1721 @ivar volumes: a structure as returned from
1722 L{ganeti.backend.GetVolumeList} (runtime)
1723 @ivar instances: a list of running instances (runtime)
1724 @ivar pinst: list of configured primary instances (config)
1725 @ivar sinst: list of configured secondary instances (config)
1726 @ivar sbp: dictionary of {primary-node: list of instances} for all
1727 instances for which this node is secondary (config)
1728 @ivar mfree: free memory, as reported by hypervisor (runtime)
1729 @ivar dfree: free disk, as reported by the node (runtime)
1730 @ivar offline: the offline status (config)
1731 @type rpc_fail: boolean
1732 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1733 not whether the individual keys were correct) (runtime)
1734 @type lvm_fail: boolean
1735 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1736 @type hyp_fail: boolean
1737 @ivar hyp_fail: whether the RPC call didn't return the instance list
1738 @type ghost: boolean
1739 @ivar ghost: whether this is a known node or not (config)
1740 @type os_fail: boolean
1741 @ivar os_fail: whether the RPC call didn't return valid OS data
1743 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1744 @type vm_capable: boolean
1745 @ivar vm_capable: whether the node can host instances
1747 @ivar pv_min: size in MiB of the smallest PVs
1749 @ivar pv_max: size in MiB of the biggest PVs
1752 def __init__(self, offline=False, name=None, vm_capable=True):
1761 self.offline = offline
1762 self.vm_capable = vm_capable
1763 self.rpc_fail = False
1764 self.lvm_fail = False
1765 self.hyp_fail = False
1767 self.os_fail = False
1772 def ExpandNames(self):
1773 # This raises errors.OpPrereqError on its own:
1774 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1776 # Get instances in node group; this is unsafe and needs verification later
1778 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1780 self.needed_locks = {
1781 locking.LEVEL_INSTANCE: inst_names,
1782 locking.LEVEL_NODEGROUP: [self.group_uuid],
1783 locking.LEVEL_NODE: [],
1785 # This opcode is run by watcher every five minutes and acquires all nodes
1786 # for a group. It doesn't run for a long time, so it's better to acquire
1787 # the node allocation lock as well.
1788 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
1791 self.share_locks = _ShareAll()
1793 def DeclareLocks(self, level):
1794 if level == locking.LEVEL_NODE:
1795 # Get members of node group; this is unsafe and needs verification later
1796 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1798 all_inst_info = self.cfg.GetAllInstancesInfo()
1800 # In Exec(), we warn about mirrored instances that have primary and
1801 # secondary living in separate node groups. To fully verify that
1802 # volumes for these instances are healthy, we will need to do an
1803 # extra call to their secondaries. We ensure here those nodes will
1805 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1806 # Important: access only the instances whose lock is owned
1807 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1808 nodes.update(all_inst_info[inst].secondary_nodes)
1810 self.needed_locks[locking.LEVEL_NODE] = nodes
1812 def CheckPrereq(self):
1813 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1814 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1816 group_nodes = set(self.group_info.members)
1818 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1821 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1823 unlocked_instances = \
1824 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1827 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1828 utils.CommaJoin(unlocked_nodes),
1831 if unlocked_instances:
1832 raise errors.OpPrereqError("Missing lock for instances: %s" %
1833 utils.CommaJoin(unlocked_instances),
1836 self.all_node_info = self.cfg.GetAllNodesInfo()
1837 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1839 self.my_node_names = utils.NiceSort(group_nodes)
1840 self.my_inst_names = utils.NiceSort(group_instances)
1842 self.my_node_info = dict((name, self.all_node_info[name])
1843 for name in self.my_node_names)
1845 self.my_inst_info = dict((name, self.all_inst_info[name])
1846 for name in self.my_inst_names)
1848 # We detect here the nodes that will need the extra RPC calls for verifying
1849 # split LV volumes; they should be locked.
1850 extra_lv_nodes = set()
1852 for inst in self.my_inst_info.values():
1853 if inst.disk_template in constants.DTS_INT_MIRROR:
1854 for nname in inst.all_nodes:
1855 if self.all_node_info[nname].group != self.group_uuid:
1856 extra_lv_nodes.add(nname)
1858 unlocked_lv_nodes = \
1859 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1861 if unlocked_lv_nodes:
1862 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
1863 utils.CommaJoin(unlocked_lv_nodes),
1865 self.extra_lv_nodes = list(extra_lv_nodes)
1867 def _VerifyNode(self, ninfo, nresult):
1868 """Perform some basic validation on data returned from a node.
1870 - check the result data structure is well formed and has all the
1872 - check ganeti version
1874 @type ninfo: L{objects.Node}
1875 @param ninfo: the node to check
1876 @param nresult: the results from the node
1878 @return: whether overall this call was successful (and we can expect
1879 reasonable values in the respose)
1883 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1885 # main result, nresult should be a non-empty dict
1886 test = not nresult or not isinstance(nresult, dict)
1887 _ErrorIf(test, constants.CV_ENODERPC, node,
1888 "unable to verify node: no data returned")
1892 # compares ganeti version
1893 local_version = constants.PROTOCOL_VERSION
1894 remote_version = nresult.get("version", None)
1895 test = not (remote_version and
1896 isinstance(remote_version, (list, tuple)) and
1897 len(remote_version) == 2)
1898 _ErrorIf(test, constants.CV_ENODERPC, node,
1899 "connection to node returned invalid data")
1903 test = local_version != remote_version[0]
1904 _ErrorIf(test, constants.CV_ENODEVERSION, node,
1905 "incompatible protocol versions: master %s,"
1906 " node %s", local_version, remote_version[0])
1910 # node seems compatible, we can actually try to look into its results
1912 # full package version
1913 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1914 constants.CV_ENODEVERSION, node,
1915 "software version mismatch: master %s, node %s",
1916 constants.RELEASE_VERSION, remote_version[1],
1917 code=self.ETYPE_WARNING)
1919 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1920 if ninfo.vm_capable and isinstance(hyp_result, dict):
1921 for hv_name, hv_result in hyp_result.iteritems():
1922 test = hv_result is not None
1923 _ErrorIf(test, constants.CV_ENODEHV, node,
1924 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1926 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1927 if ninfo.vm_capable and isinstance(hvp_result, list):
1928 for item, hv_name, hv_result in hvp_result:
1929 _ErrorIf(True, constants.CV_ENODEHV, node,
1930 "hypervisor %s parameter verify failure (source %s): %s",
1931 hv_name, item, hv_result)
1933 test = nresult.get(constants.NV_NODESETUP,
1934 ["Missing NODESETUP results"])
1935 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1940 def _VerifyNodeTime(self, ninfo, nresult,
1941 nvinfo_starttime, nvinfo_endtime):
1942 """Check the node time.
1944 @type ninfo: L{objects.Node}
1945 @param ninfo: the node to check
1946 @param nresult: the remote results for the node
1947 @param nvinfo_starttime: the start time of the RPC call
1948 @param nvinfo_endtime: the end time of the RPC call
1952 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1954 ntime = nresult.get(constants.NV_TIME, None)
1956 ntime_merged = utils.MergeTime(ntime)
1957 except (ValueError, TypeError):
1958 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1961 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1962 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1963 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1964 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1968 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1969 "Node time diverges by at least %s from master node time",
1972 def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
1973 """Check the node LVM results and update info for cross-node checks.
1975 @type ninfo: L{objects.Node}
1976 @param ninfo: the node to check
1977 @param nresult: the remote results for the node
1978 @param vg_name: the configured VG name
1979 @type nimg: L{NodeImage}
1980 @param nimg: node image
1987 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1989 # checks vg existence and size > 20G
1990 vglist = nresult.get(constants.NV_VGLIST, None)
1992 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1994 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1995 constants.MIN_VG_SIZE)
1996 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1999 (errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage)
2001 self._Error(constants.CV_ENODELVM, node, em)
2002 if pvminmax is not None:
2003 (nimg.pv_min, nimg.pv_max) = pvminmax
2005 def _VerifyGroupLVM(self, node_image, vg_name):
2006 """Check cross-node consistency in LVM.
2008 @type node_image: dict
2009 @param node_image: info about nodes, mapping from node to names to
2010 L{NodeImage} objects
2011 @param vg_name: the configured VG name
2017 # Only exlcusive storage needs this kind of checks
2018 if not self._exclusive_storage:
2021 # exclusive_storage wants all PVs to have the same size (approximately),
2022 # if the smallest and the biggest ones are okay, everything is fine.
2023 # pv_min is None iff pv_max is None
2024 vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
2027 (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
2028 (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
2029 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
2030 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
2031 "PV sizes differ too much in the group; smallest (%s MB) is"
2032 " on %s, biggest (%s MB) is on %s",
2033 pvmin, minnode, pvmax, maxnode)
2035 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2036 """Check the node bridges.
2038 @type ninfo: L{objects.Node}
2039 @param ninfo: the node to check
2040 @param nresult: the remote results for the node
2041 @param bridges: the expected list of bridges
2048 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2050 missing = nresult.get(constants.NV_BRIDGES, None)
2051 test = not isinstance(missing, list)
2052 _ErrorIf(test, constants.CV_ENODENET, node,
2053 "did not return valid bridge information")
2055 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2056 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2058 def _VerifyNodeUserScripts(self, ninfo, nresult):
2059 """Check the results of user scripts presence and executability on the node
2061 @type ninfo: L{objects.Node}
2062 @param ninfo: the node to check
2063 @param nresult: the remote results for the node
2068 test = not constants.NV_USERSCRIPTS in nresult
2069 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2070 "did not return user scripts information")
2072 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2074 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2075 "user scripts not present or not executable: %s" %
2076 utils.CommaJoin(sorted(broken_scripts)))
2078 def _VerifyNodeNetwork(self, ninfo, nresult):
2079 """Check the node network connectivity results.
2081 @type ninfo: L{objects.Node}
2082 @param ninfo: the node to check
2083 @param nresult: the remote results for the node
2087 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2089 test = constants.NV_NODELIST not in nresult
2090 _ErrorIf(test, constants.CV_ENODESSH, node,
2091 "node hasn't returned node ssh connectivity data")
2093 if nresult[constants.NV_NODELIST]:
2094 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2095 _ErrorIf(True, constants.CV_ENODESSH, node,
2096 "ssh communication with node '%s': %s", a_node, a_msg)
2098 test = constants.NV_NODENETTEST not in nresult
2099 _ErrorIf(test, constants.CV_ENODENET, node,
2100 "node hasn't returned node tcp connectivity data")
2102 if nresult[constants.NV_NODENETTEST]:
2103 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2105 _ErrorIf(True, constants.CV_ENODENET, node,
2106 "tcp communication with node '%s': %s",
2107 anode, nresult[constants.NV_NODENETTEST][anode])
2109 test = constants.NV_MASTERIP not in nresult
2110 _ErrorIf(test, constants.CV_ENODENET, node,
2111 "node hasn't returned node master IP reachability data")
2113 if not nresult[constants.NV_MASTERIP]:
2114 if node == self.master_node:
2115 msg = "the master node cannot reach the master IP (not configured?)"
2117 msg = "cannot reach the master IP"
2118 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2120 def _VerifyInstance(self, instance, inst_config, node_image,
2122 """Verify an instance.
2124 This function checks to see if the required block devices are
2125 available on the instance's node, and that the nodes are in the correct
2129 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2130 pnode = inst_config.primary_node
2131 pnode_img = node_image[pnode]
2132 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2134 node_vol_should = {}
2135 inst_config.MapLVsByNode(node_vol_should)
2137 cluster = self.cfg.GetClusterInfo()
2138 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2140 err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config, self.cfg)
2141 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2142 code=self.ETYPE_WARNING)
2144 for node in node_vol_should:
2145 n_img = node_image[node]
2146 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2147 # ignore missing volumes on offline or broken nodes
2149 for volume in node_vol_should[node]:
2150 test = volume not in n_img.volumes
2151 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2152 "volume %s missing on node %s", volume, node)
2154 if inst_config.admin_state == constants.ADMINST_UP:
2155 test = instance not in pnode_img.instances and not pnode_img.offline
2156 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2157 "instance not running on its primary node %s",
2159 _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
2160 "instance is marked as running and lives on offline node %s",
2163 diskdata = [(nname, success, status, idx)
2164 for (nname, disks) in diskstatus.items()
2165 for idx, (success, status) in enumerate(disks)]
2167 for nname, success, bdev_status, idx in diskdata:
2168 # the 'ghost node' construction in Exec() ensures that we have a
2170 snode = node_image[nname]
2171 bad_snode = snode.ghost or snode.offline
2172 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2173 not success and not bad_snode,
2174 constants.CV_EINSTANCEFAULTYDISK, instance,
2175 "couldn't retrieve status for disk/%s on %s: %s",
2176 idx, nname, bdev_status)
2177 _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
2178 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2179 constants.CV_EINSTANCEFAULTYDISK, instance,
2180 "disk/%s on %s is faulty", idx, nname)
2182 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2183 constants.CV_ENODERPC, pnode, "instance %s, connection to"
2184 " primary node failed", instance)
2186 _ErrorIf(len(inst_config.secondary_nodes) > 1,
2187 constants.CV_EINSTANCELAYOUT,
2188 instance, "instance has multiple secondary nodes: %s",
2189 utils.CommaJoin(inst_config.secondary_nodes),
2190 code=self.ETYPE_WARNING)
2192 if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
2193 # Disk template not compatible with exclusive_storage: no instance
2194 # node should have the flag set
2195 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
2196 inst_config.all_nodes)
2197 es_nodes = [n for (n, es) in es_flags.items()
2199 _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance,
2200 "instance has template %s, which is not supported on nodes"
2201 " that have exclusive storage set: %s",
2202 inst_config.disk_template, utils.CommaJoin(es_nodes))
2204 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2205 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2206 instance_groups = {}
2208 for node in instance_nodes:
2209 instance_groups.setdefault(self.all_node_info[node].group,
2213 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2214 # Sort so that we always list the primary node first.
2215 for group, nodes in sorted(instance_groups.items(),
2216 key=lambda (_, nodes): pnode in nodes,
2219 self._ErrorIf(len(instance_groups) > 1,
2220 constants.CV_EINSTANCESPLITGROUPS,
2221 instance, "instance has primary and secondary nodes in"
2222 " different groups: %s", utils.CommaJoin(pretty_list),
2223 code=self.ETYPE_WARNING)
2225 inst_nodes_offline = []
2226 for snode in inst_config.secondary_nodes:
2227 s_img = node_image[snode]
2228 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2229 snode, "instance %s, connection to secondary node failed",
2233 inst_nodes_offline.append(snode)
2235 # warn that the instance lives on offline nodes
2236 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2237 "instance has offline secondary node(s) %s",
2238 utils.CommaJoin(inst_nodes_offline))
2239 # ... or ghost/non-vm_capable nodes
2240 for node in inst_config.all_nodes:
2241 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2242 instance, "instance lives on ghost node %s", node)
2243 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2244 instance, "instance lives on non-vm_capable node %s", node)
2246 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2247 """Verify if there are any unknown volumes in the cluster.
2249 The .os, .swap and backup volumes are ignored. All other volumes are
2250 reported as unknown.
2252 @type reserved: L{ganeti.utils.FieldSet}
2253 @param reserved: a FieldSet of reserved volume names
2256 for node, n_img in node_image.items():
2257 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2258 self.all_node_info[node].group != self.group_uuid):
2259 # skip non-healthy nodes
2261 for volume in n_img.volumes:
2262 test = ((node not in node_vol_should or
2263 volume not in node_vol_should[node]) and
2264 not reserved.Matches(volume))
2265 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2266 "volume %s is unknown", volume)
2268 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2269 """Verify N+1 Memory Resilience.
2271 Check that if one single node dies we can still start all the
2272 instances it was primary for.
2275 cluster_info = self.cfg.GetClusterInfo()
2276 for node, n_img in node_image.items():
2277 # This code checks that every node which is now listed as
2278 # secondary has enough memory to host all instances it is
2279 # supposed to should a single other node in the cluster fail.
2280 # FIXME: not ready for failover to an arbitrary node
2281 # FIXME: does not support file-backed instances
2282 # WARNING: we currently take into account down instances as well
2283 # as up ones, considering that even if they're down someone
2284 # might want to start them even in the event of a node failure.
2285 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2286 # we're skipping nodes marked offline and nodes in other groups from
2287 # the N+1 warning, since most likely we don't have good memory
2288 # infromation from them; we already list instances living on such
2289 # nodes, and that's enough warning
2291 #TODO(dynmem): also consider ballooning out other instances
2292 for prinode, instances in n_img.sbp.items():
2294 for instance in instances:
2295 bep = cluster_info.FillBE(instance_cfg[instance])
2296 if bep[constants.BE_AUTO_BALANCE]:
2297 needed_mem += bep[constants.BE_MINMEM]
2298 test = n_img.mfree < needed_mem
2299 self._ErrorIf(test, constants.CV_ENODEN1, node,
2300 "not enough memory to accomodate instance failovers"
2301 " should node %s fail (%dMiB needed, %dMiB available)",
2302 prinode, needed_mem, n_img.mfree)
2305 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2306 (files_all, files_opt, files_mc, files_vm)):
2307 """Verifies file checksums collected from all nodes.
2309 @param errorif: Callback for reporting errors
2310 @param nodeinfo: List of L{objects.Node} objects
2311 @param master_node: Name of master node
2312 @param all_nvinfo: RPC results
2315 # Define functions determining which nodes to consider for a file
2318 (files_mc, lambda node: (node.master_candidate or
2319 node.name == master_node)),
2320 (files_vm, lambda node: node.vm_capable),
2323 # Build mapping from filename to list of nodes which should have the file
2325 for (files, fn) in files2nodefn:
2327 filenodes = nodeinfo
2329 filenodes = filter(fn, nodeinfo)
2330 nodefiles.update((filename,
2331 frozenset(map(operator.attrgetter("name"), filenodes)))
2332 for filename in files)
2334 assert set(nodefiles) == (files_all | files_mc | files_vm)
2336 fileinfo = dict((filename, {}) for filename in nodefiles)
2337 ignore_nodes = set()
2339 for node in nodeinfo:
2341 ignore_nodes.add(node.name)
2344 nresult = all_nvinfo[node.name]
2346 if nresult.fail_msg or not nresult.payload:
2349 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2350 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2351 for (key, value) in fingerprints.items())
2354 test = not (node_files and isinstance(node_files, dict))
2355 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2356 "Node did not return file checksum data")
2358 ignore_nodes.add(node.name)
2361 # Build per-checksum mapping from filename to nodes having it
2362 for (filename, checksum) in node_files.items():
2363 assert filename in nodefiles
2364 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2366 for (filename, checksums) in fileinfo.items():
2367 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2369 # Nodes having the file
2370 with_file = frozenset(node_name
2371 for nodes in fileinfo[filename].values()
2372 for node_name in nodes) - ignore_nodes
2374 expected_nodes = nodefiles[filename] - ignore_nodes
2376 # Nodes missing file
2377 missing_file = expected_nodes - with_file
2379 if filename in files_opt:
2381 errorif(missing_file and missing_file != expected_nodes,
2382 constants.CV_ECLUSTERFILECHECK, None,
2383 "File %s is optional, but it must exist on all or no"
2384 " nodes (not found on %s)",
2385 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2387 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2388 "File %s is missing from node(s) %s", filename,
2389 utils.CommaJoin(utils.NiceSort(missing_file)))
2391 # Warn if a node has a file it shouldn't
2392 unexpected = with_file - expected_nodes
2394 constants.CV_ECLUSTERFILECHECK, None,
2395 "File %s should not exist on node(s) %s",
2396 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2398 # See if there are multiple versions of the file
2399 test = len(checksums) > 1
2401 variants = ["variant %s on %s" %
2402 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2403 for (idx, (checksum, nodes)) in
2404 enumerate(sorted(checksums.items()))]
2408 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2409 "File %s found with %s different checksums (%s)",
2410 filename, len(checksums), "; ".join(variants))
2412 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2414 """Verifies and the node DRBD status.
2416 @type ninfo: L{objects.Node}
2417 @param ninfo: the node to check
2418 @param nresult: the remote results for the node
2419 @param instanceinfo: the dict of instances
2420 @param drbd_helper: the configured DRBD usermode helper
2421 @param drbd_map: the DRBD map as returned by
2422 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2426 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2429 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2430 test = (helper_result is None)
2431 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2432 "no drbd usermode helper returned")
2434 status, payload = helper_result
2436 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2437 "drbd usermode helper check unsuccessful: %s", payload)
2438 test = status and (payload != drbd_helper)
2439 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2440 "wrong drbd usermode helper: %s", payload)
2442 # compute the DRBD minors
2444 for minor, instance in drbd_map[node].items():
2445 test = instance not in instanceinfo
2446 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2447 "ghost instance '%s' in temporary DRBD map", instance)
2448 # ghost instance should not be running, but otherwise we
2449 # don't give double warnings (both ghost instance and
2450 # unallocated minor in use)
2452 node_drbd[minor] = (instance, False)
2454 instance = instanceinfo[instance]
2455 node_drbd[minor] = (instance.name,
2456 instance.admin_state == constants.ADMINST_UP)
2458 # and now check them
2459 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2460 test = not isinstance(used_minors, (tuple, list))
2461 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2462 "cannot parse drbd status file: %s", str(used_minors))
2464 # we cannot check drbd status
2467 for minor, (iname, must_exist) in node_drbd.items():
2468 test = minor not in used_minors and must_exist
2469 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2470 "drbd minor %d of instance %s is not active", minor, iname)
2471 for minor in used_minors:
2472 test = minor not in node_drbd
2473 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2474 "unallocated drbd minor %d is in use", minor)
2476 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2477 """Builds the node OS structures.
2479 @type ninfo: L{objects.Node}
2480 @param ninfo: the node to check
2481 @param nresult: the remote results for the node
2482 @param nimg: the node image object
2486 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2488 remote_os = nresult.get(constants.NV_OSLIST, None)
2489 test = (not isinstance(remote_os, list) or
2490 not compat.all(isinstance(v, list) and len(v) == 7
2491 for v in remote_os))
2493 _ErrorIf(test, constants.CV_ENODEOS, node,
2494 "node hasn't returned valid OS data")
2503 for (name, os_path, status, diagnose,
2504 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2506 if name not in os_dict:
2509 # parameters is a list of lists instead of list of tuples due to
2510 # JSON lacking a real tuple type, fix it:
2511 parameters = [tuple(v) for v in parameters]
2512 os_dict[name].append((os_path, status, diagnose,
2513 set(variants), set(parameters), set(api_ver)))
2515 nimg.oslist = os_dict
2517 def _VerifyNodeOS(self, ninfo, nimg, base):
2518 """Verifies the node OS list.
2520 @type ninfo: L{objects.Node}
2521 @param ninfo: the node to check
2522 @param nimg: the node image object
2523 @param base: the 'template' node we match against (e.g. from the master)
2527 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2529 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2531 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2532 for os_name, os_data in nimg.oslist.items():
2533 assert os_data, "Empty OS status for OS %s?!" % os_name
2534 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2535 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2536 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2537 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2538 "OS '%s' has multiple entries (first one shadows the rest): %s",
2539 os_name, utils.CommaJoin([v[0] for v in os_data]))
2540 # comparisons with the 'base' image
2541 test = os_name not in base.oslist
2542 _ErrorIf(test, constants.CV_ENODEOS, node,
2543 "Extra OS %s not present on reference node (%s)",
2547 assert base.oslist[os_name], "Base node has empty OS status?"
2548 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2550 # base OS is invalid, skipping
2552 for kind, a, b in [("API version", f_api, b_api),
2553 ("variants list", f_var, b_var),
2554 ("parameters", beautify_params(f_param),
2555 beautify_params(b_param))]:
2556 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2557 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2558 kind, os_name, base.name,
2559 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2561 # check any missing OSes
2562 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2563 _ErrorIf(missing, constants.CV_ENODEOS, node,
2564 "OSes present on reference node %s but missing on this node: %s",
2565 base.name, utils.CommaJoin(missing))
2567 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
2568 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2570 @type ninfo: L{objects.Node}
2571 @param ninfo: the node to check
2572 @param nresult: the remote results for the node
2573 @type is_master: bool
2574 @param is_master: Whether node is the master node
2580 (constants.ENABLE_FILE_STORAGE or
2581 constants.ENABLE_SHARED_FILE_STORAGE)):
2583 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
2585 # This should never happen
2586 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
2587 "Node did not return forbidden file storage paths")
2589 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
2590 "Found forbidden file storage paths: %s",
2591 utils.CommaJoin(fspaths))
2593 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
2594 constants.CV_ENODEFILESTORAGEPATHS, node,
2595 "Node should not have returned forbidden file storage"
2598 def _VerifyOob(self, ninfo, nresult):
2599 """Verifies out of band functionality of a node.
2601 @type ninfo: L{objects.Node}
2602 @param ninfo: the node to check
2603 @param nresult: the remote results for the node
2607 # We just have to verify the paths on master and/or master candidates
2608 # as the oob helper is invoked on the master
2609 if ((ninfo.master_candidate or ninfo.master_capable) and
2610 constants.NV_OOB_PATHS in nresult):
2611 for path_result in nresult[constants.NV_OOB_PATHS]:
2612 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2614 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2615 """Verifies and updates the node volume data.
2617 This function will update a L{NodeImage}'s internal structures
2618 with data from the remote call.
2620 @type ninfo: L{objects.Node}
2621 @param ninfo: the node to check
2622 @param nresult: the remote results for the node
2623 @param nimg: the node image object
2624 @param vg_name: the configured VG name
2628 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2630 nimg.lvm_fail = True
2631 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2634 elif isinstance(lvdata, basestring):
2635 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2636 utils.SafeEncode(lvdata))
2637 elif not isinstance(lvdata, dict):
2638 _ErrorIf(True, constants.CV_ENODELVM, node,
2639 "rpc call to node failed (lvlist)")
2641 nimg.volumes = lvdata
2642 nimg.lvm_fail = False
2644 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2645 """Verifies and updates the node instance list.
2647 If the listing was successful, then updates this node's instance
2648 list. Otherwise, it marks the RPC call as failed for the instance
2651 @type ninfo: L{objects.Node}
2652 @param ninfo: the node to check
2653 @param nresult: the remote results for the node
2654 @param nimg: the node image object
2657 idata = nresult.get(constants.NV_INSTANCELIST, None)
2658 test = not isinstance(idata, list)
2659 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2660 "rpc call to node failed (instancelist): %s",
2661 utils.SafeEncode(str(idata)))
2663 nimg.hyp_fail = True
2665 nimg.instances = idata
2667 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2668 """Verifies and computes a node information map
2670 @type ninfo: L{objects.Node}
2671 @param ninfo: the node to check
2672 @param nresult: the remote results for the node
2673 @param nimg: the node image object
2674 @param vg_name: the configured VG name
2678 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2680 # try to read free memory (from the hypervisor)
2681 hv_info = nresult.get(constants.NV_HVINFO, None)
2682 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2683 _ErrorIf(test, constants.CV_ENODEHV, node,
2684 "rpc call to node failed (hvinfo)")
2687 nimg.mfree = int(hv_info["memory_free"])
2688 except (ValueError, TypeError):
2689 _ErrorIf(True, constants.CV_ENODERPC, node,
2690 "node returned invalid nodeinfo, check hypervisor")
2692 # FIXME: devise a free space model for file based instances as well
2693 if vg_name is not None:
2694 test = (constants.NV_VGLIST not in nresult or
2695 vg_name not in nresult[constants.NV_VGLIST])
2696 _ErrorIf(test, constants.CV_ENODELVM, node,
2697 "node didn't return data for the volume group '%s'"
2698 " - it is either missing or broken", vg_name)
2701 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2702 except (ValueError, TypeError):
2703 _ErrorIf(True, constants.CV_ENODERPC, node,
2704 "node returned invalid LVM info, check LVM status")
2706 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2707 """Gets per-disk status information for all instances.
2709 @type nodelist: list of strings
2710 @param nodelist: Node names
2711 @type node_image: dict of (name, L{objects.Node})
2712 @param node_image: Node objects
2713 @type instanceinfo: dict of (name, L{objects.Instance})
2714 @param instanceinfo: Instance objects
2715 @rtype: {instance: {node: [(succes, payload)]}}
2716 @return: a dictionary of per-instance dictionaries with nodes as
2717 keys and disk information as values; the disk information is a
2718 list of tuples (success, payload)
2721 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2724 node_disks_devonly = {}
2725 diskless_instances = set()
2726 diskless = constants.DT_DISKLESS
2728 for nname in nodelist:
2729 node_instances = list(itertools.chain(node_image[nname].pinst,
2730 node_image[nname].sinst))
2731 diskless_instances.update(inst for inst in node_instances
2732 if instanceinfo[inst].disk_template == diskless)
2733 disks = [(inst, disk)
2734 for inst in node_instances
2735 for disk in instanceinfo[inst].disks]
2738 # No need to collect data
2741 node_disks[nname] = disks
2743 # _AnnotateDiskParams makes already copies of the disks
2745 for (inst, dev) in disks:
2746 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2747 self.cfg.SetDiskID(anno_disk, nname)
2748 devonly.append(anno_disk)
2750 node_disks_devonly[nname] = devonly
2752 assert len(node_disks) == len(node_disks_devonly)
2754 # Collect data from all nodes with disks
2755 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2758 assert len(result) == len(node_disks)
2762 for (nname, nres) in result.items():
2763 disks = node_disks[nname]
2766 # No data from this node
2767 data = len(disks) * [(False, "node offline")]
2770 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2771 "while getting disk information: %s", msg)
2773 # No data from this node
2774 data = len(disks) * [(False, msg)]
2777 for idx, i in enumerate(nres.payload):
2778 if isinstance(i, (tuple, list)) and len(i) == 2:
2781 logging.warning("Invalid result from node %s, entry %d: %s",
2783 data.append((False, "Invalid result from the remote node"))
2785 for ((inst, _), status) in zip(disks, data):
2786 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2788 # Add empty entries for diskless instances.
2789 for inst in diskless_instances:
2790 assert inst not in instdisk
2793 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2794 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2795 compat.all(isinstance(s, (tuple, list)) and
2796 len(s) == 2 for s in statuses)
2797 for inst, nnames in instdisk.items()
2798 for nname, statuses in nnames.items())
2800 instdisk_keys = set(instdisk)
2801 instanceinfo_keys = set(instanceinfo)
2802 assert instdisk_keys == instanceinfo_keys, \
2803 ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
2804 (instdisk_keys, instanceinfo_keys))
2809 def _SshNodeSelector(group_uuid, all_nodes):
2810 """Create endless iterators for all potential SSH check hosts.
2813 nodes = [node for node in all_nodes
2814 if (node.group != group_uuid and
2816 keyfunc = operator.attrgetter("group")
2818 return map(itertools.cycle,
2819 [sorted(map(operator.attrgetter("name"), names))
2820 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2824 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2825 """Choose which nodes should talk to which other nodes.
2827 We will make nodes contact all nodes in their group, and one node from
2830 @warning: This algorithm has a known issue if one node group is much
2831 smaller than others (e.g. just one node). In such a case all other
2832 nodes will talk to the single node.
2835 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2836 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2838 return (online_nodes,
2839 dict((name, sorted([i.next() for i in sel]))
2840 for name in online_nodes))
2842 def BuildHooksEnv(self):
2845 Cluster-Verify hooks just ran in the post phase and their failure makes
2846 the output be logged in the verify output and the verification to fail.
2850 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
2853 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2854 for node in self.my_node_info.values())
2858 def BuildHooksNodes(self):
2859 """Build hooks nodes.
2862 return ([], self.my_node_names)
2864 def Exec(self, feedback_fn):
2865 """Verify integrity of the node group, performing various test on nodes.
2868 # This method has too many local variables. pylint: disable=R0914
2869 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2871 if not self.my_node_names:
2873 feedback_fn("* Empty node group, skipping verification")
2877 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2878 verbose = self.op.verbose
2879 self._feedback_fn = feedback_fn
2881 vg_name = self.cfg.GetVGName()
2882 drbd_helper = self.cfg.GetDRBDHelper()
2883 cluster = self.cfg.GetClusterInfo()
2884 hypervisors = cluster.enabled_hypervisors
2885 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2887 i_non_redundant = [] # Non redundant instances
2888 i_non_a_balanced = [] # Non auto-balanced instances
2889 i_offline = 0 # Count of offline instances
2890 n_offline = 0 # Count of offline nodes
2891 n_drained = 0 # Count of nodes being drained
2892 node_vol_should = {}
2894 # FIXME: verify OS list
2897 filemap = _ComputeAncillaryFiles(cluster, False)
2899 # do local checksums
2900 master_node = self.master_node = self.cfg.GetMasterNode()
2901 master_ip = self.cfg.GetMasterIP()
2903 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2906 if self.cfg.GetUseExternalMipScript():
2907 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
2909 node_verify_param = {
2910 constants.NV_FILELIST:
2911 map(vcluster.MakeVirtualPath,
2912 utils.UniqueSequence(filename
2913 for files in filemap
2914 for filename in files)),
2915 constants.NV_NODELIST:
2916 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2917 self.all_node_info.values()),
2918 constants.NV_HYPERVISOR: hypervisors,
2919 constants.NV_HVPARAMS:
2920 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2921 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2922 for node in node_data_list
2923 if not node.offline],
2924 constants.NV_INSTANCELIST: hypervisors,
2925 constants.NV_VERSION: None,
2926 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2927 constants.NV_NODESETUP: None,
2928 constants.NV_TIME: None,
2929 constants.NV_MASTERIP: (master_node, master_ip),
2930 constants.NV_OSLIST: None,
2931 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2932 constants.NV_USERSCRIPTS: user_scripts,
2935 if vg_name is not None:
2936 node_verify_param[constants.NV_VGLIST] = None
2937 node_verify_param[constants.NV_LVLIST] = vg_name
2938 node_verify_param[constants.NV_PVLIST] = [vg_name]
2941 node_verify_param[constants.NV_DRBDLIST] = None
2942 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2944 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
2945 # Load file storage paths only from master node
2946 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
2949 # FIXME: this needs to be changed per node-group, not cluster-wide
2951 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2952 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2953 bridges.add(default_nicpp[constants.NIC_LINK])
2954 for instance in self.my_inst_info.values():
2955 for nic in instance.nics:
2956 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2957 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2958 bridges.add(full_nic[constants.NIC_LINK])
2961 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2963 # Build our expected cluster state
2964 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2966 vm_capable=node.vm_capable))
2967 for node in node_data_list)
2971 for node in self.all_node_info.values():
2972 path = _SupportsOob(self.cfg, node)
2973 if path and path not in oob_paths:
2974 oob_paths.append(path)
2977 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2979 for instance in self.my_inst_names:
2980 inst_config = self.my_inst_info[instance]
2981 if inst_config.admin_state == constants.ADMINST_OFFLINE:
2984 for nname in inst_config.all_nodes:
2985 if nname not in node_image:
2986 gnode = self.NodeImage(name=nname)
2987 gnode.ghost = (nname not in self.all_node_info)
2988 node_image[nname] = gnode
2990 inst_config.MapLVsByNode(node_vol_should)
2992 pnode = inst_config.primary_node
2993 node_image[pnode].pinst.append(instance)
2995 for snode in inst_config.secondary_nodes:
2996 nimg = node_image[snode]
2997 nimg.sinst.append(instance)
2998 if pnode not in nimg.sbp:
2999 nimg.sbp[pnode] = []
3000 nimg.sbp[pnode].append(instance)
3002 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names)
3003 # The value of exclusive_storage should be the same across the group, so if
3004 # it's True for at least a node, we act as if it were set for all the nodes
3005 self._exclusive_storage = compat.any(es_flags.values())
3006 if self._exclusive_storage:
3007 node_verify_param[constants.NV_EXCLUSIVEPVS] = True
3009 # At this point, we have the in-memory data structures complete,
3010 # except for the runtime information, which we'll gather next
3012 # Due to the way our RPC system works, exact response times cannot be
3013 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3014 # time before and after executing the request, we can at least have a time
3016 nvinfo_starttime = time.time()
3017 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3019 self.cfg.GetClusterName())
3020 nvinfo_endtime = time.time()
3022 if self.extra_lv_nodes and vg_name is not None:
3024 self.rpc.call_node_verify(self.extra_lv_nodes,
3025 {constants.NV_LVLIST: vg_name},
3026 self.cfg.GetClusterName())
3028 extra_lv_nvinfo = {}
3030 all_drbd_map = self.cfg.ComputeDRBDMap()
3032 feedback_fn("* Gathering disk information (%s nodes)" %
3033 len(self.my_node_names))
3034 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3037 feedback_fn("* Verifying configuration file consistency")
3039 # If not all nodes are being checked, we need to make sure the master node
3040 # and a non-checked vm_capable node are in the list.
3041 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3043 vf_nvinfo = all_nvinfo.copy()
3044 vf_node_info = list(self.my_node_info.values())
3045 additional_nodes = []
3046 if master_node not in self.my_node_info:
3047 additional_nodes.append(master_node)
3048 vf_node_info.append(self.all_node_info[master_node])
3049 # Add the first vm_capable node we find which is not included,
3050 # excluding the master node (which we already have)
3051 for node in absent_nodes:
3052 nodeinfo = self.all_node_info[node]
3053 if (nodeinfo.vm_capable and not nodeinfo.offline and
3054 node != master_node):
3055 additional_nodes.append(node)
3056 vf_node_info.append(self.all_node_info[node])
3058 key = constants.NV_FILELIST
3059 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3060 {key: node_verify_param[key]},
3061 self.cfg.GetClusterName()))
3063 vf_nvinfo = all_nvinfo
3064 vf_node_info = self.my_node_info.values()
3066 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3068 feedback_fn("* Verifying node status")
3072 for node_i in node_data_list:
3074 nimg = node_image[node]
3078 feedback_fn("* Skipping offline node %s" % (node,))
3082 if node == master_node:
3084 elif node_i.master_candidate:
3085 ntype = "master candidate"
3086 elif node_i.drained:
3092 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3094 msg = all_nvinfo[node].fail_msg
3095 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3098 nimg.rpc_fail = True
3101 nresult = all_nvinfo[node].payload
3103 nimg.call_ok = self._VerifyNode(node_i, nresult)
3104 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3105 self._VerifyNodeNetwork(node_i, nresult)
3106 self._VerifyNodeUserScripts(node_i, nresult)
3107 self._VerifyOob(node_i, nresult)
3108 self._VerifyFileStoragePaths(node_i, nresult,
3109 node == master_node)
3112 self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
3113 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3116 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3117 self._UpdateNodeInstances(node_i, nresult, nimg)
3118 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3119 self._UpdateNodeOS(node_i, nresult, nimg)
3121 if not nimg.os_fail:
3122 if refos_img is None:
3124 self._VerifyNodeOS(node_i, nimg, refos_img)
3125 self._VerifyNodeBridges(node_i, nresult, bridges)
3127 # Check whether all running instancies are primary for the node. (This
3128 # can no longer be done from _VerifyInstance below, since some of the
3129 # wrong instances could be from other node groups.)
3130 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3132 for inst in non_primary_inst:
3133 test = inst in self.all_inst_info
3134 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3135 "instance should not run on node %s", node_i.name)
3136 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3137 "node is running unknown instance %s", inst)
3139 self._VerifyGroupLVM(node_image, vg_name)
3141 for node, result in extra_lv_nvinfo.items():
3142 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3143 node_image[node], vg_name)
3145 feedback_fn("* Verifying instance status")
3146 for instance in self.my_inst_names:
3148 feedback_fn("* Verifying instance %s" % instance)
3149 inst_config = self.my_inst_info[instance]
3150 self._VerifyInstance(instance, inst_config, node_image,
3153 # If the instance is non-redundant we cannot survive losing its primary
3154 # node, so we are not N+1 compliant.
3155 if inst_config.disk_template not in constants.DTS_MIRRORED:
3156 i_non_redundant.append(instance)
3158 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3159 i_non_a_balanced.append(instance)
3161 feedback_fn("* Verifying orphan volumes")
3162 reserved = utils.FieldSet(*cluster.reserved_lvs)
3164 # We will get spurious "unknown volume" warnings if any node of this group
3165 # is secondary for an instance whose primary is in another group. To avoid
3166 # them, we find these instances and add their volumes to node_vol_should.
3167 for inst in self.all_inst_info.values():
3168 for secondary in inst.secondary_nodes:
3169 if (secondary in self.my_node_info
3170 and inst.name not in self.my_inst_info):
3171 inst.MapLVsByNode(node_vol_should)
3174 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3176 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3177 feedback_fn("* Verifying N+1 Memory redundancy")
3178 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3180 feedback_fn("* Other Notes")
3182 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3183 % len(i_non_redundant))
3185 if i_non_a_balanced:
3186 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3187 % len(i_non_a_balanced))
3190 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3193 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3196 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3200 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3201 """Analyze the post-hooks' result
3203 This method analyses the hook result, handles it, and sends some
3204 nicely-formatted feedback back to the user.
3206 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3207 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3208 @param hooks_results: the results of the multi-node hooks rpc call
3209 @param feedback_fn: function used send feedback back to the caller
3210 @param lu_result: previous Exec result
3211 @return: the new Exec result, based on the previous result
3215 # We only really run POST phase hooks, only for non-empty groups,
3216 # and are only interested in their results
3217 if not self.my_node_names:
3220 elif phase == constants.HOOKS_PHASE_POST:
3221 # Used to change hooks' output to proper indentation
3222 feedback_fn("* Hooks Results")
3223 assert hooks_results, "invalid result from hooks"
3225 for node_name in hooks_results:
3226 res = hooks_results[node_name]
3228 test = msg and not res.offline
3229 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3230 "Communication failure in hooks execution: %s", msg)
3231 if res.offline or msg:
3232 # No need to investigate payload if node is offline or gave
3235 for script, hkr, output in res.payload:
3236 test = hkr == constants.HKR_FAIL
3237 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3238 "Script %s failed, output:", script)
3240 output = self._HOOKS_INDENT_RE.sub(" ", output)
3241 feedback_fn("%s" % output)
3247 class LUClusterVerifyDisks(NoHooksLU):
3248 """Verifies the cluster disks status.
3253 def ExpandNames(self):
3254 self.share_locks = _ShareAll()
3255 self.needed_locks = {
3256 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3259 def Exec(self, feedback_fn):
3260 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3262 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3263 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3264 for group in group_names])
3267 class LUGroupVerifyDisks(NoHooksLU):
3268 """Verifies the status of all disks in a node group.
3273 def ExpandNames(self):
3274 # Raises errors.OpPrereqError on its own if group can't be found
3275 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3277 self.share_locks = _ShareAll()
3278 self.needed_locks = {
3279 locking.LEVEL_INSTANCE: [],
3280 locking.LEVEL_NODEGROUP: [],
3281 locking.LEVEL_NODE: [],
3283 # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3284 # starts one instance of this opcode for every group, which means all
3285 # nodes will be locked for a short amount of time, so it's better to
3286 # acquire the node allocation lock as well.
3287 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3290 def DeclareLocks(self, level):
3291 if level == locking.LEVEL_INSTANCE:
3292 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3294 # Lock instances optimistically, needs verification once node and group
3295 # locks have been acquired
3296 self.needed_locks[locking.LEVEL_INSTANCE] = \
3297 self.cfg.GetNodeGroupInstances(self.group_uuid)
3299 elif level == locking.LEVEL_NODEGROUP:
3300 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3302 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3303 set([self.group_uuid] +
3304 # Lock all groups used by instances optimistically; this requires
3305 # going via the node before it's locked, requiring verification
3308 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3309 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3311 elif level == locking.LEVEL_NODE:
3312 # This will only lock the nodes in the group to be verified which contain
3314 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3315 self._LockInstancesNodes()
3317 # Lock all nodes in group to be verified
3318 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3319 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3320 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3322 def CheckPrereq(self):
3323 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3324 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3325 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3327 assert self.group_uuid in owned_groups
3329 # Check if locked instances are still correct
3330 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3332 # Get instance information
3333 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3335 # Check if node groups for locked instances are still correct
3336 _CheckInstancesNodeGroups(self.cfg, self.instances,
3337 owned_groups, owned_nodes, self.group_uuid)
3339 def Exec(self, feedback_fn):
3340 """Verify integrity of cluster disks.
3342 @rtype: tuple of three items
3343 @return: a tuple of (dict of node-to-node_error, list of instances
3344 which need activate-disks, dict of instance: (node, volume) for
3349 res_instances = set()
3352 nv_dict = _MapInstanceDisksToNodes(
3353 [inst for inst in self.instances.values()
3354 if inst.admin_state == constants.ADMINST_UP])
3357 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3358 set(self.cfg.GetVmCapableNodeList()))
3360 node_lvs = self.rpc.call_lv_list(nodes, [])
3362 for (node, node_res) in node_lvs.items():
3363 if node_res.offline:
3366 msg = node_res.fail_msg
3368 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3369 res_nodes[node] = msg
3372 for lv_name, (_, _, lv_online) in node_res.payload.items():
3373 inst = nv_dict.pop((node, lv_name), None)
3374 if not (lv_online or inst is None):
3375 res_instances.add(inst)
3377 # any leftover items in nv_dict are missing LVs, let's arrange the data
3379 for key, inst in nv_dict.iteritems():
3380 res_missing.setdefault(inst, []).append(list(key))
3382 return (res_nodes, list(res_instances), res_missing)
3385 class LUClusterRepairDiskSizes(NoHooksLU):
3386 """Verifies the cluster disks sizes.
3391 def ExpandNames(self):
3392 if self.op.instances:
3393 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3394 # Not getting the node allocation lock as only a specific set of
3395 # instances (and their nodes) is going to be acquired
3396 self.needed_locks = {
3397 locking.LEVEL_NODE_RES: [],
3398 locking.LEVEL_INSTANCE: self.wanted_names,
3400 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3402 self.wanted_names = None
3403 self.needed_locks = {
3404 locking.LEVEL_NODE_RES: locking.ALL_SET,
3405 locking.LEVEL_INSTANCE: locking.ALL_SET,
3407 # This opcode is acquires the node locks for all instances
3408 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3411 self.share_locks = {
3412 locking.LEVEL_NODE_RES: 1,
3413 locking.LEVEL_INSTANCE: 0,
3414 locking.LEVEL_NODE_ALLOC: 1,
3417 def DeclareLocks(self, level):
3418 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3419 self._LockInstancesNodes(primary_only=True, level=level)
3421 def CheckPrereq(self):
3422 """Check prerequisites.
3424 This only checks the optional instance list against the existing names.
3427 if self.wanted_names is None:
3428 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3430 self.wanted_instances = \
3431 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3433 def _EnsureChildSizes(self, disk):
3434 """Ensure children of the disk have the needed disk size.
3436 This is valid mainly for DRBD8 and fixes an issue where the
3437 children have smaller disk size.
3439 @param disk: an L{ganeti.objects.Disk} object
3442 if disk.dev_type == constants.LD_DRBD8:
3443 assert disk.children, "Empty children for DRBD8?"
3444 fchild = disk.children[0]
3445 mismatch = fchild.size < disk.size
3447 self.LogInfo("Child disk has size %d, parent %d, fixing",
3448 fchild.size, disk.size)
3449 fchild.size = disk.size
3451 # and we recurse on this child only, not on the metadev
3452 return self._EnsureChildSizes(fchild) or mismatch
3456 def Exec(self, feedback_fn):
3457 """Verify the size of cluster disks.
3460 # TODO: check child disks too
3461 # TODO: check differences in size between primary/secondary nodes
3463 for instance in self.wanted_instances:
3464 pnode = instance.primary_node
3465 if pnode not in per_node_disks:
3466 per_node_disks[pnode] = []
3467 for idx, disk in enumerate(instance.disks):
3468 per_node_disks[pnode].append((instance, idx, disk))
3470 assert not (frozenset(per_node_disks.keys()) -
3471 self.owned_locks(locking.LEVEL_NODE_RES)), \
3472 "Not owning correct locks"
3473 assert not self.owned_locks(locking.LEVEL_NODE)
3476 for node, dskl in per_node_disks.items():
3477 newl = [v[2].Copy() for v in dskl]
3479 self.cfg.SetDiskID(dsk, node)
3480 result = self.rpc.call_blockdev_getsize(node, newl)
3482 self.LogWarning("Failure in blockdev_getsize call to node"
3483 " %s, ignoring", node)
3485 if len(result.payload) != len(dskl):
3486 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3487 " result.payload=%s", node, len(dskl), result.payload)
3488 self.LogWarning("Invalid result from node %s, ignoring node results",
3491 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3493 self.LogWarning("Disk %d of instance %s did not return size"
3494 " information, ignoring", idx, instance.name)
3496 if not isinstance(size, (int, long)):
3497 self.LogWarning("Disk %d of instance %s did not return valid"
3498 " size information, ignoring", idx, instance.name)
3501 if size != disk.size:
3502 self.LogInfo("Disk %d of instance %s has mismatched size,"
3503 " correcting: recorded %d, actual %d", idx,
3504 instance.name, disk.size, size)
3506 self.cfg.Update(instance, feedback_fn)
3507 changed.append((instance.name, idx, size))
3508 if self._EnsureChildSizes(disk):
3509 self.cfg.Update(instance, feedback_fn)
3510 changed.append((instance.name, idx, disk.size))
3514 class LUClusterRename(LogicalUnit):
3515 """Rename the cluster.
3518 HPATH = "cluster-rename"
3519 HTYPE = constants.HTYPE_CLUSTER
3521 def BuildHooksEnv(self):
3526 "OP_TARGET": self.cfg.GetClusterName(),
3527 "NEW_NAME": self.op.name,
3530 def BuildHooksNodes(self):
3531 """Build hooks nodes.
3534 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3536 def CheckPrereq(self):
3537 """Verify that the passed name is a valid one.
3540 hostname = netutils.GetHostname(name=self.op.name,
3541 family=self.cfg.GetPrimaryIPFamily())
3543 new_name = hostname.name
3544 self.ip = new_ip = hostname.ip
3545 old_name = self.cfg.GetClusterName()
3546 old_ip = self.cfg.GetMasterIP()
3547 if new_name == old_name and new_ip == old_ip:
3548 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3549 " cluster has changed",
3551 if new_ip != old_ip:
3552 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3553 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3554 " reachable on the network" %
3555 new_ip, errors.ECODE_NOTUNIQUE)
3557 self.op.name = new_name
3559 def Exec(self, feedback_fn):
3560 """Rename the cluster.
3563 clustername = self.op.name
3566 # shutdown the master IP
3567 master_params = self.cfg.GetMasterNetworkParameters()
3568 ems = self.cfg.GetUseExternalMipScript()
3569 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3571 result.Raise("Could not disable the master role")
3574 cluster = self.cfg.GetClusterInfo()
3575 cluster.cluster_name = clustername
3576 cluster.master_ip = new_ip
3577 self.cfg.Update(cluster, feedback_fn)
3579 # update the known hosts file
3580 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3581 node_list = self.cfg.GetOnlineNodeList()
3583 node_list.remove(master_params.name)
3586 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3588 master_params.ip = new_ip
3589 result = self.rpc.call_node_activate_master_ip(master_params.name,
3591 msg = result.fail_msg
3593 self.LogWarning("Could not re-enable the master role on"
3594 " the master, please restart manually: %s", msg)
3599 def _ValidateNetmask(cfg, netmask):
3600 """Checks if a netmask is valid.
3602 @type cfg: L{config.ConfigWriter}
3603 @param cfg: The cluster configuration
3605 @param netmask: the netmask to be verified
3606 @raise errors.OpPrereqError: if the validation fails
3609 ip_family = cfg.GetPrimaryIPFamily()
3611 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3612 except errors.ProgrammerError:
3613 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3614 ip_family, errors.ECODE_INVAL)
3615 if not ipcls.ValidateNetmask(netmask):
3616 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3617 (netmask), errors.ECODE_INVAL)
3620 class LUClusterSetParams(LogicalUnit):
3621 """Change the parameters of the cluster.
3624 HPATH = "cluster-modify"
3625 HTYPE = constants.HTYPE_CLUSTER
3628 def CheckArguments(self):
3632 if self.op.uid_pool:
3633 uidpool.CheckUidPool(self.op.uid_pool)
3635 if self.op.add_uids:
3636 uidpool.CheckUidPool(self.op.add_uids)
3638 if self.op.remove_uids:
3639 uidpool.CheckUidPool(self.op.remove_uids)
3641 if self.op.master_netmask is not None:
3642 _ValidateNetmask(self.cfg, self.op.master_netmask)
3644 if self.op.diskparams:
3645 for dt_params in self.op.diskparams.values():
3646 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3648 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3649 except errors.OpPrereqError, err:
3650 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3653 def ExpandNames(self):
3654 # FIXME: in the future maybe other cluster params won't require checking on
3655 # all nodes to be modified.
3656 # FIXME: This opcode changes cluster-wide settings. Is acquiring all
3657 # resource locks the right thing, shouldn't it be the BGL instead?
3658 self.needed_locks = {
3659 locking.LEVEL_NODE: locking.ALL_SET,
3660 locking.LEVEL_INSTANCE: locking.ALL_SET,
3661 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3662 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3664 self.share_locks = _ShareAll()
3666 def BuildHooksEnv(self):
3671 "OP_TARGET": self.cfg.GetClusterName(),
3672 "NEW_VG_NAME": self.op.vg_name,
3675 def BuildHooksNodes(self):
3676 """Build hooks nodes.
3679 mn = self.cfg.GetMasterNode()
3682 def CheckPrereq(self):
3683 """Check prerequisites.
3685 This checks whether the given params don't conflict and
3686 if the given volume group is valid.
3689 if self.op.vg_name is not None and not self.op.vg_name:
3690 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3691 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3692 " instances exist", errors.ECODE_INVAL)
3694 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3695 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3696 raise errors.OpPrereqError("Cannot disable drbd helper while"
3697 " drbd-based instances exist",
3700 node_list = self.owned_locks(locking.LEVEL_NODE)
3702 vm_capable_nodes = [node.name
3703 for node in self.cfg.GetAllNodesInfo().values()
3704 if node.name in node_list and node.vm_capable]
3706 # if vg_name not None, checks given volume group on all nodes
3708 vglist = self.rpc.call_vg_list(vm_capable_nodes)
3709 for node in vm_capable_nodes:
3710 msg = vglist[node].fail_msg
3712 # ignoring down node
3713 self.LogWarning("Error while gathering data on node %s"
3714 " (ignoring node): %s", node, msg)
3716 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3718 constants.MIN_VG_SIZE)
3720 raise errors.OpPrereqError("Error on node '%s': %s" %
3721 (node, vgstatus), errors.ECODE_ENVIRON)
3723 if self.op.drbd_helper:
3724 # checks given drbd helper on all nodes
3725 helpers = self.rpc.call_drbd_helper(node_list)
3726 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3728 self.LogInfo("Not checking drbd helper on offline node %s", node)
3730 msg = helpers[node].fail_msg
3732 raise errors.OpPrereqError("Error checking drbd helper on node"
3733 " '%s': %s" % (node, msg),
3734 errors.ECODE_ENVIRON)
3735 node_helper = helpers[node].payload
3736 if node_helper != self.op.drbd_helper:
3737 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3738 (node, node_helper), errors.ECODE_ENVIRON)
3740 self.cluster = cluster = self.cfg.GetClusterInfo()
3741 # validate params changes
3742 if self.op.beparams:
3743 objects.UpgradeBeParams(self.op.beparams)
3744 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3745 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3747 if self.op.ndparams:
3748 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3749 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3751 # TODO: we need a more general way to handle resetting
3752 # cluster-level parameters to default values
3753 if self.new_ndparams["oob_program"] == "":
3754 self.new_ndparams["oob_program"] = \
3755 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3757 if self.op.hv_state:
3758 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3759 self.cluster.hv_state_static)
3760 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3761 for hv, values in new_hv_state.items())
3763 if self.op.disk_state:
3764 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3765 self.cluster.disk_state_static)
3766 self.new_disk_state = \
3767 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3768 for name, values in svalues.items()))
3769 for storage, svalues in new_disk_state.items())
3772 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3775 all_instances = self.cfg.GetAllInstancesInfo().values()
3777 for group in self.cfg.GetAllNodeGroupsInfo().values():
3778 instances = frozenset([inst for inst in all_instances
3779 if compat.any(node in group.members
3780 for node in inst.all_nodes)])
3781 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3782 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
3783 new = _ComputeNewInstanceViolations(ipol,
3784 new_ipolicy, instances, self.cfg)
3786 violations.update(new)
3789 self.LogWarning("After the ipolicy change the following instances"
3790 " violate them: %s",
3791 utils.CommaJoin(utils.NiceSort(violations)))
3793 if self.op.nicparams:
3794 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3795 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3796 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3799 # check all instances for consistency
3800 for instance in self.cfg.GetAllInstancesInfo().values():
3801 for nic_idx, nic in enumerate(instance.nics):
3802 params_copy = copy.deepcopy(nic.nicparams)
3803 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3805 # check parameter syntax
3807 objects.NIC.CheckParameterSyntax(params_filled)
3808 except errors.ConfigurationError, err:
3809 nic_errors.append("Instance %s, nic/%d: %s" %
3810 (instance.name, nic_idx, err))
3812 # if we're moving instances to routed, check that they have an ip
3813 target_mode = params_filled[constants.NIC_MODE]
3814 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3815 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3816 " address" % (instance.name, nic_idx))
3818 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3819 "\n".join(nic_errors), errors.ECODE_INVAL)
3821 # hypervisor list/parameters
3822 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3823 if self.op.hvparams:
3824 for hv_name, hv_dict in self.op.hvparams.items():
3825 if hv_name not in self.new_hvparams:
3826 self.new_hvparams[hv_name] = hv_dict
3828 self.new_hvparams[hv_name].update(hv_dict)
3830 # disk template parameters
3831 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3832 if self.op.diskparams:
3833 for dt_name, dt_params in self.op.diskparams.items():
3834 if dt_name not in self.op.diskparams:
3835 self.new_diskparams[dt_name] = dt_params
3837 self.new_diskparams[dt_name].update(dt_params)
3839 # os hypervisor parameters
3840 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3842 for os_name, hvs in self.op.os_hvp.items():
3843 if os_name not in self.new_os_hvp:
3844 self.new_os_hvp[os_name] = hvs
3846 for hv_name, hv_dict in hvs.items():
3848 # Delete if it exists
3849 self.new_os_hvp[os_name].pop(hv_name, None)
3850 elif hv_name not in self.new_os_hvp[os_name]:
3851 self.new_os_hvp[os_name][hv_name] = hv_dict
3853 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3856 self.new_osp = objects.FillDict(cluster.osparams, {})
3857 if self.op.osparams:
3858 for os_name, osp in self.op.osparams.items():
3859 if os_name not in self.new_osp:
3860 self.new_osp[os_name] = {}
3862 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3865 if not self.new_osp[os_name]:
3866 # we removed all parameters
3867 del self.new_osp[os_name]
3869 # check the parameter validity (remote check)
3870 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3871 os_name, self.new_osp[os_name])
3873 # changes to the hypervisor list
3874 if self.op.enabled_hypervisors is not None:
3875 self.hv_list = self.op.enabled_hypervisors
3876 for hv in self.hv_list:
3877 # if the hypervisor doesn't already exist in the cluster
3878 # hvparams, we initialize it to empty, and then (in both
3879 # cases) we make sure to fill the defaults, as we might not
3880 # have a complete defaults list if the hypervisor wasn't
3882 if hv not in new_hvp:
3884 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3885 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3887 self.hv_list = cluster.enabled_hypervisors
3889 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3890 # either the enabled list has changed, or the parameters have, validate
3891 for hv_name, hv_params in self.new_hvparams.items():
3892 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3893 (self.op.enabled_hypervisors and
3894 hv_name in self.op.enabled_hypervisors)):
3895 # either this is a new hypervisor, or its parameters have changed
3896 hv_class = hypervisor.GetHypervisorClass(hv_name)
3897 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3898 hv_class.CheckParameterSyntax(hv_params)
3899 _CheckHVParams(self, node_list, hv_name, hv_params)
3901 self._CheckDiskTemplateConsistency()
3904 # no need to check any newly-enabled hypervisors, since the
3905 # defaults have already been checked in the above code-block
3906 for os_name, os_hvp in self.new_os_hvp.items():
3907 for hv_name, hv_params in os_hvp.items():
3908 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3909 # we need to fill in the new os_hvp on top of the actual hv_p
3910 cluster_defaults = self.new_hvparams.get(hv_name, {})
3911 new_osp = objects.FillDict(cluster_defaults, hv_params)
3912 hv_class = hypervisor.GetHypervisorClass(hv_name)
3913 hv_class.CheckParameterSyntax(new_osp)
3914 _CheckHVParams(self, node_list, hv_name, new_osp)
3916 if self.op.default_iallocator:
3917 alloc_script = utils.FindFile(self.op.default_iallocator,
3918 constants.IALLOCATOR_SEARCH_PATH,
3920 if alloc_script is None:
3921 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3922 " specified" % self.op.default_iallocator,
3925 def _CheckDiskTemplateConsistency(self):
3926 """Check whether the disk templates that are going to be disabled
3927 are still in use by some instances.
3930 if self.op.enabled_disk_templates:
3931 cluster = self.cfg.GetClusterInfo()
3932 instances = self.cfg.GetAllInstancesInfo()
3934 disk_templates_to_remove = set(cluster.enabled_disk_templates) \
3935 - set(self.op.enabled_disk_templates)
3936 for instance in instances.itervalues():
3937 if instance.disk_template in disk_templates_to_remove:
3938 raise errors.OpPrereqError("Cannot disable disk template '%s',"
3939 " because instance '%s' is using it." %
3940 (instance.disk_template, instance.name))
3942 def Exec(self, feedback_fn):
3943 """Change the parameters of the cluster.
3946 if self.op.vg_name is not None:
3947 new_volume = self.op.vg_name
3950 if new_volume != self.cfg.GetVGName():
3951 self.cfg.SetVGName(new_volume)
3953 feedback_fn("Cluster LVM configuration already in desired"
3954 " state, not changing")
3955 if self.op.drbd_helper is not None:
3956 new_helper = self.op.drbd_helper
3959 if new_helper != self.cfg.GetDRBDHelper():
3960 self.cfg.SetDRBDHelper(new_helper)
3962 feedback_fn("Cluster DRBD helper already in desired state,"
3964 if self.op.hvparams:
3965 self.cluster.hvparams = self.new_hvparams
3967 self.cluster.os_hvp = self.new_os_hvp
3968 if self.op.enabled_hypervisors is not None:
3969 self.cluster.hvparams = self.new_hvparams
3970 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3971 if self.op.enabled_disk_templates:
3972 self.cluster.enabled_disk_templates = \
3973 list(set(self.op.enabled_disk_templates))
3974 if self.op.beparams:
3975 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3976 if self.op.nicparams:
3977 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3979 self.cluster.ipolicy = self.new_ipolicy
3980 if self.op.osparams:
3981 self.cluster.osparams = self.new_osp
3982 if self.op.ndparams:
3983 self.cluster.ndparams = self.new_ndparams
3984 if self.op.diskparams:
3985 self.cluster.diskparams = self.new_diskparams
3986 if self.op.hv_state:
3987 self.cluster.hv_state_static = self.new_hv_state
3988 if self.op.disk_state:
3989 self.cluster.disk_state_static = self.new_disk_state
3991 if self.op.candidate_pool_size is not None:
3992 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3993 # we need to update the pool size here, otherwise the save will fail
3994 _AdjustCandidatePool(self, [])
3996 if self.op.maintain_node_health is not None:
3997 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
3998 feedback_fn("Note: CONFD was disabled at build time, node health"
3999 " maintenance is not useful (still enabling it)")
4000 self.cluster.maintain_node_health = self.op.maintain_node_health
4002 if self.op.prealloc_wipe_disks is not None:
4003 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4005 if self.op.add_uids is not None:
4006 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4008 if self.op.remove_uids is not None:
4009 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4011 if self.op.uid_pool is not None:
4012 self.cluster.uid_pool = self.op.uid_pool
4014 if self.op.default_iallocator is not None:
4015 self.cluster.default_iallocator = self.op.default_iallocator
4017 if self.op.reserved_lvs is not None:
4018 self.cluster.reserved_lvs = self.op.reserved_lvs
4020 if self.op.use_external_mip_script is not None:
4021 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4023 def helper_os(aname, mods, desc):
4025 lst = getattr(self.cluster, aname)
4026 for key, val in mods:
4027 if key == constants.DDM_ADD:
4029 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4032 elif key == constants.DDM_REMOVE:
4036 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4038 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4040 if self.op.hidden_os:
4041 helper_os("hidden_os", self.op.hidden_os, "hidden")
4043 if self.op.blacklisted_os:
4044 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4046 if self.op.master_netdev:
4047 master_params = self.cfg.GetMasterNetworkParameters()
4048 ems = self.cfg.GetUseExternalMipScript()
4049 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4050 self.cluster.master_netdev)
4051 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4053 result.Raise("Could not disable the master ip")
4054 feedback_fn("Changing master_netdev from %s to %s" %
4055 (master_params.netdev, self.op.master_netdev))
4056 self.cluster.master_netdev = self.op.master_netdev
4058 if self.op.master_netmask:
4059 master_params = self.cfg.GetMasterNetworkParameters()
4060 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4061 result = self.rpc.call_node_change_master_netmask(master_params.name,
4062 master_params.netmask,
4063 self.op.master_netmask,
4065 master_params.netdev)
4067 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4070 self.cluster.master_netmask = self.op.master_netmask
4072 self.cfg.Update(self.cluster, feedback_fn)
4074 if self.op.master_netdev:
4075 master_params = self.cfg.GetMasterNetworkParameters()
4076 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4077 self.op.master_netdev)
4078 ems = self.cfg.GetUseExternalMipScript()
4079 result = self.rpc.call_node_activate_master_ip(master_params.name,
4082 self.LogWarning("Could not re-enable the master ip on"
4083 " the master, please restart manually: %s",
4087 def _UploadHelper(lu, nodes, fname):
4088 """Helper for uploading a file and showing warnings.
4091 if os.path.exists(fname):
4092 result = lu.rpc.call_upload_file(nodes, fname)
4093 for to_node, to_result in result.items():
4094 msg = to_result.fail_msg
4096 msg = ("Copy of file %s to node %s failed: %s" %
4097 (fname, to_node, msg))
4101 def _ComputeAncillaryFiles(cluster, redist):
4102 """Compute files external to Ganeti which need to be consistent.
4104 @type redist: boolean
4105 @param redist: Whether to include files which need to be redistributed
4108 # Compute files for all nodes
4110 pathutils.SSH_KNOWN_HOSTS_FILE,
4111 pathutils.CONFD_HMAC_KEY,
4112 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4113 pathutils.SPICE_CERT_FILE,
4114 pathutils.SPICE_CACERT_FILE,
4115 pathutils.RAPI_USERS_FILE,
4119 # we need to ship at least the RAPI certificate
4120 files_all.add(pathutils.RAPI_CERT_FILE)
4122 files_all.update(pathutils.ALL_CERT_FILES)
4123 files_all.update(ssconf.SimpleStore().GetFileList())
4125 if cluster.modify_etc_hosts:
4126 files_all.add(pathutils.ETC_HOSTS)
4128 if cluster.use_external_mip_script:
4129 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4131 # Files which are optional, these must:
4132 # - be present in one other category as well
4133 # - either exist or not exist on all nodes of that category (mc, vm all)
4135 pathutils.RAPI_USERS_FILE,
4138 # Files which should only be on master candidates
4142 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4146 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4147 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4148 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4150 # Files which should only be on VM-capable nodes
4153 for hv_name in cluster.enabled_hypervisors
4155 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0])
4159 for hv_name in cluster.enabled_hypervisors
4161 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1])
4163 # Filenames in each category must be unique
4164 all_files_set = files_all | files_mc | files_vm
4165 assert (len(all_files_set) ==
4166 sum(map(len, [files_all, files_mc, files_vm]))), \
4167 "Found file listed in more than one file list"
4169 # Optional files must be present in one other category
4170 assert all_files_set.issuperset(files_opt), \
4171 "Optional file not in a different required list"
4173 # This one file should never ever be re-distributed via RPC
4174 assert not (redist and
4175 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4177 return (files_all, files_opt, files_mc, files_vm)
4180 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4181 """Distribute additional files which are part of the cluster configuration.
4183 ConfigWriter takes care of distributing the config and ssconf files, but
4184 there are more files which should be distributed to all nodes. This function
4185 makes sure those are copied.
4187 @param lu: calling logical unit
4188 @param additional_nodes: list of nodes not in the config to distribute to
4189 @type additional_vm: boolean
4190 @param additional_vm: whether the additional nodes are vm-capable or not
4193 # Gather target nodes
4194 cluster = lu.cfg.GetClusterInfo()
4195 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4197 online_nodes = lu.cfg.GetOnlineNodeList()
4198 online_set = frozenset(online_nodes)
4199 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4201 if additional_nodes is not None:
4202 online_nodes.extend(additional_nodes)
4204 vm_nodes.extend(additional_nodes)
4206 # Never distribute to master node
4207 for nodelist in [online_nodes, vm_nodes]:
4208 if master_info.name in nodelist:
4209 nodelist.remove(master_info.name)
4212 (files_all, _, files_mc, files_vm) = \
4213 _ComputeAncillaryFiles(cluster, True)
4215 # Never re-distribute configuration file from here
4216 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4217 pathutils.CLUSTER_CONF_FILE in files_vm)
4218 assert not files_mc, "Master candidates not handled in this function"
4221 (online_nodes, files_all),
4222 (vm_nodes, files_vm),
4226 for (node_list, files) in filemap:
4228 _UploadHelper(lu, node_list, fname)
4231 class LUClusterRedistConf(NoHooksLU):
4232 """Force the redistribution of cluster configuration.
4234 This is a very simple LU.
4239 def ExpandNames(self):
4240 self.needed_locks = {
4241 locking.LEVEL_NODE: locking.ALL_SET,
4242 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4244 self.share_locks = _ShareAll()
4246 def Exec(self, feedback_fn):
4247 """Redistribute the configuration.
4250 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4251 _RedistributeAncillaryFiles(self)
4254 class LUClusterActivateMasterIp(NoHooksLU):
4255 """Activate the master IP on the master node.
4258 def Exec(self, feedback_fn):
4259 """Activate the master IP.
4262 master_params = self.cfg.GetMasterNetworkParameters()
4263 ems = self.cfg.GetUseExternalMipScript()
4264 result = self.rpc.call_node_activate_master_ip(master_params.name,
4266 result.Raise("Could not activate the master IP")
4269 class LUClusterDeactivateMasterIp(NoHooksLU):
4270 """Deactivate the master IP on the master node.
4273 def Exec(self, feedback_fn):
4274 """Deactivate the master IP.
4277 master_params = self.cfg.GetMasterNetworkParameters()
4278 ems = self.cfg.GetUseExternalMipScript()
4279 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4281 result.Raise("Could not deactivate the master IP")
4284 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4285 """Sleep and poll for an instance's disk to sync.
4288 if not instance.disks or disks is not None and not disks:
4291 disks = _ExpandCheckDisks(instance, disks)
4294 lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4296 node = instance.primary_node
4299 lu.cfg.SetDiskID(dev, node)
4301 # TODO: Convert to utils.Retry
4304 degr_retries = 10 # in seconds, as we sleep 1 second each time
4308 cumul_degraded = False
4309 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4310 msg = rstats.fail_msg
4312 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4315 raise errors.RemoteError("Can't contact node %s for mirror data,"
4316 " aborting." % node)
4319 rstats = rstats.payload
4321 for i, mstat in enumerate(rstats):
4323 lu.LogWarning("Can't compute data for node %s/%s",
4324 node, disks[i].iv_name)
4327 cumul_degraded = (cumul_degraded or
4328 (mstat.is_degraded and mstat.sync_percent is None))
4329 if mstat.sync_percent is not None:
4331 if mstat.estimated_time is not None:
4332 rem_time = ("%s remaining (estimated)" %
4333 utils.FormatSeconds(mstat.estimated_time))
4334 max_time = mstat.estimated_time
4336 rem_time = "no time estimate"
4337 lu.LogInfo("- device %s: %5.2f%% done, %s",
4338 disks[i].iv_name, mstat.sync_percent, rem_time)
4340 # if we're done but degraded, let's do a few small retries, to
4341 # make sure we see a stable and not transient situation; therefore
4342 # we force restart of the loop
4343 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4344 logging.info("Degraded disks found, %d retries left", degr_retries)
4352 time.sleep(min(60, max_time))
4355 lu.LogInfo("Instance %s's disks are in sync", instance.name)
4357 return not cumul_degraded
4360 def _BlockdevFind(lu, node, dev, instance):
4361 """Wrapper around call_blockdev_find to annotate diskparams.
4363 @param lu: A reference to the lu object
4364 @param node: The node to call out
4365 @param dev: The device to find
4366 @param instance: The instance object the device belongs to
4367 @returns The result of the rpc call
4370 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4371 return lu.rpc.call_blockdev_find(node, disk)
4374 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4375 """Wrapper around L{_CheckDiskConsistencyInner}.
4378 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4379 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4383 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4385 """Check that mirrors are not degraded.
4387 @attention: The device has to be annotated already.
4389 The ldisk parameter, if True, will change the test from the
4390 is_degraded attribute (which represents overall non-ok status for
4391 the device(s)) to the ldisk (representing the local storage status).
4394 lu.cfg.SetDiskID(dev, node)
4398 if on_primary or dev.AssembleOnSecondary():
4399 rstats = lu.rpc.call_blockdev_find(node, dev)
4400 msg = rstats.fail_msg
4402 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4404 elif not rstats.payload:
4405 lu.LogWarning("Can't find disk on node %s", node)
4409 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4411 result = result and not rstats.payload.is_degraded
4414 for child in dev.children:
4415 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4421 class LUOobCommand(NoHooksLU):
4422 """Logical unit for OOB handling.
4426 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4428 def ExpandNames(self):
4429 """Gather locks we need.
4432 if self.op.node_names:
4433 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4434 lock_names = self.op.node_names
4436 lock_names = locking.ALL_SET
4438 self.needed_locks = {
4439 locking.LEVEL_NODE: lock_names,
4442 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4444 if not self.op.node_names:
4445 # Acquire node allocation lock only if all nodes are affected
4446 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4448 def CheckPrereq(self):
4449 """Check prerequisites.
4452 - the node exists in the configuration
4455 Any errors are signaled by raising errors.OpPrereqError.
4459 self.master_node = self.cfg.GetMasterNode()
4461 assert self.op.power_delay >= 0.0
4463 if self.op.node_names:
4464 if (self.op.command in self._SKIP_MASTER and
4465 self.master_node in self.op.node_names):
4466 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4467 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4469 if master_oob_handler:
4470 additional_text = ("run '%s %s %s' if you want to operate on the"
4471 " master regardless") % (master_oob_handler,
4475 additional_text = "it does not support out-of-band operations"
4477 raise errors.OpPrereqError(("Operating on the master node %s is not"
4478 " allowed for %s; %s") %
4479 (self.master_node, self.op.command,
4480 additional_text), errors.ECODE_INVAL)
4482 self.op.node_names = self.cfg.GetNodeList()
4483 if self.op.command in self._SKIP_MASTER:
4484 self.op.node_names.remove(self.master_node)
4486 if self.op.command in self._SKIP_MASTER:
4487 assert self.master_node not in self.op.node_names
4489 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4491 raise errors.OpPrereqError("Node %s not found" % node_name,
4494 self.nodes.append(node)
4496 if (not self.op.ignore_status and
4497 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4498 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4499 " not marked offline") % node_name,
4502 def Exec(self, feedback_fn):
4503 """Execute OOB and return result if we expect any.
4506 master_node = self.master_node
4509 for idx, node in enumerate(utils.NiceSort(self.nodes,
4510 key=lambda node: node.name)):
4511 node_entry = [(constants.RS_NORMAL, node.name)]
4512 ret.append(node_entry)
4514 oob_program = _SupportsOob(self.cfg, node)
4517 node_entry.append((constants.RS_UNAVAIL, None))
4520 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4521 self.op.command, oob_program, node.name)
4522 result = self.rpc.call_run_oob(master_node, oob_program,
4523 self.op.command, node.name,
4527 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4528 node.name, result.fail_msg)
4529 node_entry.append((constants.RS_NODATA, None))
4532 self._CheckPayload(result)
4533 except errors.OpExecError, err:
4534 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4536 node_entry.append((constants.RS_NODATA, None))
4538 if self.op.command == constants.OOB_HEALTH:
4539 # For health we should log important events
4540 for item, status in result.payload:
4541 if status in [constants.OOB_STATUS_WARNING,
4542 constants.OOB_STATUS_CRITICAL]:
4543 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4544 item, node.name, status)
4546 if self.op.command == constants.OOB_POWER_ON:
4548 elif self.op.command == constants.OOB_POWER_OFF:
4549 node.powered = False
4550 elif self.op.command == constants.OOB_POWER_STATUS:
4551 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4552 if powered != node.powered:
4553 logging.warning(("Recorded power state (%s) of node '%s' does not"
4554 " match actual power state (%s)"), node.powered,
4557 # For configuration changing commands we should update the node
4558 if self.op.command in (constants.OOB_POWER_ON,
4559 constants.OOB_POWER_OFF):
4560 self.cfg.Update(node, feedback_fn)
4562 node_entry.append((constants.RS_NORMAL, result.payload))
4564 if (self.op.command == constants.OOB_POWER_ON and
4565 idx < len(self.nodes) - 1):
4566 time.sleep(self.op.power_delay)
4570 def _CheckPayload(self, result):
4571 """Checks if the payload is valid.
4573 @param result: RPC result
4574 @raises errors.OpExecError: If payload is not valid
4578 if self.op.command == constants.OOB_HEALTH:
4579 if not isinstance(result.payload, list):
4580 errs.append("command 'health' is expected to return a list but got %s" %
4581 type(result.payload))
4583 for item, status in result.payload:
4584 if status not in constants.OOB_STATUSES:
4585 errs.append("health item '%s' has invalid status '%s'" %
4588 if self.op.command == constants.OOB_POWER_STATUS:
4589 if not isinstance(result.payload, dict):
4590 errs.append("power-status is expected to return a dict but got %s" %
4591 type(result.payload))
4593 if self.op.command in [
4594 constants.OOB_POWER_ON,
4595 constants.OOB_POWER_OFF,
4596 constants.OOB_POWER_CYCLE,
4598 if result.payload is not None:
4599 errs.append("%s is expected to not return payload but got '%s'" %
4600 (self.op.command, result.payload))
4603 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4604 utils.CommaJoin(errs))
4607 class _OsQuery(_QueryBase):
4608 FIELDS = query.OS_FIELDS
4610 def ExpandNames(self, lu):
4611 # Lock all nodes in shared mode
4612 # Temporary removal of locks, should be reverted later
4613 # TODO: reintroduce locks when they are lighter-weight
4614 lu.needed_locks = {}
4615 #self.share_locks[locking.LEVEL_NODE] = 1
4616 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4618 # The following variables interact with _QueryBase._GetNames
4620 self.wanted = self.names
4622 self.wanted = locking.ALL_SET
4624 self.do_locking = self.use_locking
4626 def DeclareLocks(self, lu, level):
4630 def _DiagnoseByOS(rlist):
4631 """Remaps a per-node return list into an a per-os per-node dictionary
4633 @param rlist: a map with node names as keys and OS objects as values
4636 @return: a dictionary with osnames as keys and as value another
4637 map, with nodes as keys and tuples of (path, status, diagnose,
4638 variants, parameters, api_versions) as values, eg::
4640 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4641 (/srv/..., False, "invalid api")],
4642 "node2": [(/srv/..., True, "", [], [])]}
4647 # we build here the list of nodes that didn't fail the RPC (at RPC
4648 # level), so that nodes with a non-responding node daemon don't
4649 # make all OSes invalid
4650 good_nodes = [node_name for node_name in rlist
4651 if not rlist[node_name].fail_msg]
4652 for node_name, nr in rlist.items():
4653 if nr.fail_msg or not nr.payload:
4655 for (name, path, status, diagnose, variants,
4656 params, api_versions) in nr.payload:
4657 if name not in all_os:
4658 # build a list of nodes for this os containing empty lists
4659 # for each node in node_list
4661 for nname in good_nodes:
4662 all_os[name][nname] = []
4663 # convert params from [name, help] to (name, help)
4664 params = [tuple(v) for v in params]
4665 all_os[name][node_name].append((path, status, diagnose,
4666 variants, params, api_versions))
4669 def _GetQueryData(self, lu):
4670 """Computes the list of nodes and their attributes.
4673 # Locking is not used
4674 assert not (compat.any(lu.glm.is_owned(level)
4675 for level in locking.LEVELS
4676 if level != locking.LEVEL_CLUSTER) or
4677 self.do_locking or self.use_locking)
4679 valid_nodes = [node.name
4680 for node in lu.cfg.GetAllNodesInfo().values()
4681 if not node.offline and node.vm_capable]
4682 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4683 cluster = lu.cfg.GetClusterInfo()
4687 for (os_name, os_data) in pol.items():
4688 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4689 hidden=(os_name in cluster.hidden_os),
4690 blacklisted=(os_name in cluster.blacklisted_os))
4694 api_versions = set()
4696 for idx, osl in enumerate(os_data.values()):
4697 info.valid = bool(info.valid and osl and osl[0][1])
4701 (node_variants, node_params, node_api) = osl[0][3:6]
4704 variants.update(node_variants)
4705 parameters.update(node_params)
4706 api_versions.update(node_api)
4708 # Filter out inconsistent values
4709 variants.intersection_update(node_variants)
4710 parameters.intersection_update(node_params)
4711 api_versions.intersection_update(node_api)
4713 info.variants = list(variants)
4714 info.parameters = list(parameters)
4715 info.api_versions = list(api_versions)
4717 data[os_name] = info
4719 # Prepare data in requested order
4720 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4724 class LUOsDiagnose(NoHooksLU):
4725 """Logical unit for OS diagnose/query.
4731 def _BuildFilter(fields, names):
4732 """Builds a filter for querying OSes.
4735 name_filter = qlang.MakeSimpleFilter("name", names)
4737 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4738 # respective field is not requested
4739 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4740 for fname in ["hidden", "blacklisted"]
4741 if fname not in fields]
4742 if "valid" not in fields:
4743 status_filter.append([qlang.OP_TRUE, "valid"])
4746 status_filter.insert(0, qlang.OP_AND)
4748 status_filter = None
4750 if name_filter and status_filter:
4751 return [qlang.OP_AND, name_filter, status_filter]
4755 return status_filter
4757 def CheckArguments(self):
4758 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4759 self.op.output_fields, False)
4761 def ExpandNames(self):
4762 self.oq.ExpandNames(self)
4764 def Exec(self, feedback_fn):
4765 return self.oq.OldStyleQuery(self)
4768 class _ExtStorageQuery(_QueryBase):
4769 FIELDS = query.EXTSTORAGE_FIELDS
4771 def ExpandNames(self, lu):
4772 # Lock all nodes in shared mode
4773 # Temporary removal of locks, should be reverted later
4774 # TODO: reintroduce locks when they are lighter-weight
4775 lu.needed_locks = {}
4776 #self.share_locks[locking.LEVEL_NODE] = 1
4777 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4779 # The following variables interact with _QueryBase._GetNames
4781 self.wanted = self.names
4783 self.wanted = locking.ALL_SET
4785 self.do_locking = self.use_locking
4787 def DeclareLocks(self, lu, level):
4791 def _DiagnoseByProvider(rlist):
4792 """Remaps a per-node return list into an a per-provider per-node dictionary
4794 @param rlist: a map with node names as keys and ExtStorage objects as values
4797 @return: a dictionary with extstorage providers as keys and as
4798 value another map, with nodes as keys and tuples of
4799 (path, status, diagnose, parameters) as values, eg::
4801 {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
4802 "node2": [(/srv/..., False, "missing file")]
4803 "node3": [(/srv/..., True, "", [])]
4808 # we build here the list of nodes that didn't fail the RPC (at RPC
4809 # level), so that nodes with a non-responding node daemon don't
4810 # make all OSes invalid
4811 good_nodes = [node_name for node_name in rlist
4812 if not rlist[node_name].fail_msg]
4813 for node_name, nr in rlist.items():
4814 if nr.fail_msg or not nr.payload:
4816 for (name, path, status, diagnose, params) in nr.payload:
4817 if name not in all_es:
4818 # build a list of nodes for this os containing empty lists
4819 # for each node in node_list
4821 for nname in good_nodes:
4822 all_es[name][nname] = []
4823 # convert params from [name, help] to (name, help)
4824 params = [tuple(v) for v in params]
4825 all_es[name][node_name].append((path, status, diagnose, params))
4828 def _GetQueryData(self, lu):
4829 """Computes the list of nodes and their attributes.
4832 # Locking is not used
4833 assert not (compat.any(lu.glm.is_owned(level)
4834 for level in locking.LEVELS
4835 if level != locking.LEVEL_CLUSTER) or
4836 self.do_locking or self.use_locking)
4838 valid_nodes = [node.name
4839 for node in lu.cfg.GetAllNodesInfo().values()
4840 if not node.offline and node.vm_capable]
4841 pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
4845 nodegroup_list = lu.cfg.GetNodeGroupList()
4847 for (es_name, es_data) in pol.items():
4848 # For every provider compute the nodegroup validity.
4849 # To do this we need to check the validity of each node in es_data
4850 # and then construct the corresponding nodegroup dict:
4851 # { nodegroup1: status
4852 # nodegroup2: status
4855 for nodegroup in nodegroup_list:
4856 ndgrp = lu.cfg.GetNodeGroup(nodegroup)
4858 nodegroup_nodes = ndgrp.members
4859 nodegroup_name = ndgrp.name
4862 for node in nodegroup_nodes:
4863 if node in valid_nodes:
4864 if es_data[node] != []:
4865 node_status = es_data[node][0][1]
4866 node_statuses.append(node_status)
4868 node_statuses.append(False)
4870 if False in node_statuses:
4871 ndgrp_data[nodegroup_name] = False
4873 ndgrp_data[nodegroup_name] = True
4875 # Compute the provider's parameters
4877 for idx, esl in enumerate(es_data.values()):
4878 valid = bool(esl and esl[0][1])
4882 node_params = esl[0][3]
4885 parameters.update(node_params)
4887 # Filter out inconsistent values
4888 parameters.intersection_update(node_params)
4890 params = list(parameters)
4892 # Now fill all the info for this provider
4893 info = query.ExtStorageInfo(name=es_name, node_status=es_data,
4894 nodegroup_status=ndgrp_data,
4897 data[es_name] = info
4899 # Prepare data in requested order
4900 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4904 class LUExtStorageDiagnose(NoHooksLU):
4905 """Logical unit for ExtStorage diagnose/query.
4910 def CheckArguments(self):
4911 self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
4912 self.op.output_fields, False)
4914 def ExpandNames(self):
4915 self.eq.ExpandNames(self)
4917 def Exec(self, feedback_fn):
4918 return self.eq.OldStyleQuery(self)
4921 class LUNodeRemove(LogicalUnit):
4922 """Logical unit for removing a node.
4925 HPATH = "node-remove"
4926 HTYPE = constants.HTYPE_NODE
4928 def BuildHooksEnv(self):
4933 "OP_TARGET": self.op.node_name,
4934 "NODE_NAME": self.op.node_name,
4937 def BuildHooksNodes(self):
4938 """Build hooks nodes.
4940 This doesn't run on the target node in the pre phase as a failed
4941 node would then be impossible to remove.
4944 all_nodes = self.cfg.GetNodeList()
4946 all_nodes.remove(self.op.node_name)
4949 return (all_nodes, all_nodes)
4951 def CheckPrereq(self):
4952 """Check prerequisites.
4955 - the node exists in the configuration
4956 - it does not have primary or secondary instances
4957 - it's not the master
4959 Any errors are signaled by raising errors.OpPrereqError.
4962 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4963 node = self.cfg.GetNodeInfo(self.op.node_name)
4964 assert node is not None
4966 masternode = self.cfg.GetMasterNode()
4967 if node.name == masternode:
4968 raise errors.OpPrereqError("Node is the master node, failover to another"
4969 " node is required", errors.ECODE_INVAL)
4971 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4972 if node.name in instance.all_nodes:
4973 raise errors.OpPrereqError("Instance %s is still running on the node,"
4974 " please remove first" % instance_name,
4976 self.op.node_name = node.name
4979 def Exec(self, feedback_fn):
4980 """Removes the node from the cluster.
4984 logging.info("Stopping the node daemon and removing configs from node %s",
4987 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4989 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4992 # Promote nodes to master candidate as needed
4993 _AdjustCandidatePool(self, exceptions=[node.name])
4994 self.context.RemoveNode(node.name)
4996 # Run post hooks on the node before it's removed
4997 _RunPostHook(self, node.name)
4999 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5000 msg = result.fail_msg
5002 self.LogWarning("Errors encountered on the remote node while leaving"
5003 " the cluster: %s", msg)
5005 # Remove node from our /etc/hosts
5006 if self.cfg.GetClusterInfo().modify_etc_hosts:
5007 master_node = self.cfg.GetMasterNode()
5008 result = self.rpc.call_etc_hosts_modify(master_node,
5009 constants.ETC_HOSTS_REMOVE,
5011 result.Raise("Can't update hosts file with new host data")
5012 _RedistributeAncillaryFiles(self)
5015 class _NodeQuery(_QueryBase):
5016 FIELDS = query.NODE_FIELDS
5018 def ExpandNames(self, lu):
5019 lu.needed_locks = {}
5020 lu.share_locks = _ShareAll()
5023 self.wanted = _GetWantedNodes(lu, self.names)
5025 self.wanted = locking.ALL_SET
5027 self.do_locking = (self.use_locking and
5028 query.NQ_LIVE in self.requested_data)
5031 # If any non-static field is requested we need to lock the nodes
5032 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5033 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5035 def DeclareLocks(self, lu, level):
5038 def _GetQueryData(self, lu):
5039 """Computes the list of nodes and their attributes.
5042 all_info = lu.cfg.GetAllNodesInfo()
5044 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5046 # Gather data as requested
5047 if query.NQ_LIVE in self.requested_data:
5048 # filter out non-vm_capable nodes
5049 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5051 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
5052 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5053 [lu.cfg.GetHypervisorType()], es_flags)
5054 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5055 for (name, nresult) in node_data.items()
5056 if not nresult.fail_msg and nresult.payload)
5060 if query.NQ_INST in self.requested_data:
5061 node_to_primary = dict([(name, set()) for name in nodenames])
5062 node_to_secondary = dict([(name, set()) for name in nodenames])
5064 inst_data = lu.cfg.GetAllInstancesInfo()
5066 for inst in inst_data.values():
5067 if inst.primary_node in node_to_primary:
5068 node_to_primary[inst.primary_node].add(inst.name)
5069 for secnode in inst.secondary_nodes:
5070 if secnode in node_to_secondary:
5071 node_to_secondary[secnode].add(inst.name)
5073 node_to_primary = None
5074 node_to_secondary = None
5076 if query.NQ_OOB in self.requested_data:
5077 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5078 for name, node in all_info.iteritems())
5082 if query.NQ_GROUP in self.requested_data:
5083 groups = lu.cfg.GetAllNodeGroupsInfo()
5087 return query.NodeQueryData([all_info[name] for name in nodenames],
5088 live_data, lu.cfg.GetMasterNode(),
5089 node_to_primary, node_to_secondary, groups,
5090 oob_support, lu.cfg.GetClusterInfo())
5093 class LUNodeQuery(NoHooksLU):
5094 """Logical unit for querying nodes.
5097 # pylint: disable=W0142
5100 def CheckArguments(self):
5101 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5102 self.op.output_fields, self.op.use_locking)
5104 def ExpandNames(self):
5105 self.nq.ExpandNames(self)
5107 def DeclareLocks(self, level):
5108 self.nq.DeclareLocks(self, level)
5110 def Exec(self, feedback_fn):
5111 return self.nq.OldStyleQuery(self)
5114 class LUNodeQueryvols(NoHooksLU):
5115 """Logical unit for getting volumes on node(s).
5119 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5120 _FIELDS_STATIC = utils.FieldSet("node")
5122 def CheckArguments(self):
5123 _CheckOutputFields(static=self._FIELDS_STATIC,
5124 dynamic=self._FIELDS_DYNAMIC,
5125 selected=self.op.output_fields)
5127 def ExpandNames(self):
5128 self.share_locks = _ShareAll()
5131 self.needed_locks = {
5132 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5135 self.needed_locks = {
5136 locking.LEVEL_NODE: locking.ALL_SET,
5137 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5140 def Exec(self, feedback_fn):
5141 """Computes the list of nodes and their attributes.
5144 nodenames = self.owned_locks(locking.LEVEL_NODE)
5145 volumes = self.rpc.call_node_volumes(nodenames)
5147 ilist = self.cfg.GetAllInstancesInfo()
5148 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5151 for node in nodenames:
5152 nresult = volumes[node]
5155 msg = nresult.fail_msg
5157 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5160 node_vols = sorted(nresult.payload,
5161 key=operator.itemgetter("dev"))
5163 for vol in node_vols:
5165 for field in self.op.output_fields:
5168 elif field == "phys":
5172 elif field == "name":
5174 elif field == "size":
5175 val = int(float(vol["size"]))
5176 elif field == "instance":
5177 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5179 raise errors.ParameterError(field)
5180 node_output.append(str(val))
5182 output.append(node_output)
5187 class LUNodeQueryStorage(NoHooksLU):
5188 """Logical unit for getting information on storage units on node(s).
5191 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5194 def CheckArguments(self):
5195 _CheckOutputFields(static=self._FIELDS_STATIC,
5196 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5197 selected=self.op.output_fields)
5199 def ExpandNames(self):
5200 self.share_locks = _ShareAll()
5203 self.needed_locks = {
5204 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5207 self.needed_locks = {
5208 locking.LEVEL_NODE: locking.ALL_SET,
5209 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5212 def Exec(self, feedback_fn):
5213 """Computes the list of nodes and their attributes.
5216 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5218 # Always get name to sort by
5219 if constants.SF_NAME in self.op.output_fields:
5220 fields = self.op.output_fields[:]
5222 fields = [constants.SF_NAME] + self.op.output_fields
5224 # Never ask for node or type as it's only known to the LU
5225 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5226 while extra in fields:
5227 fields.remove(extra)
5229 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5230 name_idx = field_idx[constants.SF_NAME]
5232 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5233 data = self.rpc.call_storage_list(self.nodes,
5234 self.op.storage_type, st_args,
5235 self.op.name, fields)
5239 for node in utils.NiceSort(self.nodes):
5240 nresult = data[node]
5244 msg = nresult.fail_msg
5246 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5249 rows = dict([(row[name_idx], row) for row in nresult.payload])
5251 for name in utils.NiceSort(rows.keys()):
5256 for field in self.op.output_fields:
5257 if field == constants.SF_NODE:
5259 elif field == constants.SF_TYPE:
5260 val = self.op.storage_type
5261 elif field in field_idx:
5262 val = row[field_idx[field]]
5264 raise errors.ParameterError(field)
5273 class _InstanceQuery(_QueryBase):
5274 FIELDS = query.INSTANCE_FIELDS
5276 def ExpandNames(self, lu):
5277 lu.needed_locks = {}
5278 lu.share_locks = _ShareAll()
5281 self.wanted = _GetWantedInstances(lu, self.names)
5283 self.wanted = locking.ALL_SET
5285 self.do_locking = (self.use_locking and
5286 query.IQ_LIVE in self.requested_data)
5288 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5289 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5290 lu.needed_locks[locking.LEVEL_NODE] = []
5291 lu.needed_locks[locking.LEVEL_NETWORK] = []
5292 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5294 self.do_grouplocks = (self.do_locking and
5295 query.IQ_NODES in self.requested_data)
5297 def DeclareLocks(self, lu, level):
5299 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5300 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5302 # Lock all groups used by instances optimistically; this requires going
5303 # via the node before it's locked, requiring verification later on
5304 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5306 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5307 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5308 elif level == locking.LEVEL_NODE:
5309 lu._LockInstancesNodes() # pylint: disable=W0212
5311 elif level == locking.LEVEL_NETWORK:
5312 lu.needed_locks[locking.LEVEL_NETWORK] = \
5314 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5315 for net_uuid in lu.cfg.GetInstanceNetworks(instance_name))
5318 def _CheckGroupLocks(lu):
5319 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5320 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5322 # Check if node groups for locked instances are still correct
5323 for instance_name in owned_instances:
5324 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5326 def _GetQueryData(self, lu):
5327 """Computes the list of instances and their attributes.
5330 if self.do_grouplocks:
5331 self._CheckGroupLocks(lu)
5333 cluster = lu.cfg.GetClusterInfo()
5334 all_info = lu.cfg.GetAllInstancesInfo()
5336 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5338 instance_list = [all_info[name] for name in instance_names]
5339 nodes = frozenset(itertools.chain(*(inst.all_nodes
5340 for inst in instance_list)))
5341 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5344 wrongnode_inst = set()
5346 # Gather data as requested
5347 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5349 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5351 result = node_data[name]
5353 # offline nodes will be in both lists
5354 assert result.fail_msg
5355 offline_nodes.append(name)
5357 bad_nodes.append(name)
5358 elif result.payload:
5359 for inst in result.payload:
5360 if inst in all_info:
5361 if all_info[inst].primary_node == name:
5362 live_data.update(result.payload)
5364 wrongnode_inst.add(inst)
5366 # orphan instance; we don't list it here as we don't
5367 # handle this case yet in the output of instance listing
5368 logging.warning("Orphan instance '%s' found on node %s",
5370 # else no instance is alive
5374 if query.IQ_DISKUSAGE in self.requested_data:
5375 gmi = ganeti.masterd.instance
5376 disk_usage = dict((inst.name,
5377 gmi.ComputeDiskSize(inst.disk_template,
5378 [{constants.IDISK_SIZE: disk.size}
5379 for disk in inst.disks]))
5380 for inst in instance_list)
5384 if query.IQ_CONSOLE in self.requested_data:
5386 for inst in instance_list:
5387 if inst.name in live_data:
5388 # Instance is running
5389 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5391 consinfo[inst.name] = None
5392 assert set(consinfo.keys()) == set(instance_names)
5396 if query.IQ_NODES in self.requested_data:
5397 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5399 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5400 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5401 for uuid in set(map(operator.attrgetter("group"),
5407 if query.IQ_NETWORKS in self.requested_data:
5408 net_uuids = itertools.chain(*(lu.cfg.GetInstanceNetworks(i.name)
5409 for i in instance_list))
5410 networks = dict((uuid, lu.cfg.GetNetwork(uuid)) for uuid in net_uuids)
5414 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5415 disk_usage, offline_nodes, bad_nodes,
5416 live_data, wrongnode_inst, consinfo,
5417 nodes, groups, networks)
5420 class LUQuery(NoHooksLU):
5421 """Query for resources/items of a certain kind.
5424 # pylint: disable=W0142
5427 def CheckArguments(self):
5428 qcls = _GetQueryImplementation(self.op.what)
5430 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5432 def ExpandNames(self):
5433 self.impl.ExpandNames(self)
5435 def DeclareLocks(self, level):
5436 self.impl.DeclareLocks(self, level)
5438 def Exec(self, feedback_fn):
5439 return self.impl.NewStyleQuery(self)
5442 class LUQueryFields(NoHooksLU):
5443 """Query for resources/items of a certain kind.
5446 # pylint: disable=W0142
5449 def CheckArguments(self):
5450 self.qcls = _GetQueryImplementation(self.op.what)
5452 def ExpandNames(self):
5453 self.needed_locks = {}
5455 def Exec(self, feedback_fn):
5456 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5459 class LUNodeModifyStorage(NoHooksLU):
5460 """Logical unit for modifying a storage volume on a node.
5465 def CheckArguments(self):
5466 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5468 storage_type = self.op.storage_type
5471 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5473 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5474 " modified" % storage_type,
5477 diff = set(self.op.changes.keys()) - modifiable
5479 raise errors.OpPrereqError("The following fields can not be modified for"
5480 " storage units of type '%s': %r" %
5481 (storage_type, list(diff)),
5484 def ExpandNames(self):
5485 self.needed_locks = {
5486 locking.LEVEL_NODE: self.op.node_name,
5489 def Exec(self, feedback_fn):
5490 """Computes the list of nodes and their attributes.
5493 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5494 result = self.rpc.call_storage_modify(self.op.node_name,
5495 self.op.storage_type, st_args,
5496 self.op.name, self.op.changes)
5497 result.Raise("Failed to modify storage unit '%s' on %s" %
5498 (self.op.name, self.op.node_name))
5501 class LUNodeAdd(LogicalUnit):
5502 """Logical unit for adding node to the cluster.
5506 HTYPE = constants.HTYPE_NODE
5507 _NFLAGS = ["master_capable", "vm_capable"]
5509 def CheckArguments(self):
5510 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5511 # validate/normalize the node name
5512 self.hostname = netutils.GetHostname(name=self.op.node_name,
5513 family=self.primary_ip_family)
5514 self.op.node_name = self.hostname.name
5516 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5517 raise errors.OpPrereqError("Cannot readd the master node",
5520 if self.op.readd and self.op.group:
5521 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5522 " being readded", errors.ECODE_INVAL)
5524 def BuildHooksEnv(self):
5527 This will run on all nodes before, and on all nodes + the new node after.
5531 "OP_TARGET": self.op.node_name,
5532 "NODE_NAME": self.op.node_name,
5533 "NODE_PIP": self.op.primary_ip,
5534 "NODE_SIP": self.op.secondary_ip,
5535 "MASTER_CAPABLE": str(self.op.master_capable),
5536 "VM_CAPABLE": str(self.op.vm_capable),
5539 def BuildHooksNodes(self):
5540 """Build hooks nodes.
5543 # Exclude added node
5544 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5545 post_nodes = pre_nodes + [self.op.node_name, ]
5547 return (pre_nodes, post_nodes)
5549 def CheckPrereq(self):
5550 """Check prerequisites.
5553 - the new node is not already in the config
5555 - its parameters (single/dual homed) matches the cluster
5557 Any errors are signaled by raising errors.OpPrereqError.
5561 hostname = self.hostname
5562 node = hostname.name
5563 primary_ip = self.op.primary_ip = hostname.ip
5564 if self.op.secondary_ip is None:
5565 if self.primary_ip_family == netutils.IP6Address.family:
5566 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5567 " IPv4 address must be given as secondary",
5569 self.op.secondary_ip = primary_ip
5571 secondary_ip = self.op.secondary_ip
5572 if not netutils.IP4Address.IsValid(secondary_ip):
5573 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5574 " address" % secondary_ip, errors.ECODE_INVAL)
5576 node_list = cfg.GetNodeList()
5577 if not self.op.readd and node in node_list:
5578 raise errors.OpPrereqError("Node %s is already in the configuration" %
5579 node, errors.ECODE_EXISTS)
5580 elif self.op.readd and node not in node_list:
5581 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5584 self.changed_primary_ip = False
5586 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5587 if self.op.readd and node == existing_node_name:
5588 if existing_node.secondary_ip != secondary_ip:
5589 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5590 " address configuration as before",
5592 if existing_node.primary_ip != primary_ip:
5593 self.changed_primary_ip = True
5597 if (existing_node.primary_ip == primary_ip or
5598 existing_node.secondary_ip == primary_ip or
5599 existing_node.primary_ip == secondary_ip or
5600 existing_node.secondary_ip == secondary_ip):
5601 raise errors.OpPrereqError("New node ip address(es) conflict with"
5602 " existing node %s" % existing_node.name,
5603 errors.ECODE_NOTUNIQUE)
5605 # After this 'if' block, None is no longer a valid value for the
5606 # _capable op attributes
5608 old_node = self.cfg.GetNodeInfo(node)
5609 assert old_node is not None, "Can't retrieve locked node %s" % node
5610 for attr in self._NFLAGS:
5611 if getattr(self.op, attr) is None:
5612 setattr(self.op, attr, getattr(old_node, attr))
5614 for attr in self._NFLAGS:
5615 if getattr(self.op, attr) is None:
5616 setattr(self.op, attr, True)
5618 if self.op.readd and not self.op.vm_capable:
5619 pri, sec = cfg.GetNodeInstances(node)
5621 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5622 " flag set to false, but it already holds"
5623 " instances" % node,
5626 # check that the type of the node (single versus dual homed) is the
5627 # same as for the master
5628 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5629 master_singlehomed = myself.secondary_ip == myself.primary_ip
5630 newbie_singlehomed = secondary_ip == primary_ip
5631 if master_singlehomed != newbie_singlehomed:
5632 if master_singlehomed:
5633 raise errors.OpPrereqError("The master has no secondary ip but the"
5634 " new node has one",
5637 raise errors.OpPrereqError("The master has a secondary ip but the"
5638 " new node doesn't have one",
5641 # checks reachability
5642 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5643 raise errors.OpPrereqError("Node not reachable by ping",
5644 errors.ECODE_ENVIRON)
5646 if not newbie_singlehomed:
5647 # check reachability from my secondary ip to newbie's secondary ip
5648 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5649 source=myself.secondary_ip):
5650 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5651 " based ping to node daemon port",
5652 errors.ECODE_ENVIRON)
5659 if self.op.master_capable:
5660 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5662 self.master_candidate = False
5665 self.new_node = old_node
5667 node_group = cfg.LookupNodeGroup(self.op.group)
5668 self.new_node = objects.Node(name=node,
5669 primary_ip=primary_ip,
5670 secondary_ip=secondary_ip,
5671 master_candidate=self.master_candidate,
5672 offline=False, drained=False,
5673 group=node_group, ndparams={})
5675 if self.op.ndparams:
5676 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5677 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
5678 "node", "cluster or group")
5680 if self.op.hv_state:
5681 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5683 if self.op.disk_state:
5684 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5686 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5687 # it a property on the base class.
5688 rpcrunner = rpc.DnsOnlyRunner()
5689 result = rpcrunner.call_version([node])[node]
5690 result.Raise("Can't get version information from node %s" % node)
5691 if constants.PROTOCOL_VERSION == result.payload:
5692 logging.info("Communication to node %s fine, sw version %s match",
5693 node, result.payload)
5695 raise errors.OpPrereqError("Version mismatch master version %s,"
5696 " node version %s" %
5697 (constants.PROTOCOL_VERSION, result.payload),
5698 errors.ECODE_ENVIRON)
5700 vg_name = cfg.GetVGName()
5701 if vg_name is not None:
5702 vparams = {constants.NV_PVLIST: [vg_name]}
5703 excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
5704 cname = self.cfg.GetClusterName()
5705 result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
5706 (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
5708 raise errors.OpPrereqError("Checks on node PVs failed: %s" %
5709 "; ".join(errmsgs), errors.ECODE_ENVIRON)
5711 def Exec(self, feedback_fn):
5712 """Adds the new node to the cluster.
5715 new_node = self.new_node
5716 node = new_node.name
5718 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5721 # We adding a new node so we assume it's powered
5722 new_node.powered = True
5724 # for re-adds, reset the offline/drained/master-candidate flags;
5725 # we need to reset here, otherwise offline would prevent RPC calls
5726 # later in the procedure; this also means that if the re-add
5727 # fails, we are left with a non-offlined, broken node
5729 new_node.drained = new_node.offline = False # pylint: disable=W0201
5730 self.LogInfo("Readding a node, the offline/drained flags were reset")
5731 # if we demote the node, we do cleanup later in the procedure
5732 new_node.master_candidate = self.master_candidate
5733 if self.changed_primary_ip:
5734 new_node.primary_ip = self.op.primary_ip
5736 # copy the master/vm_capable flags
5737 for attr in self._NFLAGS:
5738 setattr(new_node, attr, getattr(self.op, attr))
5740 # notify the user about any possible mc promotion
5741 if new_node.master_candidate:
5742 self.LogInfo("Node will be a master candidate")
5744 if self.op.ndparams:
5745 new_node.ndparams = self.op.ndparams
5747 new_node.ndparams = {}
5749 if self.op.hv_state:
5750 new_node.hv_state_static = self.new_hv_state
5752 if self.op.disk_state:
5753 new_node.disk_state_static = self.new_disk_state
5755 # Add node to our /etc/hosts, and add key to known_hosts
5756 if self.cfg.GetClusterInfo().modify_etc_hosts:
5757 master_node = self.cfg.GetMasterNode()
5758 result = self.rpc.call_etc_hosts_modify(master_node,
5759 constants.ETC_HOSTS_ADD,
5762 result.Raise("Can't update hosts file with new host data")
5764 if new_node.secondary_ip != new_node.primary_ip:
5765 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5768 node_verify_list = [self.cfg.GetMasterNode()]
5769 node_verify_param = {
5770 constants.NV_NODELIST: ([node], {}),
5771 # TODO: do a node-net-test as well?
5774 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5775 self.cfg.GetClusterName())
5776 for verifier in node_verify_list:
5777 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5778 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5780 for failed in nl_payload:
5781 feedback_fn("ssh/hostname verification failed"
5782 " (checking from %s): %s" %
5783 (verifier, nl_payload[failed]))
5784 raise errors.OpExecError("ssh/hostname verification failed")
5787 _RedistributeAncillaryFiles(self)
5788 self.context.ReaddNode(new_node)
5789 # make sure we redistribute the config
5790 self.cfg.Update(new_node, feedback_fn)
5791 # and make sure the new node will not have old files around
5792 if not new_node.master_candidate:
5793 result = self.rpc.call_node_demote_from_mc(new_node.name)
5794 msg = result.fail_msg
5796 self.LogWarning("Node failed to demote itself from master"
5797 " candidate status: %s" % msg)
5799 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5800 additional_vm=self.op.vm_capable)
5801 self.context.AddNode(new_node, self.proc.GetECId())
5804 class LUNodeSetParams(LogicalUnit):
5805 """Modifies the parameters of a node.
5807 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5808 to the node role (as _ROLE_*)
5809 @cvar _R2F: a dictionary from node role to tuples of flags
5810 @cvar _FLAGS: a list of attribute names corresponding to the flags
5813 HPATH = "node-modify"
5814 HTYPE = constants.HTYPE_NODE
5816 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5818 (True, False, False): _ROLE_CANDIDATE,
5819 (False, True, False): _ROLE_DRAINED,
5820 (False, False, True): _ROLE_OFFLINE,
5821 (False, False, False): _ROLE_REGULAR,
5823 _R2F = dict((v, k) for k, v in _F2R.items())
5824 _FLAGS = ["master_candidate", "drained", "offline"]
5826 def CheckArguments(self):
5827 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5828 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5829 self.op.master_capable, self.op.vm_capable,
5830 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5832 if all_mods.count(None) == len(all_mods):
5833 raise errors.OpPrereqError("Please pass at least one modification",
5835 if all_mods.count(True) > 1:
5836 raise errors.OpPrereqError("Can't set the node into more than one"
5837 " state at the same time",
5840 # Boolean value that tells us whether we might be demoting from MC
5841 self.might_demote = (self.op.master_candidate is False or
5842 self.op.offline is True or
5843 self.op.drained is True or
5844 self.op.master_capable is False)
5846 if self.op.secondary_ip:
5847 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5848 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5849 " address" % self.op.secondary_ip,
5852 self.lock_all = self.op.auto_promote and self.might_demote
5853 self.lock_instances = self.op.secondary_ip is not None
5855 def _InstanceFilter(self, instance):
5856 """Filter for getting affected instances.
5859 return (instance.disk_template in constants.DTS_INT_MIRROR and
5860 self.op.node_name in instance.all_nodes)
5862 def ExpandNames(self):
5864 self.needed_locks = {
5865 locking.LEVEL_NODE: locking.ALL_SET,
5867 # Block allocations when all nodes are locked
5868 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5871 self.needed_locks = {
5872 locking.LEVEL_NODE: self.op.node_name,
5875 # Since modifying a node can have severe effects on currently running
5876 # operations the resource lock is at least acquired in shared mode
5877 self.needed_locks[locking.LEVEL_NODE_RES] = \
5878 self.needed_locks[locking.LEVEL_NODE]
5880 # Get all locks except nodes in shared mode; they are not used for anything
5881 # but read-only access
5882 self.share_locks = _ShareAll()
5883 self.share_locks[locking.LEVEL_NODE] = 0
5884 self.share_locks[locking.LEVEL_NODE_RES] = 0
5885 self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
5887 if self.lock_instances:
5888 self.needed_locks[locking.LEVEL_INSTANCE] = \
5889 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5891 def BuildHooksEnv(self):
5894 This runs on the master node.
5898 "OP_TARGET": self.op.node_name,
5899 "MASTER_CANDIDATE": str(self.op.master_candidate),
5900 "OFFLINE": str(self.op.offline),
5901 "DRAINED": str(self.op.drained),
5902 "MASTER_CAPABLE": str(self.op.master_capable),
5903 "VM_CAPABLE": str(self.op.vm_capable),
5906 def BuildHooksNodes(self):
5907 """Build hooks nodes.
5910 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5913 def CheckPrereq(self):
5914 """Check prerequisites.
5916 This only checks the instance list against the existing names.
5919 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5921 if self.lock_instances:
5922 affected_instances = \
5923 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5925 # Verify instance locks
5926 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5927 wanted_instances = frozenset(affected_instances.keys())
5928 if wanted_instances - owned_instances:
5929 raise errors.OpPrereqError("Instances affected by changing node %s's"
5930 " secondary IP address have changed since"
5931 " locks were acquired, wanted '%s', have"
5932 " '%s'; retry the operation" %
5934 utils.CommaJoin(wanted_instances),
5935 utils.CommaJoin(owned_instances)),
5938 affected_instances = None
5940 if (self.op.master_candidate is not None or
5941 self.op.drained is not None or
5942 self.op.offline is not None):
5943 # we can't change the master's node flags
5944 if self.op.node_name == self.cfg.GetMasterNode():
5945 raise errors.OpPrereqError("The master role can be changed"
5946 " only via master-failover",
5949 if self.op.master_candidate and not node.master_capable:
5950 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5951 " it a master candidate" % node.name,
5954 if self.op.vm_capable is False:
5955 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5957 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5958 " the vm_capable flag" % node.name,
5961 if node.master_candidate and self.might_demote and not self.lock_all:
5962 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5963 # check if after removing the current node, we're missing master
5965 (mc_remaining, mc_should, _) = \
5966 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5967 if mc_remaining < mc_should:
5968 raise errors.OpPrereqError("Not enough master candidates, please"
5969 " pass auto promote option to allow"
5970 " promotion (--auto-promote or RAPI"
5971 " auto_promote=True)", errors.ECODE_STATE)
5973 self.old_flags = old_flags = (node.master_candidate,
5974 node.drained, node.offline)
5975 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5976 self.old_role = old_role = self._F2R[old_flags]
5978 # Check for ineffective changes
5979 for attr in self._FLAGS:
5980 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
5981 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5982 setattr(self.op, attr, None)
5984 # Past this point, any flag change to False means a transition
5985 # away from the respective state, as only real changes are kept
5987 # TODO: We might query the real power state if it supports OOB
5988 if _SupportsOob(self.cfg, node):
5989 if self.op.offline is False and not (node.powered or
5990 self.op.powered is True):
5991 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5992 " offline status can be reset") %
5993 self.op.node_name, errors.ECODE_STATE)
5994 elif self.op.powered is not None:
5995 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5996 " as it does not support out-of-band"
5997 " handling") % self.op.node_name,
6000 # If we're being deofflined/drained, we'll MC ourself if needed
6001 if (self.op.drained is False or self.op.offline is False or
6002 (self.op.master_capable and not node.master_capable)):
6003 if _DecideSelfPromotion(self):
6004 self.op.master_candidate = True
6005 self.LogInfo("Auto-promoting node to master candidate")
6007 # If we're no longer master capable, we'll demote ourselves from MC
6008 if self.op.master_capable is False and node.master_candidate:
6009 self.LogInfo("Demoting from master candidate")
6010 self.op.master_candidate = False
6013 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6014 if self.op.master_candidate:
6015 new_role = self._ROLE_CANDIDATE
6016 elif self.op.drained:
6017 new_role = self._ROLE_DRAINED
6018 elif self.op.offline:
6019 new_role = self._ROLE_OFFLINE
6020 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6021 # False is still in new flags, which means we're un-setting (the
6023 new_role = self._ROLE_REGULAR
6024 else: # no new flags, nothing, keep old role
6027 self.new_role = new_role
6029 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6030 # Trying to transition out of offline status
6031 result = self.rpc.call_version([node.name])[node.name]
6033 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6034 " to report its version: %s" %
6035 (node.name, result.fail_msg),
6038 self.LogWarning("Transitioning node from offline to online state"
6039 " without using re-add. Please make sure the node"
6042 # When changing the secondary ip, verify if this is a single-homed to
6043 # multi-homed transition or vice versa, and apply the relevant
6045 if self.op.secondary_ip:
6046 # Ok even without locking, because this can't be changed by any LU
6047 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6048 master_singlehomed = master.secondary_ip == master.primary_ip
6049 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6050 if self.op.force and node.name == master.name:
6051 self.LogWarning("Transitioning from single-homed to multi-homed"
6052 " cluster; all nodes will require a secondary IP"
6055 raise errors.OpPrereqError("Changing the secondary ip on a"
6056 " single-homed cluster requires the"
6057 " --force option to be passed, and the"
6058 " target node to be the master",
6060 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6061 if self.op.force and node.name == master.name:
6062 self.LogWarning("Transitioning from multi-homed to single-homed"
6063 " cluster; secondary IP addresses will have to be"
6066 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6067 " same as the primary IP on a multi-homed"
6068 " cluster, unless the --force option is"
6069 " passed, and the target node is the"
6070 " master", errors.ECODE_INVAL)
6072 assert not (frozenset(affected_instances) -
6073 self.owned_locks(locking.LEVEL_INSTANCE))
6076 if affected_instances:
6077 msg = ("Cannot change secondary IP address: offline node has"
6078 " instances (%s) configured to use it" %
6079 utils.CommaJoin(affected_instances.keys()))
6080 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6082 # On online nodes, check that no instances are running, and that
6083 # the node has the new ip and we can reach it.
6084 for instance in affected_instances.values():
6085 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6086 msg="cannot change secondary ip")
6088 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6089 if master.name != node.name:
6090 # check reachability from master secondary ip to new secondary ip
6091 if not netutils.TcpPing(self.op.secondary_ip,
6092 constants.DEFAULT_NODED_PORT,
6093 source=master.secondary_ip):
6094 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6095 " based ping to node daemon port",
6096 errors.ECODE_ENVIRON)
6098 if self.op.ndparams:
6099 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6100 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6101 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6102 "node", "cluster or group")
6103 self.new_ndparams = new_ndparams
6105 if self.op.hv_state:
6106 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6107 self.node.hv_state_static)
6109 if self.op.disk_state:
6110 self.new_disk_state = \
6111 _MergeAndVerifyDiskState(self.op.disk_state,
6112 self.node.disk_state_static)
6114 def Exec(self, feedback_fn):
6119 old_role = self.old_role
6120 new_role = self.new_role
6124 if self.op.ndparams:
6125 node.ndparams = self.new_ndparams
6127 if self.op.powered is not None:
6128 node.powered = self.op.powered
6130 if self.op.hv_state:
6131 node.hv_state_static = self.new_hv_state
6133 if self.op.disk_state:
6134 node.disk_state_static = self.new_disk_state
6136 for attr in ["master_capable", "vm_capable"]:
6137 val = getattr(self.op, attr)
6139 setattr(node, attr, val)
6140 result.append((attr, str(val)))
6142 if new_role != old_role:
6143 # Tell the node to demote itself, if no longer MC and not offline
6144 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6145 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6147 self.LogWarning("Node failed to demote itself: %s", msg)
6149 new_flags = self._R2F[new_role]
6150 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6152 result.append((desc, str(nf)))
6153 (node.master_candidate, node.drained, node.offline) = new_flags
6155 # we locked all nodes, we adjust the CP before updating this node
6157 _AdjustCandidatePool(self, [node.name])
6159 if self.op.secondary_ip:
6160 node.secondary_ip = self.op.secondary_ip
6161 result.append(("secondary_ip", self.op.secondary_ip))
6163 # this will trigger configuration file update, if needed
6164 self.cfg.Update(node, feedback_fn)
6166 # this will trigger job queue propagation or cleanup if the mc
6168 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6169 self.context.ReaddNode(node)
6174 class LUNodePowercycle(NoHooksLU):
6175 """Powercycles a node.
6180 def CheckArguments(self):
6181 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6182 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6183 raise errors.OpPrereqError("The node is the master and the force"
6184 " parameter was not set",
6187 def ExpandNames(self):
6188 """Locking for PowercycleNode.
6190 This is a last-resort option and shouldn't block on other
6191 jobs. Therefore, we grab no locks.
6194 self.needed_locks = {}
6196 def Exec(self, feedback_fn):
6200 result = self.rpc.call_node_powercycle(self.op.node_name,
6201 self.cfg.GetHypervisorType())
6202 result.Raise("Failed to schedule the reboot")
6203 return result.payload
6206 class LUClusterQuery(NoHooksLU):
6207 """Query cluster configuration.
6212 def ExpandNames(self):
6213 self.needed_locks = {}
6215 def Exec(self, feedback_fn):
6216 """Return cluster config.
6219 cluster = self.cfg.GetClusterInfo()
6222 # Filter just for enabled hypervisors
6223 for os_name, hv_dict in cluster.os_hvp.items():
6224 os_hvp[os_name] = {}
6225 for hv_name, hv_params in hv_dict.items():
6226 if hv_name in cluster.enabled_hypervisors:
6227 os_hvp[os_name][hv_name] = hv_params
6229 # Convert ip_family to ip_version
6230 primary_ip_version = constants.IP4_VERSION
6231 if cluster.primary_ip_family == netutils.IP6Address.family:
6232 primary_ip_version = constants.IP6_VERSION
6235 "software_version": constants.RELEASE_VERSION,
6236 "protocol_version": constants.PROTOCOL_VERSION,
6237 "config_version": constants.CONFIG_VERSION,
6238 "os_api_version": max(constants.OS_API_VERSIONS),
6239 "export_version": constants.EXPORT_VERSION,
6240 "architecture": runtime.GetArchInfo(),
6241 "name": cluster.cluster_name,
6242 "master": cluster.master_node,
6243 "default_hypervisor": cluster.primary_hypervisor,
6244 "enabled_hypervisors": cluster.enabled_hypervisors,
6245 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6246 for hypervisor_name in cluster.enabled_hypervisors]),
6248 "beparams": cluster.beparams,
6249 "osparams": cluster.osparams,
6250 "ipolicy": cluster.ipolicy,
6251 "nicparams": cluster.nicparams,
6252 "ndparams": cluster.ndparams,
6253 "diskparams": cluster.diskparams,
6254 "candidate_pool_size": cluster.candidate_pool_size,
6255 "master_netdev": cluster.master_netdev,
6256 "master_netmask": cluster.master_netmask,
6257 "use_external_mip_script": cluster.use_external_mip_script,
6258 "volume_group_name": cluster.volume_group_name,
6259 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6260 "file_storage_dir": cluster.file_storage_dir,
6261 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6262 "maintain_node_health": cluster.maintain_node_health,
6263 "ctime": cluster.ctime,
6264 "mtime": cluster.mtime,
6265 "uuid": cluster.uuid,
6266 "tags": list(cluster.GetTags()),
6267 "uid_pool": cluster.uid_pool,
6268 "default_iallocator": cluster.default_iallocator,
6269 "reserved_lvs": cluster.reserved_lvs,
6270 "primary_ip_version": primary_ip_version,
6271 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6272 "hidden_os": cluster.hidden_os,
6273 "blacklisted_os": cluster.blacklisted_os,
6279 class LUClusterConfigQuery(NoHooksLU):
6280 """Return configuration values.
6285 def CheckArguments(self):
6286 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6288 def ExpandNames(self):
6289 self.cq.ExpandNames(self)
6291 def DeclareLocks(self, level):
6292 self.cq.DeclareLocks(self, level)
6294 def Exec(self, feedback_fn):
6295 result = self.cq.OldStyleQuery(self)
6297 assert len(result) == 1
6302 class _ClusterQuery(_QueryBase):
6303 FIELDS = query.CLUSTER_FIELDS
6305 #: Do not sort (there is only one item)
6308 def ExpandNames(self, lu):
6309 lu.needed_locks = {}
6311 # The following variables interact with _QueryBase._GetNames
6312 self.wanted = locking.ALL_SET
6313 self.do_locking = self.use_locking
6316 raise errors.OpPrereqError("Can not use locking for cluster queries",
6319 def DeclareLocks(self, lu, level):
6322 def _GetQueryData(self, lu):
6323 """Computes the list of nodes and their attributes.
6326 # Locking is not used
6327 assert not (compat.any(lu.glm.is_owned(level)
6328 for level in locking.LEVELS
6329 if level != locking.LEVEL_CLUSTER) or
6330 self.do_locking or self.use_locking)
6332 if query.CQ_CONFIG in self.requested_data:
6333 cluster = lu.cfg.GetClusterInfo()
6335 cluster = NotImplemented
6337 if query.CQ_QUEUE_DRAINED in self.requested_data:
6338 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6340 drain_flag = NotImplemented
6342 if query.CQ_WATCHER_PAUSE in self.requested_data:
6343 master_name = lu.cfg.GetMasterNode()
6345 result = lu.rpc.call_get_watcher_pause(master_name)
6346 result.Raise("Can't retrieve watcher pause from master node '%s'" %
6349 watcher_pause = result.payload
6351 watcher_pause = NotImplemented
6353 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6356 class LUInstanceActivateDisks(NoHooksLU):
6357 """Bring up an instance's disks.
6362 def ExpandNames(self):
6363 self._ExpandAndLockInstance()
6364 self.needed_locks[locking.LEVEL_NODE] = []
6365 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6367 def DeclareLocks(self, level):
6368 if level == locking.LEVEL_NODE:
6369 self._LockInstancesNodes()
6371 def CheckPrereq(self):
6372 """Check prerequisites.
6374 This checks that the instance is in the cluster.
6377 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6378 assert self.instance is not None, \
6379 "Cannot retrieve locked instance %s" % self.op.instance_name
6380 _CheckNodeOnline(self, self.instance.primary_node)
6382 def Exec(self, feedback_fn):
6383 """Activate the disks.
6386 disks_ok, disks_info = \
6387 _AssembleInstanceDisks(self, self.instance,
6388 ignore_size=self.op.ignore_size)
6390 raise errors.OpExecError("Cannot activate block devices")
6392 if self.op.wait_for_sync:
6393 if not _WaitForSync(self, self.instance):
6394 raise errors.OpExecError("Some disks of the instance are degraded!")
6399 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6401 """Prepare the block devices for an instance.
6403 This sets up the block devices on all nodes.
6405 @type lu: L{LogicalUnit}
6406 @param lu: the logical unit on whose behalf we execute
6407 @type instance: L{objects.Instance}
6408 @param instance: the instance for whose disks we assemble
6409 @type disks: list of L{objects.Disk} or None
6410 @param disks: which disks to assemble (or all, if None)
6411 @type ignore_secondaries: boolean
6412 @param ignore_secondaries: if true, errors on secondary nodes
6413 won't result in an error return from the function
6414 @type ignore_size: boolean
6415 @param ignore_size: if true, the current known size of the disk
6416 will not be used during the disk activation, useful for cases
6417 when the size is wrong
6418 @return: False if the operation failed, otherwise a list of
6419 (host, instance_visible_name, node_visible_name)
6420 with the mapping from node devices to instance devices
6425 iname = instance.name
6426 disks = _ExpandCheckDisks(instance, disks)
6428 # With the two passes mechanism we try to reduce the window of
6429 # opportunity for the race condition of switching DRBD to primary
6430 # before handshaking occured, but we do not eliminate it
6432 # The proper fix would be to wait (with some limits) until the
6433 # connection has been made and drbd transitions from WFConnection
6434 # into any other network-connected state (Connected, SyncTarget,
6437 # 1st pass, assemble on all nodes in secondary mode
6438 for idx, inst_disk in enumerate(disks):
6439 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6441 node_disk = node_disk.Copy()
6442 node_disk.UnsetSize()
6443 lu.cfg.SetDiskID(node_disk, node)
6444 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6446 msg = result.fail_msg
6448 is_offline_secondary = (node in instance.secondary_nodes and
6450 lu.LogWarning("Could not prepare block device %s on node %s"
6451 " (is_primary=False, pass=1): %s",
6452 inst_disk.iv_name, node, msg)
6453 if not (ignore_secondaries or is_offline_secondary):
6456 # FIXME: race condition on drbd migration to primary
6458 # 2nd pass, do only the primary node
6459 for idx, inst_disk in enumerate(disks):
6462 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6463 if node != instance.primary_node:
6466 node_disk = node_disk.Copy()
6467 node_disk.UnsetSize()
6468 lu.cfg.SetDiskID(node_disk, node)
6469 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6471 msg = result.fail_msg
6473 lu.LogWarning("Could not prepare block device %s on node %s"
6474 " (is_primary=True, pass=2): %s",
6475 inst_disk.iv_name, node, msg)
6478 dev_path = result.payload
6480 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6482 # leave the disks configured for the primary node
6483 # this is a workaround that would be fixed better by
6484 # improving the logical/physical id handling
6486 lu.cfg.SetDiskID(disk, instance.primary_node)
6488 return disks_ok, device_info
6491 def _StartInstanceDisks(lu, instance, force):
6492 """Start the disks of an instance.
6495 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6496 ignore_secondaries=force)
6498 _ShutdownInstanceDisks(lu, instance)
6499 if force is not None and not force:
6501 hint=("If the message above refers to a secondary node,"
6502 " you can retry the operation using '--force'"))
6503 raise errors.OpExecError("Disk consistency error")
6506 class LUInstanceDeactivateDisks(NoHooksLU):
6507 """Shutdown an instance's disks.
6512 def ExpandNames(self):
6513 self._ExpandAndLockInstance()
6514 self.needed_locks[locking.LEVEL_NODE] = []
6515 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6517 def DeclareLocks(self, level):
6518 if level == locking.LEVEL_NODE:
6519 self._LockInstancesNodes()
6521 def CheckPrereq(self):
6522 """Check prerequisites.
6524 This checks that the instance is in the cluster.
6527 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6528 assert self.instance is not None, \
6529 "Cannot retrieve locked instance %s" % self.op.instance_name
6531 def Exec(self, feedback_fn):
6532 """Deactivate the disks
6535 instance = self.instance
6537 _ShutdownInstanceDisks(self, instance)
6539 _SafeShutdownInstanceDisks(self, instance)
6542 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6543 """Shutdown block devices of an instance.
6545 This function checks if an instance is running, before calling
6546 _ShutdownInstanceDisks.
6549 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6550 _ShutdownInstanceDisks(lu, instance, disks=disks)
6553 def _ExpandCheckDisks(instance, disks):
6554 """Return the instance disks selected by the disks list
6556 @type disks: list of L{objects.Disk} or None
6557 @param disks: selected disks
6558 @rtype: list of L{objects.Disk}
6559 @return: selected instance disks to act on
6563 return instance.disks
6565 if not set(disks).issubset(instance.disks):
6566 raise errors.ProgrammerError("Can only act on disks belonging to the"
6571 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6572 """Shutdown block devices of an instance.
6574 This does the shutdown on all nodes of the instance.
6576 If the ignore_primary is false, errors on the primary node are
6581 disks = _ExpandCheckDisks(instance, disks)
6584 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6585 lu.cfg.SetDiskID(top_disk, node)
6586 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6587 msg = result.fail_msg
6589 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6590 disk.iv_name, node, msg)
6591 if ((node == instance.primary_node and not ignore_primary) or
6592 (node != instance.primary_node and not result.offline)):
6597 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6598 """Checks if a node has enough free memory.
6600 This function checks if a given node has the needed amount of free
6601 memory. In case the node has less memory or we cannot get the
6602 information from the node, this function raises an OpPrereqError
6605 @type lu: C{LogicalUnit}
6606 @param lu: a logical unit from which we get configuration data
6608 @param node: the node to check
6609 @type reason: C{str}
6610 @param reason: string to use in the error message
6611 @type requested: C{int}
6612 @param requested: the amount of memory in MiB to check for
6613 @type hypervisor_name: C{str}
6614 @param hypervisor_name: the hypervisor to ask for memory stats
6616 @return: node current free memory
6617 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6618 we cannot check the node
6621 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
6622 nodeinfo[node].Raise("Can't get data from node %s" % node,
6623 prereq=True, ecode=errors.ECODE_ENVIRON)
6624 (_, _, (hv_info, )) = nodeinfo[node].payload
6626 free_mem = hv_info.get("memory_free", None)
6627 if not isinstance(free_mem, int):
6628 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6629 " was '%s'" % (node, free_mem),
6630 errors.ECODE_ENVIRON)
6631 if requested > free_mem:
6632 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6633 " needed %s MiB, available %s MiB" %
6634 (node, reason, requested, free_mem),
6639 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6640 """Checks if nodes have enough free disk space in all the VGs.
6642 This function checks if all given nodes have the needed amount of
6643 free disk. In case any node has less disk or we cannot get the
6644 information from the node, this function raises an OpPrereqError
6647 @type lu: C{LogicalUnit}
6648 @param lu: a logical unit from which we get configuration data
6649 @type nodenames: C{list}
6650 @param nodenames: the list of node names to check
6651 @type req_sizes: C{dict}
6652 @param req_sizes: the hash of vg and corresponding amount of disk in
6654 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6655 or we cannot check the node
6658 for vg, req_size in req_sizes.items():
6659 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6662 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6663 """Checks if nodes have enough free disk space in the specified VG.
6665 This function checks if all given nodes have the needed amount of
6666 free disk. In case any node has less disk or we cannot get the
6667 information from the node, this function raises an OpPrereqError
6670 @type lu: C{LogicalUnit}
6671 @param lu: a logical unit from which we get configuration data
6672 @type nodenames: C{list}
6673 @param nodenames: the list of node names to check
6675 @param vg: the volume group to check
6676 @type requested: C{int}
6677 @param requested: the amount of disk in MiB to check for
6678 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6679 or we cannot check the node
6682 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
6683 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
6684 for node in nodenames:
6685 info = nodeinfo[node]
6686 info.Raise("Cannot get current information from node %s" % node,
6687 prereq=True, ecode=errors.ECODE_ENVIRON)
6688 (_, (vg_info, ), _) = info.payload
6689 vg_free = vg_info.get("vg_free", None)
6690 if not isinstance(vg_free, int):
6691 raise errors.OpPrereqError("Can't compute free disk space on node"
6692 " %s for vg %s, result was '%s'" %
6693 (node, vg, vg_free), errors.ECODE_ENVIRON)
6694 if requested > vg_free:
6695 raise errors.OpPrereqError("Not enough disk space on target node %s"
6696 " vg %s: required %d MiB, available %d MiB" %
6697 (node, vg, requested, vg_free),
6701 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6702 """Checks if nodes have enough physical CPUs
6704 This function checks if all given nodes have the needed number of
6705 physical CPUs. In case any node has less CPUs or we cannot get the
6706 information from the node, this function raises an OpPrereqError
6709 @type lu: C{LogicalUnit}
6710 @param lu: a logical unit from which we get configuration data
6711 @type nodenames: C{list}
6712 @param nodenames: the list of node names to check
6713 @type requested: C{int}
6714 @param requested: the minimum acceptable number of physical CPUs
6715 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6716 or we cannot check the node
6719 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
6720 for node in nodenames:
6721 info = nodeinfo[node]
6722 info.Raise("Cannot get current information from node %s" % node,
6723 prereq=True, ecode=errors.ECODE_ENVIRON)
6724 (_, _, (hv_info, )) = info.payload
6725 num_cpus = hv_info.get("cpu_total", None)
6726 if not isinstance(num_cpus, int):
6727 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6728 " on node %s, result was '%s'" %
6729 (node, num_cpus), errors.ECODE_ENVIRON)
6730 if requested > num_cpus:
6731 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6732 "required" % (node, num_cpus, requested),
6736 class LUInstanceStartup(LogicalUnit):
6737 """Starts an instance.
6740 HPATH = "instance-start"
6741 HTYPE = constants.HTYPE_INSTANCE
6744 def CheckArguments(self):
6746 if self.op.beparams:
6747 # fill the beparams dict
6748 objects.UpgradeBeParams(self.op.beparams)
6749 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6751 def ExpandNames(self):
6752 self._ExpandAndLockInstance()
6753 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6755 def DeclareLocks(self, level):
6756 if level == locking.LEVEL_NODE_RES:
6757 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6759 def BuildHooksEnv(self):
6762 This runs on master, primary and secondary nodes of the instance.
6766 "FORCE": self.op.force,
6769 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6773 def BuildHooksNodes(self):
6774 """Build hooks nodes.
6777 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6780 def CheckPrereq(self):
6781 """Check prerequisites.
6783 This checks that the instance is in the cluster.
6786 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6787 assert self.instance is not None, \
6788 "Cannot retrieve locked instance %s" % self.op.instance_name
6791 if self.op.hvparams:
6792 # check hypervisor parameter syntax (locally)
6793 cluster = self.cfg.GetClusterInfo()
6794 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6795 filled_hvp = cluster.FillHV(instance)
6796 filled_hvp.update(self.op.hvparams)
6797 hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
6798 hv_type.CheckParameterSyntax(filled_hvp)
6799 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6801 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6803 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6805 if self.primary_offline and self.op.ignore_offline_nodes:
6806 self.LogWarning("Ignoring offline primary node")
6808 if self.op.hvparams or self.op.beparams:
6809 self.LogWarning("Overridden parameters are ignored")
6811 _CheckNodeOnline(self, instance.primary_node)
6813 bep = self.cfg.GetClusterInfo().FillBE(instance)
6814 bep.update(self.op.beparams)
6816 # check bridges existence
6817 _CheckInstanceBridgesExist(self, instance)
6819 remote_info = self.rpc.call_instance_info(instance.primary_node,
6821 instance.hypervisor)
6822 remote_info.Raise("Error checking node %s" % instance.primary_node,
6823 prereq=True, ecode=errors.ECODE_ENVIRON)
6824 if not remote_info.payload: # not running already
6825 _CheckNodeFreeMemory(self, instance.primary_node,
6826 "starting instance %s" % instance.name,
6827 bep[constants.BE_MINMEM], instance.hypervisor)
6829 def Exec(self, feedback_fn):
6830 """Start the instance.
6833 instance = self.instance
6834 force = self.op.force
6835 reason = self.op.reason
6837 if not self.op.no_remember:
6838 self.cfg.MarkInstanceUp(instance.name)
6840 if self.primary_offline:
6841 assert self.op.ignore_offline_nodes
6842 self.LogInfo("Primary node offline, marked instance as started")
6844 node_current = instance.primary_node
6846 _StartInstanceDisks(self, instance, force)
6849 self.rpc.call_instance_start(node_current,
6850 (instance, self.op.hvparams,
6852 self.op.startup_paused, reason)
6853 msg = result.fail_msg
6855 _ShutdownInstanceDisks(self, instance)
6856 raise errors.OpExecError("Could not start instance: %s" % msg)
6859 class LUInstanceReboot(LogicalUnit):
6860 """Reboot an instance.
6863 HPATH = "instance-reboot"
6864 HTYPE = constants.HTYPE_INSTANCE
6867 def ExpandNames(self):
6868 self._ExpandAndLockInstance()
6870 def BuildHooksEnv(self):
6873 This runs on master, primary and secondary nodes of the instance.
6877 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6878 "REBOOT_TYPE": self.op.reboot_type,
6879 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6882 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6886 def BuildHooksNodes(self):
6887 """Build hooks nodes.
6890 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6893 def CheckPrereq(self):
6894 """Check prerequisites.
6896 This checks that the instance is in the cluster.
6899 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6900 assert self.instance is not None, \
6901 "Cannot retrieve locked instance %s" % self.op.instance_name
6902 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6903 _CheckNodeOnline(self, instance.primary_node)
6905 # check bridges existence
6906 _CheckInstanceBridgesExist(self, instance)
6908 def Exec(self, feedback_fn):
6909 """Reboot the instance.
6912 instance = self.instance
6913 ignore_secondaries = self.op.ignore_secondaries
6914 reboot_type = self.op.reboot_type
6915 reason = self.op.reason
6917 remote_info = self.rpc.call_instance_info(instance.primary_node,
6919 instance.hypervisor)
6920 remote_info.Raise("Error checking node %s" % instance.primary_node)
6921 instance_running = bool(remote_info.payload)
6923 node_current = instance.primary_node
6925 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6926 constants.INSTANCE_REBOOT_HARD]:
6927 for disk in instance.disks:
6928 self.cfg.SetDiskID(disk, node_current)
6929 result = self.rpc.call_instance_reboot(node_current, instance,
6931 self.op.shutdown_timeout, reason)
6932 result.Raise("Could not reboot instance")
6934 if instance_running:
6935 result = self.rpc.call_instance_shutdown(node_current, instance,
6936 self.op.shutdown_timeout,
6938 result.Raise("Could not shutdown instance for full reboot")
6939 _ShutdownInstanceDisks(self, instance)
6941 self.LogInfo("Instance %s was already stopped, starting now",
6943 _StartInstanceDisks(self, instance, ignore_secondaries)
6944 result = self.rpc.call_instance_start(node_current,
6945 (instance, None, None), False,
6947 msg = result.fail_msg
6949 _ShutdownInstanceDisks(self, instance)
6950 raise errors.OpExecError("Could not start instance for"
6951 " full reboot: %s" % msg)
6953 self.cfg.MarkInstanceUp(instance.name)
6956 class LUInstanceShutdown(LogicalUnit):
6957 """Shutdown an instance.
6960 HPATH = "instance-stop"
6961 HTYPE = constants.HTYPE_INSTANCE
6964 def ExpandNames(self):
6965 self._ExpandAndLockInstance()
6967 def BuildHooksEnv(self):
6970 This runs on master, primary and secondary nodes of the instance.
6973 env = _BuildInstanceHookEnvByObject(self, self.instance)
6974 env["TIMEOUT"] = self.op.timeout
6977 def BuildHooksNodes(self):
6978 """Build hooks nodes.
6981 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6984 def CheckPrereq(self):
6985 """Check prerequisites.
6987 This checks that the instance is in the cluster.
6990 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6991 assert self.instance is not None, \
6992 "Cannot retrieve locked instance %s" % self.op.instance_name
6994 if not self.op.force:
6995 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6997 self.LogWarning("Ignoring offline instance check")
6999 self.primary_offline = \
7000 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7002 if self.primary_offline and self.op.ignore_offline_nodes:
7003 self.LogWarning("Ignoring offline primary node")
7005 _CheckNodeOnline(self, self.instance.primary_node)
7007 def Exec(self, feedback_fn):
7008 """Shutdown the instance.
7011 instance = self.instance
7012 node_current = instance.primary_node
7013 timeout = self.op.timeout
7014 reason = self.op.reason
7016 # If the instance is offline we shouldn't mark it as down, as that
7017 # resets the offline flag.
7018 if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
7019 self.cfg.MarkInstanceDown(instance.name)
7021 if self.primary_offline:
7022 assert self.op.ignore_offline_nodes
7023 self.LogInfo("Primary node offline, marked instance as stopped")
7025 result = self.rpc.call_instance_shutdown(node_current, instance, timeout,
7027 msg = result.fail_msg
7029 self.LogWarning("Could not shutdown instance: %s", msg)
7031 _ShutdownInstanceDisks(self, instance)
7034 class LUInstanceReinstall(LogicalUnit):
7035 """Reinstall an instance.
7038 HPATH = "instance-reinstall"
7039 HTYPE = constants.HTYPE_INSTANCE
7042 def ExpandNames(self):
7043 self._ExpandAndLockInstance()
7045 def BuildHooksEnv(self):
7048 This runs on master, primary and secondary nodes of the instance.
7051 return _BuildInstanceHookEnvByObject(self, self.instance)
7053 def BuildHooksNodes(self):
7054 """Build hooks nodes.
7057 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7060 def CheckPrereq(self):
7061 """Check prerequisites.
7063 This checks that the instance is in the cluster and is not running.
7066 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7067 assert instance is not None, \
7068 "Cannot retrieve locked instance %s" % self.op.instance_name
7069 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7070 " offline, cannot reinstall")
7072 if instance.disk_template == constants.DT_DISKLESS:
7073 raise errors.OpPrereqError("Instance '%s' has no disks" %
7074 self.op.instance_name,
7076 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7078 if self.op.os_type is not None:
7080 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7081 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7082 instance_os = self.op.os_type
7084 instance_os = instance.os
7086 nodelist = list(instance.all_nodes)
7088 if self.op.osparams:
7089 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7090 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7091 self.os_inst = i_osdict # the new dict (without defaults)
7095 self.instance = instance
7097 def Exec(self, feedback_fn):
7098 """Reinstall the instance.
7101 inst = self.instance
7103 if self.op.os_type is not None:
7104 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7105 inst.os = self.op.os_type
7106 # Write to configuration
7107 self.cfg.Update(inst, feedback_fn)
7109 _StartInstanceDisks(self, inst, None)
7111 feedback_fn("Running the instance OS create scripts...")
7112 # FIXME: pass debug option from opcode to backend
7113 result = self.rpc.call_instance_os_add(inst.primary_node,
7114 (inst, self.os_inst), True,
7115 self.op.debug_level)
7116 result.Raise("Could not install OS for instance %s on node %s" %
7117 (inst.name, inst.primary_node))
7119 _ShutdownInstanceDisks(self, inst)
7122 class LUInstanceRecreateDisks(LogicalUnit):
7123 """Recreate an instance's missing disks.
7126 HPATH = "instance-recreate-disks"
7127 HTYPE = constants.HTYPE_INSTANCE
7130 _MODIFYABLE = compat.UniqueFrozenset([
7131 constants.IDISK_SIZE,
7132 constants.IDISK_MODE,
7135 # New or changed disk parameters may have different semantics
7136 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7137 constants.IDISK_ADOPT,
7139 # TODO: Implement support changing VG while recreating
7141 constants.IDISK_METAVG,
7142 constants.IDISK_PROVIDER,
7143 constants.IDISK_NAME,
7146 def _RunAllocator(self):
7147 """Run the allocator based on input opcode.
7150 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7153 # The allocator should actually run in "relocate" mode, but current
7154 # allocators don't support relocating all the nodes of an instance at
7155 # the same time. As a workaround we use "allocate" mode, but this is
7156 # suboptimal for two reasons:
7157 # - The instance name passed to the allocator is present in the list of
7158 # existing instances, so there could be a conflict within the
7159 # internal structures of the allocator. This doesn't happen with the
7160 # current allocators, but it's a liability.
7161 # - The allocator counts the resources used by the instance twice: once
7162 # because the instance exists already, and once because it tries to
7163 # allocate a new instance.
7164 # The allocator could choose some of the nodes on which the instance is
7165 # running, but that's not a problem. If the instance nodes are broken,
7166 # they should be already be marked as drained or offline, and hence
7167 # skipped by the allocator. If instance disks have been lost for other
7168 # reasons, then recreating the disks on the same nodes should be fine.
7169 disk_template = self.instance.disk_template
7170 spindle_use = be_full[constants.BE_SPINDLE_USE]
7171 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7172 disk_template=disk_template,
7173 tags=list(self.instance.GetTags()),
7174 os=self.instance.os,
7176 vcpus=be_full[constants.BE_VCPUS],
7177 memory=be_full[constants.BE_MAXMEM],
7178 spindle_use=spindle_use,
7179 disks=[{constants.IDISK_SIZE: d.size,
7180 constants.IDISK_MODE: d.mode}
7181 for d in self.instance.disks],
7182 hypervisor=self.instance.hypervisor,
7183 node_whitelist=None)
7184 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7186 ial.Run(self.op.iallocator)
7188 assert req.RequiredNodes() == len(self.instance.all_nodes)
7191 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7192 " %s" % (self.op.iallocator, ial.info),
7195 self.op.nodes = ial.result
7196 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7197 self.op.instance_name, self.op.iallocator,
7198 utils.CommaJoin(ial.result))
7200 def CheckArguments(self):
7201 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7202 # Normalize and convert deprecated list of disk indices
7203 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7205 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7207 raise errors.OpPrereqError("Some disks have been specified more than"
7208 " once: %s" % utils.CommaJoin(duplicates),
7211 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7212 # when neither iallocator nor nodes are specified
7213 if self.op.iallocator or self.op.nodes:
7214 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7216 for (idx, params) in self.op.disks:
7217 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7218 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7220 raise errors.OpPrereqError("Parameters for disk %s try to change"
7221 " unmodifyable parameter(s): %s" %
7222 (idx, utils.CommaJoin(unsupported)),
7225 def ExpandNames(self):
7226 self._ExpandAndLockInstance()
7227 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7230 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7231 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7233 self.needed_locks[locking.LEVEL_NODE] = []
7234 if self.op.iallocator:
7235 # iallocator will select a new node in the same group
7236 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7237 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7239 self.needed_locks[locking.LEVEL_NODE_RES] = []
7241 def DeclareLocks(self, level):
7242 if level == locking.LEVEL_NODEGROUP:
7243 assert self.op.iallocator is not None
7244 assert not self.op.nodes
7245 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7246 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7247 # Lock the primary group used by the instance optimistically; this
7248 # requires going via the node before it's locked, requiring
7249 # verification later on
7250 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7251 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7253 elif level == locking.LEVEL_NODE:
7254 # If an allocator is used, then we lock all the nodes in the current
7255 # instance group, as we don't know yet which ones will be selected;
7256 # if we replace the nodes without using an allocator, locks are
7257 # already declared in ExpandNames; otherwise, we need to lock all the
7258 # instance nodes for disk re-creation
7259 if self.op.iallocator:
7260 assert not self.op.nodes
7261 assert not self.needed_locks[locking.LEVEL_NODE]
7262 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7264 # Lock member nodes of the group of the primary node
7265 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7266 self.needed_locks[locking.LEVEL_NODE].extend(
7267 self.cfg.GetNodeGroup(group_uuid).members)
7269 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7270 elif not self.op.nodes:
7271 self._LockInstancesNodes(primary_only=False)
7272 elif level == locking.LEVEL_NODE_RES:
7274 self.needed_locks[locking.LEVEL_NODE_RES] = \
7275 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7277 def BuildHooksEnv(self):
7280 This runs on master, primary and secondary nodes of the instance.
7283 return _BuildInstanceHookEnvByObject(self, self.instance)
7285 def BuildHooksNodes(self):
7286 """Build hooks nodes.
7289 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7292 def CheckPrereq(self):
7293 """Check prerequisites.
7295 This checks that the instance is in the cluster and is not running.
7298 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7299 assert instance is not None, \
7300 "Cannot retrieve locked instance %s" % self.op.instance_name
7302 if len(self.op.nodes) != len(instance.all_nodes):
7303 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7304 " %d replacement nodes were specified" %
7305 (instance.name, len(instance.all_nodes),
7306 len(self.op.nodes)),
7308 assert instance.disk_template != constants.DT_DRBD8 or \
7309 len(self.op.nodes) == 2
7310 assert instance.disk_template != constants.DT_PLAIN or \
7311 len(self.op.nodes) == 1
7312 primary_node = self.op.nodes[0]
7314 primary_node = instance.primary_node
7315 if not self.op.iallocator:
7316 _CheckNodeOnline(self, primary_node)
7318 if instance.disk_template == constants.DT_DISKLESS:
7319 raise errors.OpPrereqError("Instance '%s' has no disks" %
7320 self.op.instance_name, errors.ECODE_INVAL)
7322 # Verify if node group locks are still correct
7323 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7325 # Node group locks are acquired only for the primary node (and only
7326 # when the allocator is used)
7327 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7330 # if we replace nodes *and* the old primary is offline, we don't
7331 # check the instance state
7332 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7333 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7334 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7335 msg="cannot recreate disks")
7338 self.disks = dict(self.op.disks)
7340 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7342 maxidx = max(self.disks.keys())
7343 if maxidx >= len(instance.disks):
7344 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7347 if ((self.op.nodes or self.op.iallocator) and
7348 sorted(self.disks.keys()) != range(len(instance.disks))):
7349 raise errors.OpPrereqError("Can't recreate disks partially and"
7350 " change the nodes at the same time",
7353 self.instance = instance
7355 if self.op.iallocator:
7356 self._RunAllocator()
7357 # Release unneeded node and node resource locks
7358 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7359 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7360 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7362 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7364 def Exec(self, feedback_fn):
7365 """Recreate the disks.
7368 instance = self.instance
7370 assert (self.owned_locks(locking.LEVEL_NODE) ==
7371 self.owned_locks(locking.LEVEL_NODE_RES))
7374 mods = [] # keeps track of needed changes
7376 for idx, disk in enumerate(instance.disks):
7378 changes = self.disks[idx]
7380 # Disk should not be recreated
7384 # update secondaries for disks, if needed
7385 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7386 # need to update the nodes and minors
7387 assert len(self.op.nodes) == 2
7388 assert len(disk.logical_id) == 6 # otherwise disk internals
7390 (_, _, old_port, _, _, old_secret) = disk.logical_id
7391 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7392 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7393 new_minors[0], new_minors[1], old_secret)
7394 assert len(disk.logical_id) == len(new_id)
7398 mods.append((idx, new_id, changes))
7400 # now that we have passed all asserts above, we can apply the mods
7401 # in a single run (to avoid partial changes)
7402 for idx, new_id, changes in mods:
7403 disk = instance.disks[idx]
7404 if new_id is not None:
7405 assert disk.dev_type == constants.LD_DRBD8
7406 disk.logical_id = new_id
7408 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7409 mode=changes.get(constants.IDISK_MODE, None))
7411 # change primary node, if needed
7413 instance.primary_node = self.op.nodes[0]
7414 self.LogWarning("Changing the instance's nodes, you will have to"
7415 " remove any disks left on the older nodes manually")
7418 self.cfg.Update(instance, feedback_fn)
7420 # All touched nodes must be locked
7421 mylocks = self.owned_locks(locking.LEVEL_NODE)
7422 assert mylocks.issuperset(frozenset(instance.all_nodes))
7423 _CreateDisks(self, instance, to_skip=to_skip)
7426 class LUInstanceRename(LogicalUnit):
7427 """Rename an instance.
7430 HPATH = "instance-rename"
7431 HTYPE = constants.HTYPE_INSTANCE
7433 def CheckArguments(self):
7437 if self.op.ip_check and not self.op.name_check:
7438 # TODO: make the ip check more flexible and not depend on the name check
7439 raise errors.OpPrereqError("IP address check requires a name check",
7442 def BuildHooksEnv(self):
7445 This runs on master, primary and secondary nodes of the instance.
7448 env = _BuildInstanceHookEnvByObject(self, self.instance)
7449 env["INSTANCE_NEW_NAME"] = self.op.new_name
7452 def BuildHooksNodes(self):
7453 """Build hooks nodes.
7456 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7459 def CheckPrereq(self):
7460 """Check prerequisites.
7462 This checks that the instance is in the cluster and is not running.
7465 self.op.instance_name = _ExpandInstanceName(self.cfg,
7466 self.op.instance_name)
7467 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7468 assert instance is not None
7469 _CheckNodeOnline(self, instance.primary_node)
7470 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7471 msg="cannot rename")
7472 self.instance = instance
7474 new_name = self.op.new_name
7475 if self.op.name_check:
7476 hostname = _CheckHostnameSane(self, new_name)
7477 new_name = self.op.new_name = hostname.name
7478 if (self.op.ip_check and
7479 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7480 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7481 (hostname.ip, new_name),
7482 errors.ECODE_NOTUNIQUE)
7484 instance_list = self.cfg.GetInstanceList()
7485 if new_name in instance_list and new_name != instance.name:
7486 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7487 new_name, errors.ECODE_EXISTS)
7489 def Exec(self, feedback_fn):
7490 """Rename the instance.
7493 inst = self.instance
7494 old_name = inst.name
7496 rename_file_storage = False
7497 if (inst.disk_template in constants.DTS_FILEBASED and
7498 self.op.new_name != inst.name):
7499 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7500 rename_file_storage = True
7502 self.cfg.RenameInstance(inst.name, self.op.new_name)
7503 # Change the instance lock. This is definitely safe while we hold the BGL.
7504 # Otherwise the new lock would have to be added in acquired mode.
7506 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7507 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7508 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7510 # re-read the instance from the configuration after rename
7511 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7513 if rename_file_storage:
7514 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7515 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7516 old_file_storage_dir,
7517 new_file_storage_dir)
7518 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7519 " (but the instance has been renamed in Ganeti)" %
7520 (inst.primary_node, old_file_storage_dir,
7521 new_file_storage_dir))
7523 _StartInstanceDisks(self, inst, None)
7524 # update info on disks
7525 info = _GetInstanceInfoText(inst)
7526 for (idx, disk) in enumerate(inst.disks):
7527 for node in inst.all_nodes:
7528 self.cfg.SetDiskID(disk, node)
7529 result = self.rpc.call_blockdev_setinfo(node, disk, info)
7531 self.LogWarning("Error setting info on node %s for disk %s: %s",
7532 node, idx, result.fail_msg)
7534 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7535 old_name, self.op.debug_level)
7536 msg = result.fail_msg
7538 msg = ("Could not run OS rename script for instance %s on node %s"
7539 " (but the instance has been renamed in Ganeti): %s" %
7540 (inst.name, inst.primary_node, msg))
7541 self.LogWarning(msg)
7543 _ShutdownInstanceDisks(self, inst)
7548 class LUInstanceRemove(LogicalUnit):
7549 """Remove an instance.
7552 HPATH = "instance-remove"
7553 HTYPE = constants.HTYPE_INSTANCE
7556 def ExpandNames(self):
7557 self._ExpandAndLockInstance()
7558 self.needed_locks[locking.LEVEL_NODE] = []
7559 self.needed_locks[locking.LEVEL_NODE_RES] = []
7560 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7562 def DeclareLocks(self, level):
7563 if level == locking.LEVEL_NODE:
7564 self._LockInstancesNodes()
7565 elif level == locking.LEVEL_NODE_RES:
7567 self.needed_locks[locking.LEVEL_NODE_RES] = \
7568 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7570 def BuildHooksEnv(self):
7573 This runs on master, primary and secondary nodes of the instance.
7576 env = _BuildInstanceHookEnvByObject(self, self.instance)
7577 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7580 def BuildHooksNodes(self):
7581 """Build hooks nodes.
7584 nl = [self.cfg.GetMasterNode()]
7585 nl_post = list(self.instance.all_nodes) + nl
7586 return (nl, nl_post)
7588 def CheckPrereq(self):
7589 """Check prerequisites.
7591 This checks that the instance is in the cluster.
7594 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7595 assert self.instance is not None, \
7596 "Cannot retrieve locked instance %s" % self.op.instance_name
7598 def Exec(self, feedback_fn):
7599 """Remove the instance.
7602 instance = self.instance
7603 logging.info("Shutting down instance %s on node %s",
7604 instance.name, instance.primary_node)
7606 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7607 self.op.shutdown_timeout,
7609 msg = result.fail_msg
7611 if self.op.ignore_failures:
7612 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7614 raise errors.OpExecError("Could not shutdown instance %s on"
7616 (instance.name, instance.primary_node, msg))
7618 assert (self.owned_locks(locking.LEVEL_NODE) ==
7619 self.owned_locks(locking.LEVEL_NODE_RES))
7620 assert not (set(instance.all_nodes) -
7621 self.owned_locks(locking.LEVEL_NODE)), \
7622 "Not owning correct locks"
7624 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7627 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7628 """Utility function to remove an instance.
7631 logging.info("Removing block devices for instance %s", instance.name)
7633 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7634 if not ignore_failures:
7635 raise errors.OpExecError("Can't remove instance's disks")
7636 feedback_fn("Warning: can't remove instance's disks")
7638 logging.info("Removing instance %s out of cluster config", instance.name)
7640 lu.cfg.RemoveInstance(instance.name)
7642 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7643 "Instance lock removal conflict"
7645 # Remove lock for the instance
7646 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7649 class LUInstanceQuery(NoHooksLU):
7650 """Logical unit for querying instances.
7653 # pylint: disable=W0142
7656 def CheckArguments(self):
7657 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7658 self.op.output_fields, self.op.use_locking)
7660 def ExpandNames(self):
7661 self.iq.ExpandNames(self)
7663 def DeclareLocks(self, level):
7664 self.iq.DeclareLocks(self, level)
7666 def Exec(self, feedback_fn):
7667 return self.iq.OldStyleQuery(self)
7670 def _ExpandNamesForMigration(lu):
7671 """Expands names for use with L{TLMigrateInstance}.
7673 @type lu: L{LogicalUnit}
7676 if lu.op.target_node is not None:
7677 lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
7679 lu.needed_locks[locking.LEVEL_NODE] = []
7680 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7682 lu.needed_locks[locking.LEVEL_NODE_RES] = []
7683 lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7685 # The node allocation lock is actually only needed for externally replicated
7686 # instances (e.g. sharedfile or RBD) and if an iallocator is used.
7687 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
7690 def _DeclareLocksForMigration(lu, level):
7691 """Declares locks for L{TLMigrateInstance}.
7693 @type lu: L{LogicalUnit}
7694 @param level: Lock level
7697 if level == locking.LEVEL_NODE_ALLOC:
7698 assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
7700 instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
7702 # Node locks are already declared here rather than at LEVEL_NODE as we need
7703 # the instance object anyway to declare the node allocation lock.
7704 if instance.disk_template in constants.DTS_EXT_MIRROR:
7705 if lu.op.target_node is None:
7706 lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7707 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7709 lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7711 del lu.recalculate_locks[locking.LEVEL_NODE]
7713 lu._LockInstancesNodes() # pylint: disable=W0212
7715 elif level == locking.LEVEL_NODE:
7716 # Node locks are declared together with the node allocation lock
7717 assert (lu.needed_locks[locking.LEVEL_NODE] or
7718 lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
7720 elif level == locking.LEVEL_NODE_RES:
7722 lu.needed_locks[locking.LEVEL_NODE_RES] = \
7723 _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
7726 class LUInstanceFailover(LogicalUnit):
7727 """Failover an instance.
7730 HPATH = "instance-failover"
7731 HTYPE = constants.HTYPE_INSTANCE
7734 def CheckArguments(self):
7735 """Check the arguments.
7738 self.iallocator = getattr(self.op, "iallocator", None)
7739 self.target_node = getattr(self.op, "target_node", None)
7741 def ExpandNames(self):
7742 self._ExpandAndLockInstance()
7743 _ExpandNamesForMigration(self)
7746 TLMigrateInstance(self, self.op.instance_name, False, True, False,
7747 self.op.ignore_consistency, True,
7748 self.op.shutdown_timeout, self.op.ignore_ipolicy)
7750 self.tasklets = [self._migrater]
7752 def DeclareLocks(self, level):
7753 _DeclareLocksForMigration(self, level)
7755 def BuildHooksEnv(self):
7758 This runs on master, primary and secondary nodes of the instance.
7761 instance = self._migrater.instance
7762 source_node = instance.primary_node
7763 target_node = self.op.target_node
7765 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7766 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7767 "OLD_PRIMARY": source_node,
7768 "NEW_PRIMARY": target_node,
7771 if instance.disk_template in constants.DTS_INT_MIRROR:
7772 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7773 env["NEW_SECONDARY"] = source_node
7775 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7777 env.update(_BuildInstanceHookEnvByObject(self, instance))
7781 def BuildHooksNodes(self):
7782 """Build hooks nodes.
7785 instance = self._migrater.instance
7786 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7787 return (nl, nl + [instance.primary_node])
7790 class LUInstanceMigrate(LogicalUnit):
7791 """Migrate an instance.
7793 This is migration without shutting down, compared to the failover,
7794 which is done with shutdown.
7797 HPATH = "instance-migrate"
7798 HTYPE = constants.HTYPE_INSTANCE
7801 def ExpandNames(self):
7802 self._ExpandAndLockInstance()
7803 _ExpandNamesForMigration(self)
7806 TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
7807 False, self.op.allow_failover, False,
7808 self.op.allow_runtime_changes,
7809 constants.DEFAULT_SHUTDOWN_TIMEOUT,
7810 self.op.ignore_ipolicy)
7812 self.tasklets = [self._migrater]
7814 def DeclareLocks(self, level):
7815 _DeclareLocksForMigration(self, level)
7817 def BuildHooksEnv(self):
7820 This runs on master, primary and secondary nodes of the instance.
7823 instance = self._migrater.instance
7824 source_node = instance.primary_node
7825 target_node = self.op.target_node
7826 env = _BuildInstanceHookEnvByObject(self, instance)
7828 "MIGRATE_LIVE": self._migrater.live,
7829 "MIGRATE_CLEANUP": self.op.cleanup,
7830 "OLD_PRIMARY": source_node,
7831 "NEW_PRIMARY": target_node,
7832 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7835 if instance.disk_template in constants.DTS_INT_MIRROR:
7836 env["OLD_SECONDARY"] = target_node
7837 env["NEW_SECONDARY"] = source_node
7839 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7843 def BuildHooksNodes(self):
7844 """Build hooks nodes.
7847 instance = self._migrater.instance
7848 snodes = list(instance.secondary_nodes)
7849 nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
7853 class LUInstanceMove(LogicalUnit):
7854 """Move an instance by data-copying.
7857 HPATH = "instance-move"
7858 HTYPE = constants.HTYPE_INSTANCE
7861 def ExpandNames(self):
7862 self._ExpandAndLockInstance()
7863 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7864 self.op.target_node = target_node
7865 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7866 self.needed_locks[locking.LEVEL_NODE_RES] = []
7867 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7869 def DeclareLocks(self, level):
7870 if level == locking.LEVEL_NODE:
7871 self._LockInstancesNodes(primary_only=True)
7872 elif level == locking.LEVEL_NODE_RES:
7874 self.needed_locks[locking.LEVEL_NODE_RES] = \
7875 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7877 def BuildHooksEnv(self):
7880 This runs on master, primary and secondary nodes of the instance.
7884 "TARGET_NODE": self.op.target_node,
7885 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7887 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7890 def BuildHooksNodes(self):
7891 """Build hooks nodes.
7895 self.cfg.GetMasterNode(),
7896 self.instance.primary_node,
7897 self.op.target_node,
7901 def CheckPrereq(self):
7902 """Check prerequisites.
7904 This checks that the instance is in the cluster.
7907 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7908 assert self.instance is not None, \
7909 "Cannot retrieve locked instance %s" % self.op.instance_name
7911 if instance.disk_template not in constants.DTS_COPYABLE:
7912 raise errors.OpPrereqError("Disk template %s not suitable for copying" %
7913 instance.disk_template, errors.ECODE_STATE)
7915 node = self.cfg.GetNodeInfo(self.op.target_node)
7916 assert node is not None, \
7917 "Cannot retrieve locked node %s" % self.op.target_node
7919 self.target_node = target_node = node.name
7921 if target_node == instance.primary_node:
7922 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7923 (instance.name, target_node),
7926 bep = self.cfg.GetClusterInfo().FillBE(instance)
7928 for idx, dsk in enumerate(instance.disks):
7929 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7930 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7931 " cannot copy" % idx, errors.ECODE_STATE)
7933 _CheckNodeOnline(self, target_node)
7934 _CheckNodeNotDrained(self, target_node)
7935 _CheckNodeVmCapable(self, target_node)
7936 cluster = self.cfg.GetClusterInfo()
7937 group_info = self.cfg.GetNodeGroup(node.group)
7938 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
7939 _CheckTargetNodeIPolicy(self, ipolicy, instance, node, self.cfg,
7940 ignore=self.op.ignore_ipolicy)
7942 if instance.admin_state == constants.ADMINST_UP:
7943 # check memory requirements on the secondary node
7944 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7945 instance.name, bep[constants.BE_MAXMEM],
7946 instance.hypervisor)
7948 self.LogInfo("Not checking memory on the secondary node as"
7949 " instance will not be started")
7951 # check bridge existance
7952 _CheckInstanceBridgesExist(self, instance, node=target_node)
7954 def Exec(self, feedback_fn):
7955 """Move an instance.
7957 The move is done by shutting it down on its present node, copying
7958 the data over (slow) and starting it on the new node.
7961 instance = self.instance
7963 source_node = instance.primary_node
7964 target_node = self.target_node
7966 self.LogInfo("Shutting down instance %s on source node %s",
7967 instance.name, source_node)
7969 assert (self.owned_locks(locking.LEVEL_NODE) ==
7970 self.owned_locks(locking.LEVEL_NODE_RES))
7972 result = self.rpc.call_instance_shutdown(source_node, instance,
7973 self.op.shutdown_timeout,
7975 msg = result.fail_msg
7977 if self.op.ignore_consistency:
7978 self.LogWarning("Could not shutdown instance %s on node %s."
7979 " Proceeding anyway. Please make sure node"
7980 " %s is down. Error details: %s",
7981 instance.name, source_node, source_node, msg)
7983 raise errors.OpExecError("Could not shutdown instance %s on"
7985 (instance.name, source_node, msg))
7987 # create the target disks
7989 _CreateDisks(self, instance, target_node=target_node)
7990 except errors.OpExecError:
7991 self.LogWarning("Device creation failed")
7992 self.cfg.ReleaseDRBDMinors(instance.name)
7995 cluster_name = self.cfg.GetClusterInfo().cluster_name
7998 # activate, get path, copy the data over
7999 for idx, disk in enumerate(instance.disks):
8000 self.LogInfo("Copying data for disk %d", idx)
8001 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8002 instance.name, True, idx)
8004 self.LogWarning("Can't assemble newly created disk %d: %s",
8005 idx, result.fail_msg)
8006 errs.append(result.fail_msg)
8008 dev_path = result.payload
8009 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8010 target_node, dev_path,
8013 self.LogWarning("Can't copy data over for disk %d: %s",
8014 idx, result.fail_msg)
8015 errs.append(result.fail_msg)
8019 self.LogWarning("Some disks failed to copy, aborting")
8021 _RemoveDisks(self, instance, target_node=target_node)
8023 self.cfg.ReleaseDRBDMinors(instance.name)
8024 raise errors.OpExecError("Errors during disk copy: %s" %
8027 instance.primary_node = target_node
8028 self.cfg.Update(instance, feedback_fn)
8030 self.LogInfo("Removing the disks on the original node")
8031 _RemoveDisks(self, instance, target_node=source_node)
8033 # Only start the instance if it's marked as up
8034 if instance.admin_state == constants.ADMINST_UP:
8035 self.LogInfo("Starting instance %s on node %s",
8036 instance.name, target_node)
8038 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8039 ignore_secondaries=True)
8041 _ShutdownInstanceDisks(self, instance)
8042 raise errors.OpExecError("Can't activate the instance's disks")
8044 result = self.rpc.call_instance_start(target_node,
8045 (instance, None, None), False,
8047 msg = result.fail_msg
8049 _ShutdownInstanceDisks(self, instance)
8050 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8051 (instance.name, target_node, msg))
8054 class LUNodeMigrate(LogicalUnit):
8055 """Migrate all instances from a node.
8058 HPATH = "node-migrate"
8059 HTYPE = constants.HTYPE_NODE
8062 def CheckArguments(self):
8065 def ExpandNames(self):
8066 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8068 self.share_locks = _ShareAll()
8069 self.needed_locks = {
8070 locking.LEVEL_NODE: [self.op.node_name],
8073 def BuildHooksEnv(self):
8076 This runs on the master, the primary and all the secondaries.
8080 "NODE_NAME": self.op.node_name,
8081 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8084 def BuildHooksNodes(self):
8085 """Build hooks nodes.
8088 nl = [self.cfg.GetMasterNode()]
8091 def CheckPrereq(self):
8094 def Exec(self, feedback_fn):
8095 # Prepare jobs for migration instances
8096 allow_runtime_changes = self.op.allow_runtime_changes
8098 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8101 iallocator=self.op.iallocator,
8102 target_node=self.op.target_node,
8103 allow_runtime_changes=allow_runtime_changes,
8104 ignore_ipolicy=self.op.ignore_ipolicy)]
8105 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8107 # TODO: Run iallocator in this opcode and pass correct placement options to
8108 # OpInstanceMigrate. Since other jobs can modify the cluster between
8109 # running the iallocator and the actual migration, a good consistency model
8110 # will have to be found.
8112 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8113 frozenset([self.op.node_name]))
8115 return ResultWithJobs(jobs)
8118 class TLMigrateInstance(Tasklet):
8119 """Tasklet class for instance migration.
8122 @ivar live: whether the migration will be done live or non-live;
8123 this variable is initalized only after CheckPrereq has run
8124 @type cleanup: boolean
8125 @ivar cleanup: Wheater we cleanup from a failed migration
8126 @type iallocator: string
8127 @ivar iallocator: The iallocator used to determine target_node
8128 @type target_node: string
8129 @ivar target_node: If given, the target_node to reallocate the instance to
8130 @type failover: boolean
8131 @ivar failover: Whether operation results in failover or migration
8132 @type fallback: boolean
8133 @ivar fallback: Whether fallback to failover is allowed if migration not
8135 @type ignore_consistency: boolean
8136 @ivar ignore_consistency: Wheter we should ignore consistency between source
8138 @type shutdown_timeout: int
8139 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8140 @type ignore_ipolicy: bool
8141 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8146 _MIGRATION_POLL_INTERVAL = 1 # seconds
8147 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8149 def __init__(self, lu, instance_name, cleanup, failover, fallback,
8150 ignore_consistency, allow_runtime_changes, shutdown_timeout,
8152 """Initializes this class.
8155 Tasklet.__init__(self, lu)
8158 self.instance_name = instance_name
8159 self.cleanup = cleanup
8160 self.live = False # will be overridden later
8161 self.failover = failover
8162 self.fallback = fallback
8163 self.ignore_consistency = ignore_consistency
8164 self.shutdown_timeout = shutdown_timeout
8165 self.ignore_ipolicy = ignore_ipolicy
8166 self.allow_runtime_changes = allow_runtime_changes
8168 def CheckPrereq(self):
8169 """Check prerequisites.
8171 This checks that the instance is in the cluster.
8174 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8175 instance = self.cfg.GetInstanceInfo(instance_name)
8176 assert instance is not None
8177 self.instance = instance
8178 cluster = self.cfg.GetClusterInfo()
8180 if (not self.cleanup and
8181 not instance.admin_state == constants.ADMINST_UP and
8182 not self.failover and self.fallback):
8183 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8184 " switching to failover")
8185 self.failover = True
8187 if instance.disk_template not in constants.DTS_MIRRORED:
8192 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8193 " %s" % (instance.disk_template, text),
8196 if instance.disk_template in constants.DTS_EXT_MIRROR:
8197 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8199 if self.lu.op.iallocator:
8200 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8201 self._RunAllocator()
8203 # We set set self.target_node as it is required by
8205 self.target_node = self.lu.op.target_node
8207 # Check that the target node is correct in terms of instance policy
8208 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8209 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8210 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8212 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
8213 ignore=self.ignore_ipolicy)
8215 # self.target_node is already populated, either directly or by the
8217 target_node = self.target_node
8218 if self.target_node == instance.primary_node:
8219 raise errors.OpPrereqError("Cannot migrate instance %s"
8220 " to its primary (%s)" %
8221 (instance.name, instance.primary_node),
8224 if len(self.lu.tasklets) == 1:
8225 # It is safe to release locks only when we're the only tasklet
8227 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8228 keep=[instance.primary_node, self.target_node])
8229 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8232 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8234 secondary_nodes = instance.secondary_nodes
8235 if not secondary_nodes:
8236 raise errors.ConfigurationError("No secondary node but using"
8237 " %s disk template" %
8238 instance.disk_template)
8239 target_node = secondary_nodes[0]
8240 if self.lu.op.iallocator or (self.lu.op.target_node and
8241 self.lu.op.target_node != target_node):
8243 text = "failed over"
8246 raise errors.OpPrereqError("Instances with disk template %s cannot"
8247 " be %s to arbitrary nodes"
8248 " (neither an iallocator nor a target"
8249 " node can be passed)" %
8250 (instance.disk_template, text),
8252 nodeinfo = self.cfg.GetNodeInfo(target_node)
8253 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8254 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8256 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
8257 ignore=self.ignore_ipolicy)
8259 i_be = cluster.FillBE(instance)
8261 # check memory requirements on the secondary node
8262 if (not self.cleanup and
8263 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8264 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8265 "migrating instance %s" %
8267 i_be[constants.BE_MINMEM],
8268 instance.hypervisor)
8270 self.lu.LogInfo("Not checking memory on the secondary node as"
8271 " instance will not be started")
8273 # check if failover must be forced instead of migration
8274 if (not self.cleanup and not self.failover and
8275 i_be[constants.BE_ALWAYS_FAILOVER]):
8276 self.lu.LogInfo("Instance configured to always failover; fallback"
8278 self.failover = True
8280 # check bridge existance
8281 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8283 if not self.cleanup:
8284 _CheckNodeNotDrained(self.lu, target_node)
8285 if not self.failover:
8286 result = self.rpc.call_instance_migratable(instance.primary_node,
8288 if result.fail_msg and self.fallback:
8289 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8291 self.failover = True
8293 result.Raise("Can't migrate, please use failover",
8294 prereq=True, ecode=errors.ECODE_STATE)
8296 assert not (self.failover and self.cleanup)
8298 if not self.failover:
8299 if self.lu.op.live is not None and self.lu.op.mode is not None:
8300 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8301 " parameters are accepted",
8303 if self.lu.op.live is not None:
8305 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8307 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8308 # reset the 'live' parameter to None so that repeated
8309 # invocations of CheckPrereq do not raise an exception
8310 self.lu.op.live = None
8311 elif self.lu.op.mode is None:
8312 # read the default value from the hypervisor
8313 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8314 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8316 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8318 # Failover is never live
8321 if not (self.failover or self.cleanup):
8322 remote_info = self.rpc.call_instance_info(instance.primary_node,
8324 instance.hypervisor)
8325 remote_info.Raise("Error checking instance on node %s" %
8326 instance.primary_node)
8327 instance_running = bool(remote_info.payload)
8328 if instance_running:
8329 self.current_mem = int(remote_info.payload["memory"])
8331 def _RunAllocator(self):
8332 """Run the allocator based on input opcode.
8335 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8337 # FIXME: add a self.ignore_ipolicy option
8338 req = iallocator.IAReqRelocate(name=self.instance_name,
8339 relocate_from=[self.instance.primary_node])
8340 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8342 ial.Run(self.lu.op.iallocator)
8345 raise errors.OpPrereqError("Can't compute nodes using"
8346 " iallocator '%s': %s" %
8347 (self.lu.op.iallocator, ial.info),
8349 self.target_node = ial.result[0]
8350 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8351 self.instance_name, self.lu.op.iallocator,
8352 utils.CommaJoin(ial.result))
8354 def _WaitUntilSync(self):
8355 """Poll with custom rpc for disk sync.
8357 This uses our own step-based rpc call.
8360 self.feedback_fn("* wait until resync is done")
8364 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8366 (self.instance.disks,
8369 for node, nres in result.items():
8370 nres.Raise("Cannot resync disks on node %s" % node)
8371 node_done, node_percent = nres.payload
8372 all_done = all_done and node_done
8373 if node_percent is not None:
8374 min_percent = min(min_percent, node_percent)
8376 if min_percent < 100:
8377 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8380 def _EnsureSecondary(self, node):
8381 """Demote a node to secondary.
8384 self.feedback_fn("* switching node %s to secondary mode" % node)
8386 for dev in self.instance.disks:
8387 self.cfg.SetDiskID(dev, node)
8389 result = self.rpc.call_blockdev_close(node, self.instance.name,
8390 self.instance.disks)
8391 result.Raise("Cannot change disk to secondary on node %s" % node)
8393 def _GoStandalone(self):
8394 """Disconnect from the network.
8397 self.feedback_fn("* changing into standalone mode")
8398 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8399 self.instance.disks)
8400 for node, nres in result.items():
8401 nres.Raise("Cannot disconnect disks node %s" % node)
8403 def _GoReconnect(self, multimaster):
8404 """Reconnect to the network.
8410 msg = "single-master"
8411 self.feedback_fn("* changing disks into %s mode" % msg)
8412 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8413 (self.instance.disks, self.instance),
8414 self.instance.name, multimaster)
8415 for node, nres in result.items():
8416 nres.Raise("Cannot change disks config on node %s" % node)
8418 def _ExecCleanup(self):
8419 """Try to cleanup after a failed migration.
8421 The cleanup is done by:
8422 - check that the instance is running only on one node
8423 (and update the config if needed)
8424 - change disks on its secondary node to secondary
8425 - wait until disks are fully synchronized
8426 - disconnect from the network
8427 - change disks into single-master mode
8428 - wait again until disks are fully synchronized
8431 instance = self.instance
8432 target_node = self.target_node
8433 source_node = self.source_node
8435 # check running on only one node
8436 self.feedback_fn("* checking where the instance actually runs"
8437 " (if this hangs, the hypervisor might be in"
8439 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8440 for node, result in ins_l.items():
8441 result.Raise("Can't contact node %s" % node)
8443 runningon_source = instance.name in ins_l[source_node].payload
8444 runningon_target = instance.name in ins_l[target_node].payload
8446 if runningon_source and runningon_target:
8447 raise errors.OpExecError("Instance seems to be running on two nodes,"
8448 " or the hypervisor is confused; you will have"
8449 " to ensure manually that it runs only on one"
8450 " and restart this operation")
8452 if not (runningon_source or runningon_target):
8453 raise errors.OpExecError("Instance does not seem to be running at all;"
8454 " in this case it's safer to repair by"
8455 " running 'gnt-instance stop' to ensure disk"
8456 " shutdown, and then restarting it")
8458 if runningon_target:
8459 # the migration has actually succeeded, we need to update the config
8460 self.feedback_fn("* instance running on secondary node (%s),"
8461 " updating config" % target_node)
8462 instance.primary_node = target_node
8463 self.cfg.Update(instance, self.feedback_fn)
8464 demoted_node = source_node
8466 self.feedback_fn("* instance confirmed to be running on its"
8467 " primary node (%s)" % source_node)
8468 demoted_node = target_node
8470 if instance.disk_template in constants.DTS_INT_MIRROR:
8471 self._EnsureSecondary(demoted_node)
8473 self._WaitUntilSync()
8474 except errors.OpExecError:
8475 # we ignore here errors, since if the device is standalone, it
8476 # won't be able to sync
8478 self._GoStandalone()
8479 self._GoReconnect(False)
8480 self._WaitUntilSync()
8482 self.feedback_fn("* done")
8484 def _RevertDiskStatus(self):
8485 """Try to revert the disk status after a failed migration.
8488 target_node = self.target_node
8489 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8493 self._EnsureSecondary(target_node)
8494 self._GoStandalone()
8495 self._GoReconnect(False)
8496 self._WaitUntilSync()
8497 except errors.OpExecError, err:
8498 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8499 " please try to recover the instance manually;"
8500 " error '%s'" % str(err))
8502 def _AbortMigration(self):
8503 """Call the hypervisor code to abort a started migration.
8506 instance = self.instance
8507 target_node = self.target_node
8508 source_node = self.source_node
8509 migration_info = self.migration_info
8511 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8515 abort_msg = abort_result.fail_msg
8517 logging.error("Aborting migration failed on target node %s: %s",
8518 target_node, abort_msg)
8519 # Don't raise an exception here, as we stil have to try to revert the
8520 # disk status, even if this step failed.
8522 abort_result = self.rpc.call_instance_finalize_migration_src(
8523 source_node, instance, False, self.live)
8524 abort_msg = abort_result.fail_msg
8526 logging.error("Aborting migration failed on source node %s: %s",
8527 source_node, abort_msg)
8529 def _ExecMigration(self):
8530 """Migrate an instance.
8532 The migrate is done by:
8533 - change the disks into dual-master mode
8534 - wait until disks are fully synchronized again
8535 - migrate the instance
8536 - change disks on the new secondary node (the old primary) to secondary
8537 - wait until disks are fully synchronized
8538 - change disks into single-master mode
8541 instance = self.instance
8542 target_node = self.target_node
8543 source_node = self.source_node
8545 # Check for hypervisor version mismatch and warn the user.
8546 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8547 None, [self.instance.hypervisor], False)
8548 for ninfo in nodeinfo.values():
8549 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8551 (_, _, (src_info, )) = nodeinfo[source_node].payload
8552 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8554 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8555 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8556 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8557 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8558 if src_version != dst_version:
8559 self.feedback_fn("* warning: hypervisor version mismatch between"
8560 " source (%s) and target (%s) node" %
8561 (src_version, dst_version))
8563 self.feedback_fn("* checking disk consistency between source and target")
8564 for (idx, dev) in enumerate(instance.disks):
8565 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8566 raise errors.OpExecError("Disk %s is degraded or not fully"
8567 " synchronized on target node,"
8568 " aborting migration" % idx)
8570 if self.current_mem > self.tgt_free_mem:
8571 if not self.allow_runtime_changes:
8572 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8573 " free memory to fit instance %s on target"
8574 " node %s (have %dMB, need %dMB)" %
8575 (instance.name, target_node,
8576 self.tgt_free_mem, self.current_mem))
8577 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8578 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8581 rpcres.Raise("Cannot modify instance runtime memory")
8583 # First get the migration information from the remote node
8584 result = self.rpc.call_migration_info(source_node, instance)
8585 msg = result.fail_msg
8587 log_err = ("Failed fetching source migration information from %s: %s" %
8589 logging.error(log_err)
8590 raise errors.OpExecError(log_err)
8592 self.migration_info = migration_info = result.payload
8594 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8595 # Then switch the disks to master/master mode
8596 self._EnsureSecondary(target_node)
8597 self._GoStandalone()
8598 self._GoReconnect(True)
8599 self._WaitUntilSync()
8601 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8602 result = self.rpc.call_accept_instance(target_node,
8605 self.nodes_ip[target_node])
8607 msg = result.fail_msg
8609 logging.error("Instance pre-migration failed, trying to revert"
8610 " disk status: %s", msg)
8611 self.feedback_fn("Pre-migration failed, aborting")
8612 self._AbortMigration()
8613 self._RevertDiskStatus()
8614 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8615 (instance.name, msg))
8617 self.feedback_fn("* migrating instance to %s" % target_node)
8618 result = self.rpc.call_instance_migrate(source_node, instance,
8619 self.nodes_ip[target_node],
8621 msg = result.fail_msg
8623 logging.error("Instance migration failed, trying to revert"
8624 " disk status: %s", msg)
8625 self.feedback_fn("Migration failed, aborting")
8626 self._AbortMigration()
8627 self._RevertDiskStatus()
8628 raise errors.OpExecError("Could not migrate instance %s: %s" %
8629 (instance.name, msg))
8631 self.feedback_fn("* starting memory transfer")
8632 last_feedback = time.time()
8634 result = self.rpc.call_instance_get_migration_status(source_node,
8636 msg = result.fail_msg
8637 ms = result.payload # MigrationStatus instance
8638 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8639 logging.error("Instance migration failed, trying to revert"
8640 " disk status: %s", msg)
8641 self.feedback_fn("Migration failed, aborting")
8642 self._AbortMigration()
8643 self._RevertDiskStatus()
8645 msg = "hypervisor returned failure"
8646 raise errors.OpExecError("Could not migrate instance %s: %s" %
8647 (instance.name, msg))
8649 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8650 self.feedback_fn("* memory transfer complete")
8653 if (utils.TimeoutExpired(last_feedback,
8654 self._MIGRATION_FEEDBACK_INTERVAL) and
8655 ms.transferred_ram is not None):
8656 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8657 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8658 last_feedback = time.time()
8660 time.sleep(self._MIGRATION_POLL_INTERVAL)
8662 result = self.rpc.call_instance_finalize_migration_src(source_node,
8666 msg = result.fail_msg
8668 logging.error("Instance migration succeeded, but finalization failed"
8669 " on the source node: %s", msg)
8670 raise errors.OpExecError("Could not finalize instance migration: %s" %
8673 instance.primary_node = target_node
8675 # distribute new instance config to the other nodes
8676 self.cfg.Update(instance, self.feedback_fn)
8678 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8682 msg = result.fail_msg
8684 logging.error("Instance migration succeeded, but finalization failed"
8685 " on the target node: %s", msg)
8686 raise errors.OpExecError("Could not finalize instance migration: %s" %
8689 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8690 self._EnsureSecondary(source_node)
8691 self._WaitUntilSync()
8692 self._GoStandalone()
8693 self._GoReconnect(False)
8694 self._WaitUntilSync()
8696 # If the instance's disk template is `rbd' or `ext' and there was a
8697 # successful migration, unmap the device from the source node.
8698 if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
8699 disks = _ExpandCheckDisks(instance, instance.disks)
8700 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8702 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8703 msg = result.fail_msg
8705 logging.error("Migration was successful, but couldn't unmap the"
8706 " block device %s on source node %s: %s",
8707 disk.iv_name, source_node, msg)
8708 logging.error("You need to unmap the device %s manually on %s",
8709 disk.iv_name, source_node)
8711 self.feedback_fn("* done")
8713 def _ExecFailover(self):
8714 """Failover an instance.
8716 The failover is done by shutting it down on its present node and
8717 starting it on the secondary.
8720 instance = self.instance
8721 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8723 source_node = instance.primary_node
8724 target_node = self.target_node
8726 if instance.admin_state == constants.ADMINST_UP:
8727 self.feedback_fn("* checking disk consistency between source and target")
8728 for (idx, dev) in enumerate(instance.disks):
8729 # for drbd, these are drbd over lvm
8730 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8732 if primary_node.offline:
8733 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8735 (primary_node.name, idx, target_node))
8736 elif not self.ignore_consistency:
8737 raise errors.OpExecError("Disk %s is degraded on target node,"
8738 " aborting failover" % idx)
8740 self.feedback_fn("* not checking disk consistency as instance is not"
8743 self.feedback_fn("* shutting down instance on source node")
8744 logging.info("Shutting down instance %s on node %s",
8745 instance.name, source_node)
8747 result = self.rpc.call_instance_shutdown(source_node, instance,
8748 self.shutdown_timeout,
8750 msg = result.fail_msg
8752 if self.ignore_consistency or primary_node.offline:
8753 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8754 " proceeding anyway; please make sure node"
8755 " %s is down; error details: %s",
8756 instance.name, source_node, source_node, msg)
8758 raise errors.OpExecError("Could not shutdown instance %s on"
8760 (instance.name, source_node, msg))
8762 self.feedback_fn("* deactivating the instance's disks on source node")
8763 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8764 raise errors.OpExecError("Can't shut down the instance's disks")
8766 instance.primary_node = target_node
8767 # distribute new instance config to the other nodes
8768 self.cfg.Update(instance, self.feedback_fn)
8770 # Only start the instance if it's marked as up
8771 if instance.admin_state == constants.ADMINST_UP:
8772 self.feedback_fn("* activating the instance's disks on target node %s" %
8774 logging.info("Starting instance %s on node %s",
8775 instance.name, target_node)
8777 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8778 ignore_secondaries=True)
8780 _ShutdownInstanceDisks(self.lu, instance)
8781 raise errors.OpExecError("Can't activate the instance's disks")
8783 self.feedback_fn("* starting the instance on the target node %s" %
8785 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8786 False, self.lu.op.reason)
8787 msg = result.fail_msg
8789 _ShutdownInstanceDisks(self.lu, instance)
8790 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8791 (instance.name, target_node, msg))
8793 def Exec(self, feedback_fn):
8794 """Perform the migration.
8797 self.feedback_fn = feedback_fn
8798 self.source_node = self.instance.primary_node
8800 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8801 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8802 self.target_node = self.instance.secondary_nodes[0]
8803 # Otherwise self.target_node has been populated either
8804 # directly, or through an iallocator.
8806 self.all_nodes = [self.source_node, self.target_node]
8807 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8808 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8811 feedback_fn("Failover instance %s" % self.instance.name)
8812 self._ExecFailover()
8814 feedback_fn("Migrating instance %s" % self.instance.name)
8817 return self._ExecCleanup()
8819 return self._ExecMigration()
8822 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8824 """Wrapper around L{_CreateBlockDevInner}.
8826 This method annotates the root device first.
8829 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8830 excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
8831 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8832 force_open, excl_stor)
8835 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8836 info, force_open, excl_stor):
8837 """Create a tree of block devices on a given node.
8839 If this device type has to be created on secondaries, create it and
8842 If not, just recurse to children keeping the same 'force' value.
8844 @attention: The device has to be annotated already.
8846 @param lu: the lu on whose behalf we execute
8847 @param node: the node on which to create the device
8848 @type instance: L{objects.Instance}
8849 @param instance: the instance which owns the device
8850 @type device: L{objects.Disk}
8851 @param device: the device to create
8852 @type force_create: boolean
8853 @param force_create: whether to force creation of this device; this
8854 will be change to True whenever we find a device which has
8855 CreateOnSecondary() attribute
8856 @param info: the extra 'metadata' we should attach to the device
8857 (this will be represented as a LVM tag)
8858 @type force_open: boolean
8859 @param force_open: this parameter will be passes to the
8860 L{backend.BlockdevCreate} function where it specifies
8861 whether we run on primary or not, and it affects both
8862 the child assembly and the device own Open() execution
8863 @type excl_stor: boolean
8864 @param excl_stor: Whether exclusive_storage is active for the node
8866 @return: list of created devices
8868 created_devices = []
8870 if device.CreateOnSecondary():
8874 for child in device.children:
8875 devs = _CreateBlockDevInner(lu, node, instance, child, force_create,
8876 info, force_open, excl_stor)
8877 created_devices.extend(devs)
8879 if not force_create:
8880 return created_devices
8882 _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
8884 # The device has been completely created, so there is no point in keeping
8885 # its subdevices in the list. We just add the device itself instead.
8886 created_devices = [(node, device)]
8887 return created_devices
8889 except errors.DeviceCreationError, e:
8890 e.created_devices.extend(created_devices)
8892 except errors.OpExecError, e:
8893 raise errors.DeviceCreationError(str(e), created_devices)
8896 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
8898 """Create a single block device on a given node.
8900 This will not recurse over children of the device, so they must be
8903 @param lu: the lu on whose behalf we execute
8904 @param node: the node on which to create the device
8905 @type instance: L{objects.Instance}
8906 @param instance: the instance which owns the device
8907 @type device: L{objects.Disk}
8908 @param device: the device to create
8909 @param info: the extra 'metadata' we should attach to the device
8910 (this will be represented as a LVM tag)
8911 @type force_open: boolean
8912 @param force_open: this parameter will be passes to the
8913 L{backend.BlockdevCreate} function where it specifies
8914 whether we run on primary or not, and it affects both
8915 the child assembly and the device own Open() execution
8916 @type excl_stor: boolean
8917 @param excl_stor: Whether exclusive_storage is active for the node
8920 lu.cfg.SetDiskID(device, node)
8921 result = lu.rpc.call_blockdev_create(node, device, device.size,
8922 instance.name, force_open, info,
8924 result.Raise("Can't create block device %s on"
8925 " node %s for instance %s" % (device, node, instance.name))
8926 if device.physical_id is None:
8927 device.physical_id = result.payload
8930 def _GenerateUniqueNames(lu, exts):
8931 """Generate a suitable LV name.
8933 This will generate a logical volume name for the given instance.
8938 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8939 results.append("%s%s" % (new_id, val))
8943 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8944 iv_name, p_minor, s_minor):
8945 """Generate a drbd8 device complete with its children.
8948 assert len(vgnames) == len(names) == 2
8949 port = lu.cfg.AllocatePort()
8950 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8952 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8953 logical_id=(vgnames[0], names[0]),
8955 dev_data.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8956 dev_meta = objects.Disk(dev_type=constants.LD_LV,
8957 size=constants.DRBD_META_SIZE,
8958 logical_id=(vgnames[1], names[1]),
8960 dev_meta.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8961 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8962 logical_id=(primary, secondary, port,
8965 children=[dev_data, dev_meta],
8966 iv_name=iv_name, params={})
8967 drbd_dev.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8971 _DISK_TEMPLATE_NAME_PREFIX = {
8972 constants.DT_PLAIN: "",
8973 constants.DT_RBD: ".rbd",
8974 constants.DT_EXT: ".ext",
8978 _DISK_TEMPLATE_DEVICE_TYPE = {
8979 constants.DT_PLAIN: constants.LD_LV,
8980 constants.DT_FILE: constants.LD_FILE,
8981 constants.DT_SHARED_FILE: constants.LD_FILE,
8982 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8983 constants.DT_RBD: constants.LD_RBD,
8984 constants.DT_EXT: constants.LD_EXT,
8988 def _GenerateDiskTemplate(
8989 lu, template_name, instance_name, primary_node, secondary_nodes,
8990 disk_info, file_storage_dir, file_driver, base_index,
8991 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8992 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8993 """Generate the entire disk layout for a given template type.
8996 vgname = lu.cfg.GetVGName()
8997 disk_count = len(disk_info)
9000 if template_name == constants.DT_DISKLESS:
9002 elif template_name == constants.DT_DRBD8:
9003 if len(secondary_nodes) != 1:
9004 raise errors.ProgrammerError("Wrong template configuration")
9005 remote_node = secondary_nodes[0]
9006 minors = lu.cfg.AllocateDRBDMinor(
9007 [primary_node, remote_node] * len(disk_info), instance_name)
9009 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9011 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9014 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9015 for i in range(disk_count)]):
9016 names.append(lv_prefix + "_data")
9017 names.append(lv_prefix + "_meta")
9018 for idx, disk in enumerate(disk_info):
9019 disk_index = idx + base_index
9020 data_vg = disk.get(constants.IDISK_VG, vgname)
9021 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9022 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9023 disk[constants.IDISK_SIZE],
9025 names[idx * 2:idx * 2 + 2],
9026 "disk/%d" % disk_index,
9027 minors[idx * 2], minors[idx * 2 + 1])
9028 disk_dev.mode = disk[constants.IDISK_MODE]
9029 disk_dev.name = disk.get(constants.IDISK_NAME, None)
9030 disks.append(disk_dev)
9033 raise errors.ProgrammerError("Wrong template configuration")
9035 if template_name == constants.DT_FILE:
9037 elif template_name == constants.DT_SHARED_FILE:
9038 _req_shr_file_storage()
9040 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9041 if name_prefix is None:
9044 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9045 (name_prefix, base_index + i)
9046 for i in range(disk_count)])
9048 if template_name == constants.DT_PLAIN:
9050 def logical_id_fn(idx, _, disk):
9051 vg = disk.get(constants.IDISK_VG, vgname)
9052 return (vg, names[idx])
9054 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9056 lambda _, disk_index, disk: (file_driver,
9057 "%s/disk%d" % (file_storage_dir,
9059 elif template_name == constants.DT_BLOCK:
9061 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9062 disk[constants.IDISK_ADOPT])
9063 elif template_name == constants.DT_RBD:
9064 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9065 elif template_name == constants.DT_EXT:
9066 def logical_id_fn(idx, _, disk):
9067 provider = disk.get(constants.IDISK_PROVIDER, None)
9068 if provider is None:
9069 raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9070 " not found", constants.DT_EXT,
9071 constants.IDISK_PROVIDER)
9072 return (provider, names[idx])
9074 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9076 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9078 for idx, disk in enumerate(disk_info):
9080 # Only for the Ext template add disk_info to params
9081 if template_name == constants.DT_EXT:
9082 params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9084 if key not in constants.IDISK_PARAMS:
9085 params[key] = disk[key]
9086 disk_index = idx + base_index
9087 size = disk[constants.IDISK_SIZE]
9088 feedback_fn("* disk %s, size %s" %
9089 (disk_index, utils.FormatUnit(size, "h")))
9090 disk_dev = objects.Disk(dev_type=dev_type, size=size,
9091 logical_id=logical_id_fn(idx, disk_index, disk),
9092 iv_name="disk/%d" % disk_index,
9093 mode=disk[constants.IDISK_MODE],
9095 disk_dev.name = disk.get(constants.IDISK_NAME, None)
9096 disk_dev.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9097 disks.append(disk_dev)
9102 def _GetInstanceInfoText(instance):
9103 """Compute that text that should be added to the disk's metadata.
9106 return "originstname+%s" % instance.name
9109 def _CalcEta(time_taken, written, total_size):
9110 """Calculates the ETA based on size written and total size.
9112 @param time_taken: The time taken so far
9113 @param written: amount written so far
9114 @param total_size: The total size of data to be written
9115 @return: The remaining time in seconds
9118 avg_time = time_taken / float(written)
9119 return (total_size - written) * avg_time
9122 def _WipeDisks(lu, instance, disks=None):
9123 """Wipes instance disks.
9125 @type lu: L{LogicalUnit}
9126 @param lu: the logical unit on whose behalf we execute
9127 @type instance: L{objects.Instance}
9128 @param instance: the instance whose disks we should create
9129 @type disks: None or list of tuple of (number, L{objects.Disk}, number)
9130 @param disks: Disk details; tuple contains disk index, disk object and the
9134 node = instance.primary_node
9137 disks = [(idx, disk, 0)
9138 for (idx, disk) in enumerate(instance.disks)]
9140 for (_, device, _) in disks:
9141 lu.cfg.SetDiskID(device, node)
9143 logging.info("Pausing synchronization of disks of instance '%s'",
9145 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9146 (map(compat.snd, disks),
9149 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9151 for idx, success in enumerate(result.payload):
9153 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9154 " failed", idx, instance.name)
9157 for (idx, device, offset) in disks:
9158 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9159 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9161 int(min(constants.MAX_WIPE_CHUNK,
9162 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9166 start_time = time.time()
9171 info_text = (" (from %s to %s)" %
9172 (utils.FormatUnit(offset, "h"),
9173 utils.FormatUnit(size, "h")))
9175 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9177 logging.info("Wiping disk %d for instance %s on node %s using"
9178 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9180 while offset < size:
9181 wipe_size = min(wipe_chunk_size, size - offset)
9183 logging.debug("Wiping disk %d, offset %s, chunk %s",
9184 idx, offset, wipe_size)
9186 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9188 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9189 (idx, offset, wipe_size))
9193 if now - last_output >= 60:
9194 eta = _CalcEta(now - start_time, offset, size)
9195 lu.LogInfo(" - done: %.1f%% ETA: %s",
9196 offset / float(size) * 100, utils.FormatSeconds(eta))
9199 logging.info("Resuming synchronization of disks for instance '%s'",
9202 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9203 (map(compat.snd, disks),
9208 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9209 node, result.fail_msg)
9211 for idx, success in enumerate(result.payload):
9213 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9214 " failed", idx, instance.name)
9217 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9218 """Create all disks for an instance.
9220 This abstracts away some work from AddInstance.
9222 @type lu: L{LogicalUnit}
9223 @param lu: the logical unit on whose behalf we execute
9224 @type instance: L{objects.Instance}
9225 @param instance: the instance whose disks we should create
9227 @param to_skip: list of indices to skip
9228 @type target_node: string
9229 @param target_node: if passed, overrides the target node for creation
9231 @return: the success of the creation
9234 info = _GetInstanceInfoText(instance)
9235 if target_node is None:
9236 pnode = instance.primary_node
9237 all_nodes = instance.all_nodes
9242 if instance.disk_template in constants.DTS_FILEBASED:
9243 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9244 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9246 result.Raise("Failed to create directory '%s' on"
9247 " node %s" % (file_storage_dir, pnode))
9250 # Note: this needs to be kept in sync with adding of disks in
9251 # LUInstanceSetParams
9252 for idx, device in enumerate(instance.disks):
9253 if to_skip and idx in to_skip:
9255 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9257 for node in all_nodes:
9258 f_create = node == pnode
9260 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9261 disks_created.append((node, device))
9262 except errors.OpExecError:
9263 logging.warning("Creating disk %s for instance '%s' failed",
9265 except errors.DeviceCreationError, e:
9266 logging.warning("Creating disk %s for instance '%s' failed",
9268 disks_created.extend(e.created_devices)
9269 for (node, disk) in disks_created:
9270 lu.cfg.SetDiskID(disk, node)
9271 result = lu.rpc.call_blockdev_remove(node, disk)
9273 logging.warning("Failed to remove newly-created disk %s on node %s:"
9274 " %s", device, node, result.fail_msg)
9275 raise errors.OpExecError(e.message)
9278 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9279 """Remove all disks for an instance.
9281 This abstracts away some work from `AddInstance()` and
9282 `RemoveInstance()`. Note that in case some of the devices couldn't
9283 be removed, the removal will continue with the other ones.
9285 @type lu: L{LogicalUnit}
9286 @param lu: the logical unit on whose behalf we execute
9287 @type instance: L{objects.Instance}
9288 @param instance: the instance whose disks we should remove
9289 @type target_node: string
9290 @param target_node: used to override the node on which to remove the disks
9292 @return: the success of the removal
9295 logging.info("Removing block devices for instance %s", instance.name)
9298 ports_to_release = set()
9299 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9300 for (idx, device) in enumerate(anno_disks):
9302 edata = [(target_node, device)]
9304 edata = device.ComputeNodeTree(instance.primary_node)
9305 for node, disk in edata:
9306 lu.cfg.SetDiskID(disk, node)
9307 result = lu.rpc.call_blockdev_remove(node, disk)
9309 lu.LogWarning("Could not remove disk %s on node %s,"
9310 " continuing anyway: %s", idx, node, result.fail_msg)
9311 if not (result.offline and node != instance.primary_node):
9314 # if this is a DRBD disk, return its port to the pool
9315 if device.dev_type in constants.LDS_DRBD:
9316 ports_to_release.add(device.logical_id[2])
9318 if all_result or ignore_failures:
9319 for port in ports_to_release:
9320 lu.cfg.AddTcpUdpPort(port)
9322 if instance.disk_template in constants.DTS_FILEBASED:
9323 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9327 tgt = instance.primary_node
9328 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9330 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9331 file_storage_dir, instance.primary_node, result.fail_msg)
9337 def _ComputeDiskSizePerVG(disk_template, disks):
9338 """Compute disk size requirements in the volume group
9341 def _compute(disks, payload):
9342 """Universal algorithm.
9347 vgs[disk[constants.IDISK_VG]] = \
9348 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9352 # Required free disk space as a function of disk and swap space
9354 constants.DT_DISKLESS: {},
9355 constants.DT_PLAIN: _compute(disks, 0),
9356 # 128 MB are added for drbd metadata for each disk
9357 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9358 constants.DT_FILE: {},
9359 constants.DT_SHARED_FILE: {},
9362 if disk_template not in req_size_dict:
9363 raise errors.ProgrammerError("Disk template '%s' size requirement"
9364 " is unknown" % disk_template)
9366 return req_size_dict[disk_template]
9369 def _FilterVmNodes(lu, nodenames):
9370 """Filters out non-vm_capable nodes from a list.
9372 @type lu: L{LogicalUnit}
9373 @param lu: the logical unit for which we check
9374 @type nodenames: list
9375 @param nodenames: the list of nodes on which we should check
9377 @return: the list of vm-capable nodes
9380 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9381 return [name for name in nodenames if name not in vm_nodes]
9384 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9385 """Hypervisor parameter validation.
9387 This function abstract the hypervisor parameter validation to be
9388 used in both instance create and instance modify.
9390 @type lu: L{LogicalUnit}
9391 @param lu: the logical unit for which we check
9392 @type nodenames: list
9393 @param nodenames: the list of nodes on which we should check
9394 @type hvname: string
9395 @param hvname: the name of the hypervisor we should use
9396 @type hvparams: dict
9397 @param hvparams: the parameters which we need to check
9398 @raise errors.OpPrereqError: if the parameters are not valid
9401 nodenames = _FilterVmNodes(lu, nodenames)
9403 cluster = lu.cfg.GetClusterInfo()
9404 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9406 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9407 for node in nodenames:
9411 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9414 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9415 """OS parameters validation.
9417 @type lu: L{LogicalUnit}
9418 @param lu: the logical unit for which we check
9419 @type required: boolean
9420 @param required: whether the validation should fail if the OS is not
9422 @type nodenames: list
9423 @param nodenames: the list of nodes on which we should check
9424 @type osname: string
9425 @param osname: the name of the hypervisor we should use
9426 @type osparams: dict
9427 @param osparams: the parameters which we need to check
9428 @raise errors.OpPrereqError: if the parameters are not valid
9431 nodenames = _FilterVmNodes(lu, nodenames)
9432 result = lu.rpc.call_os_validate(nodenames, required, osname,
9433 [constants.OS_VALIDATE_PARAMETERS],
9435 for node, nres in result.items():
9436 # we don't check for offline cases since this should be run only
9437 # against the master node and/or an instance's nodes
9438 nres.Raise("OS Parameters validation failed on node %s" % node)
9439 if not nres.payload:
9440 lu.LogInfo("OS %s not found on node %s, validation skipped",
9444 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9445 """Wrapper around IAReqInstanceAlloc.
9447 @param op: The instance opcode
9448 @param disks: The computed disks
9449 @param nics: The computed nics
9450 @param beparams: The full filled beparams
9451 @param node_whitelist: List of nodes which should appear as online to the
9452 allocator (unless the node is already marked offline)
9454 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9457 spindle_use = beparams[constants.BE_SPINDLE_USE]
9458 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9459 disk_template=op.disk_template,
9462 vcpus=beparams[constants.BE_VCPUS],
9463 memory=beparams[constants.BE_MAXMEM],
9464 spindle_use=spindle_use,
9466 nics=[n.ToDict() for n in nics],
9467 hypervisor=op.hypervisor,
9468 node_whitelist=node_whitelist)
9471 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9472 """Computes the nics.
9474 @param op: The instance opcode
9475 @param cluster: Cluster configuration object
9476 @param default_ip: The default ip to assign
9477 @param cfg: An instance of the configuration object
9478 @param ec_id: Execution context ID
9480 @returns: The build up nics
9485 nic_mode_req = nic.get(constants.INIC_MODE, None)
9486 nic_mode = nic_mode_req
9487 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9488 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9490 net = nic.get(constants.INIC_NETWORK, None)
9491 link = nic.get(constants.NIC_LINK, None)
9492 ip = nic.get(constants.INIC_IP, None)
9494 if net is None or net.lower() == constants.VALUE_NONE:
9497 if nic_mode_req is not None or link is not None:
9498 raise errors.OpPrereqError("If network is given, no mode or link"
9499 " is allowed to be passed",
9502 # ip validity checks
9503 if ip is None or ip.lower() == constants.VALUE_NONE:
9505 elif ip.lower() == constants.VALUE_AUTO:
9506 if not op.name_check:
9507 raise errors.OpPrereqError("IP address set to auto but name checks"
9508 " have been skipped",
9512 # We defer pool operations until later, so that the iallocator has
9513 # filled in the instance's node(s) dimara
9514 if ip.lower() == constants.NIC_IP_POOL:
9516 raise errors.OpPrereqError("if ip=pool, parameter network"
9517 " must be passed too",
9520 elif not netutils.IPAddress.IsValid(ip):
9521 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9526 # TODO: check the ip address for uniqueness
9527 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9528 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9531 # MAC address verification
9532 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9533 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9534 mac = utils.NormalizeAndValidateMac(mac)
9537 # TODO: We need to factor this out
9538 cfg.ReserveMAC(mac, ec_id)
9539 except errors.ReservationError:
9540 raise errors.OpPrereqError("MAC address %s already in use"
9541 " in cluster" % mac,
9542 errors.ECODE_NOTUNIQUE)
9544 # Build nic parameters
9547 nicparams[constants.NIC_MODE] = nic_mode
9549 nicparams[constants.NIC_LINK] = link
9551 check_params = cluster.SimpleFillNIC(nicparams)
9552 objects.NIC.CheckParameterSyntax(check_params)
9553 net_uuid = cfg.LookupNetwork(net)
9554 name = nic.get(constants.INIC_NAME, None)
9555 if name is not None and name.lower() == constants.VALUE_NONE:
9557 nic_obj = objects.NIC(mac=mac, ip=nic_ip, name=name,
9558 network=net_uuid, nicparams=nicparams)
9559 nic_obj.uuid = cfg.GenerateUniqueID(ec_id)
9560 nics.append(nic_obj)
9565 def _ComputeDisks(op, default_vg):
9566 """Computes the instance disks.
9568 @param op: The instance opcode
9569 @param default_vg: The default_vg to assume
9571 @return: The computed disks
9575 for disk in op.disks:
9576 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9577 if mode not in constants.DISK_ACCESS_SET:
9578 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9579 mode, errors.ECODE_INVAL)
9580 size = disk.get(constants.IDISK_SIZE, None)
9582 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9585 except (TypeError, ValueError):
9586 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9589 ext_provider = disk.get(constants.IDISK_PROVIDER, None)
9590 if ext_provider and op.disk_template != constants.DT_EXT:
9591 raise errors.OpPrereqError("The '%s' option is only valid for the %s"
9592 " disk template, not %s" %
9593 (constants.IDISK_PROVIDER, constants.DT_EXT,
9594 op.disk_template), errors.ECODE_INVAL)
9596 data_vg = disk.get(constants.IDISK_VG, default_vg)
9597 name = disk.get(constants.IDISK_NAME, None)
9598 if name is not None and name.lower() == constants.VALUE_NONE:
9601 constants.IDISK_SIZE: size,
9602 constants.IDISK_MODE: mode,
9603 constants.IDISK_VG: data_vg,
9604 constants.IDISK_NAME: name,
9607 if constants.IDISK_METAVG in disk:
9608 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9609 if constants.IDISK_ADOPT in disk:
9610 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9612 # For extstorage, demand the `provider' option and add any
9613 # additional parameters (ext-params) to the dict
9614 if op.disk_template == constants.DT_EXT:
9616 new_disk[constants.IDISK_PROVIDER] = ext_provider
9618 if key not in constants.IDISK_PARAMS:
9619 new_disk[key] = disk[key]
9621 raise errors.OpPrereqError("Missing provider for template '%s'" %
9622 constants.DT_EXT, errors.ECODE_INVAL)
9624 disks.append(new_disk)
9629 def _ComputeFullBeParams(op, cluster):
9630 """Computes the full beparams.
9632 @param op: The instance opcode
9633 @param cluster: The cluster config object
9635 @return: The fully filled beparams
9638 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9639 for param, value in op.beparams.iteritems():
9640 if value == constants.VALUE_AUTO:
9641 op.beparams[param] = default_beparams[param]
9642 objects.UpgradeBeParams(op.beparams)
9643 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9644 return cluster.SimpleFillBE(op.beparams)
9647 def _CheckOpportunisticLocking(op):
9648 """Generate error if opportunistic locking is not possible.
9651 if op.opportunistic_locking and not op.iallocator:
9652 raise errors.OpPrereqError("Opportunistic locking is only available in"
9653 " combination with an instance allocator",
9657 class LUInstanceCreate(LogicalUnit):
9658 """Create an instance.
9661 HPATH = "instance-add"
9662 HTYPE = constants.HTYPE_INSTANCE
9665 def CheckArguments(self):
9669 # do not require name_check to ease forward/backward compatibility
9671 if self.op.no_install and self.op.start:
9672 self.LogInfo("No-installation mode selected, disabling startup")
9673 self.op.start = False
9674 # validate/normalize the instance name
9675 self.op.instance_name = \
9676 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9678 if self.op.ip_check and not self.op.name_check:
9679 # TODO: make the ip check more flexible and not depend on the name check
9680 raise errors.OpPrereqError("Cannot do IP address check without a name"
9681 " check", errors.ECODE_INVAL)
9683 # check nics' parameter names
9684 for nic in self.op.nics:
9685 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9686 # check that NIC's parameters names are unique and valid
9687 utils.ValidateDeviceNames("NIC", self.op.nics)
9689 # check that disk's names are unique and valid
9690 utils.ValidateDeviceNames("disk", self.op.disks)
9692 cluster = self.cfg.GetClusterInfo()
9693 if not self.op.disk_template in cluster.enabled_disk_templates:
9694 raise errors.OpPrereqError("Cannot create an instance with disk template"
9695 " '%s', because it is not enabled in the"
9696 " cluster. Enabled disk templates are: %s." %
9697 (self.op.disk_template,
9698 ",".join(cluster.enabled_disk_templates)))
9700 # check disks. parameter names and consistent adopt/no-adopt strategy
9701 has_adopt = has_no_adopt = False
9702 for disk in self.op.disks:
9703 if self.op.disk_template != constants.DT_EXT:
9704 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9705 if constants.IDISK_ADOPT in disk:
9709 if has_adopt and has_no_adopt:
9710 raise errors.OpPrereqError("Either all disks are adopted or none is",
9713 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9714 raise errors.OpPrereqError("Disk adoption is not supported for the"
9715 " '%s' disk template" %
9716 self.op.disk_template,
9718 if self.op.iallocator is not None:
9719 raise errors.OpPrereqError("Disk adoption not allowed with an"
9720 " iallocator script", errors.ECODE_INVAL)
9721 if self.op.mode == constants.INSTANCE_IMPORT:
9722 raise errors.OpPrereqError("Disk adoption not allowed for"
9723 " instance import", errors.ECODE_INVAL)
9725 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9726 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9727 " but no 'adopt' parameter given" %
9728 self.op.disk_template,
9731 self.adopt_disks = has_adopt
9733 # instance name verification
9734 if self.op.name_check:
9735 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
9736 self.op.instance_name = self.hostname1.name
9737 # used in CheckPrereq for ip ping check
9738 self.check_ip = self.hostname1.ip
9740 self.check_ip = None
9742 # file storage checks
9743 if (self.op.file_driver and
9744 not self.op.file_driver in constants.FILE_DRIVER):
9745 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9746 self.op.file_driver, errors.ECODE_INVAL)
9748 if self.op.disk_template == constants.DT_FILE:
9749 opcodes.RequireFileStorage()
9750 elif self.op.disk_template == constants.DT_SHARED_FILE:
9751 opcodes.RequireSharedFileStorage()
9753 ### Node/iallocator related checks
9754 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9756 if self.op.pnode is not None:
9757 if self.op.disk_template in constants.DTS_INT_MIRROR:
9758 if self.op.snode is None:
9759 raise errors.OpPrereqError("The networked disk templates need"
9760 " a mirror node", errors.ECODE_INVAL)
9762 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9764 self.op.snode = None
9766 _CheckOpportunisticLocking(self.op)
9768 self._cds = _GetClusterDomainSecret()
9770 if self.op.mode == constants.INSTANCE_IMPORT:
9771 # On import force_variant must be True, because if we forced it at
9772 # initial install, our only chance when importing it back is that it
9774 self.op.force_variant = True
9776 if self.op.no_install:
9777 self.LogInfo("No-installation mode has no effect during import")
9779 elif self.op.mode == constants.INSTANCE_CREATE:
9780 if self.op.os_type is None:
9781 raise errors.OpPrereqError("No guest OS specified",
9783 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9784 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9785 " installation" % self.op.os_type,
9787 if self.op.disk_template is None:
9788 raise errors.OpPrereqError("No disk template specified",
9791 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9792 # Check handshake to ensure both clusters have the same domain secret
9793 src_handshake = self.op.source_handshake
9794 if not src_handshake:
9795 raise errors.OpPrereqError("Missing source handshake",
9798 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9801 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9804 # Load and check source CA
9805 self.source_x509_ca_pem = self.op.source_x509_ca
9806 if not self.source_x509_ca_pem:
9807 raise errors.OpPrereqError("Missing source X509 CA",
9811 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9813 except OpenSSL.crypto.Error, err:
9814 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9815 (err, ), errors.ECODE_INVAL)
9817 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9818 if errcode is not None:
9819 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9822 self.source_x509_ca = cert
9824 src_instance_name = self.op.source_instance_name
9825 if not src_instance_name:
9826 raise errors.OpPrereqError("Missing source instance name",
9829 self.source_instance_name = \
9830 netutils.GetHostname(name=src_instance_name).name
9833 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9834 self.op.mode, errors.ECODE_INVAL)
9836 def ExpandNames(self):
9837 """ExpandNames for CreateInstance.
9839 Figure out the right locks for instance creation.
9842 self.needed_locks = {}
9844 instance_name = self.op.instance_name
9845 # this is just a preventive check, but someone might still add this
9846 # instance in the meantime, and creation will fail at lock-add time
9847 if instance_name in self.cfg.GetInstanceList():
9848 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9849 instance_name, errors.ECODE_EXISTS)
9851 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9853 if self.op.iallocator:
9854 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9855 # specifying a group on instance creation and then selecting nodes from
9857 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9858 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
9860 if self.op.opportunistic_locking:
9861 self.opportunistic_locks[locking.LEVEL_NODE] = True
9862 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
9864 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9865 nodelist = [self.op.pnode]
9866 if self.op.snode is not None:
9867 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9868 nodelist.append(self.op.snode)
9869 self.needed_locks[locking.LEVEL_NODE] = nodelist
9871 # in case of import lock the source node too
9872 if self.op.mode == constants.INSTANCE_IMPORT:
9873 src_node = self.op.src_node
9874 src_path = self.op.src_path
9876 if src_path is None:
9877 self.op.src_path = src_path = self.op.instance_name
9879 if src_node is None:
9880 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9881 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
9882 self.op.src_node = None
9883 if os.path.isabs(src_path):
9884 raise errors.OpPrereqError("Importing an instance from a path"
9885 " requires a source node option",
9888 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9889 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9890 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9891 if not os.path.isabs(src_path):
9892 self.op.src_path = src_path = \
9893 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9895 self.needed_locks[locking.LEVEL_NODE_RES] = \
9896 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
9898 def _RunAllocator(self):
9899 """Run the allocator based on input opcode.
9902 if self.op.opportunistic_locking:
9903 # Only consider nodes for which a lock is held
9904 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
9906 node_whitelist = None
9908 #TODO Export network to iallocator so that it chooses a pnode
9909 # in a nodegroup that has the desired network connected to
9910 req = _CreateInstanceAllocRequest(self.op, self.disks,
9911 self.nics, self.be_full,
9913 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9915 ial.Run(self.op.iallocator)
9918 # When opportunistic locks are used only a temporary failure is generated
9919 if self.op.opportunistic_locking:
9920 ecode = errors.ECODE_TEMP_NORES
9922 ecode = errors.ECODE_NORES
9924 raise errors.OpPrereqError("Can't compute nodes using"
9925 " iallocator '%s': %s" %
9926 (self.op.iallocator, ial.info),
9929 self.op.pnode = ial.result[0]
9930 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9931 self.op.instance_name, self.op.iallocator,
9932 utils.CommaJoin(ial.result))
9934 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9936 if req.RequiredNodes() == 2:
9937 self.op.snode = ial.result[1]
9939 def BuildHooksEnv(self):
9942 This runs on master, primary and secondary nodes of the instance.
9946 "ADD_MODE": self.op.mode,
9948 if self.op.mode == constants.INSTANCE_IMPORT:
9949 env["SRC_NODE"] = self.op.src_node
9950 env["SRC_PATH"] = self.op.src_path
9951 env["SRC_IMAGES"] = self.src_images
9953 env.update(_BuildInstanceHookEnv(
9954 name=self.op.instance_name,
9955 primary_node=self.op.pnode,
9956 secondary_nodes=self.secondaries,
9957 status=self.op.start,
9958 os_type=self.op.os_type,
9959 minmem=self.be_full[constants.BE_MINMEM],
9960 maxmem=self.be_full[constants.BE_MAXMEM],
9961 vcpus=self.be_full[constants.BE_VCPUS],
9962 nics=_NICListToTuple(self, self.nics),
9963 disk_template=self.op.disk_template,
9964 disks=[(d[constants.IDISK_NAME], d[constants.IDISK_SIZE],
9965 d[constants.IDISK_MODE]) for d in self.disks],
9968 hypervisor_name=self.op.hypervisor,
9974 def BuildHooksNodes(self):
9975 """Build hooks nodes.
9978 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9981 def _ReadExportInfo(self):
9982 """Reads the export information from disk.
9984 It will override the opcode source node and path with the actual
9985 information, if these two were not specified before.
9987 @return: the export information
9990 assert self.op.mode == constants.INSTANCE_IMPORT
9992 src_node = self.op.src_node
9993 src_path = self.op.src_path
9995 if src_node is None:
9996 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9997 exp_list = self.rpc.call_export_list(locked_nodes)
9999 for node in exp_list:
10000 if exp_list[node].fail_msg:
10002 if src_path in exp_list[node].payload:
10004 self.op.src_node = src_node = node
10005 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10009 raise errors.OpPrereqError("No export found for relative path %s" %
10010 src_path, errors.ECODE_INVAL)
10012 _CheckNodeOnline(self, src_node)
10013 result = self.rpc.call_export_info(src_node, src_path)
10014 result.Raise("No export or invalid export found in dir %s" % src_path)
10016 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10017 if not export_info.has_section(constants.INISECT_EXP):
10018 raise errors.ProgrammerError("Corrupted export config",
10019 errors.ECODE_ENVIRON)
10021 ei_version = export_info.get(constants.INISECT_EXP, "version")
10022 if (int(ei_version) != constants.EXPORT_VERSION):
10023 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10024 (ei_version, constants.EXPORT_VERSION),
10025 errors.ECODE_ENVIRON)
10028 def _ReadExportParams(self, einfo):
10029 """Use export parameters as defaults.
10031 In case the opcode doesn't specify (as in override) some instance
10032 parameters, then try to use them from the export information, if
10033 that declares them.
10036 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10038 if self.op.disk_template is None:
10039 if einfo.has_option(constants.INISECT_INS, "disk_template"):
10040 self.op.disk_template = einfo.get(constants.INISECT_INS,
10042 if self.op.disk_template not in constants.DISK_TEMPLATES:
10043 raise errors.OpPrereqError("Disk template specified in configuration"
10044 " file is not one of the allowed values:"
10046 " ".join(constants.DISK_TEMPLATES),
10047 errors.ECODE_INVAL)
10049 raise errors.OpPrereqError("No disk template specified and the export"
10050 " is missing the disk_template information",
10051 errors.ECODE_INVAL)
10053 if not self.op.disks:
10055 # TODO: import the disk iv_name too
10056 for idx in range(constants.MAX_DISKS):
10057 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10058 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10059 disks.append({constants.IDISK_SIZE: disk_sz})
10060 self.op.disks = disks
10061 if not disks and self.op.disk_template != constants.DT_DISKLESS:
10062 raise errors.OpPrereqError("No disk info specified and the export"
10063 " is missing the disk information",
10064 errors.ECODE_INVAL)
10066 if not self.op.nics:
10068 for idx in range(constants.MAX_NICS):
10069 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10071 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10072 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10077 self.op.nics = nics
10079 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10080 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10082 if (self.op.hypervisor is None and
10083 einfo.has_option(constants.INISECT_INS, "hypervisor")):
10084 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10086 if einfo.has_section(constants.INISECT_HYP):
10087 # use the export parameters but do not override the ones
10088 # specified by the user
10089 for name, value in einfo.items(constants.INISECT_HYP):
10090 if name not in self.op.hvparams:
10091 self.op.hvparams[name] = value
10093 if einfo.has_section(constants.INISECT_BEP):
10094 # use the parameters, without overriding
10095 for name, value in einfo.items(constants.INISECT_BEP):
10096 if name not in self.op.beparams:
10097 self.op.beparams[name] = value
10098 # Compatibility for the old "memory" be param
10099 if name == constants.BE_MEMORY:
10100 if constants.BE_MAXMEM not in self.op.beparams:
10101 self.op.beparams[constants.BE_MAXMEM] = value
10102 if constants.BE_MINMEM not in self.op.beparams:
10103 self.op.beparams[constants.BE_MINMEM] = value
10105 # try to read the parameters old style, from the main section
10106 for name in constants.BES_PARAMETERS:
10107 if (name not in self.op.beparams and
10108 einfo.has_option(constants.INISECT_INS, name)):
10109 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10111 if einfo.has_section(constants.INISECT_OSP):
10112 # use the parameters, without overriding
10113 for name, value in einfo.items(constants.INISECT_OSP):
10114 if name not in self.op.osparams:
10115 self.op.osparams[name] = value
10117 def _RevertToDefaults(self, cluster):
10118 """Revert the instance parameters to the default values.
10122 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10123 for name in self.op.hvparams.keys():
10124 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10125 del self.op.hvparams[name]
10127 be_defs = cluster.SimpleFillBE({})
10128 for name in self.op.beparams.keys():
10129 if name in be_defs and be_defs[name] == self.op.beparams[name]:
10130 del self.op.beparams[name]
10132 nic_defs = cluster.SimpleFillNIC({})
10133 for nic in self.op.nics:
10134 for name in constants.NICS_PARAMETERS:
10135 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10138 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10139 for name in self.op.osparams.keys():
10140 if name in os_defs and os_defs[name] == self.op.osparams[name]:
10141 del self.op.osparams[name]
10143 def _CalculateFileStorageDir(self):
10144 """Calculate final instance file storage dir.
10147 # file storage dir calculation/check
10148 self.instance_file_storage_dir = None
10149 if self.op.disk_template in constants.DTS_FILEBASED:
10150 # build the full file storage dir path
10153 if self.op.disk_template == constants.DT_SHARED_FILE:
10154 get_fsd_fn = self.cfg.GetSharedFileStorageDir
10156 get_fsd_fn = self.cfg.GetFileStorageDir
10158 cfg_storagedir = get_fsd_fn()
10159 if not cfg_storagedir:
10160 raise errors.OpPrereqError("Cluster file storage dir not defined",
10161 errors.ECODE_STATE)
10162 joinargs.append(cfg_storagedir)
10164 if self.op.file_storage_dir is not None:
10165 joinargs.append(self.op.file_storage_dir)
10167 joinargs.append(self.op.instance_name)
10169 # pylint: disable=W0142
10170 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10172 def CheckPrereq(self): # pylint: disable=R0914
10173 """Check prerequisites.
10176 self._CalculateFileStorageDir()
10178 if self.op.mode == constants.INSTANCE_IMPORT:
10179 export_info = self._ReadExportInfo()
10180 self._ReadExportParams(export_info)
10181 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10183 self._old_instance_name = None
10185 if (not self.cfg.GetVGName() and
10186 self.op.disk_template not in constants.DTS_NOT_LVM):
10187 raise errors.OpPrereqError("Cluster does not support lvm-based"
10188 " instances", errors.ECODE_STATE)
10190 if (self.op.hypervisor is None or
10191 self.op.hypervisor == constants.VALUE_AUTO):
10192 self.op.hypervisor = self.cfg.GetHypervisorType()
10194 cluster = self.cfg.GetClusterInfo()
10195 enabled_hvs = cluster.enabled_hypervisors
10196 if self.op.hypervisor not in enabled_hvs:
10197 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10199 (self.op.hypervisor, ",".join(enabled_hvs)),
10200 errors.ECODE_STATE)
10202 # Check tag validity
10203 for tag in self.op.tags:
10204 objects.TaggableObject.ValidateTag(tag)
10206 # check hypervisor parameter syntax (locally)
10207 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10208 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10210 hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
10211 hv_type.CheckParameterSyntax(filled_hvp)
10212 self.hv_full = filled_hvp
10213 # check that we don't specify global parameters on an instance
10214 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS, "hypervisor",
10215 "instance", "cluster")
10217 # fill and remember the beparams dict
10218 self.be_full = _ComputeFullBeParams(self.op, cluster)
10220 # build os parameters
10221 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10223 # now that hvp/bep are in final format, let's reset to defaults,
10225 if self.op.identify_defaults:
10226 self._RevertToDefaults(cluster)
10229 self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10230 self.proc.GetECId())
10232 # disk checks/pre-build
10233 default_vg = self.cfg.GetVGName()
10234 self.disks = _ComputeDisks(self.op, default_vg)
10236 if self.op.mode == constants.INSTANCE_IMPORT:
10238 for idx in range(len(self.disks)):
10239 option = "disk%d_dump" % idx
10240 if export_info.has_option(constants.INISECT_INS, option):
10241 # FIXME: are the old os-es, disk sizes, etc. useful?
10242 export_name = export_info.get(constants.INISECT_INS, option)
10243 image = utils.PathJoin(self.op.src_path, export_name)
10244 disk_images.append(image)
10246 disk_images.append(False)
10248 self.src_images = disk_images
10250 if self.op.instance_name == self._old_instance_name:
10251 for idx, nic in enumerate(self.nics):
10252 if nic.mac == constants.VALUE_AUTO:
10253 nic_mac_ini = "nic%d_mac" % idx
10254 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10256 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10258 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10259 if self.op.ip_check:
10260 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10261 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10262 (self.check_ip, self.op.instance_name),
10263 errors.ECODE_NOTUNIQUE)
10265 #### mac address generation
10266 # By generating here the mac address both the allocator and the hooks get
10267 # the real final mac address rather than the 'auto' or 'generate' value.
10268 # There is a race condition between the generation and the instance object
10269 # creation, which means that we know the mac is valid now, but we're not
10270 # sure it will be when we actually add the instance. If things go bad
10271 # adding the instance will abort because of a duplicate mac, and the
10272 # creation job will fail.
10273 for nic in self.nics:
10274 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10275 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10279 if self.op.iallocator is not None:
10280 self._RunAllocator()
10282 # Release all unneeded node locks
10283 keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10284 _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10285 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10286 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10288 assert (self.owned_locks(locking.LEVEL_NODE) ==
10289 self.owned_locks(locking.LEVEL_NODE_RES)), \
10290 "Node locks differ from node resource locks"
10292 #### node related checks
10294 # check primary node
10295 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10296 assert self.pnode is not None, \
10297 "Cannot retrieve locked node %s" % self.op.pnode
10299 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10300 pnode.name, errors.ECODE_STATE)
10302 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10303 pnode.name, errors.ECODE_STATE)
10304 if not pnode.vm_capable:
10305 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10306 " '%s'" % pnode.name, errors.ECODE_STATE)
10308 self.secondaries = []
10310 # Fill in any IPs from IP pools. This must happen here, because we need to
10311 # know the nic's primary node, as specified by the iallocator
10312 for idx, nic in enumerate(self.nics):
10313 net_uuid = nic.network
10314 if net_uuid is not None:
10315 nobj = self.cfg.GetNetwork(net_uuid)
10316 netparams = self.cfg.GetGroupNetParams(net_uuid, self.pnode.name)
10317 if netparams is None:
10318 raise errors.OpPrereqError("No netparams found for network"
10319 " %s. Propably not connected to"
10320 " node's %s nodegroup" %
10321 (nobj.name, self.pnode.name),
10322 errors.ECODE_INVAL)
10323 self.LogInfo("NIC/%d inherits netparams %s" %
10324 (idx, netparams.values()))
10325 nic.nicparams = dict(netparams)
10326 if nic.ip is not None:
10327 if nic.ip.lower() == constants.NIC_IP_POOL:
10329 nic.ip = self.cfg.GenerateIp(net_uuid, self.proc.GetECId())
10330 except errors.ReservationError:
10331 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10332 " from the address pool" % idx,
10333 errors.ECODE_STATE)
10334 self.LogInfo("Chose IP %s from network %s", nic.ip, nobj.name)
10337 self.cfg.ReserveIp(net_uuid, nic.ip, self.proc.GetECId())
10338 except errors.ReservationError:
10339 raise errors.OpPrereqError("IP address %s already in use"
10340 " or does not belong to network %s" %
10341 (nic.ip, nobj.name),
10342 errors.ECODE_NOTUNIQUE)
10344 # net is None, ip None or given
10345 elif self.op.conflicts_check:
10346 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10348 # mirror node verification
10349 if self.op.disk_template in constants.DTS_INT_MIRROR:
10350 if self.op.snode == pnode.name:
10351 raise errors.OpPrereqError("The secondary node cannot be the"
10352 " primary node", errors.ECODE_INVAL)
10353 _CheckNodeOnline(self, self.op.snode)
10354 _CheckNodeNotDrained(self, self.op.snode)
10355 _CheckNodeVmCapable(self, self.op.snode)
10356 self.secondaries.append(self.op.snode)
10358 snode = self.cfg.GetNodeInfo(self.op.snode)
10359 if pnode.group != snode.group:
10360 self.LogWarning("The primary and secondary nodes are in two"
10361 " different node groups; the disk parameters"
10362 " from the first disk's node group will be"
10365 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
10367 if self.op.disk_template in constants.DTS_INT_MIRROR:
10368 nodes.append(snode)
10369 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
10370 if compat.any(map(has_es, nodes)):
10371 raise errors.OpPrereqError("Disk template %s not supported with"
10372 " exclusive storage" % self.op.disk_template,
10373 errors.ECODE_STATE)
10375 nodenames = [pnode.name] + self.secondaries
10377 if not self.adopt_disks:
10378 if self.op.disk_template == constants.DT_RBD:
10379 # _CheckRADOSFreeSpace() is just a placeholder.
10380 # Any function that checks prerequisites can be placed here.
10381 # Check if there is enough space on the RADOS cluster.
10382 _CheckRADOSFreeSpace()
10383 elif self.op.disk_template == constants.DT_EXT:
10384 # FIXME: Function that checks prereqs if needed
10387 # Check lv size requirements, if not adopting
10388 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10389 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10391 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10392 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10393 disk[constants.IDISK_ADOPT])
10394 for disk in self.disks])
10395 if len(all_lvs) != len(self.disks):
10396 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10397 errors.ECODE_INVAL)
10398 for lv_name in all_lvs:
10400 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10401 # to ReserveLV uses the same syntax
10402 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10403 except errors.ReservationError:
10404 raise errors.OpPrereqError("LV named %s used by another instance" %
10405 lv_name, errors.ECODE_NOTUNIQUE)
10407 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10408 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10410 node_lvs = self.rpc.call_lv_list([pnode.name],
10411 vg_names.payload.keys())[pnode.name]
10412 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10413 node_lvs = node_lvs.payload
10415 delta = all_lvs.difference(node_lvs.keys())
10417 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10418 utils.CommaJoin(delta),
10419 errors.ECODE_INVAL)
10420 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10422 raise errors.OpPrereqError("Online logical volumes found, cannot"
10423 " adopt: %s" % utils.CommaJoin(online_lvs),
10424 errors.ECODE_STATE)
10425 # update the size of disk based on what is found
10426 for dsk in self.disks:
10427 dsk[constants.IDISK_SIZE] = \
10428 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10429 dsk[constants.IDISK_ADOPT])][0]))
10431 elif self.op.disk_template == constants.DT_BLOCK:
10432 # Normalize and de-duplicate device paths
10433 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10434 for disk in self.disks])
10435 if len(all_disks) != len(self.disks):
10436 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10437 errors.ECODE_INVAL)
10438 baddisks = [d for d in all_disks
10439 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10441 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10442 " cannot be adopted" %
10443 (utils.CommaJoin(baddisks),
10444 constants.ADOPTABLE_BLOCKDEV_ROOT),
10445 errors.ECODE_INVAL)
10447 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10448 list(all_disks))[pnode.name]
10449 node_disks.Raise("Cannot get block device information from node %s" %
10451 node_disks = node_disks.payload
10452 delta = all_disks.difference(node_disks.keys())
10454 raise errors.OpPrereqError("Missing block device(s): %s" %
10455 utils.CommaJoin(delta),
10456 errors.ECODE_INVAL)
10457 for dsk in self.disks:
10458 dsk[constants.IDISK_SIZE] = \
10459 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10461 # Verify instance specs
10462 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10464 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10465 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10466 constants.ISPEC_DISK_COUNT: len(self.disks),
10467 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10468 for disk in self.disks],
10469 constants.ISPEC_NIC_COUNT: len(self.nics),
10470 constants.ISPEC_SPINDLE_USE: spindle_use,
10473 group_info = self.cfg.GetNodeGroup(pnode.group)
10474 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10475 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec,
10476 self.op.disk_template)
10477 if not self.op.ignore_ipolicy and res:
10478 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10479 (pnode.group, group_info.name, utils.CommaJoin(res)))
10480 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10482 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10484 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10485 # check OS parameters (remotely)
10486 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10488 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10490 #TODO: _CheckExtParams (remotely)
10491 # Check parameters for extstorage
10493 # memory check on primary node
10494 #TODO(dynmem): use MINMEM for checking
10496 _CheckNodeFreeMemory(self, self.pnode.name,
10497 "creating instance %s" % self.op.instance_name,
10498 self.be_full[constants.BE_MAXMEM],
10499 self.op.hypervisor)
10501 self.dry_run_result = list(nodenames)
10503 def Exec(self, feedback_fn):
10504 """Create and add the instance to the cluster.
10507 instance = self.op.instance_name
10508 pnode_name = self.pnode.name
10510 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10511 self.owned_locks(locking.LEVEL_NODE)), \
10512 "Node locks differ from node resource locks"
10513 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10515 ht_kind = self.op.hypervisor
10516 if ht_kind in constants.HTS_REQ_PORT:
10517 network_port = self.cfg.AllocatePort()
10519 network_port = None
10521 # This is ugly but we got a chicken-egg problem here
10522 # We can only take the group disk parameters, as the instance
10523 # has no disks yet (we are generating them right here).
10524 node = self.cfg.GetNodeInfo(pnode_name)
10525 nodegroup = self.cfg.GetNodeGroup(node.group)
10526 disks = _GenerateDiskTemplate(self,
10527 self.op.disk_template,
10528 instance, pnode_name,
10531 self.instance_file_storage_dir,
10532 self.op.file_driver,
10535 self.cfg.GetGroupDiskParams(nodegroup))
10537 iobj = objects.Instance(name=instance, os=self.op.os_type,
10538 primary_node=pnode_name,
10539 nics=self.nics, disks=disks,
10540 disk_template=self.op.disk_template,
10541 admin_state=constants.ADMINST_DOWN,
10542 network_port=network_port,
10543 beparams=self.op.beparams,
10544 hvparams=self.op.hvparams,
10545 hypervisor=self.op.hypervisor,
10546 osparams=self.op.osparams,
10550 for tag in self.op.tags:
10553 if self.adopt_disks:
10554 if self.op.disk_template == constants.DT_PLAIN:
10555 # rename LVs to the newly-generated names; we need to construct
10556 # 'fake' LV disks with the old data, plus the new unique_id
10557 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10559 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10560 rename_to.append(t_dsk.logical_id)
10561 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10562 self.cfg.SetDiskID(t_dsk, pnode_name)
10563 result = self.rpc.call_blockdev_rename(pnode_name,
10564 zip(tmp_disks, rename_to))
10565 result.Raise("Failed to rename adoped LVs")
10567 feedback_fn("* creating instance disks...")
10569 _CreateDisks(self, iobj)
10570 except errors.OpExecError:
10571 self.LogWarning("Device creation failed")
10572 self.cfg.ReleaseDRBDMinors(instance)
10575 feedback_fn("adding instance %s to cluster config" % instance)
10577 self.cfg.AddInstance(iobj, self.proc.GetECId())
10579 # Declare that we don't want to remove the instance lock anymore, as we've
10580 # added the instance to the config
10581 del self.remove_locks[locking.LEVEL_INSTANCE]
10583 if self.op.mode == constants.INSTANCE_IMPORT:
10584 # Release unused nodes
10585 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10587 # Release all nodes
10588 _ReleaseLocks(self, locking.LEVEL_NODE)
10591 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10592 feedback_fn("* wiping instance disks...")
10594 _WipeDisks(self, iobj)
10595 except errors.OpExecError, err:
10596 logging.exception("Wiping disks failed")
10597 self.LogWarning("Wiping instance disks failed (%s)", err)
10601 # Something is already wrong with the disks, don't do anything else
10603 elif self.op.wait_for_sync:
10604 disk_abort = not _WaitForSync(self, iobj)
10605 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10606 # make sure the disks are not degraded (still sync-ing is ok)
10607 feedback_fn("* checking mirrors status")
10608 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10613 _RemoveDisks(self, iobj)
10614 self.cfg.RemoveInstance(iobj.name)
10615 # Make sure the instance lock gets removed
10616 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10617 raise errors.OpExecError("There are some degraded disks for"
10620 # Release all node resource locks
10621 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10623 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10624 # we need to set the disks ID to the primary node, since the
10625 # preceding code might or might have not done it, depending on
10626 # disk template and other options
10627 for disk in iobj.disks:
10628 self.cfg.SetDiskID(disk, pnode_name)
10629 if self.op.mode == constants.INSTANCE_CREATE:
10630 if not self.op.no_install:
10631 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10632 not self.op.wait_for_sync)
10634 feedback_fn("* pausing disk sync to install instance OS")
10635 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10638 for idx, success in enumerate(result.payload):
10640 logging.warn("pause-sync of instance %s for disk %d failed",
10643 feedback_fn("* running the instance OS create scripts...")
10644 # FIXME: pass debug option from opcode to backend
10646 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10647 self.op.debug_level)
10649 feedback_fn("* resuming disk sync")
10650 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10653 for idx, success in enumerate(result.payload):
10655 logging.warn("resume-sync of instance %s for disk %d failed",
10658 os_add_result.Raise("Could not add os for instance %s"
10659 " on node %s" % (instance, pnode_name))
10662 if self.op.mode == constants.INSTANCE_IMPORT:
10663 feedback_fn("* running the instance OS import scripts...")
10667 for idx, image in enumerate(self.src_images):
10671 # FIXME: pass debug option from opcode to backend
10672 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10673 constants.IEIO_FILE, (image, ),
10674 constants.IEIO_SCRIPT,
10675 (iobj.disks[idx], idx),
10677 transfers.append(dt)
10680 masterd.instance.TransferInstanceData(self, feedback_fn,
10681 self.op.src_node, pnode_name,
10682 self.pnode.secondary_ip,
10684 if not compat.all(import_result):
10685 self.LogWarning("Some disks for instance %s on node %s were not"
10686 " imported successfully" % (instance, pnode_name))
10688 rename_from = self._old_instance_name
10690 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10691 feedback_fn("* preparing remote import...")
10692 # The source cluster will stop the instance before attempting to make
10693 # a connection. In some cases stopping an instance can take a long
10694 # time, hence the shutdown timeout is added to the connection
10696 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10697 self.op.source_shutdown_timeout)
10698 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10700 assert iobj.primary_node == self.pnode.name
10702 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10703 self.source_x509_ca,
10704 self._cds, timeouts)
10705 if not compat.all(disk_results):
10706 # TODO: Should the instance still be started, even if some disks
10707 # failed to import (valid for local imports, too)?
10708 self.LogWarning("Some disks for instance %s on node %s were not"
10709 " imported successfully" % (instance, pnode_name))
10711 rename_from = self.source_instance_name
10714 # also checked in the prereq part
10715 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10718 # Run rename script on newly imported instance
10719 assert iobj.name == instance
10720 feedback_fn("Running rename script for %s" % instance)
10721 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10723 self.op.debug_level)
10724 if result.fail_msg:
10725 self.LogWarning("Failed to run rename script for %s on node"
10726 " %s: %s" % (instance, pnode_name, result.fail_msg))
10728 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10731 iobj.admin_state = constants.ADMINST_UP
10732 self.cfg.Update(iobj, feedback_fn)
10733 logging.info("Starting instance %s on node %s", instance, pnode_name)
10734 feedback_fn("* starting instance...")
10735 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10736 False, self.op.reason)
10737 result.Raise("Could not start instance")
10739 return list(iobj.all_nodes)
10742 class LUInstanceMultiAlloc(NoHooksLU):
10743 """Allocates multiple instances at the same time.
10748 def CheckArguments(self):
10749 """Check arguments.
10753 for inst in self.op.instances:
10754 if inst.iallocator is not None:
10755 raise errors.OpPrereqError("iallocator are not allowed to be set on"
10756 " instance objects", errors.ECODE_INVAL)
10757 nodes.append(bool(inst.pnode))
10758 if inst.disk_template in constants.DTS_INT_MIRROR:
10759 nodes.append(bool(inst.snode))
10761 has_nodes = compat.any(nodes)
10762 if compat.all(nodes) ^ has_nodes:
10763 raise errors.OpPrereqError("There are instance objects providing"
10764 " pnode/snode while others do not",
10765 errors.ECODE_INVAL)
10767 if self.op.iallocator is None:
10768 default_iallocator = self.cfg.GetDefaultIAllocator()
10769 if default_iallocator and has_nodes:
10770 self.op.iallocator = default_iallocator
10772 raise errors.OpPrereqError("No iallocator or nodes on the instances"
10773 " given and no cluster-wide default"
10774 " iallocator found; please specify either"
10775 " an iallocator or nodes on the instances"
10776 " or set a cluster-wide default iallocator",
10777 errors.ECODE_INVAL)
10779 _CheckOpportunisticLocking(self.op)
10781 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10783 raise errors.OpPrereqError("There are duplicate instance names: %s" %
10784 utils.CommaJoin(dups), errors.ECODE_INVAL)
10786 def ExpandNames(self):
10787 """Calculate the locks.
10790 self.share_locks = _ShareAll()
10791 self.needed_locks = {
10792 # iallocator will select nodes and even if no iallocator is used,
10793 # collisions with LUInstanceCreate should be avoided
10794 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
10797 if self.op.iallocator:
10798 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10799 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10801 if self.op.opportunistic_locking:
10802 self.opportunistic_locks[locking.LEVEL_NODE] = True
10803 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10806 for inst in self.op.instances:
10807 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10808 nodeslist.append(inst.pnode)
10809 if inst.snode is not None:
10810 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10811 nodeslist.append(inst.snode)
10813 self.needed_locks[locking.LEVEL_NODE] = nodeslist
10814 # Lock resources of instance's primary and secondary nodes (copy to
10815 # prevent accidential modification)
10816 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10818 def CheckPrereq(self):
10819 """Check prerequisite.
10822 cluster = self.cfg.GetClusterInfo()
10823 default_vg = self.cfg.GetVGName()
10824 ec_id = self.proc.GetECId()
10826 if self.op.opportunistic_locking:
10827 # Only consider nodes for which a lock is held
10828 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
10830 node_whitelist = None
10832 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10833 _ComputeNics(op, cluster, None,
10835 _ComputeFullBeParams(op, cluster),
10837 for op in self.op.instances]
10839 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10840 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10842 ial.Run(self.op.iallocator)
10844 if not ial.success:
10845 raise errors.OpPrereqError("Can't compute nodes using"
10846 " iallocator '%s': %s" %
10847 (self.op.iallocator, ial.info),
10848 errors.ECODE_NORES)
10850 self.ia_result = ial.result
10852 if self.op.dry_run:
10853 self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
10854 constants.JOB_IDS_KEY: [],
10857 def _ConstructPartialResult(self):
10858 """Contructs the partial result.
10861 (allocatable, failed) = self.ia_result
10863 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10864 map(compat.fst, allocatable),
10865 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10868 def Exec(self, feedback_fn):
10869 """Executes the opcode.
10872 op2inst = dict((op.instance_name, op) for op in self.op.instances)
10873 (allocatable, failed) = self.ia_result
10876 for (name, nodes) in allocatable:
10877 op = op2inst.pop(name)
10880 (op.pnode, op.snode) = nodes
10882 (op.pnode,) = nodes
10886 missing = set(op2inst.keys()) - set(failed)
10887 assert not missing, \
10888 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10890 return ResultWithJobs(jobs, **self._ConstructPartialResult())
10893 def _CheckRADOSFreeSpace():
10894 """Compute disk size requirements inside the RADOS cluster.
10897 # For the RADOS cluster we assume there is always enough space.
10901 class LUInstanceConsole(NoHooksLU):
10902 """Connect to an instance's console.
10904 This is somewhat special in that it returns the command line that
10905 you need to run on the master node in order to connect to the
10911 def ExpandNames(self):
10912 self.share_locks = _ShareAll()
10913 self._ExpandAndLockInstance()
10915 def CheckPrereq(self):
10916 """Check prerequisites.
10918 This checks that the instance is in the cluster.
10921 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10922 assert self.instance is not None, \
10923 "Cannot retrieve locked instance %s" % self.op.instance_name
10924 _CheckNodeOnline(self, self.instance.primary_node)
10926 def Exec(self, feedback_fn):
10927 """Connect to the console of an instance
10930 instance = self.instance
10931 node = instance.primary_node
10933 node_insts = self.rpc.call_instance_list([node],
10934 [instance.hypervisor])[node]
10935 node_insts.Raise("Can't get node information from %s" % node)
10937 if instance.name not in node_insts.payload:
10938 if instance.admin_state == constants.ADMINST_UP:
10939 state = constants.INSTST_ERRORDOWN
10940 elif instance.admin_state == constants.ADMINST_DOWN:
10941 state = constants.INSTST_ADMINDOWN
10943 state = constants.INSTST_ADMINOFFLINE
10944 raise errors.OpExecError("Instance %s is not running (state %s)" %
10945 (instance.name, state))
10947 logging.debug("Connecting to console of %s on %s", instance.name, node)
10949 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10952 def _GetInstanceConsole(cluster, instance):
10953 """Returns console information for an instance.
10955 @type cluster: L{objects.Cluster}
10956 @type instance: L{objects.Instance}
10960 hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
10961 # beparams and hvparams are passed separately, to avoid editing the
10962 # instance and then saving the defaults in the instance itself.
10963 hvparams = cluster.FillHV(instance)
10964 beparams = cluster.FillBE(instance)
10965 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10967 assert console.instance == instance.name
10968 assert console.Validate()
10970 return console.ToDict()
10973 class LUInstanceReplaceDisks(LogicalUnit):
10974 """Replace the disks of an instance.
10977 HPATH = "mirrors-replace"
10978 HTYPE = constants.HTYPE_INSTANCE
10981 def CheckArguments(self):
10982 """Check arguments.
10985 remote_node = self.op.remote_node
10986 ialloc = self.op.iallocator
10987 if self.op.mode == constants.REPLACE_DISK_CHG:
10988 if remote_node is None and ialloc is None:
10989 raise errors.OpPrereqError("When changing the secondary either an"
10990 " iallocator script must be used or the"
10991 " new node given", errors.ECODE_INVAL)
10993 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10995 elif remote_node is not None or ialloc is not None:
10996 # Not replacing the secondary
10997 raise errors.OpPrereqError("The iallocator and new node options can"
10998 " only be used when changing the"
10999 " secondary node", errors.ECODE_INVAL)
11001 def ExpandNames(self):
11002 self._ExpandAndLockInstance()
11004 assert locking.LEVEL_NODE not in self.needed_locks
11005 assert locking.LEVEL_NODE_RES not in self.needed_locks
11006 assert locking.LEVEL_NODEGROUP not in self.needed_locks
11008 assert self.op.iallocator is None or self.op.remote_node is None, \
11009 "Conflicting options"
11011 if self.op.remote_node is not None:
11012 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11014 # Warning: do not remove the locking of the new secondary here
11015 # unless DRBD8.AddChildren is changed to work in parallel;
11016 # currently it doesn't since parallel invocations of
11017 # FindUnusedMinor will conflict
11018 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11019 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11021 self.needed_locks[locking.LEVEL_NODE] = []
11022 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11024 if self.op.iallocator is not None:
11025 # iallocator will select a new node in the same group
11026 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11027 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11029 self.needed_locks[locking.LEVEL_NODE_RES] = []
11031 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11032 self.op.iallocator, self.op.remote_node,
11033 self.op.disks, self.op.early_release,
11034 self.op.ignore_ipolicy)
11036 self.tasklets = [self.replacer]
11038 def DeclareLocks(self, level):
11039 if level == locking.LEVEL_NODEGROUP:
11040 assert self.op.remote_node is None
11041 assert self.op.iallocator is not None
11042 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11044 self.share_locks[locking.LEVEL_NODEGROUP] = 1
11045 # Lock all groups used by instance optimistically; this requires going
11046 # via the node before it's locked, requiring verification later on
11047 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11048 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11050 elif level == locking.LEVEL_NODE:
11051 if self.op.iallocator is not None:
11052 assert self.op.remote_node is None
11053 assert not self.needed_locks[locking.LEVEL_NODE]
11054 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11056 # Lock member nodes of all locked groups
11057 self.needed_locks[locking.LEVEL_NODE] = \
11059 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11060 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11062 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11064 self._LockInstancesNodes()
11066 elif level == locking.LEVEL_NODE_RES:
11068 self.needed_locks[locking.LEVEL_NODE_RES] = \
11069 self.needed_locks[locking.LEVEL_NODE]
11071 def BuildHooksEnv(self):
11072 """Build hooks env.
11074 This runs on the master, the primary and all the secondaries.
11077 instance = self.replacer.instance
11079 "MODE": self.op.mode,
11080 "NEW_SECONDARY": self.op.remote_node,
11081 "OLD_SECONDARY": instance.secondary_nodes[0],
11083 env.update(_BuildInstanceHookEnvByObject(self, instance))
11086 def BuildHooksNodes(self):
11087 """Build hooks nodes.
11090 instance = self.replacer.instance
11092 self.cfg.GetMasterNode(),
11093 instance.primary_node,
11095 if self.op.remote_node is not None:
11096 nl.append(self.op.remote_node)
11099 def CheckPrereq(self):
11100 """Check prerequisites.
11103 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11104 self.op.iallocator is None)
11106 # Verify if node group locks are still correct
11107 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11109 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11111 return LogicalUnit.CheckPrereq(self)
11114 class TLReplaceDisks(Tasklet):
11115 """Replaces disks for an instance.
11117 Note: Locking is not within the scope of this class.
11120 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11121 disks, early_release, ignore_ipolicy):
11122 """Initializes this class.
11125 Tasklet.__init__(self, lu)
11128 self.instance_name = instance_name
11130 self.iallocator_name = iallocator_name
11131 self.remote_node = remote_node
11133 self.early_release = early_release
11134 self.ignore_ipolicy = ignore_ipolicy
11137 self.instance = None
11138 self.new_node = None
11139 self.target_node = None
11140 self.other_node = None
11141 self.remote_node_info = None
11142 self.node_secondary_ip = None
11145 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11146 """Compute a new secondary node using an IAllocator.
11149 req = iallocator.IAReqRelocate(name=instance_name,
11150 relocate_from=list(relocate_from))
11151 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11153 ial.Run(iallocator_name)
11155 if not ial.success:
11156 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11157 " %s" % (iallocator_name, ial.info),
11158 errors.ECODE_NORES)
11160 remote_node_name = ial.result[0]
11162 lu.LogInfo("Selected new secondary for instance '%s': %s",
11163 instance_name, remote_node_name)
11165 return remote_node_name
11167 def _FindFaultyDisks(self, node_name):
11168 """Wrapper for L{_FindFaultyInstanceDisks}.
11171 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11174 def _CheckDisksActivated(self, instance):
11175 """Checks if the instance disks are activated.
11177 @param instance: The instance to check disks
11178 @return: True if they are activated, False otherwise
11181 nodes = instance.all_nodes
11183 for idx, dev in enumerate(instance.disks):
11185 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11186 self.cfg.SetDiskID(dev, node)
11188 result = _BlockdevFind(self, node, dev, instance)
11192 elif result.fail_msg or not result.payload:
11197 def CheckPrereq(self):
11198 """Check prerequisites.
11200 This checks that the instance is in the cluster.
11203 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11204 assert instance is not None, \
11205 "Cannot retrieve locked instance %s" % self.instance_name
11207 if instance.disk_template != constants.DT_DRBD8:
11208 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11209 " instances", errors.ECODE_INVAL)
11211 if len(instance.secondary_nodes) != 1:
11212 raise errors.OpPrereqError("The instance has a strange layout,"
11213 " expected one secondary but found %d" %
11214 len(instance.secondary_nodes),
11215 errors.ECODE_FAULT)
11217 instance = self.instance
11218 secondary_node = instance.secondary_nodes[0]
11220 if self.iallocator_name is None:
11221 remote_node = self.remote_node
11223 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11224 instance.name, instance.secondary_nodes)
11226 if remote_node is None:
11227 self.remote_node_info = None
11229 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11230 "Remote node '%s' is not locked" % remote_node
11232 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11233 assert self.remote_node_info is not None, \
11234 "Cannot retrieve locked node %s" % remote_node
11236 if remote_node == self.instance.primary_node:
11237 raise errors.OpPrereqError("The specified node is the primary node of"
11238 " the instance", errors.ECODE_INVAL)
11240 if remote_node == secondary_node:
11241 raise errors.OpPrereqError("The specified node is already the"
11242 " secondary node of the instance",
11243 errors.ECODE_INVAL)
11245 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11246 constants.REPLACE_DISK_CHG):
11247 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11248 errors.ECODE_INVAL)
11250 if self.mode == constants.REPLACE_DISK_AUTO:
11251 if not self._CheckDisksActivated(instance):
11252 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11253 " first" % self.instance_name,
11254 errors.ECODE_STATE)
11255 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11256 faulty_secondary = self._FindFaultyDisks(secondary_node)
11258 if faulty_primary and faulty_secondary:
11259 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11260 " one node and can not be repaired"
11261 " automatically" % self.instance_name,
11262 errors.ECODE_STATE)
11265 self.disks = faulty_primary
11266 self.target_node = instance.primary_node
11267 self.other_node = secondary_node
11268 check_nodes = [self.target_node, self.other_node]
11269 elif faulty_secondary:
11270 self.disks = faulty_secondary
11271 self.target_node = secondary_node
11272 self.other_node = instance.primary_node
11273 check_nodes = [self.target_node, self.other_node]
11279 # Non-automatic modes
11280 if self.mode == constants.REPLACE_DISK_PRI:
11281 self.target_node = instance.primary_node
11282 self.other_node = secondary_node
11283 check_nodes = [self.target_node, self.other_node]
11285 elif self.mode == constants.REPLACE_DISK_SEC:
11286 self.target_node = secondary_node
11287 self.other_node = instance.primary_node
11288 check_nodes = [self.target_node, self.other_node]
11290 elif self.mode == constants.REPLACE_DISK_CHG:
11291 self.new_node = remote_node
11292 self.other_node = instance.primary_node
11293 self.target_node = secondary_node
11294 check_nodes = [self.new_node, self.other_node]
11296 _CheckNodeNotDrained(self.lu, remote_node)
11297 _CheckNodeVmCapable(self.lu, remote_node)
11299 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11300 assert old_node_info is not None
11301 if old_node_info.offline and not self.early_release:
11302 # doesn't make sense to delay the release
11303 self.early_release = True
11304 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11305 " early-release mode", secondary_node)
11308 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11311 # If not specified all disks should be replaced
11313 self.disks = range(len(self.instance.disks))
11315 # TODO: This is ugly, but right now we can't distinguish between internal
11316 # submitted opcode and external one. We should fix that.
11317 if self.remote_node_info:
11318 # We change the node, lets verify it still meets instance policy
11319 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11320 cluster = self.cfg.GetClusterInfo()
11321 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11323 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11324 self.cfg, ignore=self.ignore_ipolicy)
11326 for node in check_nodes:
11327 _CheckNodeOnline(self.lu, node)
11329 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11332 if node_name is not None)
11334 # Release unneeded node and node resource locks
11335 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11336 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11337 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11339 # Release any owned node group
11340 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11342 # Check whether disks are valid
11343 for disk_idx in self.disks:
11344 instance.FindDisk(disk_idx)
11346 # Get secondary node IP addresses
11347 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11348 in self.cfg.GetMultiNodeInfo(touched_nodes))
11350 def Exec(self, feedback_fn):
11351 """Execute disk replacement.
11353 This dispatches the disk replacement to the appropriate handler.
11357 # Verify owned locks before starting operation
11358 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11359 assert set(owned_nodes) == set(self.node_secondary_ip), \
11360 ("Incorrect node locks, owning %s, expected %s" %
11361 (owned_nodes, self.node_secondary_ip.keys()))
11362 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11363 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11364 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11366 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11367 assert list(owned_instances) == [self.instance_name], \
11368 "Instance '%s' not locked" % self.instance_name
11370 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11371 "Should not own any node group lock at this point"
11374 feedback_fn("No disks need replacement for instance '%s'" %
11375 self.instance.name)
11378 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11379 (utils.CommaJoin(self.disks), self.instance.name))
11380 feedback_fn("Current primary node: %s" % self.instance.primary_node)
11381 feedback_fn("Current seconary node: %s" %
11382 utils.CommaJoin(self.instance.secondary_nodes))
11384 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11386 # Activate the instance disks if we're replacing them on a down instance
11388 _StartInstanceDisks(self.lu, self.instance, True)
11391 # Should we replace the secondary node?
11392 if self.new_node is not None:
11393 fn = self._ExecDrbd8Secondary
11395 fn = self._ExecDrbd8DiskOnly
11397 result = fn(feedback_fn)
11399 # Deactivate the instance disks if we're replacing them on a
11402 _SafeShutdownInstanceDisks(self.lu, self.instance)
11404 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11407 # Verify owned locks
11408 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11409 nodes = frozenset(self.node_secondary_ip)
11410 assert ((self.early_release and not owned_nodes) or
11411 (not self.early_release and not (set(owned_nodes) - nodes))), \
11412 ("Not owning the correct locks, early_release=%s, owned=%r,"
11413 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11417 def _CheckVolumeGroup(self, nodes):
11418 self.lu.LogInfo("Checking volume groups")
11420 vgname = self.cfg.GetVGName()
11422 # Make sure volume group exists on all involved nodes
11423 results = self.rpc.call_vg_list(nodes)
11425 raise errors.OpExecError("Can't list volume groups on the nodes")
11428 res = results[node]
11429 res.Raise("Error checking node %s" % node)
11430 if vgname not in res.payload:
11431 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11434 def _CheckDisksExistence(self, nodes):
11435 # Check disk existence
11436 for idx, dev in enumerate(self.instance.disks):
11437 if idx not in self.disks:
11441 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11442 self.cfg.SetDiskID(dev, node)
11444 result = _BlockdevFind(self, node, dev, self.instance)
11446 msg = result.fail_msg
11447 if msg or not result.payload:
11449 msg = "disk not found"
11450 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11453 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11454 for idx, dev in enumerate(self.instance.disks):
11455 if idx not in self.disks:
11458 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11461 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11462 on_primary, ldisk=ldisk):
11463 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11464 " replace disks for instance %s" %
11465 (node_name, self.instance.name))
11467 def _CreateNewStorage(self, node_name):
11468 """Create new storage on the primary or secondary node.
11470 This is only used for same-node replaces, not for changing the
11471 secondary node, hence we don't want to modify the existing disk.
11476 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11477 for idx, dev in enumerate(disks):
11478 if idx not in self.disks:
11481 self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11483 self.cfg.SetDiskID(dev, node_name)
11485 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11486 names = _GenerateUniqueNames(self.lu, lv_names)
11488 (data_disk, meta_disk) = dev.children
11489 vg_data = data_disk.logical_id[0]
11490 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11491 logical_id=(vg_data, names[0]),
11492 params=data_disk.params)
11493 vg_meta = meta_disk.logical_id[0]
11494 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11495 size=constants.DRBD_META_SIZE,
11496 logical_id=(vg_meta, names[1]),
11497 params=meta_disk.params)
11499 new_lvs = [lv_data, lv_meta]
11500 old_lvs = [child.Copy() for child in dev.children]
11501 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11502 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
11504 # we pass force_create=True to force the LVM creation
11505 for new_lv in new_lvs:
11506 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11507 _GetInstanceInfoText(self.instance), False,
11512 def _CheckDevices(self, node_name, iv_names):
11513 for name, (dev, _, _) in iv_names.iteritems():
11514 self.cfg.SetDiskID(dev, node_name)
11516 result = _BlockdevFind(self, node_name, dev, self.instance)
11518 msg = result.fail_msg
11519 if msg or not result.payload:
11521 msg = "disk not found"
11522 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11525 if result.payload.is_degraded:
11526 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11528 def _RemoveOldStorage(self, node_name, iv_names):
11529 for name, (_, old_lvs, _) in iv_names.iteritems():
11530 self.lu.LogInfo("Remove logical volumes for %s", name)
11533 self.cfg.SetDiskID(lv, node_name)
11535 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11537 self.lu.LogWarning("Can't remove old LV: %s", msg,
11538 hint="remove unused LVs manually")
11540 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11541 """Replace a disk on the primary or secondary for DRBD 8.
11543 The algorithm for replace is quite complicated:
11545 1. for each disk to be replaced:
11547 1. create new LVs on the target node with unique names
11548 1. detach old LVs from the drbd device
11549 1. rename old LVs to name_replaced.<time_t>
11550 1. rename new LVs to old LVs
11551 1. attach the new LVs (with the old names now) to the drbd device
11553 1. wait for sync across all devices
11555 1. for each modified disk:
11557 1. remove old LVs (which have the name name_replaces.<time_t>)
11559 Failures are not very well handled.
11564 # Step: check device activation
11565 self.lu.LogStep(1, steps_total, "Check device existence")
11566 self._CheckDisksExistence([self.other_node, self.target_node])
11567 self._CheckVolumeGroup([self.target_node, self.other_node])
11569 # Step: check other node consistency
11570 self.lu.LogStep(2, steps_total, "Check peer consistency")
11571 self._CheckDisksConsistency(self.other_node,
11572 self.other_node == self.instance.primary_node,
11575 # Step: create new storage
11576 self.lu.LogStep(3, steps_total, "Allocate new storage")
11577 iv_names = self._CreateNewStorage(self.target_node)
11579 # Step: for each lv, detach+rename*2+attach
11580 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11581 for dev, old_lvs, new_lvs in iv_names.itervalues():
11582 self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11584 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11586 result.Raise("Can't detach drbd from local storage on node"
11587 " %s for device %s" % (self.target_node, dev.iv_name))
11589 #cfg.Update(instance)
11591 # ok, we created the new LVs, so now we know we have the needed
11592 # storage; as such, we proceed on the target node to rename
11593 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11594 # using the assumption that logical_id == physical_id (which in
11595 # turn is the unique_id on that node)
11597 # FIXME(iustin): use a better name for the replaced LVs
11598 temp_suffix = int(time.time())
11599 ren_fn = lambda d, suff: (d.physical_id[0],
11600 d.physical_id[1] + "_replaced-%s" % suff)
11602 # Build the rename list based on what LVs exist on the node
11603 rename_old_to_new = []
11604 for to_ren in old_lvs:
11605 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11606 if not result.fail_msg and result.payload:
11608 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11610 self.lu.LogInfo("Renaming the old LVs on the target node")
11611 result = self.rpc.call_blockdev_rename(self.target_node,
11613 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11615 # Now we rename the new LVs to the old LVs
11616 self.lu.LogInfo("Renaming the new LVs on the target node")
11617 rename_new_to_old = [(new, old.physical_id)
11618 for old, new in zip(old_lvs, new_lvs)]
11619 result = self.rpc.call_blockdev_rename(self.target_node,
11621 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11623 # Intermediate steps of in memory modifications
11624 for old, new in zip(old_lvs, new_lvs):
11625 new.logical_id = old.logical_id
11626 self.cfg.SetDiskID(new, self.target_node)
11628 # We need to modify old_lvs so that removal later removes the
11629 # right LVs, not the newly added ones; note that old_lvs is a
11631 for disk in old_lvs:
11632 disk.logical_id = ren_fn(disk, temp_suffix)
11633 self.cfg.SetDiskID(disk, self.target_node)
11635 # Now that the new lvs have the old name, we can add them to the device
11636 self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
11637 result = self.rpc.call_blockdev_addchildren(self.target_node,
11638 (dev, self.instance), new_lvs)
11639 msg = result.fail_msg
11641 for new_lv in new_lvs:
11642 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11645 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11646 hint=("cleanup manually the unused logical"
11648 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11650 cstep = itertools.count(5)
11652 if self.early_release:
11653 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11654 self._RemoveOldStorage(self.target_node, iv_names)
11655 # TODO: Check if releasing locks early still makes sense
11656 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11658 # Release all resource locks except those used by the instance
11659 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11660 keep=self.node_secondary_ip.keys())
11662 # Release all node locks while waiting for sync
11663 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11665 # TODO: Can the instance lock be downgraded here? Take the optional disk
11666 # shutdown in the caller into consideration.
11669 # This can fail as the old devices are degraded and _WaitForSync
11670 # does a combined result over all disks, so we don't check its return value
11671 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11672 _WaitForSync(self.lu, self.instance)
11674 # Check all devices manually
11675 self._CheckDevices(self.instance.primary_node, iv_names)
11677 # Step: remove old storage
11678 if not self.early_release:
11679 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11680 self._RemoveOldStorage(self.target_node, iv_names)
11682 def _ExecDrbd8Secondary(self, feedback_fn):
11683 """Replace the secondary node for DRBD 8.
11685 The algorithm for replace is quite complicated:
11686 - for all disks of the instance:
11687 - create new LVs on the new node with same names
11688 - shutdown the drbd device on the old secondary
11689 - disconnect the drbd network on the primary
11690 - create the drbd device on the new secondary
11691 - network attach the drbd on the primary, using an artifice:
11692 the drbd code for Attach() will connect to the network if it
11693 finds a device which is connected to the good local disks but
11694 not network enabled
11695 - wait for sync across all devices
11696 - remove all disks from the old secondary
11698 Failures are not very well handled.
11703 pnode = self.instance.primary_node
11705 # Step: check device activation
11706 self.lu.LogStep(1, steps_total, "Check device existence")
11707 self._CheckDisksExistence([self.instance.primary_node])
11708 self._CheckVolumeGroup([self.instance.primary_node])
11710 # Step: check other node consistency
11711 self.lu.LogStep(2, steps_total, "Check peer consistency")
11712 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11714 # Step: create new storage
11715 self.lu.LogStep(3, steps_total, "Allocate new storage")
11716 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11717 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
11718 for idx, dev in enumerate(disks):
11719 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11720 (self.new_node, idx))
11721 # we pass force_create=True to force LVM creation
11722 for new_lv in dev.children:
11723 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11724 True, _GetInstanceInfoText(self.instance), False,
11727 # Step 4: dbrd minors and drbd setups changes
11728 # after this, we must manually remove the drbd minors on both the
11729 # error and the success paths
11730 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11731 minors = self.cfg.AllocateDRBDMinor([self.new_node
11732 for dev in self.instance.disks],
11733 self.instance.name)
11734 logging.debug("Allocated minors %r", minors)
11737 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11738 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11739 (self.new_node, idx))
11740 # create new devices on new_node; note that we create two IDs:
11741 # one without port, so the drbd will be activated without
11742 # networking information on the new node at this stage, and one
11743 # with network, for the latter activation in step 4
11744 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11745 if self.instance.primary_node == o_node1:
11748 assert self.instance.primary_node == o_node2, "Three-node instance?"
11751 new_alone_id = (self.instance.primary_node, self.new_node, None,
11752 p_minor, new_minor, o_secret)
11753 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11754 p_minor, new_minor, o_secret)
11756 iv_names[idx] = (dev, dev.children, new_net_id)
11757 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11759 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11760 logical_id=new_alone_id,
11761 children=dev.children,
11764 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11767 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11769 _GetInstanceInfoText(self.instance), False,
11771 except errors.GenericError:
11772 self.cfg.ReleaseDRBDMinors(self.instance.name)
11775 # We have new devices, shutdown the drbd on the old secondary
11776 for idx, dev in enumerate(self.instance.disks):
11777 self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
11778 self.cfg.SetDiskID(dev, self.target_node)
11779 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11780 (dev, self.instance)).fail_msg
11782 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11783 "node: %s" % (idx, msg),
11784 hint=("Please cleanup this device manually as"
11785 " soon as possible"))
11787 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11788 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11789 self.instance.disks)[pnode]
11791 msg = result.fail_msg
11793 # detaches didn't succeed (unlikely)
11794 self.cfg.ReleaseDRBDMinors(self.instance.name)
11795 raise errors.OpExecError("Can't detach the disks from the network on"
11796 " old node: %s" % (msg,))
11798 # if we managed to detach at least one, we update all the disks of
11799 # the instance to point to the new secondary
11800 self.lu.LogInfo("Updating instance configuration")
11801 for dev, _, new_logical_id in iv_names.itervalues():
11802 dev.logical_id = new_logical_id
11803 self.cfg.SetDiskID(dev, self.instance.primary_node)
11805 self.cfg.Update(self.instance, feedback_fn)
11807 # Release all node locks (the configuration has been updated)
11808 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11810 # and now perform the drbd attach
11811 self.lu.LogInfo("Attaching primary drbds to new secondary"
11812 " (standalone => connected)")
11813 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11815 self.node_secondary_ip,
11816 (self.instance.disks, self.instance),
11817 self.instance.name,
11819 for to_node, to_result in result.items():
11820 msg = to_result.fail_msg
11822 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11824 hint=("please do a gnt-instance info to see the"
11825 " status of disks"))
11827 cstep = itertools.count(5)
11829 if self.early_release:
11830 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11831 self._RemoveOldStorage(self.target_node, iv_names)
11832 # TODO: Check if releasing locks early still makes sense
11833 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11835 # Release all resource locks except those used by the instance
11836 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11837 keep=self.node_secondary_ip.keys())
11839 # TODO: Can the instance lock be downgraded here? Take the optional disk
11840 # shutdown in the caller into consideration.
11843 # This can fail as the old devices are degraded and _WaitForSync
11844 # does a combined result over all disks, so we don't check its return value
11845 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11846 _WaitForSync(self.lu, self.instance)
11848 # Check all devices manually
11849 self._CheckDevices(self.instance.primary_node, iv_names)
11851 # Step: remove old storage
11852 if not self.early_release:
11853 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11854 self._RemoveOldStorage(self.target_node, iv_names)
11857 class LURepairNodeStorage(NoHooksLU):
11858 """Repairs the volume group on a node.
11863 def CheckArguments(self):
11864 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11866 storage_type = self.op.storage_type
11868 if (constants.SO_FIX_CONSISTENCY not in
11869 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11870 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11871 " repaired" % storage_type,
11872 errors.ECODE_INVAL)
11874 def ExpandNames(self):
11875 self.needed_locks = {
11876 locking.LEVEL_NODE: [self.op.node_name],
11879 def _CheckFaultyDisks(self, instance, node_name):
11880 """Ensure faulty disks abort the opcode or at least warn."""
11882 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11884 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11885 " node '%s'" % (instance.name, node_name),
11886 errors.ECODE_STATE)
11887 except errors.OpPrereqError, err:
11888 if self.op.ignore_consistency:
11889 self.LogWarning(str(err.args[0]))
11893 def CheckPrereq(self):
11894 """Check prerequisites.
11897 # Check whether any instance on this node has faulty disks
11898 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11899 if inst.admin_state != constants.ADMINST_UP:
11901 check_nodes = set(inst.all_nodes)
11902 check_nodes.discard(self.op.node_name)
11903 for inst_node_name in check_nodes:
11904 self._CheckFaultyDisks(inst, inst_node_name)
11906 def Exec(self, feedback_fn):
11907 feedback_fn("Repairing storage unit '%s' on %s ..." %
11908 (self.op.name, self.op.node_name))
11910 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11911 result = self.rpc.call_storage_execute(self.op.node_name,
11912 self.op.storage_type, st_args,
11914 constants.SO_FIX_CONSISTENCY)
11915 result.Raise("Failed to repair storage unit '%s' on %s" %
11916 (self.op.name, self.op.node_name))
11919 class LUNodeEvacuate(NoHooksLU):
11920 """Evacuates instances off a list of nodes.
11925 _MODE2IALLOCATOR = {
11926 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11927 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11928 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11930 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11931 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11932 constants.IALLOCATOR_NEVAC_MODES)
11934 def CheckArguments(self):
11935 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11937 def ExpandNames(self):
11938 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11940 if self.op.remote_node is not None:
11941 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11942 assert self.op.remote_node
11944 if self.op.remote_node == self.op.node_name:
11945 raise errors.OpPrereqError("Can not use evacuated node as a new"
11946 " secondary node", errors.ECODE_INVAL)
11948 if self.op.mode != constants.NODE_EVAC_SEC:
11949 raise errors.OpPrereqError("Without the use of an iallocator only"
11950 " secondary instances can be evacuated",
11951 errors.ECODE_INVAL)
11954 self.share_locks = _ShareAll()
11955 self.needed_locks = {
11956 locking.LEVEL_INSTANCE: [],
11957 locking.LEVEL_NODEGROUP: [],
11958 locking.LEVEL_NODE: [],
11961 # Determine nodes (via group) optimistically, needs verification once locks
11962 # have been acquired
11963 self.lock_nodes = self._DetermineNodes()
11965 def _DetermineNodes(self):
11966 """Gets the list of nodes to operate on.
11969 if self.op.remote_node is None:
11970 # Iallocator will choose any node(s) in the same group
11971 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11973 group_nodes = frozenset([self.op.remote_node])
11975 # Determine nodes to be locked
11976 return set([self.op.node_name]) | group_nodes
11978 def _DetermineInstances(self):
11979 """Builds list of instances to operate on.
11982 assert self.op.mode in constants.NODE_EVAC_MODES
11984 if self.op.mode == constants.NODE_EVAC_PRI:
11985 # Primary instances only
11986 inst_fn = _GetNodePrimaryInstances
11987 assert self.op.remote_node is None, \
11988 "Evacuating primary instances requires iallocator"
11989 elif self.op.mode == constants.NODE_EVAC_SEC:
11990 # Secondary instances only
11991 inst_fn = _GetNodeSecondaryInstances
11994 assert self.op.mode == constants.NODE_EVAC_ALL
11995 inst_fn = _GetNodeInstances
11996 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11998 raise errors.OpPrereqError("Due to an issue with the iallocator"
11999 " interface it is not possible to evacuate"
12000 " all instances at once; specify explicitly"
12001 " whether to evacuate primary or secondary"
12003 errors.ECODE_INVAL)
12005 return inst_fn(self.cfg, self.op.node_name)
12007 def DeclareLocks(self, level):
12008 if level == locking.LEVEL_INSTANCE:
12009 # Lock instances optimistically, needs verification once node and group
12010 # locks have been acquired
12011 self.needed_locks[locking.LEVEL_INSTANCE] = \
12012 set(i.name for i in self._DetermineInstances())
12014 elif level == locking.LEVEL_NODEGROUP:
12015 # Lock node groups for all potential target nodes optimistically, needs
12016 # verification once nodes have been acquired
12017 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12018 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12020 elif level == locking.LEVEL_NODE:
12021 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12023 def CheckPrereq(self):
12025 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12026 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12027 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12029 need_nodes = self._DetermineNodes()
12031 if not owned_nodes.issuperset(need_nodes):
12032 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12033 " locks were acquired, current nodes are"
12034 " are '%s', used to be '%s'; retry the"
12036 (self.op.node_name,
12037 utils.CommaJoin(need_nodes),
12038 utils.CommaJoin(owned_nodes)),
12039 errors.ECODE_STATE)
12041 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12042 if owned_groups != wanted_groups:
12043 raise errors.OpExecError("Node groups changed since locks were acquired,"
12044 " current groups are '%s', used to be '%s';"
12045 " retry the operation" %
12046 (utils.CommaJoin(wanted_groups),
12047 utils.CommaJoin(owned_groups)))
12049 # Determine affected instances
12050 self.instances = self._DetermineInstances()
12051 self.instance_names = [i.name for i in self.instances]
12053 if set(self.instance_names) != owned_instances:
12054 raise errors.OpExecError("Instances on node '%s' changed since locks"
12055 " were acquired, current instances are '%s',"
12056 " used to be '%s'; retry the operation" %
12057 (self.op.node_name,
12058 utils.CommaJoin(self.instance_names),
12059 utils.CommaJoin(owned_instances)))
12061 if self.instance_names:
12062 self.LogInfo("Evacuating instances from node '%s': %s",
12064 utils.CommaJoin(utils.NiceSort(self.instance_names)))
12066 self.LogInfo("No instances to evacuate from node '%s'",
12069 if self.op.remote_node is not None:
12070 for i in self.instances:
12071 if i.primary_node == self.op.remote_node:
12072 raise errors.OpPrereqError("Node %s is the primary node of"
12073 " instance %s, cannot use it as"
12075 (self.op.remote_node, i.name),
12076 errors.ECODE_INVAL)
12078 def Exec(self, feedback_fn):
12079 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12081 if not self.instance_names:
12082 # No instances to evacuate
12085 elif self.op.iallocator is not None:
12086 # TODO: Implement relocation to other group
12087 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12088 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12089 instances=list(self.instance_names))
12090 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12092 ial.Run(self.op.iallocator)
12094 if not ial.success:
12095 raise errors.OpPrereqError("Can't compute node evacuation using"
12096 " iallocator '%s': %s" %
12097 (self.op.iallocator, ial.info),
12098 errors.ECODE_NORES)
12100 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12102 elif self.op.remote_node is not None:
12103 assert self.op.mode == constants.NODE_EVAC_SEC
12105 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12106 remote_node=self.op.remote_node,
12108 mode=constants.REPLACE_DISK_CHG,
12109 early_release=self.op.early_release)]
12110 for instance_name in self.instance_names]
12113 raise errors.ProgrammerError("No iallocator or remote node")
12115 return ResultWithJobs(jobs)
12118 def _SetOpEarlyRelease(early_release, op):
12119 """Sets C{early_release} flag on opcodes if available.
12123 op.early_release = early_release
12124 except AttributeError:
12125 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12130 def _NodeEvacDest(use_nodes, group, nodes):
12131 """Returns group or nodes depending on caller's choice.
12135 return utils.CommaJoin(nodes)
12140 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12141 """Unpacks the result of change-group and node-evacuate iallocator requests.
12143 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12144 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12146 @type lu: L{LogicalUnit}
12147 @param lu: Logical unit instance
12148 @type alloc_result: tuple/list
12149 @param alloc_result: Result from iallocator
12150 @type early_release: bool
12151 @param early_release: Whether to release locks early if possible
12152 @type use_nodes: bool
12153 @param use_nodes: Whether to display node names instead of groups
12156 (moved, failed, jobs) = alloc_result
12159 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12160 for (name, reason) in failed)
12161 lu.LogWarning("Unable to evacuate instances %s", failreason)
12162 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12165 lu.LogInfo("Instances to be moved: %s",
12166 utils.CommaJoin("%s (to %s)" %
12167 (name, _NodeEvacDest(use_nodes, group, nodes))
12168 for (name, group, nodes) in moved))
12170 return [map(compat.partial(_SetOpEarlyRelease, early_release),
12171 map(opcodes.OpCode.LoadOpCode, ops))
12175 def _DiskSizeInBytesToMebibytes(lu, size):
12176 """Converts a disk size in bytes to mebibytes.
12178 Warns and rounds up if the size isn't an even multiple of 1 MiB.
12181 (mib, remainder) = divmod(size, 1024 * 1024)
12184 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12185 " to not overwrite existing data (%s bytes will not be"
12186 " wiped)", (1024 * 1024) - remainder)
12192 class LUInstanceGrowDisk(LogicalUnit):
12193 """Grow a disk of an instance.
12196 HPATH = "disk-grow"
12197 HTYPE = constants.HTYPE_INSTANCE
12200 def ExpandNames(self):
12201 self._ExpandAndLockInstance()
12202 self.needed_locks[locking.LEVEL_NODE] = []
12203 self.needed_locks[locking.LEVEL_NODE_RES] = []
12204 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12205 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12207 def DeclareLocks(self, level):
12208 if level == locking.LEVEL_NODE:
12209 self._LockInstancesNodes()
12210 elif level == locking.LEVEL_NODE_RES:
12212 self.needed_locks[locking.LEVEL_NODE_RES] = \
12213 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12215 def BuildHooksEnv(self):
12216 """Build hooks env.
12218 This runs on the master, the primary and all the secondaries.
12222 "DISK": self.op.disk,
12223 "AMOUNT": self.op.amount,
12224 "ABSOLUTE": self.op.absolute,
12226 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12229 def BuildHooksNodes(self):
12230 """Build hooks nodes.
12233 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12236 def CheckPrereq(self):
12237 """Check prerequisites.
12239 This checks that the instance is in the cluster.
12242 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12243 assert instance is not None, \
12244 "Cannot retrieve locked instance %s" % self.op.instance_name
12245 nodenames = list(instance.all_nodes)
12246 for node in nodenames:
12247 _CheckNodeOnline(self, node)
12249 self.instance = instance
12251 if instance.disk_template not in constants.DTS_GROWABLE:
12252 raise errors.OpPrereqError("Instance's disk layout does not support"
12253 " growing", errors.ECODE_INVAL)
12255 self.disk = instance.FindDisk(self.op.disk)
12257 if self.op.absolute:
12258 self.target = self.op.amount
12259 self.delta = self.target - self.disk.size
12261 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12262 "current disk size (%s)" %
12263 (utils.FormatUnit(self.target, "h"),
12264 utils.FormatUnit(self.disk.size, "h")),
12265 errors.ECODE_STATE)
12267 self.delta = self.op.amount
12268 self.target = self.disk.size + self.delta
12270 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12271 utils.FormatUnit(self.delta, "h"),
12272 errors.ECODE_INVAL)
12274 self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
12276 def _CheckDiskSpace(self, nodenames, req_vgspace):
12277 template = self.instance.disk_template
12278 if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
12279 # TODO: check the free disk space for file, when that feature will be
12281 nodes = map(self.cfg.GetNodeInfo, nodenames)
12282 es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
12285 # With exclusive storage we need to something smarter than just looking
12286 # at free space; for now, let's simply abort the operation.
12287 raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
12288 " is enabled", errors.ECODE_STATE)
12289 _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
12291 def Exec(self, feedback_fn):
12292 """Execute disk grow.
12295 instance = self.instance
12298 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12299 assert (self.owned_locks(locking.LEVEL_NODE) ==
12300 self.owned_locks(locking.LEVEL_NODE_RES))
12302 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12304 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12306 raise errors.OpExecError("Cannot activate block device to grow")
12308 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12309 (self.op.disk, instance.name,
12310 utils.FormatUnit(self.delta, "h"),
12311 utils.FormatUnit(self.target, "h")))
12313 # First run all grow ops in dry-run mode
12314 for node in instance.all_nodes:
12315 self.cfg.SetDiskID(disk, node)
12316 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12318 result.Raise("Dry-run grow request failed to node %s" % node)
12321 # Get disk size from primary node for wiping
12322 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12323 result.Raise("Failed to retrieve disk size from node '%s'" %
12324 instance.primary_node)
12326 (disk_size_in_bytes, ) = result.payload
12328 if disk_size_in_bytes is None:
12329 raise errors.OpExecError("Failed to retrieve disk size from primary"
12330 " node '%s'" % instance.primary_node)
12332 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12334 assert old_disk_size >= disk.size, \
12335 ("Retrieved disk size too small (got %s, should be at least %s)" %
12336 (old_disk_size, disk.size))
12338 old_disk_size = None
12340 # We know that (as far as we can test) operations across different
12341 # nodes will succeed, time to run it for real on the backing storage
12342 for node in instance.all_nodes:
12343 self.cfg.SetDiskID(disk, node)
12344 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12346 result.Raise("Grow request failed to node %s" % node)
12348 # And now execute it for logical storage, on the primary node
12349 node = instance.primary_node
12350 self.cfg.SetDiskID(disk, node)
12351 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12353 result.Raise("Grow request failed to node %s" % node)
12355 disk.RecordGrow(self.delta)
12356 self.cfg.Update(instance, feedback_fn)
12358 # Changes have been recorded, release node lock
12359 _ReleaseLocks(self, locking.LEVEL_NODE)
12361 # Downgrade lock while waiting for sync
12362 self.glm.downgrade(locking.LEVEL_INSTANCE)
12364 assert wipe_disks ^ (old_disk_size is None)
12367 assert instance.disks[self.op.disk] == disk
12369 # Wipe newly added disk space
12370 _WipeDisks(self, instance,
12371 disks=[(self.op.disk, disk, old_disk_size)])
12373 if self.op.wait_for_sync:
12374 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12376 self.LogWarning("Disk syncing has not returned a good status; check"
12378 if instance.admin_state != constants.ADMINST_UP:
12379 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12380 elif instance.admin_state != constants.ADMINST_UP:
12381 self.LogWarning("Not shutting down the disk even if the instance is"
12382 " not supposed to be running because no wait for"
12383 " sync mode was requested")
12385 assert self.owned_locks(locking.LEVEL_NODE_RES)
12386 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12389 class LUInstanceQueryData(NoHooksLU):
12390 """Query runtime instance data.
12395 def ExpandNames(self):
12396 self.needed_locks = {}
12398 # Use locking if requested or when non-static information is wanted
12399 if not (self.op.static or self.op.use_locking):
12400 self.LogWarning("Non-static data requested, locks need to be acquired")
12401 self.op.use_locking = True
12403 if self.op.instances or not self.op.use_locking:
12404 # Expand instance names right here
12405 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12407 # Will use acquired locks
12408 self.wanted_names = None
12410 if self.op.use_locking:
12411 self.share_locks = _ShareAll()
12413 if self.wanted_names is None:
12414 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12416 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12418 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12419 self.needed_locks[locking.LEVEL_NODE] = []
12420 self.needed_locks[locking.LEVEL_NETWORK] = []
12421 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12423 def DeclareLocks(self, level):
12424 if self.op.use_locking:
12425 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12426 if level == locking.LEVEL_NODEGROUP:
12428 # Lock all groups used by instances optimistically; this requires going
12429 # via the node before it's locked, requiring verification later on
12430 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12431 frozenset(group_uuid
12432 for instance_name in owned_instances
12434 self.cfg.GetInstanceNodeGroups(instance_name))
12436 elif level == locking.LEVEL_NODE:
12437 self._LockInstancesNodes()
12439 elif level == locking.LEVEL_NETWORK:
12440 self.needed_locks[locking.LEVEL_NETWORK] = \
12442 for instance_name in owned_instances
12444 self.cfg.GetInstanceNetworks(instance_name))
12446 def CheckPrereq(self):
12447 """Check prerequisites.
12449 This only checks the optional instance list against the existing names.
12452 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12453 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12454 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12455 owned_networks = frozenset(self.owned_locks(locking.LEVEL_NETWORK))
12457 if self.wanted_names is None:
12458 assert self.op.use_locking, "Locking was not used"
12459 self.wanted_names = owned_instances
12461 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12463 if self.op.use_locking:
12464 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12467 assert not (owned_instances or owned_groups or
12468 owned_nodes or owned_networks)
12470 self.wanted_instances = instances.values()
12472 def _ComputeBlockdevStatus(self, node, instance, dev):
12473 """Returns the status of a block device
12476 if self.op.static or not node:
12479 self.cfg.SetDiskID(dev, node)
12481 result = self.rpc.call_blockdev_find(node, dev)
12485 result.Raise("Can't compute disk status for %s" % instance.name)
12487 status = result.payload
12491 return (status.dev_path, status.major, status.minor,
12492 status.sync_percent, status.estimated_time,
12493 status.is_degraded, status.ldisk_status)
12495 def _ComputeDiskStatus(self, instance, snode, dev):
12496 """Compute block device status.
12499 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12501 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12503 def _ComputeDiskStatusInner(self, instance, snode, dev):
12504 """Compute block device status.
12506 @attention: The device has to be annotated already.
12509 if dev.dev_type in constants.LDS_DRBD:
12510 # we change the snode then (otherwise we use the one passed in)
12511 if dev.logical_id[0] == instance.primary_node:
12512 snode = dev.logical_id[1]
12514 snode = dev.logical_id[0]
12516 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12518 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12521 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12528 "iv_name": dev.iv_name,
12529 "dev_type": dev.dev_type,
12530 "logical_id": dev.logical_id,
12531 "physical_id": dev.physical_id,
12532 "pstatus": dev_pstatus,
12533 "sstatus": dev_sstatus,
12534 "children": dev_children,
12541 def Exec(self, feedback_fn):
12542 """Gather and return data"""
12545 cluster = self.cfg.GetClusterInfo()
12547 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12548 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12550 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12551 for node in nodes.values()))
12553 group2name_fn = lambda uuid: groups[uuid].name
12554 for instance in self.wanted_instances:
12555 pnode = nodes[instance.primary_node]
12557 if self.op.static or pnode.offline:
12558 remote_state = None
12560 self.LogWarning("Primary node %s is marked offline, returning static"
12561 " information only for instance %s" %
12562 (pnode.name, instance.name))
12564 remote_info = self.rpc.call_instance_info(instance.primary_node,
12566 instance.hypervisor)
12567 remote_info.Raise("Error checking node %s" % instance.primary_node)
12568 remote_info = remote_info.payload
12569 if remote_info and "state" in remote_info:
12570 remote_state = "up"
12572 if instance.admin_state == constants.ADMINST_UP:
12573 remote_state = "down"
12575 remote_state = instance.admin_state
12577 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12580 snodes_group_uuids = [nodes[snode_name].group
12581 for snode_name in instance.secondary_nodes]
12583 result[instance.name] = {
12584 "name": instance.name,
12585 "config_state": instance.admin_state,
12586 "run_state": remote_state,
12587 "pnode": instance.primary_node,
12588 "pnode_group_uuid": pnode.group,
12589 "pnode_group_name": group2name_fn(pnode.group),
12590 "snodes": instance.secondary_nodes,
12591 "snodes_group_uuids": snodes_group_uuids,
12592 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12594 # this happens to be the same format used for hooks
12595 "nics": _NICListToTuple(self, instance.nics),
12596 "disk_template": instance.disk_template,
12598 "hypervisor": instance.hypervisor,
12599 "network_port": instance.network_port,
12600 "hv_instance": instance.hvparams,
12601 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12602 "be_instance": instance.beparams,
12603 "be_actual": cluster.FillBE(instance),
12604 "os_instance": instance.osparams,
12605 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12606 "serial_no": instance.serial_no,
12607 "mtime": instance.mtime,
12608 "ctime": instance.ctime,
12609 "uuid": instance.uuid,
12615 def PrepareContainerMods(mods, private_fn):
12616 """Prepares a list of container modifications by adding a private data field.
12618 @type mods: list of tuples; (operation, index, parameters)
12619 @param mods: List of modifications
12620 @type private_fn: callable or None
12621 @param private_fn: Callable for constructing a private data field for a
12626 if private_fn is None:
12631 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12634 def GetItemFromContainer(identifier, kind, container):
12635 """Return the item refered by the identifier.
12637 @type identifier: string
12638 @param identifier: Item index or name or UUID
12640 @param kind: One-word item description
12641 @type container: list
12642 @param container: Container to get the item from
12647 idx = int(identifier)
12650 absidx = len(container) - 1
12652 raise IndexError("Not accepting negative indices other than -1")
12653 elif idx > len(container):
12654 raise IndexError("Got %s index %s, but there are only %s" %
12655 (kind, idx, len(container)))
12658 return (absidx, container[idx])
12662 for idx, item in enumerate(container):
12663 if item.uuid == identifier or item.name == identifier:
12666 raise errors.OpPrereqError("Cannot find %s with identifier %s" %
12667 (kind, identifier), errors.ECODE_NOENT)
12670 #: Type description for changes as returned by L{ApplyContainerMods}'s
12672 _TApplyContModsCbChanges = \
12673 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12674 ht.TNonEmptyString,
12679 def ApplyContainerMods(kind, container, chgdesc, mods,
12680 create_fn, modify_fn, remove_fn):
12681 """Applies descriptions in C{mods} to C{container}.
12684 @param kind: One-word item description
12685 @type container: list
12686 @param container: Container to modify
12687 @type chgdesc: None or list
12688 @param chgdesc: List of applied changes
12690 @param mods: Modifications as returned by L{PrepareContainerMods}
12691 @type create_fn: callable
12692 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12693 receives absolute item index, parameters and private data object as added
12694 by L{PrepareContainerMods}, returns tuple containing new item and changes
12696 @type modify_fn: callable
12697 @param modify_fn: Callback for modifying an existing item
12698 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12699 and private data object as added by L{PrepareContainerMods}, returns
12701 @type remove_fn: callable
12702 @param remove_fn: Callback on removing item; receives absolute item index,
12703 item and private data object as added by L{PrepareContainerMods}
12706 for (op, identifier, params, private) in mods:
12709 if op == constants.DDM_ADD:
12710 # Calculate where item will be added
12711 # When adding an item, identifier can only be an index
12713 idx = int(identifier)
12715 raise errors.OpPrereqError("Only possitive integer or -1 is accepted as"
12716 " identifier for %s" % constants.DDM_ADD,
12717 errors.ECODE_INVAL)
12719 addidx = len(container)
12722 raise IndexError("Not accepting negative indices other than -1")
12723 elif idx > len(container):
12724 raise IndexError("Got %s index %s, but there are only %s" %
12725 (kind, idx, len(container)))
12728 if create_fn is None:
12731 (item, changes) = create_fn(addidx, params, private)
12734 container.append(item)
12737 assert idx <= len(container)
12738 # list.insert does so before the specified index
12739 container.insert(idx, item)
12741 # Retrieve existing item
12742 (absidx, item) = GetItemFromContainer(identifier, kind, container)
12744 if op == constants.DDM_REMOVE:
12747 if remove_fn is not None:
12748 remove_fn(absidx, item, private)
12750 changes = [("%s/%s" % (kind, absidx), "remove")]
12752 assert container[absidx] == item
12753 del container[absidx]
12754 elif op == constants.DDM_MODIFY:
12755 if modify_fn is not None:
12756 changes = modify_fn(absidx, item, params, private)
12758 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12760 assert _TApplyContModsCbChanges(changes)
12762 if not (chgdesc is None or changes is None):
12763 chgdesc.extend(changes)
12766 def _UpdateIvNames(base_index, disks):
12767 """Updates the C{iv_name} attribute of disks.
12769 @type disks: list of L{objects.Disk}
12772 for (idx, disk) in enumerate(disks):
12773 disk.iv_name = "disk/%s" % (base_index + idx, )
12776 class _InstNicModPrivate:
12777 """Data structure for network interface modifications.
12779 Used by L{LUInstanceSetParams}.
12782 def __init__(self):
12787 class LUInstanceSetParams(LogicalUnit):
12788 """Modifies an instances's parameters.
12791 HPATH = "instance-modify"
12792 HTYPE = constants.HTYPE_INSTANCE
12796 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12797 assert ht.TList(mods)
12798 assert not mods or len(mods[0]) in (2, 3)
12800 if mods and len(mods[0]) == 2:
12804 for op, params in mods:
12805 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12806 result.append((op, -1, params))
12810 raise errors.OpPrereqError("Only one %s add or remove operation is"
12811 " supported at a time" % kind,
12812 errors.ECODE_INVAL)
12814 result.append((constants.DDM_MODIFY, op, params))
12816 assert verify_fn(result)
12823 def _CheckMods(kind, mods, key_types, item_fn):
12824 """Ensures requested disk/NIC modifications are valid.
12827 for (op, _, params) in mods:
12828 assert ht.TDict(params)
12830 # If 'key_types' is an empty dict, we assume we have an
12831 # 'ext' template and thus do not ForceDictType
12833 utils.ForceDictType(params, key_types)
12835 if op == constants.DDM_REMOVE:
12837 raise errors.OpPrereqError("No settings should be passed when"
12838 " removing a %s" % kind,
12839 errors.ECODE_INVAL)
12840 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12841 item_fn(op, params)
12843 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12846 def _VerifyDiskModification(op, params):
12847 """Verifies a disk modification.
12850 if op == constants.DDM_ADD:
12851 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12852 if mode not in constants.DISK_ACCESS_SET:
12853 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12854 errors.ECODE_INVAL)
12856 size = params.get(constants.IDISK_SIZE, None)
12858 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12859 constants.IDISK_SIZE, errors.ECODE_INVAL)
12863 except (TypeError, ValueError), err:
12864 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12865 errors.ECODE_INVAL)
12867 params[constants.IDISK_SIZE] = size
12868 name = params.get(constants.IDISK_NAME, None)
12869 if name is not None and name.lower() == constants.VALUE_NONE:
12870 params[constants.IDISK_NAME] = None
12872 elif op == constants.DDM_MODIFY:
12873 if constants.IDISK_SIZE in params:
12874 raise errors.OpPrereqError("Disk size change not possible, use"
12875 " grow-disk", errors.ECODE_INVAL)
12876 if len(params) > 2:
12877 raise errors.OpPrereqError("Disk modification doesn't support"
12878 " additional arbitrary parameters",
12879 errors.ECODE_INVAL)
12880 name = params.get(constants.IDISK_NAME, None)
12881 if name is not None and name.lower() == constants.VALUE_NONE:
12882 params[constants.IDISK_NAME] = None
12885 def _VerifyNicModification(op, params):
12886 """Verifies a network interface modification.
12889 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12890 ip = params.get(constants.INIC_IP, None)
12891 name = params.get(constants.INIC_NAME, None)
12892 req_net = params.get(constants.INIC_NETWORK, None)
12893 link = params.get(constants.NIC_LINK, None)
12894 mode = params.get(constants.NIC_MODE, None)
12895 if name is not None and name.lower() == constants.VALUE_NONE:
12896 params[constants.INIC_NAME] = None
12897 if req_net is not None:
12898 if req_net.lower() == constants.VALUE_NONE:
12899 params[constants.INIC_NETWORK] = None
12901 elif link is not None or mode is not None:
12902 raise errors.OpPrereqError("If network is given"
12903 " mode or link should not",
12904 errors.ECODE_INVAL)
12906 if op == constants.DDM_ADD:
12907 macaddr = params.get(constants.INIC_MAC, None)
12908 if macaddr is None:
12909 params[constants.INIC_MAC] = constants.VALUE_AUTO
12912 if ip.lower() == constants.VALUE_NONE:
12913 params[constants.INIC_IP] = None
12915 if ip.lower() == constants.NIC_IP_POOL:
12916 if op == constants.DDM_ADD and req_net is None:
12917 raise errors.OpPrereqError("If ip=pool, parameter network"
12919 errors.ECODE_INVAL)
12921 if not netutils.IPAddress.IsValid(ip):
12922 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12923 errors.ECODE_INVAL)
12925 if constants.INIC_MAC in params:
12926 macaddr = params[constants.INIC_MAC]
12927 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12928 macaddr = utils.NormalizeAndValidateMac(macaddr)
12930 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12931 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12932 " modifying an existing NIC",
12933 errors.ECODE_INVAL)
12935 def CheckArguments(self):
12936 if not (self.op.nics or self.op.disks or self.op.disk_template or
12937 self.op.hvparams or self.op.beparams or self.op.os_name or
12938 self.op.offline is not None or self.op.runtime_mem or
12940 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12942 if self.op.hvparams:
12943 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS,
12944 "hypervisor", "instance", "cluster")
12946 self.op.disks = self._UpgradeDiskNicMods(
12947 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12948 self.op.nics = self._UpgradeDiskNicMods(
12949 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12951 if self.op.disks and self.op.disk_template is not None:
12952 raise errors.OpPrereqError("Disk template conversion and other disk"
12953 " changes not supported at the same time",
12954 errors.ECODE_INVAL)
12956 if (self.op.disk_template and
12957 self.op.disk_template in constants.DTS_INT_MIRROR and
12958 self.op.remote_node is None):
12959 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12960 " one requires specifying a secondary node",
12961 errors.ECODE_INVAL)
12963 # Check NIC modifications
12964 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12965 self._VerifyNicModification)
12968 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
12970 def ExpandNames(self):
12971 self._ExpandAndLockInstance()
12972 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12973 # Can't even acquire node locks in shared mode as upcoming changes in
12974 # Ganeti 2.6 will start to modify the node object on disk conversion
12975 self.needed_locks[locking.LEVEL_NODE] = []
12976 self.needed_locks[locking.LEVEL_NODE_RES] = []
12977 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12978 # Look node group to look up the ipolicy
12979 self.share_locks[locking.LEVEL_NODEGROUP] = 1
12981 def DeclareLocks(self, level):
12982 if level == locking.LEVEL_NODEGROUP:
12983 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12984 # Acquire locks for the instance's nodegroups optimistically. Needs
12985 # to be verified in CheckPrereq
12986 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12987 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12988 elif level == locking.LEVEL_NODE:
12989 self._LockInstancesNodes()
12990 if self.op.disk_template and self.op.remote_node:
12991 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12992 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12993 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12995 self.needed_locks[locking.LEVEL_NODE_RES] = \
12996 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12998 def BuildHooksEnv(self):
12999 """Build hooks env.
13001 This runs on the master, primary and secondaries.
13005 if constants.BE_MINMEM in self.be_new:
13006 args["minmem"] = self.be_new[constants.BE_MINMEM]
13007 if constants.BE_MAXMEM in self.be_new:
13008 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
13009 if constants.BE_VCPUS in self.be_new:
13010 args["vcpus"] = self.be_new[constants.BE_VCPUS]
13011 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
13012 # information at all.
13014 if self._new_nics is not None:
13017 for nic in self._new_nics:
13018 n = copy.deepcopy(nic)
13019 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
13020 n.nicparams = nicparams
13021 nics.append(_NICToTuple(self, n))
13023 args["nics"] = nics
13025 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
13026 if self.op.disk_template:
13027 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
13028 if self.op.runtime_mem:
13029 env["RUNTIME_MEMORY"] = self.op.runtime_mem
13033 def BuildHooksNodes(self):
13034 """Build hooks nodes.
13037 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
13040 def _PrepareNicModification(self, params, private, old_ip, old_net_uuid,
13041 old_params, cluster, pnode):
13043 update_params_dict = dict([(key, params[key])
13044 for key in constants.NICS_PARAMETERS
13047 req_link = update_params_dict.get(constants.NIC_LINK, None)
13048 req_mode = update_params_dict.get(constants.NIC_MODE, None)
13050 new_net_uuid = None
13051 new_net_uuid_or_name = params.get(constants.INIC_NETWORK, old_net_uuid)
13052 if new_net_uuid_or_name:
13053 new_net_uuid = self.cfg.LookupNetwork(new_net_uuid_or_name)
13054 new_net_obj = self.cfg.GetNetwork(new_net_uuid)
13057 old_net_obj = self.cfg.GetNetwork(old_net_uuid)
13060 netparams = self.cfg.GetGroupNetParams(new_net_uuid, pnode)
13062 raise errors.OpPrereqError("No netparams found for the network"
13063 " %s, probably not connected" %
13064 new_net_obj.name, errors.ECODE_INVAL)
13065 new_params = dict(netparams)
13067 new_params = _GetUpdatedParams(old_params, update_params_dict)
13069 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13071 new_filled_params = cluster.SimpleFillNIC(new_params)
13072 objects.NIC.CheckParameterSyntax(new_filled_params)
13074 new_mode = new_filled_params[constants.NIC_MODE]
13075 if new_mode == constants.NIC_MODE_BRIDGED:
13076 bridge = new_filled_params[constants.NIC_LINK]
13077 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13079 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13081 self.warn.append(msg)
13083 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13085 elif new_mode == constants.NIC_MODE_ROUTED:
13086 ip = params.get(constants.INIC_IP, old_ip)
13088 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13089 " on a routed NIC", errors.ECODE_INVAL)
13091 elif new_mode == constants.NIC_MODE_OVS:
13092 # TODO: check OVS link
13093 self.LogInfo("OVS links are currently not checked for correctness")
13095 if constants.INIC_MAC in params:
13096 mac = params[constants.INIC_MAC]
13098 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13099 errors.ECODE_INVAL)
13100 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13101 # otherwise generate the MAC address
13102 params[constants.INIC_MAC] = \
13103 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13105 # or validate/reserve the current one
13107 self.cfg.ReserveMAC(mac, self.proc.GetECId())
13108 except errors.ReservationError:
13109 raise errors.OpPrereqError("MAC address '%s' already in use"
13110 " in cluster" % mac,
13111 errors.ECODE_NOTUNIQUE)
13112 elif new_net_uuid != old_net_uuid:
13114 def get_net_prefix(net_uuid):
13117 nobj = self.cfg.GetNetwork(net_uuid)
13118 mac_prefix = nobj.mac_prefix
13122 new_prefix = get_net_prefix(new_net_uuid)
13123 old_prefix = get_net_prefix(old_net_uuid)
13124 if old_prefix != new_prefix:
13125 params[constants.INIC_MAC] = \
13126 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13128 # if there is a change in (ip, network) tuple
13129 new_ip = params.get(constants.INIC_IP, old_ip)
13130 if (new_ip, new_net_uuid) != (old_ip, old_net_uuid):
13132 # if IP is pool then require a network and generate one IP
13133 if new_ip.lower() == constants.NIC_IP_POOL:
13136 new_ip = self.cfg.GenerateIp(new_net_uuid, self.proc.GetECId())
13137 except errors.ReservationError:
13138 raise errors.OpPrereqError("Unable to get a free IP"
13139 " from the address pool",
13140 errors.ECODE_STATE)
13141 self.LogInfo("Chose IP %s from network %s",
13144 params[constants.INIC_IP] = new_ip
13146 raise errors.OpPrereqError("ip=pool, but no network found",
13147 errors.ECODE_INVAL)
13148 # Reserve new IP if in the new network if any
13151 self.cfg.ReserveIp(new_net_uuid, new_ip, self.proc.GetECId())
13152 self.LogInfo("Reserving IP %s in network %s",
13153 new_ip, new_net_obj.name)
13154 except errors.ReservationError:
13155 raise errors.OpPrereqError("IP %s not available in network %s" %
13156 (new_ip, new_net_obj.name),
13157 errors.ECODE_NOTUNIQUE)
13158 # new network is None so check if new IP is a conflicting IP
13159 elif self.op.conflicts_check:
13160 _CheckForConflictingIp(self, new_ip, pnode)
13162 # release old IP if old network is not None
13163 if old_ip and old_net_uuid:
13165 self.cfg.ReleaseIp(old_net_uuid, old_ip, self.proc.GetECId())
13166 except errors.AddressPoolError:
13167 logging.warning("Release IP %s not contained in network %s",
13168 old_ip, old_net_obj.name)
13170 # there are no changes in (ip, network) tuple and old network is not None
13171 elif (old_net_uuid is not None and
13172 (req_link is not None or req_mode is not None)):
13173 raise errors.OpPrereqError("Not allowed to change link or mode of"
13174 " a NIC that is connected to a network",
13175 errors.ECODE_INVAL)
13177 private.params = new_params
13178 private.filled = new_filled_params
13180 def _PreCheckDiskTemplate(self, pnode_info):
13181 """CheckPrereq checks related to a new disk template."""
13182 # Arguments are passed to avoid configuration lookups
13183 instance = self.instance
13184 pnode = instance.primary_node
13185 cluster = self.cluster
13186 if instance.disk_template == self.op.disk_template:
13187 raise errors.OpPrereqError("Instance already has disk template %s" %
13188 instance.disk_template, errors.ECODE_INVAL)
13190 if (instance.disk_template,
13191 self.op.disk_template) not in self._DISK_CONVERSIONS:
13192 raise errors.OpPrereqError("Unsupported disk template conversion from"
13193 " %s to %s" % (instance.disk_template,
13194 self.op.disk_template),
13195 errors.ECODE_INVAL)
13196 _CheckInstanceState(self, instance, INSTANCE_DOWN,
13197 msg="cannot change disk template")
13198 if self.op.disk_template in constants.DTS_INT_MIRROR:
13199 if self.op.remote_node == pnode:
13200 raise errors.OpPrereqError("Given new secondary node %s is the same"
13201 " as the primary node of the instance" %
13202 self.op.remote_node, errors.ECODE_STATE)
13203 _CheckNodeOnline(self, self.op.remote_node)
13204 _CheckNodeNotDrained(self, self.op.remote_node)
13205 # FIXME: here we assume that the old instance type is DT_PLAIN
13206 assert instance.disk_template == constants.DT_PLAIN
13207 disks = [{constants.IDISK_SIZE: d.size,
13208 constants.IDISK_VG: d.logical_id[0]}
13209 for d in instance.disks]
13210 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13211 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13213 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13214 snode_group = self.cfg.GetNodeGroup(snode_info.group)
13215 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13217 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info, self.cfg,
13218 ignore=self.op.ignore_ipolicy)
13219 if pnode_info.group != snode_info.group:
13220 self.LogWarning("The primary and secondary nodes are in two"
13221 " different node groups; the disk parameters"
13222 " from the first disk's node group will be"
13225 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
13226 # Make sure none of the nodes require exclusive storage
13227 nodes = [pnode_info]
13228 if self.op.disk_template in constants.DTS_INT_MIRROR:
13230 nodes.append(snode_info)
13231 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
13232 if compat.any(map(has_es, nodes)):
13233 errmsg = ("Cannot convert disk template from %s to %s when exclusive"
13234 " storage is enabled" % (instance.disk_template,
13235 self.op.disk_template))
13236 raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
13238 def CheckPrereq(self):
13239 """Check prerequisites.
13241 This only checks the instance list against the existing names.
13244 assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13245 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13247 cluster = self.cluster = self.cfg.GetClusterInfo()
13248 assert self.instance is not None, \
13249 "Cannot retrieve locked instance %s" % self.op.instance_name
13251 pnode = instance.primary_node
13255 if (self.op.pnode is not None and self.op.pnode != pnode and
13256 not self.op.force):
13257 # verify that the instance is not up
13258 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13259 instance.hypervisor)
13260 if instance_info.fail_msg:
13261 self.warn.append("Can't get instance runtime information: %s" %
13262 instance_info.fail_msg)
13263 elif instance_info.payload:
13264 raise errors.OpPrereqError("Instance is still running on %s" % pnode,
13265 errors.ECODE_STATE)
13267 assert pnode in self.owned_locks(locking.LEVEL_NODE)
13268 nodelist = list(instance.all_nodes)
13269 pnode_info = self.cfg.GetNodeInfo(pnode)
13270 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13272 #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13273 assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13274 group_info = self.cfg.GetNodeGroup(pnode_info.group)
13276 # dictionary with instance information after the modification
13279 # Check disk modifications. This is done here and not in CheckArguments
13280 # (as with NICs), because we need to know the instance's disk template
13281 if instance.disk_template == constants.DT_EXT:
13282 self._CheckMods("disk", self.op.disks, {},
13283 self._VerifyDiskModification)
13285 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
13286 self._VerifyDiskModification)
13288 # Prepare disk/NIC modifications
13289 self.diskmod = PrepareContainerMods(self.op.disks, None)
13290 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13292 # Check the validity of the `provider' parameter
13293 if instance.disk_template in constants.DT_EXT:
13294 for mod in self.diskmod:
13295 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13296 if mod[0] == constants.DDM_ADD:
13297 if ext_provider is None:
13298 raise errors.OpPrereqError("Instance template is '%s' and parameter"
13299 " '%s' missing, during disk add" %
13301 constants.IDISK_PROVIDER),
13302 errors.ECODE_NOENT)
13303 elif mod[0] == constants.DDM_MODIFY:
13305 raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
13307 constants.IDISK_PROVIDER,
13308 errors.ECODE_INVAL)
13310 for mod in self.diskmod:
13311 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13312 if ext_provider is not None:
13313 raise errors.OpPrereqError("Parameter '%s' is only valid for"
13314 " instances of type '%s'" %
13315 (constants.IDISK_PROVIDER,
13317 errors.ECODE_INVAL)
13320 if self.op.os_name and not self.op.force:
13321 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13322 self.op.force_variant)
13323 instance_os = self.op.os_name
13325 instance_os = instance.os
13327 assert not (self.op.disk_template and self.op.disks), \
13328 "Can't modify disk template and apply disk changes at the same time"
13330 if self.op.disk_template:
13331 self._PreCheckDiskTemplate(pnode_info)
13333 # hvparams processing
13334 if self.op.hvparams:
13335 hv_type = instance.hypervisor
13336 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13337 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13338 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13341 hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
13342 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13343 self.hv_proposed = self.hv_new = hv_new # the new actual values
13344 self.hv_inst = i_hvdict # the new dict (without defaults)
13346 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13348 self.hv_new = self.hv_inst = {}
13350 # beparams processing
13351 if self.op.beparams:
13352 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13354 objects.UpgradeBeParams(i_bedict)
13355 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13356 be_new = cluster.SimpleFillBE(i_bedict)
13357 self.be_proposed = self.be_new = be_new # the new actual values
13358 self.be_inst = i_bedict # the new dict (without defaults)
13360 self.be_new = self.be_inst = {}
13361 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13362 be_old = cluster.FillBE(instance)
13364 # CPU param validation -- checking every time a parameter is
13365 # changed to cover all cases where either CPU mask or vcpus have
13367 if (constants.BE_VCPUS in self.be_proposed and
13368 constants.HV_CPU_MASK in self.hv_proposed):
13370 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13371 # Verify mask is consistent with number of vCPUs. Can skip this
13372 # test if only 1 entry in the CPU mask, which means same mask
13373 # is applied to all vCPUs.
13374 if (len(cpu_list) > 1 and
13375 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13376 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13378 (self.be_proposed[constants.BE_VCPUS],
13379 self.hv_proposed[constants.HV_CPU_MASK]),
13380 errors.ECODE_INVAL)
13382 # Only perform this test if a new CPU mask is given
13383 if constants.HV_CPU_MASK in self.hv_new:
13384 # Calculate the largest CPU number requested
13385 max_requested_cpu = max(map(max, cpu_list))
13386 # Check that all of the instance's nodes have enough physical CPUs to
13387 # satisfy the requested CPU mask
13388 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13389 max_requested_cpu + 1, instance.hypervisor)
13391 # osparams processing
13392 if self.op.osparams:
13393 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13394 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13395 self.os_inst = i_osdict # the new dict (without defaults)
13399 #TODO(dynmem): do the appropriate check involving MINMEM
13400 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13401 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13402 mem_check_list = [pnode]
13403 if be_new[constants.BE_AUTO_BALANCE]:
13404 # either we changed auto_balance to yes or it was from before
13405 mem_check_list.extend(instance.secondary_nodes)
13406 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13407 instance.hypervisor)
13408 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13409 [instance.hypervisor], False)
13410 pninfo = nodeinfo[pnode]
13411 msg = pninfo.fail_msg
13413 # Assume the primary node is unreachable and go ahead
13414 self.warn.append("Can't get info from primary node %s: %s" %
13417 (_, _, (pnhvinfo, )) = pninfo.payload
13418 if not isinstance(pnhvinfo.get("memory_free", None), int):
13419 self.warn.append("Node data from primary node %s doesn't contain"
13420 " free memory information" % pnode)
13421 elif instance_info.fail_msg:
13422 self.warn.append("Can't get instance runtime information: %s" %
13423 instance_info.fail_msg)
13425 if instance_info.payload:
13426 current_mem = int(instance_info.payload["memory"])
13428 # Assume instance not running
13429 # (there is a slight race condition here, but it's not very
13430 # probable, and we have no other way to check)
13431 # TODO: Describe race condition
13433 #TODO(dynmem): do the appropriate check involving MINMEM
13434 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13435 pnhvinfo["memory_free"])
13437 raise errors.OpPrereqError("This change will prevent the instance"
13438 " from starting, due to %d MB of memory"
13439 " missing on its primary node" %
13440 miss_mem, errors.ECODE_NORES)
13442 if be_new[constants.BE_AUTO_BALANCE]:
13443 for node, nres in nodeinfo.items():
13444 if node not in instance.secondary_nodes:
13446 nres.Raise("Can't get info from secondary node %s" % node,
13447 prereq=True, ecode=errors.ECODE_STATE)
13448 (_, _, (nhvinfo, )) = nres.payload
13449 if not isinstance(nhvinfo.get("memory_free", None), int):
13450 raise errors.OpPrereqError("Secondary node %s didn't return free"
13451 " memory information" % node,
13452 errors.ECODE_STATE)
13453 #TODO(dynmem): do the appropriate check involving MINMEM
13454 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13455 raise errors.OpPrereqError("This change will prevent the instance"
13456 " from failover to its secondary node"
13457 " %s, due to not enough memory" % node,
13458 errors.ECODE_STATE)
13460 if self.op.runtime_mem:
13461 remote_info = self.rpc.call_instance_info(instance.primary_node,
13463 instance.hypervisor)
13464 remote_info.Raise("Error checking node %s" % instance.primary_node)
13465 if not remote_info.payload: # not running already
13466 raise errors.OpPrereqError("Instance %s is not running" %
13467 instance.name, errors.ECODE_STATE)
13469 current_memory = remote_info.payload["memory"]
13470 if (not self.op.force and
13471 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13472 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13473 raise errors.OpPrereqError("Instance %s must have memory between %d"
13474 " and %d MB of memory unless --force is"
13477 self.be_proposed[constants.BE_MINMEM],
13478 self.be_proposed[constants.BE_MAXMEM]),
13479 errors.ECODE_INVAL)
13481 delta = self.op.runtime_mem - current_memory
13483 _CheckNodeFreeMemory(self, instance.primary_node,
13484 "ballooning memory for instance %s" %
13485 instance.name, delta, instance.hypervisor)
13487 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13488 raise errors.OpPrereqError("Disk operations not supported for"
13489 " diskless instances", errors.ECODE_INVAL)
13491 def _PrepareNicCreate(_, params, private):
13492 self._PrepareNicModification(params, private, None, None,
13493 {}, cluster, pnode)
13494 return (None, None)
13496 def _PrepareNicMod(_, nic, params, private):
13497 self._PrepareNicModification(params, private, nic.ip, nic.network,
13498 nic.nicparams, cluster, pnode)
13501 def _PrepareNicRemove(_, params, __):
13503 net = params.network
13504 if net is not None and ip is not None:
13505 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13507 # Verify NIC changes (operating on copy)
13508 nics = instance.nics[:]
13509 ApplyContainerMods("NIC", nics, None, self.nicmod,
13510 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13511 if len(nics) > constants.MAX_NICS:
13512 raise errors.OpPrereqError("Instance has too many network interfaces"
13513 " (%d), cannot add more" % constants.MAX_NICS,
13514 errors.ECODE_STATE)
13516 def _PrepareDiskMod(_, disk, params, __):
13517 disk.name = params.get(constants.IDISK_NAME, None)
13519 # Verify disk changes (operating on a copy)
13520 disks = copy.deepcopy(instance.disks)
13521 ApplyContainerMods("disk", disks, None, self.diskmod, None, _PrepareDiskMod,
13523 utils.ValidateDeviceNames("disk", disks)
13524 if len(disks) > constants.MAX_DISKS:
13525 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13526 " more" % constants.MAX_DISKS,
13527 errors.ECODE_STATE)
13528 disk_sizes = [disk.size for disk in instance.disks]
13529 disk_sizes.extend(params["size"] for (op, idx, params, private) in
13530 self.diskmod if op == constants.DDM_ADD)
13531 ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13532 ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13534 if self.op.offline is not None and self.op.offline:
13535 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
13536 msg="can't change to offline")
13538 # Pre-compute NIC changes (necessary to use result in hooks)
13539 self._nic_chgdesc = []
13541 # Operate on copies as this is still in prereq
13542 nics = [nic.Copy() for nic in instance.nics]
13543 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13544 self._CreateNewNic, self._ApplyNicMods, None)
13545 # Verify that NIC names are unique and valid
13546 utils.ValidateDeviceNames("NIC", nics)
13547 self._new_nics = nics
13548 ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13550 self._new_nics = None
13551 ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13553 if not self.op.ignore_ipolicy:
13554 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13557 # Fill ispec with backend parameters
13558 ispec[constants.ISPEC_SPINDLE_USE] = \
13559 self.be_new.get(constants.BE_SPINDLE_USE, None)
13560 ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13563 # Copy ispec to verify parameters with min/max values separately
13564 if self.op.disk_template:
13565 new_disk_template = self.op.disk_template
13567 new_disk_template = instance.disk_template
13568 ispec_max = ispec.copy()
13569 ispec_max[constants.ISPEC_MEM_SIZE] = \
13570 self.be_new.get(constants.BE_MAXMEM, None)
13571 res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max,
13573 ispec_min = ispec.copy()
13574 ispec_min[constants.ISPEC_MEM_SIZE] = \
13575 self.be_new.get(constants.BE_MINMEM, None)
13576 res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min,
13579 if (res_max or res_min):
13580 # FIXME: Improve error message by including information about whether
13581 # the upper or lower limit of the parameter fails the ipolicy.
13582 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13583 (group_info, group_info.name,
13584 utils.CommaJoin(set(res_max + res_min))))
13585 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13587 def _ConvertPlainToDrbd(self, feedback_fn):
13588 """Converts an instance from plain to drbd.
13591 feedback_fn("Converting template to drbd")
13592 instance = self.instance
13593 pnode = instance.primary_node
13594 snode = self.op.remote_node
13596 assert instance.disk_template == constants.DT_PLAIN
13598 # create a fake disk info for _GenerateDiskTemplate
13599 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13600 constants.IDISK_VG: d.logical_id[0],
13601 constants.IDISK_NAME: d.name}
13602 for d in instance.disks]
13603 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13604 instance.name, pnode, [snode],
13605 disk_info, None, None, 0, feedback_fn,
13607 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13609 p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
13610 s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
13611 info = _GetInstanceInfoText(instance)
13612 feedback_fn("Creating additional volumes...")
13613 # first, create the missing data and meta devices
13614 for disk in anno_disks:
13615 # unfortunately this is... not too nice
13616 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13617 info, True, p_excl_stor)
13618 for child in disk.children:
13619 _CreateSingleBlockDev(self, snode, instance, child, info, True,
13621 # at this stage, all new LVs have been created, we can rename the
13623 feedback_fn("Renaming original volumes...")
13624 rename_list = [(o, n.children[0].logical_id)
13625 for (o, n) in zip(instance.disks, new_disks)]
13626 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13627 result.Raise("Failed to rename original LVs")
13629 feedback_fn("Initializing DRBD devices...")
13630 # all child devices are in place, we can now create the DRBD devices
13632 for disk in anno_disks:
13633 for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
13634 f_create = node == pnode
13635 _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
13637 except errors.GenericError, e:
13638 feedback_fn("Initializing of DRBD devices failed;"
13639 " renaming back original volumes...")
13640 for disk in new_disks:
13641 self.cfg.SetDiskID(disk, pnode)
13642 rename_back_list = [(n.children[0], o.logical_id)
13643 for (n, o) in zip(new_disks, instance.disks)]
13644 result = self.rpc.call_blockdev_rename(pnode, rename_back_list)
13645 result.Raise("Failed to rename LVs back after error %s" % str(e))
13648 # at this point, the instance has been modified
13649 instance.disk_template = constants.DT_DRBD8
13650 instance.disks = new_disks
13651 self.cfg.Update(instance, feedback_fn)
13653 # Release node locks while waiting for sync
13654 _ReleaseLocks(self, locking.LEVEL_NODE)
13656 # disks are created, waiting for sync
13657 disk_abort = not _WaitForSync(self, instance,
13658 oneshot=not self.op.wait_for_sync)
13660 raise errors.OpExecError("There are some degraded disks for"
13661 " this instance, please cleanup manually")
13663 # Node resource locks will be released by caller
13665 def _ConvertDrbdToPlain(self, feedback_fn):
13666 """Converts an instance from drbd to plain.
13669 instance = self.instance
13671 assert len(instance.secondary_nodes) == 1
13672 assert instance.disk_template == constants.DT_DRBD8
13674 pnode = instance.primary_node
13675 snode = instance.secondary_nodes[0]
13676 feedback_fn("Converting template to plain")
13678 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13679 new_disks = [d.children[0] for d in instance.disks]
13681 # copy over size, mode and name
13682 for parent, child in zip(old_disks, new_disks):
13683 child.size = parent.size
13684 child.mode = parent.mode
13685 child.name = parent.name
13687 # this is a DRBD disk, return its port to the pool
13688 # NOTE: this must be done right before the call to cfg.Update!
13689 for disk in old_disks:
13690 tcp_port = disk.logical_id[2]
13691 self.cfg.AddTcpUdpPort(tcp_port)
13693 # update instance structure
13694 instance.disks = new_disks
13695 instance.disk_template = constants.DT_PLAIN
13696 _UpdateIvNames(0, instance.disks)
13697 self.cfg.Update(instance, feedback_fn)
13699 # Release locks in case removing disks takes a while
13700 _ReleaseLocks(self, locking.LEVEL_NODE)
13702 feedback_fn("Removing volumes on the secondary node...")
13703 for disk in old_disks:
13704 self.cfg.SetDiskID(disk, snode)
13705 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13707 self.LogWarning("Could not remove block device %s on node %s,"
13708 " continuing anyway: %s", disk.iv_name, snode, msg)
13710 feedback_fn("Removing unneeded volumes on the primary node...")
13711 for idx, disk in enumerate(old_disks):
13712 meta = disk.children[1]
13713 self.cfg.SetDiskID(meta, pnode)
13714 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13716 self.LogWarning("Could not remove metadata for disk %d on node %s,"
13717 " continuing anyway: %s", idx, pnode, msg)
13719 def _CreateNewDisk(self, idx, params, _):
13720 """Creates a new disk.
13723 instance = self.instance
13726 if instance.disk_template in constants.DTS_FILEBASED:
13727 (file_driver, file_path) = instance.disks[0].logical_id
13728 file_path = os.path.dirname(file_path)
13730 file_driver = file_path = None
13733 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13734 instance.primary_node, instance.secondary_nodes,
13735 [params], file_path, file_driver, idx,
13736 self.Log, self.diskparams)[0]
13738 info = _GetInstanceInfoText(instance)
13740 logging.info("Creating volume %s for instance %s",
13741 disk.iv_name, instance.name)
13742 # Note: this needs to be kept in sync with _CreateDisks
13744 for node in instance.all_nodes:
13745 f_create = (node == instance.primary_node)
13747 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13748 except errors.OpExecError, err:
13749 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13750 disk.iv_name, disk, node, err)
13752 if self.cluster.prealloc_wipe_disks:
13754 _WipeDisks(self, instance,
13755 disks=[(idx, disk, 0)])
13758 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13762 def _ModifyDisk(idx, disk, params, _):
13763 """Modifies a disk.
13767 mode = params.get(constants.IDISK_MODE, None)
13770 changes.append(("disk.mode/%d" % idx, disk.mode))
13772 name = params.get(constants.IDISK_NAME, None)
13774 changes.append(("disk.name/%d" % idx, disk.name))
13778 def _RemoveDisk(self, idx, root, _):
13782 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13783 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13784 self.cfg.SetDiskID(disk, node)
13785 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13787 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13788 " continuing anyway", idx, node, msg)
13790 # if this is a DRBD disk, return its port to the pool
13791 if root.dev_type in constants.LDS_DRBD:
13792 self.cfg.AddTcpUdpPort(root.logical_id[2])
13794 def _CreateNewNic(self, idx, params, private):
13795 """Creates data structure for a new network interface.
13798 mac = params[constants.INIC_MAC]
13799 ip = params.get(constants.INIC_IP, None)
13800 net = params.get(constants.INIC_NETWORK, None)
13801 name = params.get(constants.INIC_NAME, None)
13802 net_uuid = self.cfg.LookupNetwork(net)
13803 #TODO: not private.filled?? can a nic have no nicparams??
13804 nicparams = private.filled
13805 nobj = objects.NIC(mac=mac, ip=ip, network=net_uuid, name=name,
13806 nicparams=nicparams)
13807 nobj.uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13811 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
13812 (mac, ip, private.filled[constants.NIC_MODE],
13813 private.filled[constants.NIC_LINK],
13817 def _ApplyNicMods(self, idx, nic, params, private):
13818 """Modifies a network interface.
13823 for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NAME]:
13825 changes.append(("nic.%s/%d" % (key, idx), params[key]))
13826 setattr(nic, key, params[key])
13828 new_net = params.get(constants.INIC_NETWORK, nic.network)
13829 new_net_uuid = self.cfg.LookupNetwork(new_net)
13830 if new_net_uuid != nic.network:
13831 changes.append(("nic.network/%d" % idx, new_net))
13832 nic.network = new_net_uuid
13835 nic.nicparams = private.filled
13837 for (key, val) in nic.nicparams.items():
13838 changes.append(("nic.%s/%d" % (key, idx), val))
13842 def Exec(self, feedback_fn):
13843 """Modifies an instance.
13845 All parameters take effect only at the next restart of the instance.
13848 # Process here the warnings from CheckPrereq, as we don't have a
13849 # feedback_fn there.
13850 # TODO: Replace with self.LogWarning
13851 for warn in self.warn:
13852 feedback_fn("WARNING: %s" % warn)
13854 assert ((self.op.disk_template is None) ^
13855 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13856 "Not owning any node resource locks"
13859 instance = self.instance
13863 instance.primary_node = self.op.pnode
13866 if self.op.runtime_mem:
13867 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13869 self.op.runtime_mem)
13870 rpcres.Raise("Cannot modify instance runtime memory")
13871 result.append(("runtime_memory", self.op.runtime_mem))
13873 # Apply disk changes
13874 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13875 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13876 _UpdateIvNames(0, instance.disks)
13878 if self.op.disk_template:
13880 check_nodes = set(instance.all_nodes)
13881 if self.op.remote_node:
13882 check_nodes.add(self.op.remote_node)
13883 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13884 owned = self.owned_locks(level)
13885 assert not (check_nodes - owned), \
13886 ("Not owning the correct locks, owning %r, expected at least %r" %
13887 (owned, check_nodes))
13889 r_shut = _ShutdownInstanceDisks(self, instance)
13891 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13892 " proceed with disk template conversion")
13893 mode = (instance.disk_template, self.op.disk_template)
13895 self._DISK_CONVERSIONS[mode](self, feedback_fn)
13897 self.cfg.ReleaseDRBDMinors(instance.name)
13899 result.append(("disk_template", self.op.disk_template))
13901 assert instance.disk_template == self.op.disk_template, \
13902 ("Expected disk template '%s', found '%s'" %
13903 (self.op.disk_template, instance.disk_template))
13905 # Release node and resource locks if there are any (they might already have
13906 # been released during disk conversion)
13907 _ReleaseLocks(self, locking.LEVEL_NODE)
13908 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13910 # Apply NIC changes
13911 if self._new_nics is not None:
13912 instance.nics = self._new_nics
13913 result.extend(self._nic_chgdesc)
13916 if self.op.hvparams:
13917 instance.hvparams = self.hv_inst
13918 for key, val in self.op.hvparams.iteritems():
13919 result.append(("hv/%s" % key, val))
13922 if self.op.beparams:
13923 instance.beparams = self.be_inst
13924 for key, val in self.op.beparams.iteritems():
13925 result.append(("be/%s" % key, val))
13928 if self.op.os_name:
13929 instance.os = self.op.os_name
13932 if self.op.osparams:
13933 instance.osparams = self.os_inst
13934 for key, val in self.op.osparams.iteritems():
13935 result.append(("os/%s" % key, val))
13937 if self.op.offline is None:
13940 elif self.op.offline:
13941 # Mark instance as offline
13942 self.cfg.MarkInstanceOffline(instance.name)
13943 result.append(("admin_state", constants.ADMINST_OFFLINE))
13945 # Mark instance as online, but stopped
13946 self.cfg.MarkInstanceDown(instance.name)
13947 result.append(("admin_state", constants.ADMINST_DOWN))
13949 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
13951 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13952 self.owned_locks(locking.LEVEL_NODE)), \
13953 "All node locks should have been released by now"
13957 _DISK_CONVERSIONS = {
13958 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13959 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13963 class LUInstanceChangeGroup(LogicalUnit):
13964 HPATH = "instance-change-group"
13965 HTYPE = constants.HTYPE_INSTANCE
13968 def ExpandNames(self):
13969 self.share_locks = _ShareAll()
13971 self.needed_locks = {
13972 locking.LEVEL_NODEGROUP: [],
13973 locking.LEVEL_NODE: [],
13974 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
13977 self._ExpandAndLockInstance()
13979 if self.op.target_groups:
13980 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13981 self.op.target_groups)
13983 self.req_target_uuids = None
13985 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13987 def DeclareLocks(self, level):
13988 if level == locking.LEVEL_NODEGROUP:
13989 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13991 if self.req_target_uuids:
13992 lock_groups = set(self.req_target_uuids)
13994 # Lock all groups used by instance optimistically; this requires going
13995 # via the node before it's locked, requiring verification later on
13996 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13997 lock_groups.update(instance_groups)
13999 # No target groups, need to lock all of them
14000 lock_groups = locking.ALL_SET
14002 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14004 elif level == locking.LEVEL_NODE:
14005 if self.req_target_uuids:
14006 # Lock all nodes used by instances
14007 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14008 self._LockInstancesNodes()
14010 # Lock all nodes in all potential target groups
14011 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
14012 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
14013 member_nodes = [node_name
14014 for group in lock_groups
14015 for node_name in self.cfg.GetNodeGroup(group).members]
14016 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14018 # Lock all nodes as all groups are potential targets
14019 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14021 def CheckPrereq(self):
14022 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14023 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14024 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14026 assert (self.req_target_uuids is None or
14027 owned_groups.issuperset(self.req_target_uuids))
14028 assert owned_instances == set([self.op.instance_name])
14030 # Get instance information
14031 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
14033 # Check if node groups for locked instance are still correct
14034 assert owned_nodes.issuperset(self.instance.all_nodes), \
14035 ("Instance %s's nodes changed while we kept the lock" %
14036 self.op.instance_name)
14038 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
14041 if self.req_target_uuids:
14042 # User requested specific target groups
14043 self.target_uuids = frozenset(self.req_target_uuids)
14045 # All groups except those used by the instance are potential targets
14046 self.target_uuids = owned_groups - inst_groups
14048 conflicting_groups = self.target_uuids & inst_groups
14049 if conflicting_groups:
14050 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
14051 " used by the instance '%s'" %
14052 (utils.CommaJoin(conflicting_groups),
14053 self.op.instance_name),
14054 errors.ECODE_INVAL)
14056 if not self.target_uuids:
14057 raise errors.OpPrereqError("There are no possible target groups",
14058 errors.ECODE_INVAL)
14060 def BuildHooksEnv(self):
14061 """Build hooks env.
14064 assert self.target_uuids
14067 "TARGET_GROUPS": " ".join(self.target_uuids),
14070 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14074 def BuildHooksNodes(self):
14075 """Build hooks nodes.
14078 mn = self.cfg.GetMasterNode()
14079 return ([mn], [mn])
14081 def Exec(self, feedback_fn):
14082 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14084 assert instances == [self.op.instance_name], "Instance not locked"
14086 req = iallocator.IAReqGroupChange(instances=instances,
14087 target_groups=list(self.target_uuids))
14088 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14090 ial.Run(self.op.iallocator)
14092 if not ial.success:
14093 raise errors.OpPrereqError("Can't compute solution for changing group of"
14094 " instance '%s' using iallocator '%s': %s" %
14095 (self.op.instance_name, self.op.iallocator,
14096 ial.info), errors.ECODE_NORES)
14098 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14100 self.LogInfo("Iallocator returned %s job(s) for changing group of"
14101 " instance '%s'", len(jobs), self.op.instance_name)
14103 return ResultWithJobs(jobs)
14106 class LUBackupQuery(NoHooksLU):
14107 """Query the exports list
14112 def CheckArguments(self):
14113 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
14114 ["node", "export"], self.op.use_locking)
14116 def ExpandNames(self):
14117 self.expq.ExpandNames(self)
14119 def DeclareLocks(self, level):
14120 self.expq.DeclareLocks(self, level)
14122 def Exec(self, feedback_fn):
14125 for (node, expname) in self.expq.OldStyleQuery(self):
14126 if expname is None:
14127 result[node] = False
14129 result.setdefault(node, []).append(expname)
14134 class _ExportQuery(_QueryBase):
14135 FIELDS = query.EXPORT_FIELDS
14137 #: The node name is not a unique key for this query
14138 SORT_FIELD = "node"
14140 def ExpandNames(self, lu):
14141 lu.needed_locks = {}
14143 # The following variables interact with _QueryBase._GetNames
14145 self.wanted = _GetWantedNodes(lu, self.names)
14147 self.wanted = locking.ALL_SET
14149 self.do_locking = self.use_locking
14151 if self.do_locking:
14152 lu.share_locks = _ShareAll()
14153 lu.needed_locks = {
14154 locking.LEVEL_NODE: self.wanted,
14158 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14160 def DeclareLocks(self, lu, level):
14163 def _GetQueryData(self, lu):
14164 """Computes the list of nodes and their attributes.
14167 # Locking is not used
14169 assert not (compat.any(lu.glm.is_owned(level)
14170 for level in locking.LEVELS
14171 if level != locking.LEVEL_CLUSTER) or
14172 self.do_locking or self.use_locking)
14174 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
14178 for (node, nres) in lu.rpc.call_export_list(nodes).items():
14180 result.append((node, None))
14182 result.extend((node, expname) for expname in nres.payload)
14187 class LUBackupPrepare(NoHooksLU):
14188 """Prepares an instance for an export and returns useful information.
14193 def ExpandNames(self):
14194 self._ExpandAndLockInstance()
14196 def CheckPrereq(self):
14197 """Check prerequisites.
14200 instance_name = self.op.instance_name
14202 self.instance = self.cfg.GetInstanceInfo(instance_name)
14203 assert self.instance is not None, \
14204 "Cannot retrieve locked instance %s" % self.op.instance_name
14205 _CheckNodeOnline(self, self.instance.primary_node)
14207 self._cds = _GetClusterDomainSecret()
14209 def Exec(self, feedback_fn):
14210 """Prepares an instance for an export.
14213 instance = self.instance
14215 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14216 salt = utils.GenerateSecret(8)
14218 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14219 result = self.rpc.call_x509_cert_create(instance.primary_node,
14220 constants.RIE_CERT_VALIDITY)
14221 result.Raise("Can't create X509 key and certificate on %s" % result.node)
14223 (name, cert_pem) = result.payload
14225 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14229 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14230 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14232 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14238 class LUBackupExport(LogicalUnit):
14239 """Export an instance to an image in the cluster.
14242 HPATH = "instance-export"
14243 HTYPE = constants.HTYPE_INSTANCE
14246 def CheckArguments(self):
14247 """Check the arguments.
14250 self.x509_key_name = self.op.x509_key_name
14251 self.dest_x509_ca_pem = self.op.destination_x509_ca
14253 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14254 if not self.x509_key_name:
14255 raise errors.OpPrereqError("Missing X509 key name for encryption",
14256 errors.ECODE_INVAL)
14258 if not self.dest_x509_ca_pem:
14259 raise errors.OpPrereqError("Missing destination X509 CA",
14260 errors.ECODE_INVAL)
14262 def ExpandNames(self):
14263 self._ExpandAndLockInstance()
14265 # Lock all nodes for local exports
14266 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14267 # FIXME: lock only instance primary and destination node
14269 # Sad but true, for now we have do lock all nodes, as we don't know where
14270 # the previous export might be, and in this LU we search for it and
14271 # remove it from its current node. In the future we could fix this by:
14272 # - making a tasklet to search (share-lock all), then create the
14273 # new one, then one to remove, after
14274 # - removing the removal operation altogether
14275 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14277 # Allocations should be stopped while this LU runs with node locks, but
14278 # it doesn't have to be exclusive
14279 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14280 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14282 def DeclareLocks(self, level):
14283 """Last minute lock declaration."""
14284 # All nodes are locked anyway, so nothing to do here.
14286 def BuildHooksEnv(self):
14287 """Build hooks env.
14289 This will run on the master, primary node and target node.
14293 "EXPORT_MODE": self.op.mode,
14294 "EXPORT_NODE": self.op.target_node,
14295 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14296 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14297 # TODO: Generic function for boolean env variables
14298 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14301 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14305 def BuildHooksNodes(self):
14306 """Build hooks nodes.
14309 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14311 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14312 nl.append(self.op.target_node)
14316 def CheckPrereq(self):
14317 """Check prerequisites.
14319 This checks that the instance and node names are valid.
14322 instance_name = self.op.instance_name
14324 self.instance = self.cfg.GetInstanceInfo(instance_name)
14325 assert self.instance is not None, \
14326 "Cannot retrieve locked instance %s" % self.op.instance_name
14327 _CheckNodeOnline(self, self.instance.primary_node)
14329 if (self.op.remove_instance and
14330 self.instance.admin_state == constants.ADMINST_UP and
14331 not self.op.shutdown):
14332 raise errors.OpPrereqError("Can not remove instance without shutting it"
14333 " down before", errors.ECODE_STATE)
14335 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14336 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14337 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14338 assert self.dst_node is not None
14340 _CheckNodeOnline(self, self.dst_node.name)
14341 _CheckNodeNotDrained(self, self.dst_node.name)
14344 self.dest_disk_info = None
14345 self.dest_x509_ca = None
14347 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14348 self.dst_node = None
14350 if len(self.op.target_node) != len(self.instance.disks):
14351 raise errors.OpPrereqError(("Received destination information for %s"
14352 " disks, but instance %s has %s disks") %
14353 (len(self.op.target_node), instance_name,
14354 len(self.instance.disks)),
14355 errors.ECODE_INVAL)
14357 cds = _GetClusterDomainSecret()
14359 # Check X509 key name
14361 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14362 except (TypeError, ValueError), err:
14363 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14364 errors.ECODE_INVAL)
14366 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14367 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14368 errors.ECODE_INVAL)
14370 # Load and verify CA
14372 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14373 except OpenSSL.crypto.Error, err:
14374 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14375 (err, ), errors.ECODE_INVAL)
14377 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14378 if errcode is not None:
14379 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14380 (msg, ), errors.ECODE_INVAL)
14382 self.dest_x509_ca = cert
14384 # Verify target information
14386 for idx, disk_data in enumerate(self.op.target_node):
14388 (host, port, magic) = \
14389 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14390 except errors.GenericError, err:
14391 raise errors.OpPrereqError("Target info for disk %s: %s" %
14392 (idx, err), errors.ECODE_INVAL)
14394 disk_info.append((host, port, magic))
14396 assert len(disk_info) == len(self.op.target_node)
14397 self.dest_disk_info = disk_info
14400 raise errors.ProgrammerError("Unhandled export mode %r" %
14403 # instance disk type verification
14404 # TODO: Implement export support for file-based disks
14405 for disk in self.instance.disks:
14406 if disk.dev_type == constants.LD_FILE:
14407 raise errors.OpPrereqError("Export not supported for instances with"
14408 " file-based disks", errors.ECODE_INVAL)
14410 def _CleanupExports(self, feedback_fn):
14411 """Removes exports of current instance from all other nodes.
14413 If an instance in a cluster with nodes A..D was exported to node C, its
14414 exports will be removed from the nodes A, B and D.
14417 assert self.op.mode != constants.EXPORT_MODE_REMOTE
14419 nodelist = self.cfg.GetNodeList()
14420 nodelist.remove(self.dst_node.name)
14422 # on one-node clusters nodelist will be empty after the removal
14423 # if we proceed the backup would be removed because OpBackupQuery
14424 # substitutes an empty list with the full cluster node list.
14425 iname = self.instance.name
14427 feedback_fn("Removing old exports for instance %s" % iname)
14428 exportlist = self.rpc.call_export_list(nodelist)
14429 for node in exportlist:
14430 if exportlist[node].fail_msg:
14432 if iname in exportlist[node].payload:
14433 msg = self.rpc.call_export_remove(node, iname).fail_msg
14435 self.LogWarning("Could not remove older export for instance %s"
14436 " on node %s: %s", iname, node, msg)
14438 def Exec(self, feedback_fn):
14439 """Export an instance to an image in the cluster.
14442 assert self.op.mode in constants.EXPORT_MODES
14444 instance = self.instance
14445 src_node = instance.primary_node
14447 if self.op.shutdown:
14448 # shutdown the instance, but not the disks
14449 feedback_fn("Shutting down instance %s" % instance.name)
14450 result = self.rpc.call_instance_shutdown(src_node, instance,
14451 self.op.shutdown_timeout,
14453 # TODO: Maybe ignore failures if ignore_remove_failures is set
14454 result.Raise("Could not shutdown instance %s on"
14455 " node %s" % (instance.name, src_node))
14457 # set the disks ID correctly since call_instance_start needs the
14458 # correct drbd minor to create the symlinks
14459 for disk in instance.disks:
14460 self.cfg.SetDiskID(disk, src_node)
14462 activate_disks = (instance.admin_state != constants.ADMINST_UP)
14465 # Activate the instance disks if we'exporting a stopped instance
14466 feedback_fn("Activating disks for %s" % instance.name)
14467 _StartInstanceDisks(self, instance, None)
14470 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14473 helper.CreateSnapshots()
14475 if (self.op.shutdown and
14476 instance.admin_state == constants.ADMINST_UP and
14477 not self.op.remove_instance):
14478 assert not activate_disks
14479 feedback_fn("Starting instance %s" % instance.name)
14480 result = self.rpc.call_instance_start(src_node,
14481 (instance, None, None), False,
14483 msg = result.fail_msg
14485 feedback_fn("Failed to start instance: %s" % msg)
14486 _ShutdownInstanceDisks(self, instance)
14487 raise errors.OpExecError("Could not start instance: %s" % msg)
14489 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14490 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14491 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14492 connect_timeout = constants.RIE_CONNECT_TIMEOUT
14493 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14495 (key_name, _, _) = self.x509_key_name
14498 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14501 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14502 key_name, dest_ca_pem,
14507 # Check for backwards compatibility
14508 assert len(dresults) == len(instance.disks)
14509 assert compat.all(isinstance(i, bool) for i in dresults), \
14510 "Not all results are boolean: %r" % dresults
14514 feedback_fn("Deactivating disks for %s" % instance.name)
14515 _ShutdownInstanceDisks(self, instance)
14517 if not (compat.all(dresults) and fin_resu):
14520 failures.append("export finalization")
14521 if not compat.all(dresults):
14522 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14524 failures.append("disk export: disk(s) %s" % fdsk)
14526 raise errors.OpExecError("Export failed, errors in %s" %
14527 utils.CommaJoin(failures))
14529 # At this point, the export was successful, we can cleanup/finish
14531 # Remove instance if requested
14532 if self.op.remove_instance:
14533 feedback_fn("Removing instance %s" % instance.name)
14534 _RemoveInstance(self, feedback_fn, instance,
14535 self.op.ignore_remove_failures)
14537 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14538 self._CleanupExports(feedback_fn)
14540 return fin_resu, dresults
14543 class LUBackupRemove(NoHooksLU):
14544 """Remove exports related to the named instance.
14549 def ExpandNames(self):
14550 self.needed_locks = {
14551 # We need all nodes to be locked in order for RemoveExport to work, but
14552 # we don't need to lock the instance itself, as nothing will happen to it
14553 # (and we can remove exports also for a removed instance)
14554 locking.LEVEL_NODE: locking.ALL_SET,
14556 # Removing backups is quick, so blocking allocations is justified
14557 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14560 # Allocations should be stopped while this LU runs with node locks, but it
14561 # doesn't have to be exclusive
14562 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14564 def Exec(self, feedback_fn):
14565 """Remove any export.
14568 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14569 # If the instance was not found we'll try with the name that was passed in.
14570 # This will only work if it was an FQDN, though.
14572 if not instance_name:
14574 instance_name = self.op.instance_name
14576 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14577 exportlist = self.rpc.call_export_list(locked_nodes)
14579 for node in exportlist:
14580 msg = exportlist[node].fail_msg
14582 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14584 if instance_name in exportlist[node].payload:
14586 result = self.rpc.call_export_remove(node, instance_name)
14587 msg = result.fail_msg
14589 logging.error("Could not remove export for instance %s"
14590 " on node %s: %s", instance_name, node, msg)
14592 if fqdn_warn and not found:
14593 feedback_fn("Export not found. If trying to remove an export belonging"
14594 " to a deleted instance please use its Fully Qualified"
14598 class LUGroupAdd(LogicalUnit):
14599 """Logical unit for creating node groups.
14602 HPATH = "group-add"
14603 HTYPE = constants.HTYPE_GROUP
14606 def ExpandNames(self):
14607 # We need the new group's UUID here so that we can create and acquire the
14608 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14609 # that it should not check whether the UUID exists in the configuration.
14610 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14611 self.needed_locks = {}
14612 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14614 def CheckPrereq(self):
14615 """Check prerequisites.
14617 This checks that the given group name is not an existing node group
14622 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14623 except errors.OpPrereqError:
14626 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14627 " node group (UUID: %s)" %
14628 (self.op.group_name, existing_uuid),
14629 errors.ECODE_EXISTS)
14631 if self.op.ndparams:
14632 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14634 if self.op.hv_state:
14635 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14637 self.new_hv_state = None
14639 if self.op.disk_state:
14640 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14642 self.new_disk_state = None
14644 if self.op.diskparams:
14645 for templ in constants.DISK_TEMPLATES:
14646 if templ in self.op.diskparams:
14647 utils.ForceDictType(self.op.diskparams[templ],
14648 constants.DISK_DT_TYPES)
14649 self.new_diskparams = self.op.diskparams
14651 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14652 except errors.OpPrereqError, err:
14653 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14654 errors.ECODE_INVAL)
14656 self.new_diskparams = {}
14658 if self.op.ipolicy:
14659 cluster = self.cfg.GetClusterInfo()
14660 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14662 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14663 except errors.ConfigurationError, err:
14664 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14665 errors.ECODE_INVAL)
14667 def BuildHooksEnv(self):
14668 """Build hooks env.
14672 "GROUP_NAME": self.op.group_name,
14675 def BuildHooksNodes(self):
14676 """Build hooks nodes.
14679 mn = self.cfg.GetMasterNode()
14680 return ([mn], [mn])
14682 def Exec(self, feedback_fn):
14683 """Add the node group to the cluster.
14686 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14687 uuid=self.group_uuid,
14688 alloc_policy=self.op.alloc_policy,
14689 ndparams=self.op.ndparams,
14690 diskparams=self.new_diskparams,
14691 ipolicy=self.op.ipolicy,
14692 hv_state_static=self.new_hv_state,
14693 disk_state_static=self.new_disk_state)
14695 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14696 del self.remove_locks[locking.LEVEL_NODEGROUP]
14699 class LUGroupAssignNodes(NoHooksLU):
14700 """Logical unit for assigning nodes to groups.
14705 def ExpandNames(self):
14706 # These raise errors.OpPrereqError on their own:
14707 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14708 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14710 # We want to lock all the affected nodes and groups. We have readily
14711 # available the list of nodes, and the *destination* group. To gather the
14712 # list of "source" groups, we need to fetch node information later on.
14713 self.needed_locks = {
14714 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14715 locking.LEVEL_NODE: self.op.nodes,
14718 def DeclareLocks(self, level):
14719 if level == locking.LEVEL_NODEGROUP:
14720 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14722 # Try to get all affected nodes' groups without having the group or node
14723 # lock yet. Needs verification later in the code flow.
14724 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14726 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14728 def CheckPrereq(self):
14729 """Check prerequisites.
14732 assert self.needed_locks[locking.LEVEL_NODEGROUP]
14733 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14734 frozenset(self.op.nodes))
14736 expected_locks = (set([self.group_uuid]) |
14737 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14738 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14739 if actual_locks != expected_locks:
14740 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14741 " current groups are '%s', used to be '%s'" %
14742 (utils.CommaJoin(expected_locks),
14743 utils.CommaJoin(actual_locks)))
14745 self.node_data = self.cfg.GetAllNodesInfo()
14746 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14747 instance_data = self.cfg.GetAllInstancesInfo()
14749 if self.group is None:
14750 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14751 (self.op.group_name, self.group_uuid))
14753 (new_splits, previous_splits) = \
14754 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14755 for node in self.op.nodes],
14756 self.node_data, instance_data)
14759 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14761 if not self.op.force:
14762 raise errors.OpExecError("The following instances get split by this"
14763 " change and --force was not given: %s" %
14766 self.LogWarning("This operation will split the following instances: %s",
14769 if previous_splits:
14770 self.LogWarning("In addition, these already-split instances continue"
14771 " to be split across groups: %s",
14772 utils.CommaJoin(utils.NiceSort(previous_splits)))
14774 def Exec(self, feedback_fn):
14775 """Assign nodes to a new group.
14778 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14780 self.cfg.AssignGroupNodes(mods)
14783 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14784 """Check for split instances after a node assignment.
14786 This method considers a series of node assignments as an atomic operation,
14787 and returns information about split instances after applying the set of
14790 In particular, it returns information about newly split instances, and
14791 instances that were already split, and remain so after the change.
14793 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14796 @type changes: list of (node_name, new_group_uuid) pairs.
14797 @param changes: list of node assignments to consider.
14798 @param node_data: a dict with data for all nodes
14799 @param instance_data: a dict with all instances to consider
14800 @rtype: a two-tuple
14801 @return: a list of instances that were previously okay and result split as a
14802 consequence of this change, and a list of instances that were previously
14803 split and this change does not fix.
14806 changed_nodes = dict((node, group) for node, group in changes
14807 if node_data[node].group != group)
14809 all_split_instances = set()
14810 previously_split_instances = set()
14812 def InstanceNodes(instance):
14813 return [instance.primary_node] + list(instance.secondary_nodes)
14815 for inst in instance_data.values():
14816 if inst.disk_template not in constants.DTS_INT_MIRROR:
14819 instance_nodes = InstanceNodes(inst)
14821 if len(set(node_data[node].group for node in instance_nodes)) > 1:
14822 previously_split_instances.add(inst.name)
14824 if len(set(changed_nodes.get(node, node_data[node].group)
14825 for node in instance_nodes)) > 1:
14826 all_split_instances.add(inst.name)
14828 return (list(all_split_instances - previously_split_instances),
14829 list(previously_split_instances & all_split_instances))
14832 class _GroupQuery(_QueryBase):
14833 FIELDS = query.GROUP_FIELDS
14835 def ExpandNames(self, lu):
14836 lu.needed_locks = {}
14838 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14839 self._cluster = lu.cfg.GetClusterInfo()
14840 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14843 self.wanted = [name_to_uuid[name]
14844 for name in utils.NiceSort(name_to_uuid.keys())]
14846 # Accept names to be either names or UUIDs.
14849 all_uuid = frozenset(self._all_groups.keys())
14851 for name in self.names:
14852 if name in all_uuid:
14853 self.wanted.append(name)
14854 elif name in name_to_uuid:
14855 self.wanted.append(name_to_uuid[name])
14857 missing.append(name)
14860 raise errors.OpPrereqError("Some groups do not exist: %s" %
14861 utils.CommaJoin(missing),
14862 errors.ECODE_NOENT)
14864 def DeclareLocks(self, lu, level):
14867 def _GetQueryData(self, lu):
14868 """Computes the list of node groups and their attributes.
14871 do_nodes = query.GQ_NODE in self.requested_data
14872 do_instances = query.GQ_INST in self.requested_data
14874 group_to_nodes = None
14875 group_to_instances = None
14877 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14878 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14879 # latter GetAllInstancesInfo() is not enough, for we have to go through
14880 # instance->node. Hence, we will need to process nodes even if we only need
14881 # instance information.
14882 if do_nodes or do_instances:
14883 all_nodes = lu.cfg.GetAllNodesInfo()
14884 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14887 for node in all_nodes.values():
14888 if node.group in group_to_nodes:
14889 group_to_nodes[node.group].append(node.name)
14890 node_to_group[node.name] = node.group
14893 all_instances = lu.cfg.GetAllInstancesInfo()
14894 group_to_instances = dict((uuid, []) for uuid in self.wanted)
14896 for instance in all_instances.values():
14897 node = instance.primary_node
14898 if node in node_to_group:
14899 group_to_instances[node_to_group[node]].append(instance.name)
14902 # Do not pass on node information if it was not requested.
14903 group_to_nodes = None
14905 return query.GroupQueryData(self._cluster,
14906 [self._all_groups[uuid]
14907 for uuid in self.wanted],
14908 group_to_nodes, group_to_instances,
14909 query.GQ_DISKPARAMS in self.requested_data)
14912 class LUGroupQuery(NoHooksLU):
14913 """Logical unit for querying node groups.
14918 def CheckArguments(self):
14919 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14920 self.op.output_fields, False)
14922 def ExpandNames(self):
14923 self.gq.ExpandNames(self)
14925 def DeclareLocks(self, level):
14926 self.gq.DeclareLocks(self, level)
14928 def Exec(self, feedback_fn):
14929 return self.gq.OldStyleQuery(self)
14932 class LUGroupSetParams(LogicalUnit):
14933 """Modifies the parameters of a node group.
14936 HPATH = "group-modify"
14937 HTYPE = constants.HTYPE_GROUP
14940 def CheckArguments(self):
14943 self.op.diskparams,
14944 self.op.alloc_policy,
14946 self.op.disk_state,
14950 if all_changes.count(None) == len(all_changes):
14951 raise errors.OpPrereqError("Please pass at least one modification",
14952 errors.ECODE_INVAL)
14954 def ExpandNames(self):
14955 # This raises errors.OpPrereqError on its own:
14956 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14958 self.needed_locks = {
14959 locking.LEVEL_INSTANCE: [],
14960 locking.LEVEL_NODEGROUP: [self.group_uuid],
14963 self.share_locks[locking.LEVEL_INSTANCE] = 1
14965 def DeclareLocks(self, level):
14966 if level == locking.LEVEL_INSTANCE:
14967 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14969 # Lock instances optimistically, needs verification once group lock has
14971 self.needed_locks[locking.LEVEL_INSTANCE] = \
14972 self.cfg.GetNodeGroupInstances(self.group_uuid)
14975 def _UpdateAndVerifyDiskParams(old, new):
14976 """Updates and verifies disk parameters.
14979 new_params = _GetUpdatedParams(old, new)
14980 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14983 def CheckPrereq(self):
14984 """Check prerequisites.
14987 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14989 # Check if locked instances are still correct
14990 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14992 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14993 cluster = self.cfg.GetClusterInfo()
14995 if self.group is None:
14996 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14997 (self.op.group_name, self.group_uuid))
14999 if self.op.ndparams:
15000 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
15001 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
15002 self.new_ndparams = new_ndparams
15004 if self.op.diskparams:
15005 diskparams = self.group.diskparams
15006 uavdp = self._UpdateAndVerifyDiskParams
15007 # For each disktemplate subdict update and verify the values
15008 new_diskparams = dict((dt,
15009 uavdp(diskparams.get(dt, {}),
15010 self.op.diskparams[dt]))
15011 for dt in constants.DISK_TEMPLATES
15012 if dt in self.op.diskparams)
15013 # As we've all subdicts of diskparams ready, lets merge the actual
15014 # dict with all updated subdicts
15015 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
15017 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15018 except errors.OpPrereqError, err:
15019 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15020 errors.ECODE_INVAL)
15022 if self.op.hv_state:
15023 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
15024 self.group.hv_state_static)
15026 if self.op.disk_state:
15027 self.new_disk_state = \
15028 _MergeAndVerifyDiskState(self.op.disk_state,
15029 self.group.disk_state_static)
15031 if self.op.ipolicy:
15032 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
15036 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
15037 inst_filter = lambda inst: inst.name in owned_instances
15038 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
15039 gmi = ganeti.masterd.instance
15041 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
15043 new_ipolicy, instances, self.cfg)
15046 self.LogWarning("After the ipolicy change the following instances"
15047 " violate them: %s",
15048 utils.CommaJoin(violations))
15050 def BuildHooksEnv(self):
15051 """Build hooks env.
15055 "GROUP_NAME": self.op.group_name,
15056 "NEW_ALLOC_POLICY": self.op.alloc_policy,
15059 def BuildHooksNodes(self):
15060 """Build hooks nodes.
15063 mn = self.cfg.GetMasterNode()
15064 return ([mn], [mn])
15066 def Exec(self, feedback_fn):
15067 """Modifies the node group.
15072 if self.op.ndparams:
15073 self.group.ndparams = self.new_ndparams
15074 result.append(("ndparams", str(self.group.ndparams)))
15076 if self.op.diskparams:
15077 self.group.diskparams = self.new_diskparams
15078 result.append(("diskparams", str(self.group.diskparams)))
15080 if self.op.alloc_policy:
15081 self.group.alloc_policy = self.op.alloc_policy
15083 if self.op.hv_state:
15084 self.group.hv_state_static = self.new_hv_state
15086 if self.op.disk_state:
15087 self.group.disk_state_static = self.new_disk_state
15089 if self.op.ipolicy:
15090 self.group.ipolicy = self.new_ipolicy
15092 self.cfg.Update(self.group, feedback_fn)
15096 class LUGroupRemove(LogicalUnit):
15097 HPATH = "group-remove"
15098 HTYPE = constants.HTYPE_GROUP
15101 def ExpandNames(self):
15102 # This will raises errors.OpPrereqError on its own:
15103 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15104 self.needed_locks = {
15105 locking.LEVEL_NODEGROUP: [self.group_uuid],
15108 def CheckPrereq(self):
15109 """Check prerequisites.
15111 This checks that the given group name exists as a node group, that is
15112 empty (i.e., contains no nodes), and that is not the last group of the
15116 # Verify that the group is empty.
15117 group_nodes = [node.name
15118 for node in self.cfg.GetAllNodesInfo().values()
15119 if node.group == self.group_uuid]
15122 raise errors.OpPrereqError("Group '%s' not empty, has the following"
15124 (self.op.group_name,
15125 utils.CommaJoin(utils.NiceSort(group_nodes))),
15126 errors.ECODE_STATE)
15128 # Verify the cluster would not be left group-less.
15129 if len(self.cfg.GetNodeGroupList()) == 1:
15130 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
15131 " removed" % self.op.group_name,
15132 errors.ECODE_STATE)
15134 def BuildHooksEnv(self):
15135 """Build hooks env.
15139 "GROUP_NAME": self.op.group_name,
15142 def BuildHooksNodes(self):
15143 """Build hooks nodes.
15146 mn = self.cfg.GetMasterNode()
15147 return ([mn], [mn])
15149 def Exec(self, feedback_fn):
15150 """Remove the node group.
15154 self.cfg.RemoveNodeGroup(self.group_uuid)
15155 except errors.ConfigurationError:
15156 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
15157 (self.op.group_name, self.group_uuid))
15159 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15162 class LUGroupRename(LogicalUnit):
15163 HPATH = "group-rename"
15164 HTYPE = constants.HTYPE_GROUP
15167 def ExpandNames(self):
15168 # This raises errors.OpPrereqError on its own:
15169 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15171 self.needed_locks = {
15172 locking.LEVEL_NODEGROUP: [self.group_uuid],
15175 def CheckPrereq(self):
15176 """Check prerequisites.
15178 Ensures requested new name is not yet used.
15182 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15183 except errors.OpPrereqError:
15186 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15187 " node group (UUID: %s)" %
15188 (self.op.new_name, new_name_uuid),
15189 errors.ECODE_EXISTS)
15191 def BuildHooksEnv(self):
15192 """Build hooks env.
15196 "OLD_NAME": self.op.group_name,
15197 "NEW_NAME": self.op.new_name,
15200 def BuildHooksNodes(self):
15201 """Build hooks nodes.
15204 mn = self.cfg.GetMasterNode()
15206 all_nodes = self.cfg.GetAllNodesInfo()
15207 all_nodes.pop(mn, None)
15210 run_nodes.extend(node.name for node in all_nodes.values()
15211 if node.group == self.group_uuid)
15213 return (run_nodes, run_nodes)
15215 def Exec(self, feedback_fn):
15216 """Rename the node group.
15219 group = self.cfg.GetNodeGroup(self.group_uuid)
15222 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15223 (self.op.group_name, self.group_uuid))
15225 group.name = self.op.new_name
15226 self.cfg.Update(group, feedback_fn)
15228 return self.op.new_name
15231 class LUGroupEvacuate(LogicalUnit):
15232 HPATH = "group-evacuate"
15233 HTYPE = constants.HTYPE_GROUP
15236 def ExpandNames(self):
15237 # This raises errors.OpPrereqError on its own:
15238 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15240 if self.op.target_groups:
15241 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15242 self.op.target_groups)
15244 self.req_target_uuids = []
15246 if self.group_uuid in self.req_target_uuids:
15247 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15248 " as a target group (targets are %s)" %
15250 utils.CommaJoin(self.req_target_uuids)),
15251 errors.ECODE_INVAL)
15253 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15255 self.share_locks = _ShareAll()
15256 self.needed_locks = {
15257 locking.LEVEL_INSTANCE: [],
15258 locking.LEVEL_NODEGROUP: [],
15259 locking.LEVEL_NODE: [],
15262 def DeclareLocks(self, level):
15263 if level == locking.LEVEL_INSTANCE:
15264 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15266 # Lock instances optimistically, needs verification once node and group
15267 # locks have been acquired
15268 self.needed_locks[locking.LEVEL_INSTANCE] = \
15269 self.cfg.GetNodeGroupInstances(self.group_uuid)
15271 elif level == locking.LEVEL_NODEGROUP:
15272 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15274 if self.req_target_uuids:
15275 lock_groups = set([self.group_uuid] + self.req_target_uuids)
15277 # Lock all groups used by instances optimistically; this requires going
15278 # via the node before it's locked, requiring verification later on
15279 lock_groups.update(group_uuid
15280 for instance_name in
15281 self.owned_locks(locking.LEVEL_INSTANCE)
15283 self.cfg.GetInstanceNodeGroups(instance_name))
15285 # No target groups, need to lock all of them
15286 lock_groups = locking.ALL_SET
15288 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15290 elif level == locking.LEVEL_NODE:
15291 # This will only lock the nodes in the group to be evacuated which
15292 # contain actual instances
15293 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15294 self._LockInstancesNodes()
15296 # Lock all nodes in group to be evacuated and target groups
15297 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15298 assert self.group_uuid in owned_groups
15299 member_nodes = [node_name
15300 for group in owned_groups
15301 for node_name in self.cfg.GetNodeGroup(group).members]
15302 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15304 def CheckPrereq(self):
15305 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15306 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15307 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15309 assert owned_groups.issuperset(self.req_target_uuids)
15310 assert self.group_uuid in owned_groups
15312 # Check if locked instances are still correct
15313 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15315 # Get instance information
15316 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15318 # Check if node groups for locked instances are still correct
15319 _CheckInstancesNodeGroups(self.cfg, self.instances,
15320 owned_groups, owned_nodes, self.group_uuid)
15322 if self.req_target_uuids:
15323 # User requested specific target groups
15324 self.target_uuids = self.req_target_uuids
15326 # All groups except the one to be evacuated are potential targets
15327 self.target_uuids = [group_uuid for group_uuid in owned_groups
15328 if group_uuid != self.group_uuid]
15330 if not self.target_uuids:
15331 raise errors.OpPrereqError("There are no possible target groups",
15332 errors.ECODE_INVAL)
15334 def BuildHooksEnv(self):
15335 """Build hooks env.
15339 "GROUP_NAME": self.op.group_name,
15340 "TARGET_GROUPS": " ".join(self.target_uuids),
15343 def BuildHooksNodes(self):
15344 """Build hooks nodes.
15347 mn = self.cfg.GetMasterNode()
15349 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15351 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15353 return (run_nodes, run_nodes)
15355 def Exec(self, feedback_fn):
15356 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15358 assert self.group_uuid not in self.target_uuids
15360 req = iallocator.IAReqGroupChange(instances=instances,
15361 target_groups=self.target_uuids)
15362 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15364 ial.Run(self.op.iallocator)
15366 if not ial.success:
15367 raise errors.OpPrereqError("Can't compute group evacuation using"
15368 " iallocator '%s': %s" %
15369 (self.op.iallocator, ial.info),
15370 errors.ECODE_NORES)
15372 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15374 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15375 len(jobs), self.op.group_name)
15377 return ResultWithJobs(jobs)
15380 class TagsLU(NoHooksLU): # pylint: disable=W0223
15381 """Generic tags LU.
15383 This is an abstract class which is the parent of all the other tags LUs.
15386 def ExpandNames(self):
15387 self.group_uuid = None
15388 self.needed_locks = {}
15390 if self.op.kind == constants.TAG_NODE:
15391 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15392 lock_level = locking.LEVEL_NODE
15393 lock_name = self.op.name
15394 elif self.op.kind == constants.TAG_INSTANCE:
15395 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15396 lock_level = locking.LEVEL_INSTANCE
15397 lock_name = self.op.name
15398 elif self.op.kind == constants.TAG_NODEGROUP:
15399 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15400 lock_level = locking.LEVEL_NODEGROUP
15401 lock_name = self.group_uuid
15402 elif self.op.kind == constants.TAG_NETWORK:
15403 self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15404 lock_level = locking.LEVEL_NETWORK
15405 lock_name = self.network_uuid
15410 if lock_level and getattr(self.op, "use_locking", True):
15411 self.needed_locks[lock_level] = lock_name
15413 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15414 # not possible to acquire the BGL based on opcode parameters)
15416 def CheckPrereq(self):
15417 """Check prerequisites.
15420 if self.op.kind == constants.TAG_CLUSTER:
15421 self.target = self.cfg.GetClusterInfo()
15422 elif self.op.kind == constants.TAG_NODE:
15423 self.target = self.cfg.GetNodeInfo(self.op.name)
15424 elif self.op.kind == constants.TAG_INSTANCE:
15425 self.target = self.cfg.GetInstanceInfo(self.op.name)
15426 elif self.op.kind == constants.TAG_NODEGROUP:
15427 self.target = self.cfg.GetNodeGroup(self.group_uuid)
15428 elif self.op.kind == constants.TAG_NETWORK:
15429 self.target = self.cfg.GetNetwork(self.network_uuid)
15431 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15432 str(self.op.kind), errors.ECODE_INVAL)
15435 class LUTagsGet(TagsLU):
15436 """Returns the tags of a given object.
15441 def ExpandNames(self):
15442 TagsLU.ExpandNames(self)
15444 # Share locks as this is only a read operation
15445 self.share_locks = _ShareAll()
15447 def Exec(self, feedback_fn):
15448 """Returns the tag list.
15451 return list(self.target.GetTags())
15454 class LUTagsSearch(NoHooksLU):
15455 """Searches the tags for a given pattern.
15460 def ExpandNames(self):
15461 self.needed_locks = {}
15463 def CheckPrereq(self):
15464 """Check prerequisites.
15466 This checks the pattern passed for validity by compiling it.
15470 self.re = re.compile(self.op.pattern)
15471 except re.error, err:
15472 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15473 (self.op.pattern, err), errors.ECODE_INVAL)
15475 def Exec(self, feedback_fn):
15476 """Returns the tag list.
15480 tgts = [("/cluster", cfg.GetClusterInfo())]
15481 ilist = cfg.GetAllInstancesInfo().values()
15482 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15483 nlist = cfg.GetAllNodesInfo().values()
15484 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15485 tgts.extend(("/nodegroup/%s" % n.name, n)
15486 for n in cfg.GetAllNodeGroupsInfo().values())
15488 for path, target in tgts:
15489 for tag in target.GetTags():
15490 if self.re.search(tag):
15491 results.append((path, tag))
15495 class LUTagsSet(TagsLU):
15496 """Sets a tag on a given object.
15501 def CheckPrereq(self):
15502 """Check prerequisites.
15504 This checks the type and length of the tag name and value.
15507 TagsLU.CheckPrereq(self)
15508 for tag in self.op.tags:
15509 objects.TaggableObject.ValidateTag(tag)
15511 def Exec(self, feedback_fn):
15516 for tag in self.op.tags:
15517 self.target.AddTag(tag)
15518 except errors.TagError, err:
15519 raise errors.OpExecError("Error while setting tag: %s" % str(err))
15520 self.cfg.Update(self.target, feedback_fn)
15523 class LUTagsDel(TagsLU):
15524 """Delete a list of tags from a given object.
15529 def CheckPrereq(self):
15530 """Check prerequisites.
15532 This checks that we have the given tag.
15535 TagsLU.CheckPrereq(self)
15536 for tag in self.op.tags:
15537 objects.TaggableObject.ValidateTag(tag)
15538 del_tags = frozenset(self.op.tags)
15539 cur_tags = self.target.GetTags()
15541 diff_tags = del_tags - cur_tags
15543 diff_names = ("'%s'" % i for i in sorted(diff_tags))
15544 raise errors.OpPrereqError("Tag(s) %s not found" %
15545 (utils.CommaJoin(diff_names), ),
15546 errors.ECODE_NOENT)
15548 def Exec(self, feedback_fn):
15549 """Remove the tag from the object.
15552 for tag in self.op.tags:
15553 self.target.RemoveTag(tag)
15554 self.cfg.Update(self.target, feedback_fn)
15557 class LUTestDelay(NoHooksLU):
15558 """Sleep for a specified amount of time.
15560 This LU sleeps on the master and/or nodes for a specified amount of
15566 def ExpandNames(self):
15567 """Expand names and set required locks.
15569 This expands the node list, if any.
15572 self.needed_locks = {}
15573 if self.op.on_nodes:
15574 # _GetWantedNodes can be used here, but is not always appropriate to use
15575 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15576 # more information.
15577 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15578 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15580 def _TestDelay(self):
15581 """Do the actual sleep.
15584 if self.op.on_master:
15585 if not utils.TestDelay(self.op.duration):
15586 raise errors.OpExecError("Error during master delay test")
15587 if self.op.on_nodes:
15588 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15589 for node, node_result in result.items():
15590 node_result.Raise("Failure during rpc call to node %s" % node)
15592 def Exec(self, feedback_fn):
15593 """Execute the test delay opcode, with the wanted repetitions.
15596 if self.op.repeat == 0:
15599 top_value = self.op.repeat - 1
15600 for i in range(self.op.repeat):
15601 self.LogInfo("Test delay iteration %d/%d", i, top_value)
15605 class LURestrictedCommand(NoHooksLU):
15606 """Logical unit for executing restricted commands.
15611 def ExpandNames(self):
15613 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15615 self.needed_locks = {
15616 locking.LEVEL_NODE: self.op.nodes,
15618 self.share_locks = {
15619 locking.LEVEL_NODE: not self.op.use_locking,
15622 def CheckPrereq(self):
15623 """Check prerequisites.
15627 def Exec(self, feedback_fn):
15628 """Execute restricted command and return output.
15631 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15633 # Check if correct locks are held
15634 assert set(self.op.nodes).issubset(owned_nodes)
15636 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15640 for node_name in self.op.nodes:
15641 nres = rpcres[node_name]
15643 msg = ("Command '%s' on node '%s' failed: %s" %
15644 (self.op.command, node_name, nres.fail_msg))
15645 result.append((False, msg))
15647 result.append((True, nres.payload))
15652 class LUTestJqueue(NoHooksLU):
15653 """Utility LU to test some aspects of the job queue.
15658 # Must be lower than default timeout for WaitForJobChange to see whether it
15659 # notices changed jobs
15660 _CLIENT_CONNECT_TIMEOUT = 20.0
15661 _CLIENT_CONFIRM_TIMEOUT = 60.0
15664 def _NotifyUsingSocket(cls, cb, errcls):
15665 """Opens a Unix socket and waits for another program to connect.
15668 @param cb: Callback to send socket name to client
15669 @type errcls: class
15670 @param errcls: Exception class to use for errors
15673 # Using a temporary directory as there's no easy way to create temporary
15674 # sockets without writing a custom loop around tempfile.mktemp and
15676 tmpdir = tempfile.mkdtemp()
15678 tmpsock = utils.PathJoin(tmpdir, "sock")
15680 logging.debug("Creating temporary socket at %s", tmpsock)
15681 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15686 # Send details to client
15689 # Wait for client to connect before continuing
15690 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15692 (conn, _) = sock.accept()
15693 except socket.error, err:
15694 raise errcls("Client didn't connect in time (%s)" % err)
15698 # Remove as soon as client is connected
15699 shutil.rmtree(tmpdir)
15701 # Wait for client to close
15704 # pylint: disable=E1101
15705 # Instance of '_socketobject' has no ... member
15706 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15708 except socket.error, err:
15709 raise errcls("Client failed to confirm notification (%s)" % err)
15713 def _SendNotification(self, test, arg, sockname):
15714 """Sends a notification to the client.
15717 @param test: Test name
15718 @param arg: Test argument (depends on test)
15719 @type sockname: string
15720 @param sockname: Socket path
15723 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15725 def _Notify(self, prereq, test, arg):
15726 """Notifies the client of a test.
15729 @param prereq: Whether this is a prereq-phase test
15731 @param test: Test name
15732 @param arg: Test argument (depends on test)
15736 errcls = errors.OpPrereqError
15738 errcls = errors.OpExecError
15740 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15744 def CheckArguments(self):
15745 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15746 self.expandnames_calls = 0
15748 def ExpandNames(self):
15749 checkargs_calls = getattr(self, "checkargs_calls", 0)
15750 if checkargs_calls < 1:
15751 raise errors.ProgrammerError("CheckArguments was not called")
15753 self.expandnames_calls += 1
15755 if self.op.notify_waitlock:
15756 self._Notify(True, constants.JQT_EXPANDNAMES, None)
15758 self.LogInfo("Expanding names")
15760 # Get lock on master node (just to get a lock, not for a particular reason)
15761 self.needed_locks = {
15762 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15765 def Exec(self, feedback_fn):
15766 if self.expandnames_calls < 1:
15767 raise errors.ProgrammerError("ExpandNames was not called")
15769 if self.op.notify_exec:
15770 self._Notify(False, constants.JQT_EXEC, None)
15772 self.LogInfo("Executing")
15774 if self.op.log_messages:
15775 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15776 for idx, msg in enumerate(self.op.log_messages):
15777 self.LogInfo("Sending log message %s", idx + 1)
15778 feedback_fn(constants.JQT_MSGPREFIX + msg)
15779 # Report how many test messages have been sent
15780 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15783 raise errors.OpExecError("Opcode failure was requested")
15788 class LUTestAllocator(NoHooksLU):
15789 """Run allocator tests.
15791 This LU runs the allocator tests
15794 def CheckPrereq(self):
15795 """Check prerequisites.
15797 This checks the opcode parameters depending on the director and mode test.
15800 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15801 constants.IALLOCATOR_MODE_MULTI_ALLOC):
15802 for attr in ["memory", "disks", "disk_template",
15803 "os", "tags", "nics", "vcpus"]:
15804 if not hasattr(self.op, attr):
15805 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15806 attr, errors.ECODE_INVAL)
15807 iname = self.cfg.ExpandInstanceName(self.op.name)
15808 if iname is not None:
15809 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15810 iname, errors.ECODE_EXISTS)
15811 if not isinstance(self.op.nics, list):
15812 raise errors.OpPrereqError("Invalid parameter 'nics'",
15813 errors.ECODE_INVAL)
15814 if not isinstance(self.op.disks, list):
15815 raise errors.OpPrereqError("Invalid parameter 'disks'",
15816 errors.ECODE_INVAL)
15817 for row in self.op.disks:
15818 if (not isinstance(row, dict) or
15819 constants.IDISK_SIZE not in row or
15820 not isinstance(row[constants.IDISK_SIZE], int) or
15821 constants.IDISK_MODE not in row or
15822 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15823 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15824 " parameter", errors.ECODE_INVAL)
15825 if self.op.hypervisor is None:
15826 self.op.hypervisor = self.cfg.GetHypervisorType()
15827 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15828 fname = _ExpandInstanceName(self.cfg, self.op.name)
15829 self.op.name = fname
15830 self.relocate_from = \
15831 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15832 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15833 constants.IALLOCATOR_MODE_NODE_EVAC):
15834 if not self.op.instances:
15835 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15836 self.op.instances = _GetWantedInstances(self, self.op.instances)
15838 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15839 self.op.mode, errors.ECODE_INVAL)
15841 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15842 if self.op.iallocator is None:
15843 raise errors.OpPrereqError("Missing allocator name",
15844 errors.ECODE_INVAL)
15845 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15846 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15847 self.op.direction, errors.ECODE_INVAL)
15849 def Exec(self, feedback_fn):
15850 """Run the allocator test.
15853 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15854 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15855 memory=self.op.memory,
15856 disks=self.op.disks,
15857 disk_template=self.op.disk_template,
15861 vcpus=self.op.vcpus,
15862 spindle_use=self.op.spindle_use,
15863 hypervisor=self.op.hypervisor,
15864 node_whitelist=None)
15865 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15866 req = iallocator.IAReqRelocate(name=self.op.name,
15867 relocate_from=list(self.relocate_from))
15868 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15869 req = iallocator.IAReqGroupChange(instances=self.op.instances,
15870 target_groups=self.op.target_groups)
15871 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15872 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15873 evac_mode=self.op.evac_mode)
15874 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15875 disk_template = self.op.disk_template
15876 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15877 memory=self.op.memory,
15878 disks=self.op.disks,
15879 disk_template=disk_template,
15883 vcpus=self.op.vcpus,
15884 spindle_use=self.op.spindle_use,
15885 hypervisor=self.op.hypervisor)
15886 for idx in range(self.op.count)]
15887 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15889 raise errors.ProgrammerError("Uncatched mode %s in"
15890 " LUTestAllocator.Exec", self.op.mode)
15892 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15893 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15894 result = ial.in_text
15896 ial.Run(self.op.iallocator, validate=False)
15897 result = ial.out_text
15901 class LUNetworkAdd(LogicalUnit):
15902 """Logical unit for creating networks.
15905 HPATH = "network-add"
15906 HTYPE = constants.HTYPE_NETWORK
15909 def BuildHooksNodes(self):
15910 """Build hooks nodes.
15913 mn = self.cfg.GetMasterNode()
15914 return ([mn], [mn])
15916 def CheckArguments(self):
15917 if self.op.mac_prefix:
15918 self.op.mac_prefix = \
15919 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
15921 def ExpandNames(self):
15922 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15924 if self.op.conflicts_check:
15925 self.share_locks[locking.LEVEL_NODE] = 1
15926 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
15927 self.needed_locks = {
15928 locking.LEVEL_NODE: locking.ALL_SET,
15929 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
15932 self.needed_locks = {}
15934 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
15936 def CheckPrereq(self):
15937 if self.op.network is None:
15938 raise errors.OpPrereqError("Network must be given",
15939 errors.ECODE_INVAL)
15942 existing_uuid = self.cfg.LookupNetwork(self.op.network_name)
15943 except errors.OpPrereqError:
15946 raise errors.OpPrereqError("Desired network name '%s' already exists as a"
15947 " network (UUID: %s)" %
15948 (self.op.network_name, existing_uuid),
15949 errors.ECODE_EXISTS)
15951 # Check tag validity
15952 for tag in self.op.tags:
15953 objects.TaggableObject.ValidateTag(tag)
15955 def BuildHooksEnv(self):
15956 """Build hooks env.
15960 "name": self.op.network_name,
15961 "subnet": self.op.network,
15962 "gateway": self.op.gateway,
15963 "network6": self.op.network6,
15964 "gateway6": self.op.gateway6,
15965 "mac_prefix": self.op.mac_prefix,
15966 "tags": self.op.tags,
15968 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15970 def Exec(self, feedback_fn):
15971 """Add the ip pool to the cluster.
15974 nobj = objects.Network(name=self.op.network_name,
15975 network=self.op.network,
15976 gateway=self.op.gateway,
15977 network6=self.op.network6,
15978 gateway6=self.op.gateway6,
15979 mac_prefix=self.op.mac_prefix,
15980 uuid=self.network_uuid)
15981 # Initialize the associated address pool
15983 pool = network.AddressPool.InitializeNetwork(nobj)
15984 except errors.AddressPoolError, err:
15985 raise errors.OpExecError("Cannot create IP address pool for network"
15986 " '%s': %s" % (self.op.network_name, err))
15988 # Check if we need to reserve the nodes and the cluster master IP
15989 # These may not be allocated to any instances in routed mode, as
15990 # they wouldn't function anyway.
15991 if self.op.conflicts_check:
15992 for node in self.cfg.GetAllNodesInfo().values():
15993 for ip in [node.primary_ip, node.secondary_ip]:
15995 if pool.Contains(ip):
15997 self.LogInfo("Reserved IP address of node '%s' (%s)",
15999 except errors.AddressPoolError, err:
16000 self.LogWarning("Cannot reserve IP address '%s' of node '%s': %s",
16001 ip, node.name, err)
16003 master_ip = self.cfg.GetClusterInfo().master_ip
16005 if pool.Contains(master_ip):
16006 pool.Reserve(master_ip)
16007 self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
16008 except errors.AddressPoolError, err:
16009 self.LogWarning("Cannot reserve cluster master IP address (%s): %s",
16012 if self.op.add_reserved_ips:
16013 for ip in self.op.add_reserved_ips:
16015 pool.Reserve(ip, external=True)
16016 except errors.AddressPoolError, err:
16017 raise errors.OpExecError("Cannot reserve IP address '%s': %s" %
16021 for tag in self.op.tags:
16024 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
16025 del self.remove_locks[locking.LEVEL_NETWORK]
16028 class LUNetworkRemove(LogicalUnit):
16029 HPATH = "network-remove"
16030 HTYPE = constants.HTYPE_NETWORK
16033 def ExpandNames(self):
16034 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16036 self.share_locks[locking.LEVEL_NODEGROUP] = 1
16037 self.needed_locks = {
16038 locking.LEVEL_NETWORK: [self.network_uuid],
16039 locking.LEVEL_NODEGROUP: locking.ALL_SET,
16042 def CheckPrereq(self):
16043 """Check prerequisites.
16045 This checks that the given network name exists as a network, that is
16046 empty (i.e., contains no nodes), and that is not the last group of the
16050 # Verify that the network is not conncted.
16051 node_groups = [group.name
16052 for group in self.cfg.GetAllNodeGroupsInfo().values()
16053 if self.network_uuid in group.networks]
16056 self.LogWarning("Network '%s' is connected to the following"
16057 " node groups: %s" %
16058 (self.op.network_name,
16059 utils.CommaJoin(utils.NiceSort(node_groups))))
16060 raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
16062 def BuildHooksEnv(self):
16063 """Build hooks env.
16067 "NETWORK_NAME": self.op.network_name,
16070 def BuildHooksNodes(self):
16071 """Build hooks nodes.
16074 mn = self.cfg.GetMasterNode()
16075 return ([mn], [mn])
16077 def Exec(self, feedback_fn):
16078 """Remove the network.
16082 self.cfg.RemoveNetwork(self.network_uuid)
16083 except errors.ConfigurationError:
16084 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
16085 (self.op.network_name, self.network_uuid))
16088 class LUNetworkSetParams(LogicalUnit):
16089 """Modifies the parameters of a network.
16092 HPATH = "network-modify"
16093 HTYPE = constants.HTYPE_NETWORK
16096 def CheckArguments(self):
16097 if (self.op.gateway and
16098 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
16099 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
16100 " at once", errors.ECODE_INVAL)
16102 def ExpandNames(self):
16103 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16105 self.needed_locks = {
16106 locking.LEVEL_NETWORK: [self.network_uuid],
16109 def CheckPrereq(self):
16110 """Check prerequisites.
16113 self.network = self.cfg.GetNetwork(self.network_uuid)
16114 self.gateway = self.network.gateway
16115 self.mac_prefix = self.network.mac_prefix
16116 self.network6 = self.network.network6
16117 self.gateway6 = self.network.gateway6
16118 self.tags = self.network.tags
16120 self.pool = network.AddressPool(self.network)
16122 if self.op.gateway:
16123 if self.op.gateway == constants.VALUE_NONE:
16124 self.gateway = None
16126 self.gateway = self.op.gateway
16127 if self.pool.IsReserved(self.gateway):
16128 raise errors.OpPrereqError("Gateway IP address '%s' is already"
16129 " reserved" % self.gateway,
16130 errors.ECODE_STATE)
16132 if self.op.mac_prefix:
16133 if self.op.mac_prefix == constants.VALUE_NONE:
16134 self.mac_prefix = None
16136 self.mac_prefix = \
16137 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16139 if self.op.gateway6:
16140 if self.op.gateway6 == constants.VALUE_NONE:
16141 self.gateway6 = None
16143 self.gateway6 = self.op.gateway6
16145 if self.op.network6:
16146 if self.op.network6 == constants.VALUE_NONE:
16147 self.network6 = None
16149 self.network6 = self.op.network6
16151 def BuildHooksEnv(self):
16152 """Build hooks env.
16156 "name": self.op.network_name,
16157 "subnet": self.network.network,
16158 "gateway": self.gateway,
16159 "network6": self.network6,
16160 "gateway6": self.gateway6,
16161 "mac_prefix": self.mac_prefix,
16164 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16166 def BuildHooksNodes(self):
16167 """Build hooks nodes.
16170 mn = self.cfg.GetMasterNode()
16171 return ([mn], [mn])
16173 def Exec(self, feedback_fn):
16174 """Modifies the network.
16177 #TODO: reserve/release via temporary reservation manager
16178 # extend cfg.ReserveIp/ReleaseIp with the external flag
16179 if self.op.gateway:
16180 if self.gateway == self.network.gateway:
16181 self.LogWarning("Gateway is already %s", self.gateway)
16184 self.pool.Reserve(self.gateway, external=True)
16185 if self.network.gateway:
16186 self.pool.Release(self.network.gateway, external=True)
16187 self.network.gateway = self.gateway
16189 if self.op.add_reserved_ips:
16190 for ip in self.op.add_reserved_ips:
16192 if self.pool.IsReserved(ip):
16193 self.LogWarning("IP address %s is already reserved", ip)
16195 self.pool.Reserve(ip, external=True)
16196 except errors.AddressPoolError, err:
16197 self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16199 if self.op.remove_reserved_ips:
16200 for ip in self.op.remove_reserved_ips:
16201 if ip == self.network.gateway:
16202 self.LogWarning("Cannot unreserve Gateway's IP")
16205 if not self.pool.IsReserved(ip):
16206 self.LogWarning("IP address %s is already unreserved", ip)
16208 self.pool.Release(ip, external=True)
16209 except errors.AddressPoolError, err:
16210 self.LogWarning("Cannot release IP address %s: %s", ip, err)
16212 if self.op.mac_prefix:
16213 self.network.mac_prefix = self.mac_prefix
16215 if self.op.network6:
16216 self.network.network6 = self.network6
16218 if self.op.gateway6:
16219 self.network.gateway6 = self.gateway6
16221 self.pool.Validate()
16223 self.cfg.Update(self.network, feedback_fn)
16226 class _NetworkQuery(_QueryBase):
16227 FIELDS = query.NETWORK_FIELDS
16229 def ExpandNames(self, lu):
16230 lu.needed_locks = {}
16231 lu.share_locks = _ShareAll()
16233 self.do_locking = self.use_locking
16235 all_networks = lu.cfg.GetAllNetworksInfo()
16236 name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16242 for name in self.names:
16243 if name in name_to_uuid:
16244 self.wanted.append(name_to_uuid[name])
16246 missing.append(name)
16249 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16250 errors.ECODE_NOENT)
16252 self.wanted = locking.ALL_SET
16254 if self.do_locking:
16255 lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted
16256 if query.NETQ_INST in self.requested_data:
16257 lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
16258 if query.NETQ_GROUP in self.requested_data:
16259 lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET
16261 def DeclareLocks(self, lu, level):
16264 def _GetQueryData(self, lu):
16265 """Computes the list of networks and their attributes.
16268 all_networks = lu.cfg.GetAllNetworksInfo()
16270 network_uuids = self._GetNames(lu, all_networks.keys(),
16271 locking.LEVEL_NETWORK)
16273 do_instances = query.NETQ_INST in self.requested_data
16274 do_groups = query.NETQ_GROUP in self.requested_data
16276 network_to_instances = None
16277 network_to_groups = None
16279 # For NETQ_GROUP, we need to map network->[groups]
16281 all_groups = lu.cfg.GetAllNodeGroupsInfo()
16282 network_to_groups = dict((uuid, []) for uuid in network_uuids)
16283 for _, group in all_groups.iteritems():
16284 for net_uuid in network_uuids:
16285 netparams = group.networks.get(net_uuid, None)
16287 info = (group.name, netparams[constants.NIC_MODE],
16288 netparams[constants.NIC_LINK])
16290 network_to_groups[net_uuid].append(info)
16293 all_instances = lu.cfg.GetAllInstancesInfo()
16294 network_to_instances = dict((uuid, []) for uuid in network_uuids)
16295 for instance in all_instances.values():
16296 for nic in instance.nics:
16297 if nic.network in network_uuids:
16298 network_to_instances[nic.network].append(instance.name)
16301 if query.NETQ_STATS in self.requested_data:
16304 self._GetStats(network.AddressPool(all_networks[uuid])))
16305 for uuid in network_uuids)
16309 return query.NetworkQueryData([all_networks[uuid]
16310 for uuid in network_uuids],
16312 network_to_instances,
16316 def _GetStats(pool):
16317 """Returns statistics for a network address pool.
16321 "free_count": pool.GetFreeCount(),
16322 "reserved_count": pool.GetReservedCount(),
16323 "map": pool.GetMap(),
16324 "external_reservations":
16325 utils.CommaJoin(pool.GetExternalReservations()),
16329 class LUNetworkQuery(NoHooksLU):
16330 """Logical unit for querying networks.
16335 def CheckArguments(self):
16336 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16337 self.op.output_fields, self.op.use_locking)
16339 def ExpandNames(self):
16340 self.nq.ExpandNames(self)
16342 def Exec(self, feedback_fn):
16343 return self.nq.OldStyleQuery(self)
16346 class LUNetworkConnect(LogicalUnit):
16347 """Connect a network to a nodegroup
16350 HPATH = "network-connect"
16351 HTYPE = constants.HTYPE_NETWORK
16354 def ExpandNames(self):
16355 self.network_name = self.op.network_name
16356 self.group_name = self.op.group_name
16357 self.network_mode = self.op.network_mode
16358 self.network_link = self.op.network_link
16360 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16361 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16363 self.needed_locks = {
16364 locking.LEVEL_INSTANCE: [],
16365 locking.LEVEL_NODEGROUP: [self.group_uuid],
16367 self.share_locks[locking.LEVEL_INSTANCE] = 1
16369 if self.op.conflicts_check:
16370 self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16371 self.share_locks[locking.LEVEL_NETWORK] = 1
16373 def DeclareLocks(self, level):
16374 if level == locking.LEVEL_INSTANCE:
16375 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16377 # Lock instances optimistically, needs verification once group lock has
16379 if self.op.conflicts_check:
16380 self.needed_locks[locking.LEVEL_INSTANCE] = \
16381 self.cfg.GetNodeGroupInstances(self.group_uuid)
16383 def BuildHooksEnv(self):
16385 "GROUP_NAME": self.group_name,
16386 "GROUP_NETWORK_MODE": self.network_mode,
16387 "GROUP_NETWORK_LINK": self.network_link,
16391 def BuildHooksNodes(self):
16392 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16393 return (nodes, nodes)
16395 def CheckPrereq(self):
16396 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16398 assert self.group_uuid in owned_groups
16400 # Check if locked instances are still correct
16401 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16402 if self.op.conflicts_check:
16403 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16406 constants.NIC_MODE: self.network_mode,
16407 constants.NIC_LINK: self.network_link,
16409 objects.NIC.CheckParameterSyntax(self.netparams)
16411 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16412 #if self.network_mode == constants.NIC_MODE_BRIDGED:
16413 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16414 self.connected = False
16415 if self.network_uuid in self.group.networks:
16416 self.LogWarning("Network '%s' is already mapped to group '%s'" %
16417 (self.network_name, self.group.name))
16418 self.connected = True
16420 # check only if not already connected
16421 elif self.op.conflicts_check:
16422 pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid))
16424 _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip),
16425 "connect to", owned_instances)
16427 def Exec(self, feedback_fn):
16428 # Connect the network and update the group only if not already connected
16429 if not self.connected:
16430 self.group.networks[self.network_uuid] = self.netparams
16431 self.cfg.Update(self.group, feedback_fn)
16434 def _NetworkConflictCheck(lu, check_fn, action, instances):
16435 """Checks for network interface conflicts with a network.
16437 @type lu: L{LogicalUnit}
16438 @type check_fn: callable receiving one parameter (L{objects.NIC}) and
16440 @param check_fn: Function checking for conflict
16441 @type action: string
16442 @param action: Part of error message (see code)
16443 @raise errors.OpPrereqError: If conflicting IP addresses are found.
16448 for (_, instance) in lu.cfg.GetMultiInstanceInfo(instances):
16449 instconflicts = [(idx, nic.ip)
16450 for (idx, nic) in enumerate(instance.nics)
16454 conflicts.append((instance.name, instconflicts))
16457 lu.LogWarning("IP addresses from network '%s', which is about to %s"
16458 " node group '%s', are in use: %s" %
16459 (lu.network_name, action, lu.group.name,
16460 utils.CommaJoin(("%s: %s" %
16461 (name, _FmtNetworkConflict(details)))
16462 for (name, details) in conflicts)))
16464 raise errors.OpPrereqError("Conflicting IP addresses found; "
16465 " remove/modify the corresponding network"
16466 " interfaces", errors.ECODE_STATE)
16469 def _FmtNetworkConflict(details):
16470 """Utility for L{_NetworkConflictCheck}.
16473 return utils.CommaJoin("nic%s/%s" % (idx, ipaddr)
16474 for (idx, ipaddr) in details)
16477 class LUNetworkDisconnect(LogicalUnit):
16478 """Disconnect a network to a nodegroup
16481 HPATH = "network-disconnect"
16482 HTYPE = constants.HTYPE_NETWORK
16485 def ExpandNames(self):
16486 self.network_name = self.op.network_name
16487 self.group_name = self.op.group_name
16489 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16490 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16492 self.needed_locks = {
16493 locking.LEVEL_INSTANCE: [],
16494 locking.LEVEL_NODEGROUP: [self.group_uuid],
16496 self.share_locks[locking.LEVEL_INSTANCE] = 1
16498 def DeclareLocks(self, level):
16499 if level == locking.LEVEL_INSTANCE:
16500 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16502 # Lock instances optimistically, needs verification once group lock has
16504 self.needed_locks[locking.LEVEL_INSTANCE] = \
16505 self.cfg.GetNodeGroupInstances(self.group_uuid)
16507 def BuildHooksEnv(self):
16509 "GROUP_NAME": self.group_name,
16513 def BuildHooksNodes(self):
16514 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16515 return (nodes, nodes)
16517 def CheckPrereq(self):
16518 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16520 assert self.group_uuid in owned_groups
16522 # Check if locked instances are still correct
16523 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16524 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16526 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16527 self.connected = True
16528 if self.network_uuid not in self.group.networks:
16529 self.LogWarning("Network '%s' is not mapped to group '%s'",
16530 self.network_name, self.group.name)
16531 self.connected = False
16533 # We need this check only if network is not already connected
16535 _NetworkConflictCheck(self, lambda nic: nic.network == self.network_uuid,
16536 "disconnect from", owned_instances)
16538 def Exec(self, feedback_fn):
16539 # Disconnect the network and update the group only if network is connected
16541 del self.group.networks[self.network_uuid]
16542 self.cfg.Update(self.group, feedback_fn)
16545 #: Query type implementations
16547 constants.QR_CLUSTER: _ClusterQuery,
16548 constants.QR_INSTANCE: _InstanceQuery,
16549 constants.QR_NODE: _NodeQuery,
16550 constants.QR_GROUP: _GroupQuery,
16551 constants.QR_NETWORK: _NetworkQuery,
16552 constants.QR_OS: _OsQuery,
16553 constants.QR_EXTSTORAGE: _ExtStorageQuery,
16554 constants.QR_EXPORT: _ExportQuery,
16557 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16560 def _GetQueryImplementation(name):
16561 """Returns the implemtnation for a query type.
16563 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16567 return _QUERY_IMPL[name]
16569 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16570 errors.ECODE_INVAL)
16573 def _CheckForConflictingIp(lu, ip, node):
16574 """In case of conflicting IP address raise error.
16577 @param ip: IP address
16579 @param node: node name
16582 (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16583 if conf_net is not None:
16584 raise errors.OpPrereqError(("The requested IP address (%s) belongs to"
16585 " network %s, but the target NIC does not." %
16587 errors.ECODE_STATE)
16589 return (None, None)