4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
39 from ganeti import utils
40 from ganeti import errors
41 from ganeti import hypervisor
42 from ganeti import locking
43 from ganeti import constants
44 from ganeti import objects
45 from ganeti import compat
46 from ganeti import masterd
47 from ganeti import netutils
48 from ganeti import query
49 from ganeti import qlang
50 from ganeti import opcodes
52 from ganeti import rpc
53 from ganeti import pathutils
54 from ganeti import network
55 from ganeti.masterd import iallocator
57 from ganeti.cmdlib.base import ResultWithJobs, LogicalUnit, NoHooksLU, \
59 from ganeti.cmdlib.common import _ExpandInstanceName, _ExpandItemName, \
60 _ExpandNodeName, _ShareAll, _CheckNodeGroupInstances, _GetWantedNodes, \
61 _GetWantedInstances, _RunPostHook, _RedistributeAncillaryFiles, \
62 _MergeAndVerifyHvState, _MergeAndVerifyDiskState, _GetUpdatedIPolicy, \
63 _ComputeNewInstanceViolations, _GetUpdatedParams, _CheckOSParams, \
64 _CheckHVParams, _AdjustCandidatePool, _CheckNodePVs, \
65 _ComputeIPolicyInstanceViolation, _AnnotateDiskParams, _SupportsOob, \
66 _ComputeIPolicySpecViolation
68 from ganeti.cmdlib.cluster import LUClusterActivateMasterIp, \
69 LUClusterDeactivateMasterIp, LUClusterConfigQuery, LUClusterDestroy, \
70 LUClusterPostInit, _ClusterQuery, LUClusterQuery, LUClusterRedistConf, \
71 LUClusterRename, LUClusterRepairDiskSizes, LUClusterSetParams, \
72 LUClusterVerify, LUClusterVerifyConfig, LUClusterVerifyGroup, \
74 from ganeti.cmdlib.tags import LUTagsGet, LUTagsSearch, LUTagsSet, LUTagsDel
75 from ganeti.cmdlib.network import LUNetworkAdd, LUNetworkRemove, \
76 LUNetworkSetParams, _NetworkQuery, LUNetworkQuery, LUNetworkConnect, \
78 from ganeti.cmdlib.test import LUTestDelay, LUTestJqueue, LUTestAllocator
80 import ganeti.masterd.instance # pylint: disable=W0611
84 INSTANCE_DOWN = [constants.ADMINST_DOWN]
85 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
86 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
88 #: Instance status in which an instance can be marked as offline/online
89 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
90 constants.ADMINST_OFFLINE,
94 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
96 """Checks if node groups for locked instances are still correct.
98 @type cfg: L{config.ConfigWriter}
99 @param cfg: Cluster configuration
100 @type instances: dict; string as key, L{objects.Instance} as value
101 @param instances: Dictionary, instance name as key, instance object as value
102 @type owned_groups: iterable of string
103 @param owned_groups: List of owned groups
104 @type owned_nodes: iterable of string
105 @param owned_nodes: List of owned nodes
106 @type cur_group_uuid: string or None
107 @param cur_group_uuid: Optional group UUID to check against instance's groups
110 for (name, inst) in instances.items():
111 assert owned_nodes.issuperset(inst.all_nodes), \
112 "Instance %s's nodes changed while we kept the lock" % name
114 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
116 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
117 "Instance %s has no node in group %s" % (name, cur_group_uuid)
120 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
122 """Checks if the owned node groups are still correct for an instance.
124 @type cfg: L{config.ConfigWriter}
125 @param cfg: The cluster configuration
126 @type instance_name: string
127 @param instance_name: Instance name
128 @type owned_groups: set or frozenset
129 @param owned_groups: List of currently owned node groups
130 @type primary_only: boolean
131 @param primary_only: Whether to check node groups for only the primary node
134 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
136 if not owned_groups.issuperset(inst_groups):
137 raise errors.OpPrereqError("Instance %s's node groups changed since"
138 " locks were acquired, current groups are"
139 " are '%s', owning groups '%s'; retry the"
142 utils.CommaJoin(inst_groups),
143 utils.CommaJoin(owned_groups)),
149 def _IsExclusiveStorageEnabledNode(cfg, node):
150 """Whether exclusive_storage is in effect for the given node.
152 @type cfg: L{config.ConfigWriter}
153 @param cfg: The cluster configuration
154 @type node: L{objects.Node}
155 @param node: The node
157 @return: The effective value of exclusive_storage
160 return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
163 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
164 """Whether exclusive_storage is in effect for the given node.
166 @type cfg: L{config.ConfigWriter}
167 @param cfg: The cluster configuration
168 @type nodename: string
169 @param nodename: The node
171 @return: The effective value of exclusive_storage
172 @raise errors.OpPrereqError: if no node exists with the given name
175 ni = cfg.GetNodeInfo(nodename)
177 raise errors.OpPrereqError("Invalid node name %s" % nodename,
179 return _IsExclusiveStorageEnabledNode(cfg, ni)
182 def _CopyLockList(names):
183 """Makes a copy of a list of lock names.
185 Handles L{locking.ALL_SET} correctly.
188 if names == locking.ALL_SET:
189 return locking.ALL_SET
194 def _ReleaseLocks(lu, level, names=None, keep=None):
195 """Releases locks owned by an LU.
197 @type lu: L{LogicalUnit}
198 @param level: Lock level
199 @type names: list or None
200 @param names: Names of locks to release
201 @type keep: list or None
202 @param keep: Names of locks to retain
205 assert not (keep is not None and names is not None), \
206 "Only one of the 'names' and the 'keep' parameters can be given"
208 if names is not None:
209 should_release = names.__contains__
211 should_release = lambda name: name not in keep
213 should_release = None
215 owned = lu.owned_locks(level)
217 # Not owning any lock at this level, do nothing
224 # Determine which locks to release
226 if should_release(name):
231 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
233 # Release just some locks
234 lu.glm.release(level, names=release)
236 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
239 lu.glm.release(level)
241 assert not lu.glm.is_owned(level), "No locks should be owned"
244 def _MapInstanceDisksToNodes(instances):
245 """Creates a map from (node, volume) to instance name.
247 @type instances: list of L{objects.Instance}
248 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
251 return dict(((node, vol), inst.name)
252 for inst in instances
253 for (node, vols) in inst.MapLVsByNode().items()
257 def _CheckOutputFields(static, dynamic, selected):
258 """Checks whether all selected fields are valid.
260 @type static: L{utils.FieldSet}
261 @param static: static fields set
262 @type dynamic: L{utils.FieldSet}
263 @param dynamic: dynamic fields set
270 delta = f.NonMatching(selected)
272 raise errors.OpPrereqError("Unknown output fields selected: %s"
273 % ",".join(delta), errors.ECODE_INVAL)
276 def _CheckParamsNotGlobal(params, glob_pars, kind, bad_levels, good_levels):
277 """Make sure that none of the given paramters is global.
279 If a global parameter is found, an L{errors.OpPrereqError} exception is
280 raised. This is used to avoid setting global parameters for individual nodes.
282 @type params: dictionary
283 @param params: Parameters to check
284 @type glob_pars: dictionary
285 @param glob_pars: Forbidden parameters
287 @param kind: Kind of parameters (e.g. "node")
288 @type bad_levels: string
289 @param bad_levels: Level(s) at which the parameters are forbidden (e.g.
291 @type good_levels: strings
292 @param good_levels: Level(s) at which the parameters are allowed (e.g.
296 used_globals = glob_pars.intersection(params)
298 msg = ("The following %s parameters are global and cannot"
299 " be customized at %s level, please modify them at"
301 (kind, bad_levels, good_levels, utils.CommaJoin(used_globals)))
302 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
305 def _CheckNodeOnline(lu, node, msg=None):
306 """Ensure that a given node is online.
308 @param lu: the LU on behalf of which we make the check
309 @param node: the node to check
310 @param msg: if passed, should be a message to replace the default one
311 @raise errors.OpPrereqError: if the node is offline
315 msg = "Can't use offline node"
316 if lu.cfg.GetNodeInfo(node).offline:
317 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
320 def _CheckNodeNotDrained(lu, node):
321 """Ensure that a given node is not drained.
323 @param lu: the LU on behalf of which we make the check
324 @param node: the node to check
325 @raise errors.OpPrereqError: if the node is drained
328 if lu.cfg.GetNodeInfo(node).drained:
329 raise errors.OpPrereqError("Can't use drained node %s" % node,
333 def _CheckNodeVmCapable(lu, node):
334 """Ensure that a given node is vm capable.
336 @param lu: the LU on behalf of which we make the check
337 @param node: the node to check
338 @raise errors.OpPrereqError: if the node is not vm capable
341 if not lu.cfg.GetNodeInfo(node).vm_capable:
342 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
346 def _CheckNodeHasOS(lu, node, os_name, force_variant):
347 """Ensure that a node supports a given OS.
349 @param lu: the LU on behalf of which we make the check
350 @param node: the node to check
351 @param os_name: the OS to query about
352 @param force_variant: whether to ignore variant errors
353 @raise errors.OpPrereqError: if the node is not supporting the OS
356 result = lu.rpc.call_os_get(node, os_name)
357 result.Raise("OS '%s' not in supported OS list for node %s" %
359 prereq=True, ecode=errors.ECODE_INVAL)
360 if not force_variant:
361 _CheckOSVariant(result.payload, os_name)
364 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
365 """Ensure that a node has the given secondary ip.
367 @type lu: L{LogicalUnit}
368 @param lu: the LU on behalf of which we make the check
370 @param node: the node to check
371 @type secondary_ip: string
372 @param secondary_ip: the ip to check
373 @type prereq: boolean
374 @param prereq: whether to throw a prerequisite or an execute error
375 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
376 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
379 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
380 result.Raise("Failure checking secondary ip on node %s" % node,
381 prereq=prereq, ecode=errors.ECODE_ENVIRON)
382 if not result.payload:
383 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
384 " please fix and re-run this command" % secondary_ip)
386 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
388 raise errors.OpExecError(msg)
391 def _GetClusterDomainSecret():
392 """Reads the cluster domain secret.
395 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
399 def _CheckInstanceState(lu, instance, req_states, msg=None):
400 """Ensure that an instance is in one of the required states.
402 @param lu: the LU on behalf of which we make the check
403 @param instance: the instance to check
404 @param msg: if passed, should be a message to replace the default one
405 @raise errors.OpPrereqError: if the instance is not in the required state
409 msg = ("can't use instance from outside %s states" %
410 utils.CommaJoin(req_states))
411 if instance.admin_state not in req_states:
412 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
413 (instance.name, instance.admin_state, msg),
416 if constants.ADMINST_UP not in req_states:
417 pnode = instance.primary_node
418 if not lu.cfg.GetNodeInfo(pnode).offline:
419 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
420 ins_l.Raise("Can't contact node %s for instance information" % pnode,
421 prereq=True, ecode=errors.ECODE_ENVIRON)
422 if instance.name in ins_l.payload:
423 raise errors.OpPrereqError("Instance %s is running, %s" %
424 (instance.name, msg), errors.ECODE_STATE)
426 lu.LogWarning("Primary node offline, ignoring check that instance"
430 def _ComputeIPolicyInstanceSpecViolation(
431 ipolicy, instance_spec, disk_template,
432 _compute_fn=_ComputeIPolicySpecViolation):
433 """Compute if instance specs meets the specs of ipolicy.
436 @param ipolicy: The ipolicy to verify against
437 @param instance_spec: dict
438 @param instance_spec: The instance spec to verify
439 @type disk_template: string
440 @param disk_template: the disk template of the instance
441 @param _compute_fn: The function to verify ipolicy (unittest only)
442 @see: L{_ComputeIPolicySpecViolation}
445 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
446 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
447 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
448 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
449 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
450 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
452 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
453 disk_sizes, spindle_use, disk_template)
456 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
458 _compute_fn=_ComputeIPolicyInstanceViolation):
459 """Compute if instance meets the specs of the new target group.
461 @param ipolicy: The ipolicy to verify
462 @param instance: The instance object to verify
463 @param current_group: The current group of the instance
464 @param target_group: The new group of the instance
465 @type cfg: L{config.ConfigWriter}
466 @param cfg: Cluster configuration
467 @param _compute_fn: The function to verify ipolicy (unittest only)
468 @see: L{_ComputeIPolicySpecViolation}
471 if current_group == target_group:
474 return _compute_fn(ipolicy, instance, cfg)
477 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, cfg, ignore=False,
478 _compute_fn=_ComputeIPolicyNodeViolation):
479 """Checks that the target node is correct in terms of instance policy.
481 @param ipolicy: The ipolicy to verify
482 @param instance: The instance object to verify
483 @param node: The new node to relocate
484 @type cfg: L{config.ConfigWriter}
485 @param cfg: Cluster configuration
486 @param ignore: Ignore violations of the ipolicy
487 @param _compute_fn: The function to verify ipolicy (unittest only)
488 @see: L{_ComputeIPolicySpecViolation}
491 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
492 res = _compute_fn(ipolicy, instance, primary_node.group, node.group, cfg)
495 msg = ("Instance does not meet target node group's (%s) instance"
496 " policy: %s") % (node.group, utils.CommaJoin(res))
500 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
503 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
504 minmem, maxmem, vcpus, nics, disk_template, disks,
505 bep, hvp, hypervisor_name, tags):
506 """Builds instance related env variables for hooks
508 This builds the hook environment from individual variables.
511 @param name: the name of the instance
512 @type primary_node: string
513 @param primary_node: the name of the instance's primary node
514 @type secondary_nodes: list
515 @param secondary_nodes: list of secondary nodes as strings
516 @type os_type: string
517 @param os_type: the name of the instance's OS
519 @param status: the desired status of the instance
521 @param minmem: the minimum memory size of the instance
523 @param maxmem: the maximum memory size of the instance
525 @param vcpus: the count of VCPUs the instance has
527 @param nics: list of tuples (name, uuid, ip, mac, mode, link, net, netinfo)
528 representing the NICs the instance has
529 @type disk_template: string
530 @param disk_template: the disk template of the instance
532 @param disks: list of tuples (name, uuid, size, mode)
534 @param bep: the backend parameters for the instance
536 @param hvp: the hypervisor parameters for the instance
537 @type hypervisor_name: string
538 @param hypervisor_name: the hypervisor for the instance
540 @param tags: list of instance tags as strings
542 @return: the hook environment for this instance
547 "INSTANCE_NAME": name,
548 "INSTANCE_PRIMARY": primary_node,
549 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
550 "INSTANCE_OS_TYPE": os_type,
551 "INSTANCE_STATUS": status,
552 "INSTANCE_MINMEM": minmem,
553 "INSTANCE_MAXMEM": maxmem,
554 # TODO(2.9) remove deprecated "memory" value
555 "INSTANCE_MEMORY": maxmem,
556 "INSTANCE_VCPUS": vcpus,
557 "INSTANCE_DISK_TEMPLATE": disk_template,
558 "INSTANCE_HYPERVISOR": hypervisor_name,
561 nic_count = len(nics)
562 for idx, (name, _, ip, mac, mode, link, net, netinfo) in enumerate(nics):
565 env["INSTANCE_NIC%d_NAME" % idx] = name
566 env["INSTANCE_NIC%d_IP" % idx] = ip
567 env["INSTANCE_NIC%d_MAC" % idx] = mac
568 env["INSTANCE_NIC%d_MODE" % idx] = mode
569 env["INSTANCE_NIC%d_LINK" % idx] = link
571 nobj = objects.Network.FromDict(netinfo)
572 env.update(nobj.HooksDict("INSTANCE_NIC%d_" % idx))
574 # FIXME: broken network reference: the instance NIC specifies a
575 # network, but the relevant network entry was not in the config. This
576 # should be made impossible.
577 env["INSTANCE_NIC%d_NETWORK_NAME" % idx] = net
578 if mode == constants.NIC_MODE_BRIDGED:
579 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
583 env["INSTANCE_NIC_COUNT"] = nic_count
586 disk_count = len(disks)
587 for idx, (name, size, mode) in enumerate(disks):
588 env["INSTANCE_DISK%d_NAME" % idx] = name
589 env["INSTANCE_DISK%d_SIZE" % idx] = size
590 env["INSTANCE_DISK%d_MODE" % idx] = mode
594 env["INSTANCE_DISK_COUNT"] = disk_count
599 env["INSTANCE_TAGS"] = " ".join(tags)
601 for source, kind in [(bep, "BE"), (hvp, "HV")]:
602 for key, value in source.items():
603 env["INSTANCE_%s_%s" % (kind, key)] = value
608 def _NICToTuple(lu, nic):
609 """Build a tupple of nic information.
611 @type lu: L{LogicalUnit}
612 @param lu: the logical unit on whose behalf we execute
613 @type nic: L{objects.NIC}
614 @param nic: nic to convert to hooks tuple
617 cluster = lu.cfg.GetClusterInfo()
618 filled_params = cluster.SimpleFillNIC(nic.nicparams)
619 mode = filled_params[constants.NIC_MODE]
620 link = filled_params[constants.NIC_LINK]
623 nobj = lu.cfg.GetNetwork(nic.network)
624 netinfo = objects.Network.ToDict(nobj)
625 return (nic.name, nic.uuid, nic.ip, nic.mac, mode, link, nic.network, netinfo)
628 def _NICListToTuple(lu, nics):
629 """Build a list of nic information tuples.
631 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
632 value in LUInstanceQueryData.
634 @type lu: L{LogicalUnit}
635 @param lu: the logical unit on whose behalf we execute
636 @type nics: list of L{objects.NIC}
637 @param nics: list of nics to convert to hooks tuples
642 hooks_nics.append(_NICToTuple(lu, nic))
646 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
647 """Builds instance related env variables for hooks from an object.
649 @type lu: L{LogicalUnit}
650 @param lu: the logical unit on whose behalf we execute
651 @type instance: L{objects.Instance}
652 @param instance: the instance for which we should build the
655 @param override: dictionary with key/values that will override
658 @return: the hook environment dictionary
661 cluster = lu.cfg.GetClusterInfo()
662 bep = cluster.FillBE(instance)
663 hvp = cluster.FillHV(instance)
665 "name": instance.name,
666 "primary_node": instance.primary_node,
667 "secondary_nodes": instance.secondary_nodes,
668 "os_type": instance.os,
669 "status": instance.admin_state,
670 "maxmem": bep[constants.BE_MAXMEM],
671 "minmem": bep[constants.BE_MINMEM],
672 "vcpus": bep[constants.BE_VCPUS],
673 "nics": _NICListToTuple(lu, instance.nics),
674 "disk_template": instance.disk_template,
675 "disks": [(disk.name, disk.size, disk.mode)
676 for disk in instance.disks],
679 "hypervisor_name": instance.hypervisor,
680 "tags": instance.tags,
683 args.update(override)
684 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
687 def _DecideSelfPromotion(lu, exceptions=None):
688 """Decide whether I should promote myself as a master candidate.
691 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
692 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
693 # the new node will increase mc_max with one, so:
694 mc_should = min(mc_should + 1, cp_size)
695 return mc_now < mc_should
698 def _CheckNicsBridgesExist(lu, target_nics, target_node):
699 """Check that the brigdes needed by a list of nics exist.
702 cluster = lu.cfg.GetClusterInfo()
703 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
704 brlist = [params[constants.NIC_LINK] for params in paramslist
705 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
707 result = lu.rpc.call_bridges_exist(target_node, brlist)
708 result.Raise("Error checking bridges on destination node '%s'" %
709 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
712 def _CheckInstanceBridgesExist(lu, instance, node=None):
713 """Check that the brigdes needed by an instance exist.
717 node = instance.primary_node
718 _CheckNicsBridgesExist(lu, instance.nics, node)
721 def _CheckOSVariant(os_obj, name):
722 """Check whether an OS name conforms to the os variants specification.
724 @type os_obj: L{objects.OS}
725 @param os_obj: OS object to check
727 @param name: OS name passed by the user, to check for validity
730 variant = objects.OS.GetVariant(name)
731 if not os_obj.supported_variants:
733 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
734 " passed)" % (os_obj.name, variant),
738 raise errors.OpPrereqError("OS name must include a variant",
741 if variant not in os_obj.supported_variants:
742 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
745 def _GetNodeInstancesInner(cfg, fn):
746 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
749 def _GetNodeInstances(cfg, node_name):
750 """Returns a list of all primary and secondary instances on a node.
754 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
757 def _GetNodePrimaryInstances(cfg, node_name):
758 """Returns primary instances on a node.
761 return _GetNodeInstancesInner(cfg,
762 lambda inst: node_name == inst.primary_node)
765 def _GetNodeSecondaryInstances(cfg, node_name):
766 """Returns secondary instances on a node.
769 return _GetNodeInstancesInner(cfg,
770 lambda inst: node_name in inst.secondary_nodes)
773 def _GetStorageTypeArgs(cfg, storage_type):
774 """Returns the arguments for a storage type.
777 # Special case for file storage
778 if storage_type == constants.ST_FILE:
779 # storage.FileStorage wants a list of storage directories
780 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
785 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
788 for dev in instance.disks:
789 cfg.SetDiskID(dev, node_name)
791 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
793 result.Raise("Failed to get disk status from node %s" % node_name,
794 prereq=prereq, ecode=errors.ECODE_ENVIRON)
796 for idx, bdev_status in enumerate(result.payload):
797 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
803 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
804 """Check the sanity of iallocator and node arguments and use the
805 cluster-wide iallocator if appropriate.
807 Check that at most one of (iallocator, node) is specified. If none is
808 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
809 then the LU's opcode's iallocator slot is filled with the cluster-wide
812 @type iallocator_slot: string
813 @param iallocator_slot: the name of the opcode iallocator slot
814 @type node_slot: string
815 @param node_slot: the name of the opcode target node slot
818 node = getattr(lu.op, node_slot, None)
819 ialloc = getattr(lu.op, iallocator_slot, None)
823 if node is not None and ialloc is not None:
824 raise errors.OpPrereqError("Do not specify both, iallocator and node",
826 elif ((node is None and ialloc is None) or
827 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
828 default_iallocator = lu.cfg.GetDefaultIAllocator()
829 if default_iallocator:
830 setattr(lu.op, iallocator_slot, default_iallocator)
832 raise errors.OpPrereqError("No iallocator or node given and no"
833 " cluster-wide default iallocator found;"
834 " please specify either an iallocator or a"
835 " node, or set a cluster-wide default"
836 " iallocator", errors.ECODE_INVAL)
839 def _GetDefaultIAllocator(cfg, ialloc):
840 """Decides on which iallocator to use.
842 @type cfg: L{config.ConfigWriter}
843 @param cfg: Cluster configuration object
844 @type ialloc: string or None
845 @param ialloc: Iallocator specified in opcode
847 @return: Iallocator name
851 # Use default iallocator
852 ialloc = cfg.GetDefaultIAllocator()
855 raise errors.OpPrereqError("No iallocator was specified, neither in the"
856 " opcode nor as a cluster-wide default",
862 def _CheckHostnameSane(lu, name):
863 """Ensures that a given hostname resolves to a 'sane' name.
865 The given name is required to be a prefix of the resolved hostname,
866 to prevent accidental mismatches.
868 @param lu: the logical unit on behalf of which we're checking
869 @param name: the name we should resolve and check
870 @return: the resolved hostname object
873 hostname = netutils.GetHostname(name=name)
874 if hostname.name != name:
875 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
876 if not utils.MatchNameComponent(name, [hostname.name]):
877 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
878 " same as given hostname '%s'") %
879 (hostname.name, name), errors.ECODE_INVAL)
883 class LUGroupVerifyDisks(NoHooksLU):
884 """Verifies the status of all disks in a node group.
889 def ExpandNames(self):
890 # Raises errors.OpPrereqError on its own if group can't be found
891 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
893 self.share_locks = _ShareAll()
894 self.needed_locks = {
895 locking.LEVEL_INSTANCE: [],
896 locking.LEVEL_NODEGROUP: [],
897 locking.LEVEL_NODE: [],
899 # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
900 # starts one instance of this opcode for every group, which means all
901 # nodes will be locked for a short amount of time, so it's better to
902 # acquire the node allocation lock as well.
903 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
906 def DeclareLocks(self, level):
907 if level == locking.LEVEL_INSTANCE:
908 assert not self.needed_locks[locking.LEVEL_INSTANCE]
910 # Lock instances optimistically, needs verification once node and group
911 # locks have been acquired
912 self.needed_locks[locking.LEVEL_INSTANCE] = \
913 self.cfg.GetNodeGroupInstances(self.group_uuid)
915 elif level == locking.LEVEL_NODEGROUP:
916 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
918 self.needed_locks[locking.LEVEL_NODEGROUP] = \
919 set([self.group_uuid] +
920 # Lock all groups used by instances optimistically; this requires
921 # going via the node before it's locked, requiring verification
924 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
925 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
927 elif level == locking.LEVEL_NODE:
928 # This will only lock the nodes in the group to be verified which contain
930 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
931 self._LockInstancesNodes()
933 # Lock all nodes in group to be verified
934 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
935 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
936 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
938 def CheckPrereq(self):
939 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
940 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
941 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
943 assert self.group_uuid in owned_groups
945 # Check if locked instances are still correct
946 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
948 # Get instance information
949 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
951 # Check if node groups for locked instances are still correct
952 _CheckInstancesNodeGroups(self.cfg, self.instances,
953 owned_groups, owned_nodes, self.group_uuid)
955 def Exec(self, feedback_fn):
956 """Verify integrity of cluster disks.
958 @rtype: tuple of three items
959 @return: a tuple of (dict of node-to-node_error, list of instances
960 which need activate-disks, dict of instance: (node, volume) for
965 res_instances = set()
968 nv_dict = _MapInstanceDisksToNodes(
969 [inst for inst in self.instances.values()
970 if inst.admin_state == constants.ADMINST_UP])
973 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
974 set(self.cfg.GetVmCapableNodeList()))
976 node_lvs = self.rpc.call_lv_list(nodes, [])
978 for (node, node_res) in node_lvs.items():
982 msg = node_res.fail_msg
984 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
985 res_nodes[node] = msg
988 for lv_name, (_, _, lv_online) in node_res.payload.items():
989 inst = nv_dict.pop((node, lv_name), None)
990 if not (lv_online or inst is None):
991 res_instances.add(inst)
993 # any leftover items in nv_dict are missing LVs, let's arrange the data
995 for key, inst in nv_dict.iteritems():
996 res_missing.setdefault(inst, []).append(list(key))
998 return (res_nodes, list(res_instances), res_missing)
1001 def _WaitForSync(lu, instance, disks=None, oneshot=False):
1002 """Sleep and poll for an instance's disk to sync.
1005 if not instance.disks or disks is not None and not disks:
1008 disks = _ExpandCheckDisks(instance, disks)
1011 lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
1013 node = instance.primary_node
1016 lu.cfg.SetDiskID(dev, node)
1018 # TODO: Convert to utils.Retry
1021 degr_retries = 10 # in seconds, as we sleep 1 second each time
1025 cumul_degraded = False
1026 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
1027 msg = rstats.fail_msg
1029 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
1032 raise errors.RemoteError("Can't contact node %s for mirror data,"
1033 " aborting." % node)
1036 rstats = rstats.payload
1038 for i, mstat in enumerate(rstats):
1040 lu.LogWarning("Can't compute data for node %s/%s",
1041 node, disks[i].iv_name)
1044 cumul_degraded = (cumul_degraded or
1045 (mstat.is_degraded and mstat.sync_percent is None))
1046 if mstat.sync_percent is not None:
1048 if mstat.estimated_time is not None:
1049 rem_time = ("%s remaining (estimated)" %
1050 utils.FormatSeconds(mstat.estimated_time))
1051 max_time = mstat.estimated_time
1053 rem_time = "no time estimate"
1054 lu.LogInfo("- device %s: %5.2f%% done, %s",
1055 disks[i].iv_name, mstat.sync_percent, rem_time)
1057 # if we're done but degraded, let's do a few small retries, to
1058 # make sure we see a stable and not transient situation; therefore
1059 # we force restart of the loop
1060 if (done or oneshot) and cumul_degraded and degr_retries > 0:
1061 logging.info("Degraded disks found, %d retries left", degr_retries)
1069 time.sleep(min(60, max_time))
1072 lu.LogInfo("Instance %s's disks are in sync", instance.name)
1074 return not cumul_degraded
1077 def _BlockdevFind(lu, node, dev, instance):
1078 """Wrapper around call_blockdev_find to annotate diskparams.
1080 @param lu: A reference to the lu object
1081 @param node: The node to call out
1082 @param dev: The device to find
1083 @param instance: The instance object the device belongs to
1084 @returns The result of the rpc call
1087 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
1088 return lu.rpc.call_blockdev_find(node, disk)
1091 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
1092 """Wrapper around L{_CheckDiskConsistencyInner}.
1095 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
1096 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
1100 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
1102 """Check that mirrors are not degraded.
1104 @attention: The device has to be annotated already.
1106 The ldisk parameter, if True, will change the test from the
1107 is_degraded attribute (which represents overall non-ok status for
1108 the device(s)) to the ldisk (representing the local storage status).
1111 lu.cfg.SetDiskID(dev, node)
1115 if on_primary or dev.AssembleOnSecondary():
1116 rstats = lu.rpc.call_blockdev_find(node, dev)
1117 msg = rstats.fail_msg
1119 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
1121 elif not rstats.payload:
1122 lu.LogWarning("Can't find disk on node %s", node)
1126 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
1128 result = result and not rstats.payload.is_degraded
1131 for child in dev.children:
1132 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
1138 class LUOobCommand(NoHooksLU):
1139 """Logical unit for OOB handling.
1143 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
1145 def ExpandNames(self):
1146 """Gather locks we need.
1149 if self.op.node_names:
1150 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
1151 lock_names = self.op.node_names
1153 lock_names = locking.ALL_SET
1155 self.needed_locks = {
1156 locking.LEVEL_NODE: lock_names,
1159 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
1161 if not self.op.node_names:
1162 # Acquire node allocation lock only if all nodes are affected
1163 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
1165 def CheckPrereq(self):
1166 """Check prerequisites.
1169 - the node exists in the configuration
1172 Any errors are signaled by raising errors.OpPrereqError.
1176 self.master_node = self.cfg.GetMasterNode()
1178 assert self.op.power_delay >= 0.0
1180 if self.op.node_names:
1181 if (self.op.command in self._SKIP_MASTER and
1182 self.master_node in self.op.node_names):
1183 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
1184 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
1186 if master_oob_handler:
1187 additional_text = ("run '%s %s %s' if you want to operate on the"
1188 " master regardless") % (master_oob_handler,
1192 additional_text = "it does not support out-of-band operations"
1194 raise errors.OpPrereqError(("Operating on the master node %s is not"
1195 " allowed for %s; %s") %
1196 (self.master_node, self.op.command,
1197 additional_text), errors.ECODE_INVAL)
1199 self.op.node_names = self.cfg.GetNodeList()
1200 if self.op.command in self._SKIP_MASTER:
1201 self.op.node_names.remove(self.master_node)
1203 if self.op.command in self._SKIP_MASTER:
1204 assert self.master_node not in self.op.node_names
1206 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
1208 raise errors.OpPrereqError("Node %s not found" % node_name,
1211 self.nodes.append(node)
1213 if (not self.op.ignore_status and
1214 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
1215 raise errors.OpPrereqError(("Cannot power off node %s because it is"
1216 " not marked offline") % node_name,
1219 def Exec(self, feedback_fn):
1220 """Execute OOB and return result if we expect any.
1223 master_node = self.master_node
1226 for idx, node in enumerate(utils.NiceSort(self.nodes,
1227 key=lambda node: node.name)):
1228 node_entry = [(constants.RS_NORMAL, node.name)]
1229 ret.append(node_entry)
1231 oob_program = _SupportsOob(self.cfg, node)
1234 node_entry.append((constants.RS_UNAVAIL, None))
1237 logging.info("Executing out-of-band command '%s' using '%s' on %s",
1238 self.op.command, oob_program, node.name)
1239 result = self.rpc.call_run_oob(master_node, oob_program,
1240 self.op.command, node.name,
1244 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
1245 node.name, result.fail_msg)
1246 node_entry.append((constants.RS_NODATA, None))
1249 self._CheckPayload(result)
1250 except errors.OpExecError, err:
1251 self.LogWarning("Payload returned by node '%s' is not valid: %s",
1253 node_entry.append((constants.RS_NODATA, None))
1255 if self.op.command == constants.OOB_HEALTH:
1256 # For health we should log important events
1257 for item, status in result.payload:
1258 if status in [constants.OOB_STATUS_WARNING,
1259 constants.OOB_STATUS_CRITICAL]:
1260 self.LogWarning("Item '%s' on node '%s' has status '%s'",
1261 item, node.name, status)
1263 if self.op.command == constants.OOB_POWER_ON:
1265 elif self.op.command == constants.OOB_POWER_OFF:
1266 node.powered = False
1267 elif self.op.command == constants.OOB_POWER_STATUS:
1268 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
1269 if powered != node.powered:
1270 logging.warning(("Recorded power state (%s) of node '%s' does not"
1271 " match actual power state (%s)"), node.powered,
1274 # For configuration changing commands we should update the node
1275 if self.op.command in (constants.OOB_POWER_ON,
1276 constants.OOB_POWER_OFF):
1277 self.cfg.Update(node, feedback_fn)
1279 node_entry.append((constants.RS_NORMAL, result.payload))
1281 if (self.op.command == constants.OOB_POWER_ON and
1282 idx < len(self.nodes) - 1):
1283 time.sleep(self.op.power_delay)
1287 def _CheckPayload(self, result):
1288 """Checks if the payload is valid.
1290 @param result: RPC result
1291 @raises errors.OpExecError: If payload is not valid
1295 if self.op.command == constants.OOB_HEALTH:
1296 if not isinstance(result.payload, list):
1297 errs.append("command 'health' is expected to return a list but got %s" %
1298 type(result.payload))
1300 for item, status in result.payload:
1301 if status not in constants.OOB_STATUSES:
1302 errs.append("health item '%s' has invalid status '%s'" %
1305 if self.op.command == constants.OOB_POWER_STATUS:
1306 if not isinstance(result.payload, dict):
1307 errs.append("power-status is expected to return a dict but got %s" %
1308 type(result.payload))
1310 if self.op.command in [
1311 constants.OOB_POWER_ON,
1312 constants.OOB_POWER_OFF,
1313 constants.OOB_POWER_CYCLE,
1315 if result.payload is not None:
1316 errs.append("%s is expected to not return payload but got '%s'" %
1317 (self.op.command, result.payload))
1320 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
1321 utils.CommaJoin(errs))
1324 class _OsQuery(_QueryBase):
1325 FIELDS = query.OS_FIELDS
1327 def ExpandNames(self, lu):
1328 # Lock all nodes in shared mode
1329 # Temporary removal of locks, should be reverted later
1330 # TODO: reintroduce locks when they are lighter-weight
1331 lu.needed_locks = {}
1332 #self.share_locks[locking.LEVEL_NODE] = 1
1333 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1335 # The following variables interact with _QueryBase._GetNames
1337 self.wanted = self.names
1339 self.wanted = locking.ALL_SET
1341 self.do_locking = self.use_locking
1343 def DeclareLocks(self, lu, level):
1347 def _DiagnoseByOS(rlist):
1348 """Remaps a per-node return list into an a per-os per-node dictionary
1350 @param rlist: a map with node names as keys and OS objects as values
1353 @return: a dictionary with osnames as keys and as value another
1354 map, with nodes as keys and tuples of (path, status, diagnose,
1355 variants, parameters, api_versions) as values, eg::
1357 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
1358 (/srv/..., False, "invalid api")],
1359 "node2": [(/srv/..., True, "", [], [])]}
1364 # we build here the list of nodes that didn't fail the RPC (at RPC
1365 # level), so that nodes with a non-responding node daemon don't
1366 # make all OSes invalid
1367 good_nodes = [node_name for node_name in rlist
1368 if not rlist[node_name].fail_msg]
1369 for node_name, nr in rlist.items():
1370 if nr.fail_msg or not nr.payload:
1372 for (name, path, status, diagnose, variants,
1373 params, api_versions) in nr.payload:
1374 if name not in all_os:
1375 # build a list of nodes for this os containing empty lists
1376 # for each node in node_list
1378 for nname in good_nodes:
1379 all_os[name][nname] = []
1380 # convert params from [name, help] to (name, help)
1381 params = [tuple(v) for v in params]
1382 all_os[name][node_name].append((path, status, diagnose,
1383 variants, params, api_versions))
1386 def _GetQueryData(self, lu):
1387 """Computes the list of nodes and their attributes.
1390 # Locking is not used
1391 assert not (compat.any(lu.glm.is_owned(level)
1392 for level in locking.LEVELS
1393 if level != locking.LEVEL_CLUSTER) or
1394 self.do_locking or self.use_locking)
1396 valid_nodes = [node.name
1397 for node in lu.cfg.GetAllNodesInfo().values()
1398 if not node.offline and node.vm_capable]
1399 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
1400 cluster = lu.cfg.GetClusterInfo()
1404 for (os_name, os_data) in pol.items():
1405 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
1406 hidden=(os_name in cluster.hidden_os),
1407 blacklisted=(os_name in cluster.blacklisted_os))
1411 api_versions = set()
1413 for idx, osl in enumerate(os_data.values()):
1414 info.valid = bool(info.valid and osl and osl[0][1])
1418 (node_variants, node_params, node_api) = osl[0][3:6]
1421 variants.update(node_variants)
1422 parameters.update(node_params)
1423 api_versions.update(node_api)
1425 # Filter out inconsistent values
1426 variants.intersection_update(node_variants)
1427 parameters.intersection_update(node_params)
1428 api_versions.intersection_update(node_api)
1430 info.variants = list(variants)
1431 info.parameters = list(parameters)
1432 info.api_versions = list(api_versions)
1434 data[os_name] = info
1436 # Prepare data in requested order
1437 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
1441 class LUOsDiagnose(NoHooksLU):
1442 """Logical unit for OS diagnose/query.
1448 def _BuildFilter(fields, names):
1449 """Builds a filter for querying OSes.
1452 name_filter = qlang.MakeSimpleFilter("name", names)
1454 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
1455 # respective field is not requested
1456 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
1457 for fname in ["hidden", "blacklisted"]
1458 if fname not in fields]
1459 if "valid" not in fields:
1460 status_filter.append([qlang.OP_TRUE, "valid"])
1463 status_filter.insert(0, qlang.OP_AND)
1465 status_filter = None
1467 if name_filter and status_filter:
1468 return [qlang.OP_AND, name_filter, status_filter]
1472 return status_filter
1474 def CheckArguments(self):
1475 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
1476 self.op.output_fields, False)
1478 def ExpandNames(self):
1479 self.oq.ExpandNames(self)
1481 def Exec(self, feedback_fn):
1482 return self.oq.OldStyleQuery(self)
1485 class _ExtStorageQuery(_QueryBase):
1486 FIELDS = query.EXTSTORAGE_FIELDS
1488 def ExpandNames(self, lu):
1489 # Lock all nodes in shared mode
1490 # Temporary removal of locks, should be reverted later
1491 # TODO: reintroduce locks when they are lighter-weight
1492 lu.needed_locks = {}
1493 #self.share_locks[locking.LEVEL_NODE] = 1
1494 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1496 # The following variables interact with _QueryBase._GetNames
1498 self.wanted = self.names
1500 self.wanted = locking.ALL_SET
1502 self.do_locking = self.use_locking
1504 def DeclareLocks(self, lu, level):
1508 def _DiagnoseByProvider(rlist):
1509 """Remaps a per-node return list into an a per-provider per-node dictionary
1511 @param rlist: a map with node names as keys and ExtStorage objects as values
1514 @return: a dictionary with extstorage providers as keys and as
1515 value another map, with nodes as keys and tuples of
1516 (path, status, diagnose, parameters) as values, eg::
1518 {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
1519 "node2": [(/srv/..., False, "missing file")]
1520 "node3": [(/srv/..., True, "", [])]
1525 # we build here the list of nodes that didn't fail the RPC (at RPC
1526 # level), so that nodes with a non-responding node daemon don't
1527 # make all OSes invalid
1528 good_nodes = [node_name for node_name in rlist
1529 if not rlist[node_name].fail_msg]
1530 for node_name, nr in rlist.items():
1531 if nr.fail_msg or not nr.payload:
1533 for (name, path, status, diagnose, params) in nr.payload:
1534 if name not in all_es:
1535 # build a list of nodes for this os containing empty lists
1536 # for each node in node_list
1538 for nname in good_nodes:
1539 all_es[name][nname] = []
1540 # convert params from [name, help] to (name, help)
1541 params = [tuple(v) for v in params]
1542 all_es[name][node_name].append((path, status, diagnose, params))
1545 def _GetQueryData(self, lu):
1546 """Computes the list of nodes and their attributes.
1549 # Locking is not used
1550 assert not (compat.any(lu.glm.is_owned(level)
1551 for level in locking.LEVELS
1552 if level != locking.LEVEL_CLUSTER) or
1553 self.do_locking or self.use_locking)
1555 valid_nodes = [node.name
1556 for node in lu.cfg.GetAllNodesInfo().values()
1557 if not node.offline and node.vm_capable]
1558 pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
1562 nodegroup_list = lu.cfg.GetNodeGroupList()
1564 for (es_name, es_data) in pol.items():
1565 # For every provider compute the nodegroup validity.
1566 # To do this we need to check the validity of each node in es_data
1567 # and then construct the corresponding nodegroup dict:
1568 # { nodegroup1: status
1569 # nodegroup2: status
1572 for nodegroup in nodegroup_list:
1573 ndgrp = lu.cfg.GetNodeGroup(nodegroup)
1575 nodegroup_nodes = ndgrp.members
1576 nodegroup_name = ndgrp.name
1579 for node in nodegroup_nodes:
1580 if node in valid_nodes:
1581 if es_data[node] != []:
1582 node_status = es_data[node][0][1]
1583 node_statuses.append(node_status)
1585 node_statuses.append(False)
1587 if False in node_statuses:
1588 ndgrp_data[nodegroup_name] = False
1590 ndgrp_data[nodegroup_name] = True
1592 # Compute the provider's parameters
1594 for idx, esl in enumerate(es_data.values()):
1595 valid = bool(esl and esl[0][1])
1599 node_params = esl[0][3]
1602 parameters.update(node_params)
1604 # Filter out inconsistent values
1605 parameters.intersection_update(node_params)
1607 params = list(parameters)
1609 # Now fill all the info for this provider
1610 info = query.ExtStorageInfo(name=es_name, node_status=es_data,
1611 nodegroup_status=ndgrp_data,
1614 data[es_name] = info
1616 # Prepare data in requested order
1617 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
1621 class LUExtStorageDiagnose(NoHooksLU):
1622 """Logical unit for ExtStorage diagnose/query.
1627 def CheckArguments(self):
1628 self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
1629 self.op.output_fields, False)
1631 def ExpandNames(self):
1632 self.eq.ExpandNames(self)
1634 def Exec(self, feedback_fn):
1635 return self.eq.OldStyleQuery(self)
1638 class LUNodeRemove(LogicalUnit):
1639 """Logical unit for removing a node.
1642 HPATH = "node-remove"
1643 HTYPE = constants.HTYPE_NODE
1645 def BuildHooksEnv(self):
1650 "OP_TARGET": self.op.node_name,
1651 "NODE_NAME": self.op.node_name,
1654 def BuildHooksNodes(self):
1655 """Build hooks nodes.
1657 This doesn't run on the target node in the pre phase as a failed
1658 node would then be impossible to remove.
1661 all_nodes = self.cfg.GetNodeList()
1663 all_nodes.remove(self.op.node_name)
1666 return (all_nodes, all_nodes)
1668 def CheckPrereq(self):
1669 """Check prerequisites.
1672 - the node exists in the configuration
1673 - it does not have primary or secondary instances
1674 - it's not the master
1676 Any errors are signaled by raising errors.OpPrereqError.
1679 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
1680 node = self.cfg.GetNodeInfo(self.op.node_name)
1681 assert node is not None
1683 masternode = self.cfg.GetMasterNode()
1684 if node.name == masternode:
1685 raise errors.OpPrereqError("Node is the master node, failover to another"
1686 " node is required", errors.ECODE_INVAL)
1688 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
1689 if node.name in instance.all_nodes:
1690 raise errors.OpPrereqError("Instance %s is still running on the node,"
1691 " please remove first" % instance_name,
1693 self.op.node_name = node.name
1696 def Exec(self, feedback_fn):
1697 """Removes the node from the cluster.
1701 logging.info("Stopping the node daemon and removing configs from node %s",
1704 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1706 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
1709 # Promote nodes to master candidate as needed
1710 _AdjustCandidatePool(self, exceptions=[node.name])
1711 self.context.RemoveNode(node.name)
1713 # Run post hooks on the node before it's removed
1714 _RunPostHook(self, node.name)
1716 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
1717 msg = result.fail_msg
1719 self.LogWarning("Errors encountered on the remote node while leaving"
1720 " the cluster: %s", msg)
1722 # Remove node from our /etc/hosts
1723 if self.cfg.GetClusterInfo().modify_etc_hosts:
1724 master_node = self.cfg.GetMasterNode()
1725 result = self.rpc.call_etc_hosts_modify(master_node,
1726 constants.ETC_HOSTS_REMOVE,
1728 result.Raise("Can't update hosts file with new host data")
1729 _RedistributeAncillaryFiles(self)
1732 class _NodeQuery(_QueryBase):
1733 FIELDS = query.NODE_FIELDS
1735 def ExpandNames(self, lu):
1736 lu.needed_locks = {}
1737 lu.share_locks = _ShareAll()
1740 self.wanted = _GetWantedNodes(lu, self.names)
1742 self.wanted = locking.ALL_SET
1744 self.do_locking = (self.use_locking and
1745 query.NQ_LIVE in self.requested_data)
1748 # If any non-static field is requested we need to lock the nodes
1749 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
1750 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
1752 def DeclareLocks(self, lu, level):
1755 def _GetQueryData(self, lu):
1756 """Computes the list of nodes and their attributes.
1759 all_info = lu.cfg.GetAllNodesInfo()
1761 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
1763 # Gather data as requested
1764 if query.NQ_LIVE in self.requested_data:
1765 # filter out non-vm_capable nodes
1766 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
1768 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
1769 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
1770 [lu.cfg.GetHypervisorType()], es_flags)
1771 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
1772 for (name, nresult) in node_data.items()
1773 if not nresult.fail_msg and nresult.payload)
1777 if query.NQ_INST in self.requested_data:
1778 node_to_primary = dict([(name, set()) for name in nodenames])
1779 node_to_secondary = dict([(name, set()) for name in nodenames])
1781 inst_data = lu.cfg.GetAllInstancesInfo()
1783 for inst in inst_data.values():
1784 if inst.primary_node in node_to_primary:
1785 node_to_primary[inst.primary_node].add(inst.name)
1786 for secnode in inst.secondary_nodes:
1787 if secnode in node_to_secondary:
1788 node_to_secondary[secnode].add(inst.name)
1790 node_to_primary = None
1791 node_to_secondary = None
1793 if query.NQ_OOB in self.requested_data:
1794 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
1795 for name, node in all_info.iteritems())
1799 if query.NQ_GROUP in self.requested_data:
1800 groups = lu.cfg.GetAllNodeGroupsInfo()
1804 return query.NodeQueryData([all_info[name] for name in nodenames],
1805 live_data, lu.cfg.GetMasterNode(),
1806 node_to_primary, node_to_secondary, groups,
1807 oob_support, lu.cfg.GetClusterInfo())
1810 class LUNodeQuery(NoHooksLU):
1811 """Logical unit for querying nodes.
1814 # pylint: disable=W0142
1817 def CheckArguments(self):
1818 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
1819 self.op.output_fields, self.op.use_locking)
1821 def ExpandNames(self):
1822 self.nq.ExpandNames(self)
1824 def DeclareLocks(self, level):
1825 self.nq.DeclareLocks(self, level)
1827 def Exec(self, feedback_fn):
1828 return self.nq.OldStyleQuery(self)
1831 class LUNodeQueryvols(NoHooksLU):
1832 """Logical unit for getting volumes on node(s).
1836 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
1837 _FIELDS_STATIC = utils.FieldSet("node")
1839 def CheckArguments(self):
1840 _CheckOutputFields(static=self._FIELDS_STATIC,
1841 dynamic=self._FIELDS_DYNAMIC,
1842 selected=self.op.output_fields)
1844 def ExpandNames(self):
1845 self.share_locks = _ShareAll()
1848 self.needed_locks = {
1849 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
1852 self.needed_locks = {
1853 locking.LEVEL_NODE: locking.ALL_SET,
1854 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
1857 def Exec(self, feedback_fn):
1858 """Computes the list of nodes and their attributes.
1861 nodenames = self.owned_locks(locking.LEVEL_NODE)
1862 volumes = self.rpc.call_node_volumes(nodenames)
1864 ilist = self.cfg.GetAllInstancesInfo()
1865 vol2inst = _MapInstanceDisksToNodes(ilist.values())
1868 for node in nodenames:
1869 nresult = volumes[node]
1872 msg = nresult.fail_msg
1874 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
1877 node_vols = sorted(nresult.payload,
1878 key=operator.itemgetter("dev"))
1880 for vol in node_vols:
1882 for field in self.op.output_fields:
1885 elif field == "phys":
1889 elif field == "name":
1891 elif field == "size":
1892 val = int(float(vol["size"]))
1893 elif field == "instance":
1894 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
1896 raise errors.ParameterError(field)
1897 node_output.append(str(val))
1899 output.append(node_output)
1904 class LUNodeQueryStorage(NoHooksLU):
1905 """Logical unit for getting information on storage units on node(s).
1908 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
1911 def CheckArguments(self):
1912 _CheckOutputFields(static=self._FIELDS_STATIC,
1913 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
1914 selected=self.op.output_fields)
1916 def ExpandNames(self):
1917 self.share_locks = _ShareAll()
1920 self.needed_locks = {
1921 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
1924 self.needed_locks = {
1925 locking.LEVEL_NODE: locking.ALL_SET,
1926 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
1929 def Exec(self, feedback_fn):
1930 """Computes the list of nodes and their attributes.
1933 self.nodes = self.owned_locks(locking.LEVEL_NODE)
1935 # Always get name to sort by
1936 if constants.SF_NAME in self.op.output_fields:
1937 fields = self.op.output_fields[:]
1939 fields = [constants.SF_NAME] + self.op.output_fields
1941 # Never ask for node or type as it's only known to the LU
1942 for extra in [constants.SF_NODE, constants.SF_TYPE]:
1943 while extra in fields:
1944 fields.remove(extra)
1946 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
1947 name_idx = field_idx[constants.SF_NAME]
1949 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
1950 data = self.rpc.call_storage_list(self.nodes,
1951 self.op.storage_type, st_args,
1952 self.op.name, fields)
1956 for node in utils.NiceSort(self.nodes):
1957 nresult = data[node]
1961 msg = nresult.fail_msg
1963 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
1966 rows = dict([(row[name_idx], row) for row in nresult.payload])
1968 for name in utils.NiceSort(rows.keys()):
1973 for field in self.op.output_fields:
1974 if field == constants.SF_NODE:
1976 elif field == constants.SF_TYPE:
1977 val = self.op.storage_type
1978 elif field in field_idx:
1979 val = row[field_idx[field]]
1981 raise errors.ParameterError(field)
1990 class _InstanceQuery(_QueryBase):
1991 FIELDS = query.INSTANCE_FIELDS
1993 def ExpandNames(self, lu):
1994 lu.needed_locks = {}
1995 lu.share_locks = _ShareAll()
1998 self.wanted = _GetWantedInstances(lu, self.names)
2000 self.wanted = locking.ALL_SET
2002 self.do_locking = (self.use_locking and
2003 query.IQ_LIVE in self.requested_data)
2005 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
2006 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
2007 lu.needed_locks[locking.LEVEL_NODE] = []
2008 lu.needed_locks[locking.LEVEL_NETWORK] = []
2009 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2011 self.do_grouplocks = (self.do_locking and
2012 query.IQ_NODES in self.requested_data)
2014 def DeclareLocks(self, lu, level):
2016 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
2017 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
2019 # Lock all groups used by instances optimistically; this requires going
2020 # via the node before it's locked, requiring verification later on
2021 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
2023 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
2024 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
2025 elif level == locking.LEVEL_NODE:
2026 lu._LockInstancesNodes() # pylint: disable=W0212
2028 elif level == locking.LEVEL_NETWORK:
2029 lu.needed_locks[locking.LEVEL_NETWORK] = \
2031 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
2032 for net_uuid in lu.cfg.GetInstanceNetworks(instance_name))
2035 def _CheckGroupLocks(lu):
2036 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
2037 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
2039 # Check if node groups for locked instances are still correct
2040 for instance_name in owned_instances:
2041 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
2043 def _GetQueryData(self, lu):
2044 """Computes the list of instances and their attributes.
2047 if self.do_grouplocks:
2048 self._CheckGroupLocks(lu)
2050 cluster = lu.cfg.GetClusterInfo()
2051 all_info = lu.cfg.GetAllInstancesInfo()
2053 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
2055 instance_list = [all_info[name] for name in instance_names]
2056 nodes = frozenset(itertools.chain(*(inst.all_nodes
2057 for inst in instance_list)))
2058 hv_list = list(set([inst.hypervisor for inst in instance_list]))
2061 wrongnode_inst = set()
2063 # Gather data as requested
2064 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
2066 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
2068 result = node_data[name]
2070 # offline nodes will be in both lists
2071 assert result.fail_msg
2072 offline_nodes.append(name)
2074 bad_nodes.append(name)
2075 elif result.payload:
2076 for inst in result.payload:
2077 if inst in all_info:
2078 if all_info[inst].primary_node == name:
2079 live_data.update(result.payload)
2081 wrongnode_inst.add(inst)
2083 # orphan instance; we don't list it here as we don't
2084 # handle this case yet in the output of instance listing
2085 logging.warning("Orphan instance '%s' found on node %s",
2087 # else no instance is alive
2091 if query.IQ_DISKUSAGE in self.requested_data:
2092 gmi = ganeti.masterd.instance
2093 disk_usage = dict((inst.name,
2094 gmi.ComputeDiskSize(inst.disk_template,
2095 [{constants.IDISK_SIZE: disk.size}
2096 for disk in inst.disks]))
2097 for inst in instance_list)
2101 if query.IQ_CONSOLE in self.requested_data:
2103 for inst in instance_list:
2104 if inst.name in live_data:
2105 # Instance is running
2106 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
2108 consinfo[inst.name] = None
2109 assert set(consinfo.keys()) == set(instance_names)
2113 if query.IQ_NODES in self.requested_data:
2114 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
2116 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
2117 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
2118 for uuid in set(map(operator.attrgetter("group"),
2124 if query.IQ_NETWORKS in self.requested_data:
2125 net_uuids = itertools.chain(*(lu.cfg.GetInstanceNetworks(i.name)
2126 for i in instance_list))
2127 networks = dict((uuid, lu.cfg.GetNetwork(uuid)) for uuid in net_uuids)
2131 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
2132 disk_usage, offline_nodes, bad_nodes,
2133 live_data, wrongnode_inst, consinfo,
2134 nodes, groups, networks)
2137 class LUQuery(NoHooksLU):
2138 """Query for resources/items of a certain kind.
2141 # pylint: disable=W0142
2144 def CheckArguments(self):
2145 qcls = _GetQueryImplementation(self.op.what)
2147 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
2149 def ExpandNames(self):
2150 self.impl.ExpandNames(self)
2152 def DeclareLocks(self, level):
2153 self.impl.DeclareLocks(self, level)
2155 def Exec(self, feedback_fn):
2156 return self.impl.NewStyleQuery(self)
2159 class LUQueryFields(NoHooksLU):
2160 """Query for resources/items of a certain kind.
2163 # pylint: disable=W0142
2166 def CheckArguments(self):
2167 self.qcls = _GetQueryImplementation(self.op.what)
2169 def ExpandNames(self):
2170 self.needed_locks = {}
2172 def Exec(self, feedback_fn):
2173 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
2176 class LUNodeModifyStorage(NoHooksLU):
2177 """Logical unit for modifying a storage volume on a node.
2182 def CheckArguments(self):
2183 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2185 storage_type = self.op.storage_type
2188 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2190 raise errors.OpPrereqError("Storage units of type '%s' can not be"
2191 " modified" % storage_type,
2194 diff = set(self.op.changes.keys()) - modifiable
2196 raise errors.OpPrereqError("The following fields can not be modified for"
2197 " storage units of type '%s': %r" %
2198 (storage_type, list(diff)),
2201 def ExpandNames(self):
2202 self.needed_locks = {
2203 locking.LEVEL_NODE: self.op.node_name,
2206 def Exec(self, feedback_fn):
2207 """Computes the list of nodes and their attributes.
2210 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2211 result = self.rpc.call_storage_modify(self.op.node_name,
2212 self.op.storage_type, st_args,
2213 self.op.name, self.op.changes)
2214 result.Raise("Failed to modify storage unit '%s' on %s" %
2215 (self.op.name, self.op.node_name))
2218 class LUNodeAdd(LogicalUnit):
2219 """Logical unit for adding node to the cluster.
2223 HTYPE = constants.HTYPE_NODE
2224 _NFLAGS = ["master_capable", "vm_capable"]
2226 def CheckArguments(self):
2227 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
2228 # validate/normalize the node name
2229 self.hostname = netutils.GetHostname(name=self.op.node_name,
2230 family=self.primary_ip_family)
2231 self.op.node_name = self.hostname.name
2233 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
2234 raise errors.OpPrereqError("Cannot readd the master node",
2237 if self.op.readd and self.op.group:
2238 raise errors.OpPrereqError("Cannot pass a node group when a node is"
2239 " being readded", errors.ECODE_INVAL)
2241 def BuildHooksEnv(self):
2244 This will run on all nodes before, and on all nodes + the new node after.
2248 "OP_TARGET": self.op.node_name,
2249 "NODE_NAME": self.op.node_name,
2250 "NODE_PIP": self.op.primary_ip,
2251 "NODE_SIP": self.op.secondary_ip,
2252 "MASTER_CAPABLE": str(self.op.master_capable),
2253 "VM_CAPABLE": str(self.op.vm_capable),
2256 def BuildHooksNodes(self):
2257 """Build hooks nodes.
2260 # Exclude added node
2261 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
2262 post_nodes = pre_nodes + [self.op.node_name, ]
2264 return (pre_nodes, post_nodes)
2266 def CheckPrereq(self):
2267 """Check prerequisites.
2270 - the new node is not already in the config
2272 - its parameters (single/dual homed) matches the cluster
2274 Any errors are signaled by raising errors.OpPrereqError.
2278 hostname = self.hostname
2279 node = hostname.name
2280 primary_ip = self.op.primary_ip = hostname.ip
2281 if self.op.secondary_ip is None:
2282 if self.primary_ip_family == netutils.IP6Address.family:
2283 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
2284 " IPv4 address must be given as secondary",
2286 self.op.secondary_ip = primary_ip
2288 secondary_ip = self.op.secondary_ip
2289 if not netutils.IP4Address.IsValid(secondary_ip):
2290 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
2291 " address" % secondary_ip, errors.ECODE_INVAL)
2293 node_list = cfg.GetNodeList()
2294 if not self.op.readd and node in node_list:
2295 raise errors.OpPrereqError("Node %s is already in the configuration" %
2296 node, errors.ECODE_EXISTS)
2297 elif self.op.readd and node not in node_list:
2298 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
2301 self.changed_primary_ip = False
2303 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
2304 if self.op.readd and node == existing_node_name:
2305 if existing_node.secondary_ip != secondary_ip:
2306 raise errors.OpPrereqError("Readded node doesn't have the same IP"
2307 " address configuration as before",
2309 if existing_node.primary_ip != primary_ip:
2310 self.changed_primary_ip = True
2314 if (existing_node.primary_ip == primary_ip or
2315 existing_node.secondary_ip == primary_ip or
2316 existing_node.primary_ip == secondary_ip or
2317 existing_node.secondary_ip == secondary_ip):
2318 raise errors.OpPrereqError("New node ip address(es) conflict with"
2319 " existing node %s" % existing_node.name,
2320 errors.ECODE_NOTUNIQUE)
2322 # After this 'if' block, None is no longer a valid value for the
2323 # _capable op attributes
2325 old_node = self.cfg.GetNodeInfo(node)
2326 assert old_node is not None, "Can't retrieve locked node %s" % node
2327 for attr in self._NFLAGS:
2328 if getattr(self.op, attr) is None:
2329 setattr(self.op, attr, getattr(old_node, attr))
2331 for attr in self._NFLAGS:
2332 if getattr(self.op, attr) is None:
2333 setattr(self.op, attr, True)
2335 if self.op.readd and not self.op.vm_capable:
2336 pri, sec = cfg.GetNodeInstances(node)
2338 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
2339 " flag set to false, but it already holds"
2340 " instances" % node,
2343 # check that the type of the node (single versus dual homed) is the
2344 # same as for the master
2345 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2346 master_singlehomed = myself.secondary_ip == myself.primary_ip
2347 newbie_singlehomed = secondary_ip == primary_ip
2348 if master_singlehomed != newbie_singlehomed:
2349 if master_singlehomed:
2350 raise errors.OpPrereqError("The master has no secondary ip but the"
2351 " new node has one",
2354 raise errors.OpPrereqError("The master has a secondary ip but the"
2355 " new node doesn't have one",
2358 # checks reachability
2359 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2360 raise errors.OpPrereqError("Node not reachable by ping",
2361 errors.ECODE_ENVIRON)
2363 if not newbie_singlehomed:
2364 # check reachability from my secondary ip to newbie's secondary ip
2365 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2366 source=myself.secondary_ip):
2367 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2368 " based ping to node daemon port",
2369 errors.ECODE_ENVIRON)
2376 if self.op.master_capable:
2377 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
2379 self.master_candidate = False
2382 self.new_node = old_node
2384 node_group = cfg.LookupNodeGroup(self.op.group)
2385 self.new_node = objects.Node(name=node,
2386 primary_ip=primary_ip,
2387 secondary_ip=secondary_ip,
2388 master_candidate=self.master_candidate,
2389 offline=False, drained=False,
2390 group=node_group, ndparams={})
2392 if self.op.ndparams:
2393 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2394 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
2395 "node", "cluster or group")
2397 if self.op.hv_state:
2398 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
2400 if self.op.disk_state:
2401 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
2403 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
2404 # it a property on the base class.
2405 rpcrunner = rpc.DnsOnlyRunner()
2406 result = rpcrunner.call_version([node])[node]
2407 result.Raise("Can't get version information from node %s" % node)
2408 if constants.PROTOCOL_VERSION == result.payload:
2409 logging.info("Communication to node %s fine, sw version %s match",
2410 node, result.payload)
2412 raise errors.OpPrereqError("Version mismatch master version %s,"
2413 " node version %s" %
2414 (constants.PROTOCOL_VERSION, result.payload),
2415 errors.ECODE_ENVIRON)
2417 vg_name = cfg.GetVGName()
2418 if vg_name is not None:
2419 vparams = {constants.NV_PVLIST: [vg_name]}
2420 excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
2421 cname = self.cfg.GetClusterName()
2422 result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
2423 (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
2425 raise errors.OpPrereqError("Checks on node PVs failed: %s" %
2426 "; ".join(errmsgs), errors.ECODE_ENVIRON)
2428 def Exec(self, feedback_fn):
2429 """Adds the new node to the cluster.
2432 new_node = self.new_node
2433 node = new_node.name
2435 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
2438 # We adding a new node so we assume it's powered
2439 new_node.powered = True
2441 # for re-adds, reset the offline/drained/master-candidate flags;
2442 # we need to reset here, otherwise offline would prevent RPC calls
2443 # later in the procedure; this also means that if the re-add
2444 # fails, we are left with a non-offlined, broken node
2446 new_node.drained = new_node.offline = False # pylint: disable=W0201
2447 self.LogInfo("Readding a node, the offline/drained flags were reset")
2448 # if we demote the node, we do cleanup later in the procedure
2449 new_node.master_candidate = self.master_candidate
2450 if self.changed_primary_ip:
2451 new_node.primary_ip = self.op.primary_ip
2453 # copy the master/vm_capable flags
2454 for attr in self._NFLAGS:
2455 setattr(new_node, attr, getattr(self.op, attr))
2457 # notify the user about any possible mc promotion
2458 if new_node.master_candidate:
2459 self.LogInfo("Node will be a master candidate")
2461 if self.op.ndparams:
2462 new_node.ndparams = self.op.ndparams
2464 new_node.ndparams = {}
2466 if self.op.hv_state:
2467 new_node.hv_state_static = self.new_hv_state
2469 if self.op.disk_state:
2470 new_node.disk_state_static = self.new_disk_state
2472 # Add node to our /etc/hosts, and add key to known_hosts
2473 if self.cfg.GetClusterInfo().modify_etc_hosts:
2474 master_node = self.cfg.GetMasterNode()
2475 result = self.rpc.call_etc_hosts_modify(master_node,
2476 constants.ETC_HOSTS_ADD,
2479 result.Raise("Can't update hosts file with new host data")
2481 if new_node.secondary_ip != new_node.primary_ip:
2482 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
2485 node_verify_list = [self.cfg.GetMasterNode()]
2486 node_verify_param = {
2487 constants.NV_NODELIST: ([node], {}),
2488 # TODO: do a node-net-test as well?
2491 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2492 self.cfg.GetClusterName())
2493 for verifier in node_verify_list:
2494 result[verifier].Raise("Cannot communicate with node %s" % verifier)
2495 nl_payload = result[verifier].payload[constants.NV_NODELIST]
2497 for failed in nl_payload:
2498 feedback_fn("ssh/hostname verification failed"
2499 " (checking from %s): %s" %
2500 (verifier, nl_payload[failed]))
2501 raise errors.OpExecError("ssh/hostname verification failed")
2504 _RedistributeAncillaryFiles(self)
2505 self.context.ReaddNode(new_node)
2506 # make sure we redistribute the config
2507 self.cfg.Update(new_node, feedback_fn)
2508 # and make sure the new node will not have old files around
2509 if not new_node.master_candidate:
2510 result = self.rpc.call_node_demote_from_mc(new_node.name)
2511 msg = result.fail_msg
2513 self.LogWarning("Node failed to demote itself from master"
2514 " candidate status: %s" % msg)
2516 _RedistributeAncillaryFiles(self, additional_nodes=[node],
2517 additional_vm=self.op.vm_capable)
2518 self.context.AddNode(new_node, self.proc.GetECId())
2521 class LUNodeSetParams(LogicalUnit):
2522 """Modifies the parameters of a node.
2524 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
2525 to the node role (as _ROLE_*)
2526 @cvar _R2F: a dictionary from node role to tuples of flags
2527 @cvar _FLAGS: a list of attribute names corresponding to the flags
2530 HPATH = "node-modify"
2531 HTYPE = constants.HTYPE_NODE
2533 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
2535 (True, False, False): _ROLE_CANDIDATE,
2536 (False, True, False): _ROLE_DRAINED,
2537 (False, False, True): _ROLE_OFFLINE,
2538 (False, False, False): _ROLE_REGULAR,
2540 _R2F = dict((v, k) for k, v in _F2R.items())
2541 _FLAGS = ["master_candidate", "drained", "offline"]
2543 def CheckArguments(self):
2544 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2545 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
2546 self.op.master_capable, self.op.vm_capable,
2547 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
2549 if all_mods.count(None) == len(all_mods):
2550 raise errors.OpPrereqError("Please pass at least one modification",
2552 if all_mods.count(True) > 1:
2553 raise errors.OpPrereqError("Can't set the node into more than one"
2554 " state at the same time",
2557 # Boolean value that tells us whether we might be demoting from MC
2558 self.might_demote = (self.op.master_candidate is False or
2559 self.op.offline is True or
2560 self.op.drained is True or
2561 self.op.master_capable is False)
2563 if self.op.secondary_ip:
2564 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
2565 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
2566 " address" % self.op.secondary_ip,
2569 self.lock_all = self.op.auto_promote and self.might_demote
2570 self.lock_instances = self.op.secondary_ip is not None
2572 def _InstanceFilter(self, instance):
2573 """Filter for getting affected instances.
2576 return (instance.disk_template in constants.DTS_INT_MIRROR and
2577 self.op.node_name in instance.all_nodes)
2579 def ExpandNames(self):
2581 self.needed_locks = {
2582 locking.LEVEL_NODE: locking.ALL_SET,
2584 # Block allocations when all nodes are locked
2585 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2588 self.needed_locks = {
2589 locking.LEVEL_NODE: self.op.node_name,
2592 # Since modifying a node can have severe effects on currently running
2593 # operations the resource lock is at least acquired in shared mode
2594 self.needed_locks[locking.LEVEL_NODE_RES] = \
2595 self.needed_locks[locking.LEVEL_NODE]
2597 # Get all locks except nodes in shared mode; they are not used for anything
2598 # but read-only access
2599 self.share_locks = _ShareAll()
2600 self.share_locks[locking.LEVEL_NODE] = 0
2601 self.share_locks[locking.LEVEL_NODE_RES] = 0
2602 self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
2604 if self.lock_instances:
2605 self.needed_locks[locking.LEVEL_INSTANCE] = \
2606 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
2608 def BuildHooksEnv(self):
2611 This runs on the master node.
2615 "OP_TARGET": self.op.node_name,
2616 "MASTER_CANDIDATE": str(self.op.master_candidate),
2617 "OFFLINE": str(self.op.offline),
2618 "DRAINED": str(self.op.drained),
2619 "MASTER_CAPABLE": str(self.op.master_capable),
2620 "VM_CAPABLE": str(self.op.vm_capable),
2623 def BuildHooksNodes(self):
2624 """Build hooks nodes.
2627 nl = [self.cfg.GetMasterNode(), self.op.node_name]
2630 def CheckPrereq(self):
2631 """Check prerequisites.
2633 This only checks the instance list against the existing names.
2636 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2638 if self.lock_instances:
2639 affected_instances = \
2640 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
2642 # Verify instance locks
2643 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
2644 wanted_instances = frozenset(affected_instances.keys())
2645 if wanted_instances - owned_instances:
2646 raise errors.OpPrereqError("Instances affected by changing node %s's"
2647 " secondary IP address have changed since"
2648 " locks were acquired, wanted '%s', have"
2649 " '%s'; retry the operation" %
2651 utils.CommaJoin(wanted_instances),
2652 utils.CommaJoin(owned_instances)),
2655 affected_instances = None
2657 if (self.op.master_candidate is not None or
2658 self.op.drained is not None or
2659 self.op.offline is not None):
2660 # we can't change the master's node flags
2661 if self.op.node_name == self.cfg.GetMasterNode():
2662 raise errors.OpPrereqError("The master role can be changed"
2663 " only via master-failover",
2666 if self.op.master_candidate and not node.master_capable:
2667 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
2668 " it a master candidate" % node.name,
2671 if self.op.vm_capable is False:
2672 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
2674 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
2675 " the vm_capable flag" % node.name,
2678 if node.master_candidate and self.might_demote and not self.lock_all:
2679 assert not self.op.auto_promote, "auto_promote set but lock_all not"
2680 # check if after removing the current node, we're missing master
2682 (mc_remaining, mc_should, _) = \
2683 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
2684 if mc_remaining < mc_should:
2685 raise errors.OpPrereqError("Not enough master candidates, please"
2686 " pass auto promote option to allow"
2687 " promotion (--auto-promote or RAPI"
2688 " auto_promote=True)", errors.ECODE_STATE)
2690 self.old_flags = old_flags = (node.master_candidate,
2691 node.drained, node.offline)
2692 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
2693 self.old_role = old_role = self._F2R[old_flags]
2695 # Check for ineffective changes
2696 for attr in self._FLAGS:
2697 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
2698 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
2699 setattr(self.op, attr, None)
2701 # Past this point, any flag change to False means a transition
2702 # away from the respective state, as only real changes are kept
2704 # TODO: We might query the real power state if it supports OOB
2705 if _SupportsOob(self.cfg, node):
2706 if self.op.offline is False and not (node.powered or
2707 self.op.powered is True):
2708 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
2709 " offline status can be reset") %
2710 self.op.node_name, errors.ECODE_STATE)
2711 elif self.op.powered is not None:
2712 raise errors.OpPrereqError(("Unable to change powered state for node %s"
2713 " as it does not support out-of-band"
2714 " handling") % self.op.node_name,
2717 # If we're being deofflined/drained, we'll MC ourself if needed
2718 if (self.op.drained is False or self.op.offline is False or
2719 (self.op.master_capable and not node.master_capable)):
2720 if _DecideSelfPromotion(self):
2721 self.op.master_candidate = True
2722 self.LogInfo("Auto-promoting node to master candidate")
2724 # If we're no longer master capable, we'll demote ourselves from MC
2725 if self.op.master_capable is False and node.master_candidate:
2726 self.LogInfo("Demoting from master candidate")
2727 self.op.master_candidate = False
2730 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
2731 if self.op.master_candidate:
2732 new_role = self._ROLE_CANDIDATE
2733 elif self.op.drained:
2734 new_role = self._ROLE_DRAINED
2735 elif self.op.offline:
2736 new_role = self._ROLE_OFFLINE
2737 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
2738 # False is still in new flags, which means we're un-setting (the
2740 new_role = self._ROLE_REGULAR
2741 else: # no new flags, nothing, keep old role
2744 self.new_role = new_role
2746 if old_role == self._ROLE_OFFLINE and new_role != old_role:
2747 # Trying to transition out of offline status
2748 result = self.rpc.call_version([node.name])[node.name]
2750 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
2751 " to report its version: %s" %
2752 (node.name, result.fail_msg),
2755 self.LogWarning("Transitioning node from offline to online state"
2756 " without using re-add. Please make sure the node"
2759 # When changing the secondary ip, verify if this is a single-homed to
2760 # multi-homed transition or vice versa, and apply the relevant
2762 if self.op.secondary_ip:
2763 # Ok even without locking, because this can't be changed by any LU
2764 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
2765 master_singlehomed = master.secondary_ip == master.primary_ip
2766 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
2767 if self.op.force and node.name == master.name:
2768 self.LogWarning("Transitioning from single-homed to multi-homed"
2769 " cluster; all nodes will require a secondary IP"
2772 raise errors.OpPrereqError("Changing the secondary ip on a"
2773 " single-homed cluster requires the"
2774 " --force option to be passed, and the"
2775 " target node to be the master",
2777 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
2778 if self.op.force and node.name == master.name:
2779 self.LogWarning("Transitioning from multi-homed to single-homed"
2780 " cluster; secondary IP addresses will have to be"
2783 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
2784 " same as the primary IP on a multi-homed"
2785 " cluster, unless the --force option is"
2786 " passed, and the target node is the"
2787 " master", errors.ECODE_INVAL)
2789 assert not (frozenset(affected_instances) -
2790 self.owned_locks(locking.LEVEL_INSTANCE))
2793 if affected_instances:
2794 msg = ("Cannot change secondary IP address: offline node has"
2795 " instances (%s) configured to use it" %
2796 utils.CommaJoin(affected_instances.keys()))
2797 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
2799 # On online nodes, check that no instances are running, and that
2800 # the node has the new ip and we can reach it.
2801 for instance in affected_instances.values():
2802 _CheckInstanceState(self, instance, INSTANCE_DOWN,
2803 msg="cannot change secondary ip")
2805 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
2806 if master.name != node.name:
2807 # check reachability from master secondary ip to new secondary ip
2808 if not netutils.TcpPing(self.op.secondary_ip,
2809 constants.DEFAULT_NODED_PORT,
2810 source=master.secondary_ip):
2811 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2812 " based ping to node daemon port",
2813 errors.ECODE_ENVIRON)
2815 if self.op.ndparams:
2816 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
2817 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
2818 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
2819 "node", "cluster or group")
2820 self.new_ndparams = new_ndparams
2822 if self.op.hv_state:
2823 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
2824 self.node.hv_state_static)
2826 if self.op.disk_state:
2827 self.new_disk_state = \
2828 _MergeAndVerifyDiskState(self.op.disk_state,
2829 self.node.disk_state_static)
2831 def Exec(self, feedback_fn):
2836 old_role = self.old_role
2837 new_role = self.new_role
2841 if self.op.ndparams:
2842 node.ndparams = self.new_ndparams
2844 if self.op.powered is not None:
2845 node.powered = self.op.powered
2847 if self.op.hv_state:
2848 node.hv_state_static = self.new_hv_state
2850 if self.op.disk_state:
2851 node.disk_state_static = self.new_disk_state
2853 for attr in ["master_capable", "vm_capable"]:
2854 val = getattr(self.op, attr)
2856 setattr(node, attr, val)
2857 result.append((attr, str(val)))
2859 if new_role != old_role:
2860 # Tell the node to demote itself, if no longer MC and not offline
2861 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
2862 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
2864 self.LogWarning("Node failed to demote itself: %s", msg)
2866 new_flags = self._R2F[new_role]
2867 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
2869 result.append((desc, str(nf)))
2870 (node.master_candidate, node.drained, node.offline) = new_flags
2872 # we locked all nodes, we adjust the CP before updating this node
2874 _AdjustCandidatePool(self, [node.name])
2876 if self.op.secondary_ip:
2877 node.secondary_ip = self.op.secondary_ip
2878 result.append(("secondary_ip", self.op.secondary_ip))
2880 # this will trigger configuration file update, if needed
2881 self.cfg.Update(node, feedback_fn)
2883 # this will trigger job queue propagation or cleanup if the mc
2885 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
2886 self.context.ReaddNode(node)
2891 class LUNodePowercycle(NoHooksLU):
2892 """Powercycles a node.
2897 def CheckArguments(self):
2898 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2899 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
2900 raise errors.OpPrereqError("The node is the master and the force"
2901 " parameter was not set",
2904 def ExpandNames(self):
2905 """Locking for PowercycleNode.
2907 This is a last-resort option and shouldn't block on other
2908 jobs. Therefore, we grab no locks.
2911 self.needed_locks = {}
2913 def Exec(self, feedback_fn):
2917 result = self.rpc.call_node_powercycle(self.op.node_name,
2918 self.cfg.GetHypervisorType())
2919 result.Raise("Failed to schedule the reboot")
2920 return result.payload
2923 class LUInstanceActivateDisks(NoHooksLU):
2924 """Bring up an instance's disks.
2929 def ExpandNames(self):
2930 self._ExpandAndLockInstance()
2931 self.needed_locks[locking.LEVEL_NODE] = []
2932 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2934 def DeclareLocks(self, level):
2935 if level == locking.LEVEL_NODE:
2936 self._LockInstancesNodes()
2938 def CheckPrereq(self):
2939 """Check prerequisites.
2941 This checks that the instance is in the cluster.
2944 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2945 assert self.instance is not None, \
2946 "Cannot retrieve locked instance %s" % self.op.instance_name
2947 _CheckNodeOnline(self, self.instance.primary_node)
2949 def Exec(self, feedback_fn):
2950 """Activate the disks.
2953 disks_ok, disks_info = \
2954 _AssembleInstanceDisks(self, self.instance,
2955 ignore_size=self.op.ignore_size)
2957 raise errors.OpExecError("Cannot activate block devices")
2959 if self.op.wait_for_sync:
2960 if not _WaitForSync(self, self.instance):
2961 raise errors.OpExecError("Some disks of the instance are degraded!")
2966 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
2968 """Prepare the block devices for an instance.
2970 This sets up the block devices on all nodes.
2972 @type lu: L{LogicalUnit}
2973 @param lu: the logical unit on whose behalf we execute
2974 @type instance: L{objects.Instance}
2975 @param instance: the instance for whose disks we assemble
2976 @type disks: list of L{objects.Disk} or None
2977 @param disks: which disks to assemble (or all, if None)
2978 @type ignore_secondaries: boolean
2979 @param ignore_secondaries: if true, errors on secondary nodes
2980 won't result in an error return from the function
2981 @type ignore_size: boolean
2982 @param ignore_size: if true, the current known size of the disk
2983 will not be used during the disk activation, useful for cases
2984 when the size is wrong
2985 @return: False if the operation failed, otherwise a list of
2986 (host, instance_visible_name, node_visible_name)
2987 with the mapping from node devices to instance devices
2992 iname = instance.name
2993 disks = _ExpandCheckDisks(instance, disks)
2995 # With the two passes mechanism we try to reduce the window of
2996 # opportunity for the race condition of switching DRBD to primary
2997 # before handshaking occured, but we do not eliminate it
2999 # The proper fix would be to wait (with some limits) until the
3000 # connection has been made and drbd transitions from WFConnection
3001 # into any other network-connected state (Connected, SyncTarget,
3004 # 1st pass, assemble on all nodes in secondary mode
3005 for idx, inst_disk in enumerate(disks):
3006 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3008 node_disk = node_disk.Copy()
3009 node_disk.UnsetSize()
3010 lu.cfg.SetDiskID(node_disk, node)
3011 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
3013 msg = result.fail_msg
3015 is_offline_secondary = (node in instance.secondary_nodes and
3017 lu.LogWarning("Could not prepare block device %s on node %s"
3018 " (is_primary=False, pass=1): %s",
3019 inst_disk.iv_name, node, msg)
3020 if not (ignore_secondaries or is_offline_secondary):
3023 # FIXME: race condition on drbd migration to primary
3025 # 2nd pass, do only the primary node
3026 for idx, inst_disk in enumerate(disks):
3029 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3030 if node != instance.primary_node:
3033 node_disk = node_disk.Copy()
3034 node_disk.UnsetSize()
3035 lu.cfg.SetDiskID(node_disk, node)
3036 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
3038 msg = result.fail_msg
3040 lu.LogWarning("Could not prepare block device %s on node %s"
3041 " (is_primary=True, pass=2): %s",
3042 inst_disk.iv_name, node, msg)
3045 dev_path = result.payload
3047 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3049 # leave the disks configured for the primary node
3050 # this is a workaround that would be fixed better by
3051 # improving the logical/physical id handling
3053 lu.cfg.SetDiskID(disk, instance.primary_node)
3055 return disks_ok, device_info
3058 def _StartInstanceDisks(lu, instance, force):
3059 """Start the disks of an instance.
3062 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3063 ignore_secondaries=force)
3065 _ShutdownInstanceDisks(lu, instance)
3066 if force is not None and not force:
3068 hint=("If the message above refers to a secondary node,"
3069 " you can retry the operation using '--force'"))
3070 raise errors.OpExecError("Disk consistency error")
3073 class LUInstanceDeactivateDisks(NoHooksLU):
3074 """Shutdown an instance's disks.
3079 def ExpandNames(self):
3080 self._ExpandAndLockInstance()
3081 self.needed_locks[locking.LEVEL_NODE] = []
3082 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3084 def DeclareLocks(self, level):
3085 if level == locking.LEVEL_NODE:
3086 self._LockInstancesNodes()
3088 def CheckPrereq(self):
3089 """Check prerequisites.
3091 This checks that the instance is in the cluster.
3094 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3095 assert self.instance is not None, \
3096 "Cannot retrieve locked instance %s" % self.op.instance_name
3098 def Exec(self, feedback_fn):
3099 """Deactivate the disks
3102 instance = self.instance
3104 _ShutdownInstanceDisks(self, instance)
3106 _SafeShutdownInstanceDisks(self, instance)
3109 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
3110 """Shutdown block devices of an instance.
3112 This function checks if an instance is running, before calling
3113 _ShutdownInstanceDisks.
3116 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
3117 _ShutdownInstanceDisks(lu, instance, disks=disks)
3120 def _ExpandCheckDisks(instance, disks):
3121 """Return the instance disks selected by the disks list
3123 @type disks: list of L{objects.Disk} or None
3124 @param disks: selected disks
3125 @rtype: list of L{objects.Disk}
3126 @return: selected instance disks to act on
3130 return instance.disks
3132 if not set(disks).issubset(instance.disks):
3133 raise errors.ProgrammerError("Can only act on disks belonging to the"
3138 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
3139 """Shutdown block devices of an instance.
3141 This does the shutdown on all nodes of the instance.
3143 If the ignore_primary is false, errors on the primary node are
3148 disks = _ExpandCheckDisks(instance, disks)
3151 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3152 lu.cfg.SetDiskID(top_disk, node)
3153 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
3154 msg = result.fail_msg
3156 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3157 disk.iv_name, node, msg)
3158 if ((node == instance.primary_node and not ignore_primary) or
3159 (node != instance.primary_node and not result.offline)):
3164 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3165 """Checks if a node has enough free memory.
3167 This function checks if a given node has the needed amount of free
3168 memory. In case the node has less memory or we cannot get the
3169 information from the node, this function raises an OpPrereqError
3172 @type lu: C{LogicalUnit}
3173 @param lu: a logical unit from which we get configuration data
3175 @param node: the node to check
3176 @type reason: C{str}
3177 @param reason: string to use in the error message
3178 @type requested: C{int}
3179 @param requested: the amount of memory in MiB to check for
3180 @type hypervisor_name: C{str}
3181 @param hypervisor_name: the hypervisor to ask for memory stats
3183 @return: node current free memory
3184 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3185 we cannot check the node
3188 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
3189 nodeinfo[node].Raise("Can't get data from node %s" % node,
3190 prereq=True, ecode=errors.ECODE_ENVIRON)
3191 (_, _, (hv_info, )) = nodeinfo[node].payload
3193 free_mem = hv_info.get("memory_free", None)
3194 if not isinstance(free_mem, int):
3195 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3196 " was '%s'" % (node, free_mem),
3197 errors.ECODE_ENVIRON)
3198 if requested > free_mem:
3199 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3200 " needed %s MiB, available %s MiB" %
3201 (node, reason, requested, free_mem),
3206 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
3207 """Checks if nodes have enough free disk space in all the VGs.
3209 This function checks if all given nodes have the needed amount of
3210 free disk. In case any node has less disk or we cannot get the
3211 information from the node, this function raises an OpPrereqError
3214 @type lu: C{LogicalUnit}
3215 @param lu: a logical unit from which we get configuration data
3216 @type nodenames: C{list}
3217 @param nodenames: the list of node names to check
3218 @type req_sizes: C{dict}
3219 @param req_sizes: the hash of vg and corresponding amount of disk in
3221 @raise errors.OpPrereqError: if the node doesn't have enough disk,
3222 or we cannot check the node
3225 for vg, req_size in req_sizes.items():
3226 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
3229 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
3230 """Checks if nodes have enough free disk space in the specified VG.
3232 This function checks if all given nodes have the needed amount of
3233 free disk. In case any node has less disk or we cannot get the
3234 information from the node, this function raises an OpPrereqError
3237 @type lu: C{LogicalUnit}
3238 @param lu: a logical unit from which we get configuration data
3239 @type nodenames: C{list}
3240 @param nodenames: the list of node names to check
3242 @param vg: the volume group to check
3243 @type requested: C{int}
3244 @param requested: the amount of disk in MiB to check for
3245 @raise errors.OpPrereqError: if the node doesn't have enough disk,
3246 or we cannot check the node
3249 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
3250 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
3251 for node in nodenames:
3252 info = nodeinfo[node]
3253 info.Raise("Cannot get current information from node %s" % node,
3254 prereq=True, ecode=errors.ECODE_ENVIRON)
3255 (_, (vg_info, ), _) = info.payload
3256 vg_free = vg_info.get("vg_free", None)
3257 if not isinstance(vg_free, int):
3258 raise errors.OpPrereqError("Can't compute free disk space on node"
3259 " %s for vg %s, result was '%s'" %
3260 (node, vg, vg_free), errors.ECODE_ENVIRON)
3261 if requested > vg_free:
3262 raise errors.OpPrereqError("Not enough disk space on target node %s"
3263 " vg %s: required %d MiB, available %d MiB" %
3264 (node, vg, requested, vg_free),
3268 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
3269 """Checks if nodes have enough physical CPUs
3271 This function checks if all given nodes have the needed number of
3272 physical CPUs. In case any node has less CPUs or we cannot get the
3273 information from the node, this function raises an OpPrereqError
3276 @type lu: C{LogicalUnit}
3277 @param lu: a logical unit from which we get configuration data
3278 @type nodenames: C{list}
3279 @param nodenames: the list of node names to check
3280 @type requested: C{int}
3281 @param requested: the minimum acceptable number of physical CPUs
3282 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
3283 or we cannot check the node
3286 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
3287 for node in nodenames:
3288 info = nodeinfo[node]
3289 info.Raise("Cannot get current information from node %s" % node,
3290 prereq=True, ecode=errors.ECODE_ENVIRON)
3291 (_, _, (hv_info, )) = info.payload
3292 num_cpus = hv_info.get("cpu_total", None)
3293 if not isinstance(num_cpus, int):
3294 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
3295 " on node %s, result was '%s'" %
3296 (node, num_cpus), errors.ECODE_ENVIRON)
3297 if requested > num_cpus:
3298 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
3299 "required" % (node, num_cpus, requested),
3303 class LUInstanceStartup(LogicalUnit):
3304 """Starts an instance.
3307 HPATH = "instance-start"
3308 HTYPE = constants.HTYPE_INSTANCE
3311 def CheckArguments(self):
3313 if self.op.beparams:
3314 # fill the beparams dict
3315 objects.UpgradeBeParams(self.op.beparams)
3316 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3318 def ExpandNames(self):
3319 self._ExpandAndLockInstance()
3320 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3322 def DeclareLocks(self, level):
3323 if level == locking.LEVEL_NODE_RES:
3324 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
3326 def BuildHooksEnv(self):
3329 This runs on master, primary and secondary nodes of the instance.
3333 "FORCE": self.op.force,
3336 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3340 def BuildHooksNodes(self):
3341 """Build hooks nodes.
3344 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3347 def CheckPrereq(self):
3348 """Check prerequisites.
3350 This checks that the instance is in the cluster.
3353 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3354 assert self.instance is not None, \
3355 "Cannot retrieve locked instance %s" % self.op.instance_name
3358 if self.op.hvparams:
3359 # check hypervisor parameter syntax (locally)
3360 cluster = self.cfg.GetClusterInfo()
3361 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
3362 filled_hvp = cluster.FillHV(instance)
3363 filled_hvp.update(self.op.hvparams)
3364 hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
3365 hv_type.CheckParameterSyntax(filled_hvp)
3366 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3368 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
3370 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
3372 if self.primary_offline and self.op.ignore_offline_nodes:
3373 self.LogWarning("Ignoring offline primary node")
3375 if self.op.hvparams or self.op.beparams:
3376 self.LogWarning("Overridden parameters are ignored")
3378 _CheckNodeOnline(self, instance.primary_node)
3380 bep = self.cfg.GetClusterInfo().FillBE(instance)
3381 bep.update(self.op.beparams)
3383 # check bridges existence
3384 _CheckInstanceBridgesExist(self, instance)
3386 remote_info = self.rpc.call_instance_info(instance.primary_node,
3388 instance.hypervisor)
3389 remote_info.Raise("Error checking node %s" % instance.primary_node,
3390 prereq=True, ecode=errors.ECODE_ENVIRON)
3391 if not remote_info.payload: # not running already
3392 _CheckNodeFreeMemory(self, instance.primary_node,
3393 "starting instance %s" % instance.name,
3394 bep[constants.BE_MINMEM], instance.hypervisor)
3396 def Exec(self, feedback_fn):
3397 """Start the instance.
3400 instance = self.instance
3401 force = self.op.force
3402 reason = self.op.reason
3404 if not self.op.no_remember:
3405 self.cfg.MarkInstanceUp(instance.name)
3407 if self.primary_offline:
3408 assert self.op.ignore_offline_nodes
3409 self.LogInfo("Primary node offline, marked instance as started")
3411 node_current = instance.primary_node
3413 _StartInstanceDisks(self, instance, force)
3416 self.rpc.call_instance_start(node_current,
3417 (instance, self.op.hvparams,
3419 self.op.startup_paused, reason)
3420 msg = result.fail_msg
3422 _ShutdownInstanceDisks(self, instance)
3423 raise errors.OpExecError("Could not start instance: %s" % msg)
3426 class LUInstanceReboot(LogicalUnit):
3427 """Reboot an instance.
3430 HPATH = "instance-reboot"
3431 HTYPE = constants.HTYPE_INSTANCE
3434 def ExpandNames(self):
3435 self._ExpandAndLockInstance()
3437 def BuildHooksEnv(self):
3440 This runs on master, primary and secondary nodes of the instance.
3444 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3445 "REBOOT_TYPE": self.op.reboot_type,
3446 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
3449 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3453 def BuildHooksNodes(self):
3454 """Build hooks nodes.
3457 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3460 def CheckPrereq(self):
3461 """Check prerequisites.
3463 This checks that the instance is in the cluster.
3466 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3467 assert self.instance is not None, \
3468 "Cannot retrieve locked instance %s" % self.op.instance_name
3469 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
3470 _CheckNodeOnline(self, instance.primary_node)
3472 # check bridges existence
3473 _CheckInstanceBridgesExist(self, instance)
3475 def Exec(self, feedback_fn):
3476 """Reboot the instance.
3479 instance = self.instance
3480 ignore_secondaries = self.op.ignore_secondaries
3481 reboot_type = self.op.reboot_type
3482 reason = self.op.reason
3484 remote_info = self.rpc.call_instance_info(instance.primary_node,
3486 instance.hypervisor)
3487 remote_info.Raise("Error checking node %s" % instance.primary_node)
3488 instance_running = bool(remote_info.payload)
3490 node_current = instance.primary_node
3492 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3493 constants.INSTANCE_REBOOT_HARD]:
3494 for disk in instance.disks:
3495 self.cfg.SetDiskID(disk, node_current)
3496 result = self.rpc.call_instance_reboot(node_current, instance,
3498 self.op.shutdown_timeout, reason)
3499 result.Raise("Could not reboot instance")
3501 if instance_running:
3502 result = self.rpc.call_instance_shutdown(node_current, instance,
3503 self.op.shutdown_timeout,
3505 result.Raise("Could not shutdown instance for full reboot")
3506 _ShutdownInstanceDisks(self, instance)
3508 self.LogInfo("Instance %s was already stopped, starting now",
3510 _StartInstanceDisks(self, instance, ignore_secondaries)
3511 result = self.rpc.call_instance_start(node_current,
3512 (instance, None, None), False,
3514 msg = result.fail_msg
3516 _ShutdownInstanceDisks(self, instance)
3517 raise errors.OpExecError("Could not start instance for"
3518 " full reboot: %s" % msg)
3520 self.cfg.MarkInstanceUp(instance.name)
3523 class LUInstanceShutdown(LogicalUnit):
3524 """Shutdown an instance.
3527 HPATH = "instance-stop"
3528 HTYPE = constants.HTYPE_INSTANCE
3531 def ExpandNames(self):
3532 self._ExpandAndLockInstance()
3534 def BuildHooksEnv(self):
3537 This runs on master, primary and secondary nodes of the instance.
3540 env = _BuildInstanceHookEnvByObject(self, self.instance)
3541 env["TIMEOUT"] = self.op.timeout
3544 def BuildHooksNodes(self):
3545 """Build hooks nodes.
3548 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3551 def CheckPrereq(self):
3552 """Check prerequisites.
3554 This checks that the instance is in the cluster.
3557 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3558 assert self.instance is not None, \
3559 "Cannot retrieve locked instance %s" % self.op.instance_name
3561 if not self.op.force:
3562 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
3564 self.LogWarning("Ignoring offline instance check")
3566 self.primary_offline = \
3567 self.cfg.GetNodeInfo(self.instance.primary_node).offline
3569 if self.primary_offline and self.op.ignore_offline_nodes:
3570 self.LogWarning("Ignoring offline primary node")
3572 _CheckNodeOnline(self, self.instance.primary_node)
3574 def Exec(self, feedback_fn):
3575 """Shutdown the instance.
3578 instance = self.instance
3579 node_current = instance.primary_node
3580 timeout = self.op.timeout
3581 reason = self.op.reason
3583 # If the instance is offline we shouldn't mark it as down, as that
3584 # resets the offline flag.
3585 if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
3586 self.cfg.MarkInstanceDown(instance.name)
3588 if self.primary_offline:
3589 assert self.op.ignore_offline_nodes
3590 self.LogInfo("Primary node offline, marked instance as stopped")
3592 result = self.rpc.call_instance_shutdown(node_current, instance, timeout,
3594 msg = result.fail_msg
3596 self.LogWarning("Could not shutdown instance: %s", msg)
3598 _ShutdownInstanceDisks(self, instance)
3601 class LUInstanceReinstall(LogicalUnit):
3602 """Reinstall an instance.
3605 HPATH = "instance-reinstall"
3606 HTYPE = constants.HTYPE_INSTANCE
3609 def ExpandNames(self):
3610 self._ExpandAndLockInstance()
3612 def BuildHooksEnv(self):
3615 This runs on master, primary and secondary nodes of the instance.
3618 return _BuildInstanceHookEnvByObject(self, self.instance)
3620 def BuildHooksNodes(self):
3621 """Build hooks nodes.
3624 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3627 def CheckPrereq(self):
3628 """Check prerequisites.
3630 This checks that the instance is in the cluster and is not running.
3633 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3634 assert instance is not None, \
3635 "Cannot retrieve locked instance %s" % self.op.instance_name
3636 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
3637 " offline, cannot reinstall")
3639 if instance.disk_template == constants.DT_DISKLESS:
3640 raise errors.OpPrereqError("Instance '%s' has no disks" %
3641 self.op.instance_name,
3643 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
3645 if self.op.os_type is not None:
3647 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
3648 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
3649 instance_os = self.op.os_type
3651 instance_os = instance.os
3653 nodelist = list(instance.all_nodes)
3655 if self.op.osparams:
3656 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
3657 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
3658 self.os_inst = i_osdict # the new dict (without defaults)
3662 self.instance = instance
3664 def Exec(self, feedback_fn):
3665 """Reinstall the instance.
3668 inst = self.instance
3670 if self.op.os_type is not None:
3671 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3672 inst.os = self.op.os_type
3673 # Write to configuration
3674 self.cfg.Update(inst, feedback_fn)
3676 _StartInstanceDisks(self, inst, None)
3678 feedback_fn("Running the instance OS create scripts...")
3679 # FIXME: pass debug option from opcode to backend
3680 result = self.rpc.call_instance_os_add(inst.primary_node,
3681 (inst, self.os_inst), True,
3682 self.op.debug_level)
3683 result.Raise("Could not install OS for instance %s on node %s" %
3684 (inst.name, inst.primary_node))
3686 _ShutdownInstanceDisks(self, inst)
3689 class LUInstanceRecreateDisks(LogicalUnit):
3690 """Recreate an instance's missing disks.
3693 HPATH = "instance-recreate-disks"
3694 HTYPE = constants.HTYPE_INSTANCE
3697 _MODIFYABLE = compat.UniqueFrozenset([
3698 constants.IDISK_SIZE,
3699 constants.IDISK_MODE,
3702 # New or changed disk parameters may have different semantics
3703 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
3704 constants.IDISK_ADOPT,
3706 # TODO: Implement support changing VG while recreating
3708 constants.IDISK_METAVG,
3709 constants.IDISK_PROVIDER,
3710 constants.IDISK_NAME,
3713 def _RunAllocator(self):
3714 """Run the allocator based on input opcode.
3717 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
3720 # The allocator should actually run in "relocate" mode, but current
3721 # allocators don't support relocating all the nodes of an instance at
3722 # the same time. As a workaround we use "allocate" mode, but this is
3723 # suboptimal for two reasons:
3724 # - The instance name passed to the allocator is present in the list of
3725 # existing instances, so there could be a conflict within the
3726 # internal structures of the allocator. This doesn't happen with the
3727 # current allocators, but it's a liability.
3728 # - The allocator counts the resources used by the instance twice: once
3729 # because the instance exists already, and once because it tries to
3730 # allocate a new instance.
3731 # The allocator could choose some of the nodes on which the instance is
3732 # running, but that's not a problem. If the instance nodes are broken,
3733 # they should be already be marked as drained or offline, and hence
3734 # skipped by the allocator. If instance disks have been lost for other
3735 # reasons, then recreating the disks on the same nodes should be fine.
3736 disk_template = self.instance.disk_template
3737 spindle_use = be_full[constants.BE_SPINDLE_USE]
3738 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
3739 disk_template=disk_template,
3740 tags=list(self.instance.GetTags()),
3741 os=self.instance.os,
3743 vcpus=be_full[constants.BE_VCPUS],
3744 memory=be_full[constants.BE_MAXMEM],
3745 spindle_use=spindle_use,
3746 disks=[{constants.IDISK_SIZE: d.size,
3747 constants.IDISK_MODE: d.mode}
3748 for d in self.instance.disks],
3749 hypervisor=self.instance.hypervisor,
3750 node_whitelist=None)
3751 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
3753 ial.Run(self.op.iallocator)
3755 assert req.RequiredNodes() == len(self.instance.all_nodes)
3758 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
3759 " %s" % (self.op.iallocator, ial.info),
3762 self.op.nodes = ial.result
3763 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
3764 self.op.instance_name, self.op.iallocator,
3765 utils.CommaJoin(ial.result))
3767 def CheckArguments(self):
3768 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
3769 # Normalize and convert deprecated list of disk indices
3770 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
3772 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
3774 raise errors.OpPrereqError("Some disks have been specified more than"
3775 " once: %s" % utils.CommaJoin(duplicates),
3778 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
3779 # when neither iallocator nor nodes are specified
3780 if self.op.iallocator or self.op.nodes:
3781 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
3783 for (idx, params) in self.op.disks:
3784 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
3785 unsupported = frozenset(params.keys()) - self._MODIFYABLE
3787 raise errors.OpPrereqError("Parameters for disk %s try to change"
3788 " unmodifyable parameter(s): %s" %
3789 (idx, utils.CommaJoin(unsupported)),
3792 def ExpandNames(self):
3793 self._ExpandAndLockInstance()
3794 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3797 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
3798 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
3800 self.needed_locks[locking.LEVEL_NODE] = []
3801 if self.op.iallocator:
3802 # iallocator will select a new node in the same group
3803 self.needed_locks[locking.LEVEL_NODEGROUP] = []
3804 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
3806 self.needed_locks[locking.LEVEL_NODE_RES] = []
3808 def DeclareLocks(self, level):
3809 if level == locking.LEVEL_NODEGROUP:
3810 assert self.op.iallocator is not None
3811 assert not self.op.nodes
3812 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3813 self.share_locks[locking.LEVEL_NODEGROUP] = 1
3814 # Lock the primary group used by the instance optimistically; this
3815 # requires going via the node before it's locked, requiring
3816 # verification later on
3817 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3818 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
3820 elif level == locking.LEVEL_NODE:
3821 # If an allocator is used, then we lock all the nodes in the current
3822 # instance group, as we don't know yet which ones will be selected;
3823 # if we replace the nodes without using an allocator, locks are
3824 # already declared in ExpandNames; otherwise, we need to lock all the
3825 # instance nodes for disk re-creation
3826 if self.op.iallocator:
3827 assert not self.op.nodes
3828 assert not self.needed_locks[locking.LEVEL_NODE]
3829 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
3831 # Lock member nodes of the group of the primary node
3832 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
3833 self.needed_locks[locking.LEVEL_NODE].extend(
3834 self.cfg.GetNodeGroup(group_uuid).members)
3836 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
3837 elif not self.op.nodes:
3838 self._LockInstancesNodes(primary_only=False)
3839 elif level == locking.LEVEL_NODE_RES:
3841 self.needed_locks[locking.LEVEL_NODE_RES] = \
3842 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
3844 def BuildHooksEnv(self):
3847 This runs on master, primary and secondary nodes of the instance.
3850 return _BuildInstanceHookEnvByObject(self, self.instance)
3852 def BuildHooksNodes(self):
3853 """Build hooks nodes.
3856 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3859 def CheckPrereq(self):
3860 """Check prerequisites.
3862 This checks that the instance is in the cluster and is not running.
3865 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3866 assert instance is not None, \
3867 "Cannot retrieve locked instance %s" % self.op.instance_name
3869 if len(self.op.nodes) != len(instance.all_nodes):
3870 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
3871 " %d replacement nodes were specified" %
3872 (instance.name, len(instance.all_nodes),
3873 len(self.op.nodes)),
3875 assert instance.disk_template != constants.DT_DRBD8 or \
3876 len(self.op.nodes) == 2
3877 assert instance.disk_template != constants.DT_PLAIN or \
3878 len(self.op.nodes) == 1
3879 primary_node = self.op.nodes[0]
3881 primary_node = instance.primary_node
3882 if not self.op.iallocator:
3883 _CheckNodeOnline(self, primary_node)
3885 if instance.disk_template == constants.DT_DISKLESS:
3886 raise errors.OpPrereqError("Instance '%s' has no disks" %
3887 self.op.instance_name, errors.ECODE_INVAL)
3889 # Verify if node group locks are still correct
3890 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
3892 # Node group locks are acquired only for the primary node (and only
3893 # when the allocator is used)
3894 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
3897 # if we replace nodes *and* the old primary is offline, we don't
3898 # check the instance state
3899 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
3900 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
3901 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
3902 msg="cannot recreate disks")
3905 self.disks = dict(self.op.disks)
3907 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
3909 maxidx = max(self.disks.keys())
3910 if maxidx >= len(instance.disks):
3911 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
3914 if ((self.op.nodes or self.op.iallocator) and
3915 sorted(self.disks.keys()) != range(len(instance.disks))):
3916 raise errors.OpPrereqError("Can't recreate disks partially and"
3917 " change the nodes at the same time",
3920 self.instance = instance
3922 if self.op.iallocator:
3923 self._RunAllocator()
3924 # Release unneeded node and node resource locks
3925 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
3926 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
3927 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
3929 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
3931 def Exec(self, feedback_fn):
3932 """Recreate the disks.
3935 instance = self.instance
3937 assert (self.owned_locks(locking.LEVEL_NODE) ==
3938 self.owned_locks(locking.LEVEL_NODE_RES))
3941 mods = [] # keeps track of needed changes
3943 for idx, disk in enumerate(instance.disks):
3945 changes = self.disks[idx]
3947 # Disk should not be recreated
3951 # update secondaries for disks, if needed
3952 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
3953 # need to update the nodes and minors
3954 assert len(self.op.nodes) == 2
3955 assert len(disk.logical_id) == 6 # otherwise disk internals
3957 (_, _, old_port, _, _, old_secret) = disk.logical_id
3958 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
3959 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
3960 new_minors[0], new_minors[1], old_secret)
3961 assert len(disk.logical_id) == len(new_id)
3965 mods.append((idx, new_id, changes))
3967 # now that we have passed all asserts above, we can apply the mods
3968 # in a single run (to avoid partial changes)
3969 for idx, new_id, changes in mods:
3970 disk = instance.disks[idx]
3971 if new_id is not None:
3972 assert disk.dev_type == constants.LD_DRBD8
3973 disk.logical_id = new_id
3975 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
3976 mode=changes.get(constants.IDISK_MODE, None))
3978 # change primary node, if needed
3980 instance.primary_node = self.op.nodes[0]
3981 self.LogWarning("Changing the instance's nodes, you will have to"
3982 " remove any disks left on the older nodes manually")
3985 self.cfg.Update(instance, feedback_fn)
3987 # All touched nodes must be locked
3988 mylocks = self.owned_locks(locking.LEVEL_NODE)
3989 assert mylocks.issuperset(frozenset(instance.all_nodes))
3990 _CreateDisks(self, instance, to_skip=to_skip)
3993 class LUInstanceRename(LogicalUnit):
3994 """Rename an instance.
3997 HPATH = "instance-rename"
3998 HTYPE = constants.HTYPE_INSTANCE
4000 def CheckArguments(self):
4004 if self.op.ip_check and not self.op.name_check:
4005 # TODO: make the ip check more flexible and not depend on the name check
4006 raise errors.OpPrereqError("IP address check requires a name check",
4009 def BuildHooksEnv(self):
4012 This runs on master, primary and secondary nodes of the instance.
4015 env = _BuildInstanceHookEnvByObject(self, self.instance)
4016 env["INSTANCE_NEW_NAME"] = self.op.new_name
4019 def BuildHooksNodes(self):
4020 """Build hooks nodes.
4023 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4026 def CheckPrereq(self):
4027 """Check prerequisites.
4029 This checks that the instance is in the cluster and is not running.
4032 self.op.instance_name = _ExpandInstanceName(self.cfg,
4033 self.op.instance_name)
4034 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4035 assert instance is not None
4036 _CheckNodeOnline(self, instance.primary_node)
4037 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
4038 msg="cannot rename")
4039 self.instance = instance
4041 new_name = self.op.new_name
4042 if self.op.name_check:
4043 hostname = _CheckHostnameSane(self, new_name)
4044 new_name = self.op.new_name = hostname.name
4045 if (self.op.ip_check and
4046 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
4047 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4048 (hostname.ip, new_name),
4049 errors.ECODE_NOTUNIQUE)
4051 instance_list = self.cfg.GetInstanceList()
4052 if new_name in instance_list and new_name != instance.name:
4053 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4054 new_name, errors.ECODE_EXISTS)
4056 def Exec(self, feedback_fn):
4057 """Rename the instance.
4060 inst = self.instance
4061 old_name = inst.name
4063 rename_file_storage = False
4064 if (inst.disk_template in constants.DTS_FILEBASED and
4065 self.op.new_name != inst.name):
4066 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4067 rename_file_storage = True
4069 self.cfg.RenameInstance(inst.name, self.op.new_name)
4070 # Change the instance lock. This is definitely safe while we hold the BGL.
4071 # Otherwise the new lock would have to be added in acquired mode.
4073 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
4074 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
4075 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4077 # re-read the instance from the configuration after rename
4078 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4080 if rename_file_storage:
4081 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4082 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4083 old_file_storage_dir,
4084 new_file_storage_dir)
4085 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4086 " (but the instance has been renamed in Ganeti)" %
4087 (inst.primary_node, old_file_storage_dir,
4088 new_file_storage_dir))
4090 _StartInstanceDisks(self, inst, None)
4091 # update info on disks
4092 info = _GetInstanceInfoText(inst)
4093 for (idx, disk) in enumerate(inst.disks):
4094 for node in inst.all_nodes:
4095 self.cfg.SetDiskID(disk, node)
4096 result = self.rpc.call_blockdev_setinfo(node, disk, info)
4098 self.LogWarning("Error setting info on node %s for disk %s: %s",
4099 node, idx, result.fail_msg)
4101 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4102 old_name, self.op.debug_level)
4103 msg = result.fail_msg
4105 msg = ("Could not run OS rename script for instance %s on node %s"
4106 " (but the instance has been renamed in Ganeti): %s" %
4107 (inst.name, inst.primary_node, msg))
4108 self.LogWarning(msg)
4110 _ShutdownInstanceDisks(self, inst)
4115 class LUInstanceRemove(LogicalUnit):
4116 """Remove an instance.
4119 HPATH = "instance-remove"
4120 HTYPE = constants.HTYPE_INSTANCE
4123 def ExpandNames(self):
4124 self._ExpandAndLockInstance()
4125 self.needed_locks[locking.LEVEL_NODE] = []
4126 self.needed_locks[locking.LEVEL_NODE_RES] = []
4127 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4129 def DeclareLocks(self, level):
4130 if level == locking.LEVEL_NODE:
4131 self._LockInstancesNodes()
4132 elif level == locking.LEVEL_NODE_RES:
4134 self.needed_locks[locking.LEVEL_NODE_RES] = \
4135 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
4137 def BuildHooksEnv(self):
4140 This runs on master, primary and secondary nodes of the instance.
4143 env = _BuildInstanceHookEnvByObject(self, self.instance)
4144 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4147 def BuildHooksNodes(self):
4148 """Build hooks nodes.
4151 nl = [self.cfg.GetMasterNode()]
4152 nl_post = list(self.instance.all_nodes) + nl
4153 return (nl, nl_post)
4155 def CheckPrereq(self):
4156 """Check prerequisites.
4158 This checks that the instance is in the cluster.
4161 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4162 assert self.instance is not None, \
4163 "Cannot retrieve locked instance %s" % self.op.instance_name
4165 def Exec(self, feedback_fn):
4166 """Remove the instance.
4169 instance = self.instance
4170 logging.info("Shutting down instance %s on node %s",
4171 instance.name, instance.primary_node)
4173 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4174 self.op.shutdown_timeout,
4176 msg = result.fail_msg
4178 if self.op.ignore_failures:
4179 feedback_fn("Warning: can't shutdown instance: %s" % msg)
4181 raise errors.OpExecError("Could not shutdown instance %s on"
4183 (instance.name, instance.primary_node, msg))
4185 assert (self.owned_locks(locking.LEVEL_NODE) ==
4186 self.owned_locks(locking.LEVEL_NODE_RES))
4187 assert not (set(instance.all_nodes) -
4188 self.owned_locks(locking.LEVEL_NODE)), \
4189 "Not owning correct locks"
4191 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4194 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4195 """Utility function to remove an instance.
4198 logging.info("Removing block devices for instance %s", instance.name)
4200 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
4201 if not ignore_failures:
4202 raise errors.OpExecError("Can't remove instance's disks")
4203 feedback_fn("Warning: can't remove instance's disks")
4205 logging.info("Removing instance %s out of cluster config", instance.name)
4207 lu.cfg.RemoveInstance(instance.name)
4209 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4210 "Instance lock removal conflict"
4212 # Remove lock for the instance
4213 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4216 class LUInstanceQuery(NoHooksLU):
4217 """Logical unit for querying instances.
4220 # pylint: disable=W0142
4223 def CheckArguments(self):
4224 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
4225 self.op.output_fields, self.op.use_locking)
4227 def ExpandNames(self):
4228 self.iq.ExpandNames(self)
4230 def DeclareLocks(self, level):
4231 self.iq.DeclareLocks(self, level)
4233 def Exec(self, feedback_fn):
4234 return self.iq.OldStyleQuery(self)
4237 def _ExpandNamesForMigration(lu):
4238 """Expands names for use with L{TLMigrateInstance}.
4240 @type lu: L{LogicalUnit}
4243 if lu.op.target_node is not None:
4244 lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
4246 lu.needed_locks[locking.LEVEL_NODE] = []
4247 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4249 lu.needed_locks[locking.LEVEL_NODE_RES] = []
4250 lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
4252 # The node allocation lock is actually only needed for externally replicated
4253 # instances (e.g. sharedfile or RBD) and if an iallocator is used.
4254 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
4257 def _DeclareLocksForMigration(lu, level):
4258 """Declares locks for L{TLMigrateInstance}.
4260 @type lu: L{LogicalUnit}
4261 @param level: Lock level
4264 if level == locking.LEVEL_NODE_ALLOC:
4265 assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4267 instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
4269 # Node locks are already declared here rather than at LEVEL_NODE as we need
4270 # the instance object anyway to declare the node allocation lock.
4271 if instance.disk_template in constants.DTS_EXT_MIRROR:
4272 if lu.op.target_node is None:
4273 lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4274 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4276 lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
4278 del lu.recalculate_locks[locking.LEVEL_NODE]
4280 lu._LockInstancesNodes() # pylint: disable=W0212
4282 elif level == locking.LEVEL_NODE:
4283 # Node locks are declared together with the node allocation lock
4284 assert (lu.needed_locks[locking.LEVEL_NODE] or
4285 lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
4287 elif level == locking.LEVEL_NODE_RES:
4289 lu.needed_locks[locking.LEVEL_NODE_RES] = \
4290 _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
4293 class LUInstanceFailover(LogicalUnit):
4294 """Failover an instance.
4297 HPATH = "instance-failover"
4298 HTYPE = constants.HTYPE_INSTANCE
4301 def CheckArguments(self):
4302 """Check the arguments.
4305 self.iallocator = getattr(self.op, "iallocator", None)
4306 self.target_node = getattr(self.op, "target_node", None)
4308 def ExpandNames(self):
4309 self._ExpandAndLockInstance()
4310 _ExpandNamesForMigration(self)
4313 TLMigrateInstance(self, self.op.instance_name, False, True, False,
4314 self.op.ignore_consistency, True,
4315 self.op.shutdown_timeout, self.op.ignore_ipolicy)
4317 self.tasklets = [self._migrater]
4319 def DeclareLocks(self, level):
4320 _DeclareLocksForMigration(self, level)
4322 def BuildHooksEnv(self):
4325 This runs on master, primary and secondary nodes of the instance.
4328 instance = self._migrater.instance
4329 source_node = instance.primary_node
4330 target_node = self.op.target_node
4332 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4333 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4334 "OLD_PRIMARY": source_node,
4335 "NEW_PRIMARY": target_node,
4338 if instance.disk_template in constants.DTS_INT_MIRROR:
4339 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
4340 env["NEW_SECONDARY"] = source_node
4342 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
4344 env.update(_BuildInstanceHookEnvByObject(self, instance))
4348 def BuildHooksNodes(self):
4349 """Build hooks nodes.
4352 instance = self._migrater.instance
4353 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4354 return (nl, nl + [instance.primary_node])
4357 class LUInstanceMigrate(LogicalUnit):
4358 """Migrate an instance.
4360 This is migration without shutting down, compared to the failover,
4361 which is done with shutdown.
4364 HPATH = "instance-migrate"
4365 HTYPE = constants.HTYPE_INSTANCE
4368 def ExpandNames(self):
4369 self._ExpandAndLockInstance()
4370 _ExpandNamesForMigration(self)
4373 TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
4374 False, self.op.allow_failover, False,
4375 self.op.allow_runtime_changes,
4376 constants.DEFAULT_SHUTDOWN_TIMEOUT,
4377 self.op.ignore_ipolicy)
4379 self.tasklets = [self._migrater]
4381 def DeclareLocks(self, level):
4382 _DeclareLocksForMigration(self, level)
4384 def BuildHooksEnv(self):
4387 This runs on master, primary and secondary nodes of the instance.
4390 instance = self._migrater.instance
4391 source_node = instance.primary_node
4392 target_node = self.op.target_node
4393 env = _BuildInstanceHookEnvByObject(self, instance)
4395 "MIGRATE_LIVE": self._migrater.live,
4396 "MIGRATE_CLEANUP": self.op.cleanup,
4397 "OLD_PRIMARY": source_node,
4398 "NEW_PRIMARY": target_node,
4399 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
4402 if instance.disk_template in constants.DTS_INT_MIRROR:
4403 env["OLD_SECONDARY"] = target_node
4404 env["NEW_SECONDARY"] = source_node
4406 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
4410 def BuildHooksNodes(self):
4411 """Build hooks nodes.
4414 instance = self._migrater.instance
4415 snodes = list(instance.secondary_nodes)
4416 nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
4420 class LUInstanceMove(LogicalUnit):
4421 """Move an instance by data-copying.
4424 HPATH = "instance-move"
4425 HTYPE = constants.HTYPE_INSTANCE
4428 def ExpandNames(self):
4429 self._ExpandAndLockInstance()
4430 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
4431 self.op.target_node = target_node
4432 self.needed_locks[locking.LEVEL_NODE] = [target_node]
4433 self.needed_locks[locking.LEVEL_NODE_RES] = []
4434 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4436 def DeclareLocks(self, level):
4437 if level == locking.LEVEL_NODE:
4438 self._LockInstancesNodes(primary_only=True)
4439 elif level == locking.LEVEL_NODE_RES:
4441 self.needed_locks[locking.LEVEL_NODE_RES] = \
4442 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
4444 def BuildHooksEnv(self):
4447 This runs on master, primary and secondary nodes of the instance.
4451 "TARGET_NODE": self.op.target_node,
4452 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4454 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4457 def BuildHooksNodes(self):
4458 """Build hooks nodes.
4462 self.cfg.GetMasterNode(),
4463 self.instance.primary_node,
4464 self.op.target_node,
4468 def CheckPrereq(self):
4469 """Check prerequisites.
4471 This checks that the instance is in the cluster.
4474 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4475 assert self.instance is not None, \
4476 "Cannot retrieve locked instance %s" % self.op.instance_name
4478 if instance.disk_template not in constants.DTS_COPYABLE:
4479 raise errors.OpPrereqError("Disk template %s not suitable for copying" %
4480 instance.disk_template, errors.ECODE_STATE)
4482 node = self.cfg.GetNodeInfo(self.op.target_node)
4483 assert node is not None, \
4484 "Cannot retrieve locked node %s" % self.op.target_node
4486 self.target_node = target_node = node.name
4488 if target_node == instance.primary_node:
4489 raise errors.OpPrereqError("Instance %s is already on the node %s" %
4490 (instance.name, target_node),
4493 bep = self.cfg.GetClusterInfo().FillBE(instance)
4495 for idx, dsk in enumerate(instance.disks):
4496 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4497 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4498 " cannot copy" % idx, errors.ECODE_STATE)
4500 _CheckNodeOnline(self, target_node)
4501 _CheckNodeNotDrained(self, target_node)
4502 _CheckNodeVmCapable(self, target_node)
4503 cluster = self.cfg.GetClusterInfo()
4504 group_info = self.cfg.GetNodeGroup(node.group)
4505 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
4506 _CheckTargetNodeIPolicy(self, ipolicy, instance, node, self.cfg,
4507 ignore=self.op.ignore_ipolicy)
4509 if instance.admin_state == constants.ADMINST_UP:
4510 # check memory requirements on the secondary node
4511 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4512 instance.name, bep[constants.BE_MAXMEM],
4513 instance.hypervisor)
4515 self.LogInfo("Not checking memory on the secondary node as"
4516 " instance will not be started")
4518 # check bridge existance
4519 _CheckInstanceBridgesExist(self, instance, node=target_node)
4521 def Exec(self, feedback_fn):
4522 """Move an instance.
4524 The move is done by shutting it down on its present node, copying
4525 the data over (slow) and starting it on the new node.
4528 instance = self.instance
4530 source_node = instance.primary_node
4531 target_node = self.target_node
4533 self.LogInfo("Shutting down instance %s on source node %s",
4534 instance.name, source_node)
4536 assert (self.owned_locks(locking.LEVEL_NODE) ==
4537 self.owned_locks(locking.LEVEL_NODE_RES))
4539 result = self.rpc.call_instance_shutdown(source_node, instance,
4540 self.op.shutdown_timeout,
4542 msg = result.fail_msg
4544 if self.op.ignore_consistency:
4545 self.LogWarning("Could not shutdown instance %s on node %s."
4546 " Proceeding anyway. Please make sure node"
4547 " %s is down. Error details: %s",
4548 instance.name, source_node, source_node, msg)
4550 raise errors.OpExecError("Could not shutdown instance %s on"
4552 (instance.name, source_node, msg))
4554 # create the target disks
4556 _CreateDisks(self, instance, target_node=target_node)
4557 except errors.OpExecError:
4558 self.LogWarning("Device creation failed")
4559 self.cfg.ReleaseDRBDMinors(instance.name)
4562 cluster_name = self.cfg.GetClusterInfo().cluster_name
4565 # activate, get path, copy the data over
4566 for idx, disk in enumerate(instance.disks):
4567 self.LogInfo("Copying data for disk %d", idx)
4568 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
4569 instance.name, True, idx)
4571 self.LogWarning("Can't assemble newly created disk %d: %s",
4572 idx, result.fail_msg)
4573 errs.append(result.fail_msg)
4575 dev_path = result.payload
4576 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
4577 target_node, dev_path,
4580 self.LogWarning("Can't copy data over for disk %d: %s",
4581 idx, result.fail_msg)
4582 errs.append(result.fail_msg)
4586 self.LogWarning("Some disks failed to copy, aborting")
4588 _RemoveDisks(self, instance, target_node=target_node)
4590 self.cfg.ReleaseDRBDMinors(instance.name)
4591 raise errors.OpExecError("Errors during disk copy: %s" %
4594 instance.primary_node = target_node
4595 self.cfg.Update(instance, feedback_fn)
4597 self.LogInfo("Removing the disks on the original node")
4598 _RemoveDisks(self, instance, target_node=source_node)
4600 # Only start the instance if it's marked as up
4601 if instance.admin_state == constants.ADMINST_UP:
4602 self.LogInfo("Starting instance %s on node %s",
4603 instance.name, target_node)
4605 disks_ok, _ = _AssembleInstanceDisks(self, instance,
4606 ignore_secondaries=True)
4608 _ShutdownInstanceDisks(self, instance)
4609 raise errors.OpExecError("Can't activate the instance's disks")
4611 result = self.rpc.call_instance_start(target_node,
4612 (instance, None, None), False,
4614 msg = result.fail_msg
4616 _ShutdownInstanceDisks(self, instance)
4617 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4618 (instance.name, target_node, msg))
4621 class LUNodeMigrate(LogicalUnit):
4622 """Migrate all instances from a node.
4625 HPATH = "node-migrate"
4626 HTYPE = constants.HTYPE_NODE
4629 def CheckArguments(self):
4632 def ExpandNames(self):
4633 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4635 self.share_locks = _ShareAll()
4636 self.needed_locks = {
4637 locking.LEVEL_NODE: [self.op.node_name],
4640 def BuildHooksEnv(self):
4643 This runs on the master, the primary and all the secondaries.
4647 "NODE_NAME": self.op.node_name,
4648 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
4651 def BuildHooksNodes(self):
4652 """Build hooks nodes.
4655 nl = [self.cfg.GetMasterNode()]
4658 def CheckPrereq(self):
4661 def Exec(self, feedback_fn):
4662 # Prepare jobs for migration instances
4663 allow_runtime_changes = self.op.allow_runtime_changes
4665 [opcodes.OpInstanceMigrate(instance_name=inst.name,
4668 iallocator=self.op.iallocator,
4669 target_node=self.op.target_node,
4670 allow_runtime_changes=allow_runtime_changes,
4671 ignore_ipolicy=self.op.ignore_ipolicy)]
4672 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
4674 # TODO: Run iallocator in this opcode and pass correct placement options to
4675 # OpInstanceMigrate. Since other jobs can modify the cluster between
4676 # running the iallocator and the actual migration, a good consistency model
4677 # will have to be found.
4679 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
4680 frozenset([self.op.node_name]))
4682 return ResultWithJobs(jobs)
4685 class TLMigrateInstance(Tasklet):
4686 """Tasklet class for instance migration.
4689 @ivar live: whether the migration will be done live or non-live;
4690 this variable is initalized only after CheckPrereq has run
4691 @type cleanup: boolean
4692 @ivar cleanup: Wheater we cleanup from a failed migration
4693 @type iallocator: string
4694 @ivar iallocator: The iallocator used to determine target_node
4695 @type target_node: string
4696 @ivar target_node: If given, the target_node to reallocate the instance to
4697 @type failover: boolean
4698 @ivar failover: Whether operation results in failover or migration
4699 @type fallback: boolean
4700 @ivar fallback: Whether fallback to failover is allowed if migration not
4702 @type ignore_consistency: boolean
4703 @ivar ignore_consistency: Wheter we should ignore consistency between source
4705 @type shutdown_timeout: int
4706 @ivar shutdown_timeout: In case of failover timeout of the shutdown
4707 @type ignore_ipolicy: bool
4708 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
4713 _MIGRATION_POLL_INTERVAL = 1 # seconds
4714 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
4716 def __init__(self, lu, instance_name, cleanup, failover, fallback,
4717 ignore_consistency, allow_runtime_changes, shutdown_timeout,
4719 """Initializes this class.
4722 Tasklet.__init__(self, lu)
4725 self.instance_name = instance_name
4726 self.cleanup = cleanup
4727 self.live = False # will be overridden later
4728 self.failover = failover
4729 self.fallback = fallback
4730 self.ignore_consistency = ignore_consistency
4731 self.shutdown_timeout = shutdown_timeout
4732 self.ignore_ipolicy = ignore_ipolicy
4733 self.allow_runtime_changes = allow_runtime_changes
4735 def CheckPrereq(self):
4736 """Check prerequisites.
4738 This checks that the instance is in the cluster.
4741 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
4742 instance = self.cfg.GetInstanceInfo(instance_name)
4743 assert instance is not None
4744 self.instance = instance
4745 cluster = self.cfg.GetClusterInfo()
4747 if (not self.cleanup and
4748 not instance.admin_state == constants.ADMINST_UP and
4749 not self.failover and self.fallback):
4750 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
4751 " switching to failover")
4752 self.failover = True
4754 if instance.disk_template not in constants.DTS_MIRRORED:
4759 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
4760 " %s" % (instance.disk_template, text),
4763 if instance.disk_template in constants.DTS_EXT_MIRROR:
4764 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
4766 if self.lu.op.iallocator:
4767 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
4768 self._RunAllocator()
4770 # We set set self.target_node as it is required by
4772 self.target_node = self.lu.op.target_node
4774 # Check that the target node is correct in terms of instance policy
4775 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
4776 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
4777 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
4779 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
4780 ignore=self.ignore_ipolicy)
4782 # self.target_node is already populated, either directly or by the
4784 target_node = self.target_node
4785 if self.target_node == instance.primary_node:
4786 raise errors.OpPrereqError("Cannot migrate instance %s"
4787 " to its primary (%s)" %
4788 (instance.name, instance.primary_node),
4791 if len(self.lu.tasklets) == 1:
4792 # It is safe to release locks only when we're the only tasklet
4794 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
4795 keep=[instance.primary_node, self.target_node])
4796 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
4799 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
4801 secondary_nodes = instance.secondary_nodes
4802 if not secondary_nodes:
4803 raise errors.ConfigurationError("No secondary node but using"
4804 " %s disk template" %
4805 instance.disk_template)
4806 target_node = secondary_nodes[0]
4807 if self.lu.op.iallocator or (self.lu.op.target_node and
4808 self.lu.op.target_node != target_node):
4810 text = "failed over"
4813 raise errors.OpPrereqError("Instances with disk template %s cannot"
4814 " be %s to arbitrary nodes"
4815 " (neither an iallocator nor a target"
4816 " node can be passed)" %
4817 (instance.disk_template, text),
4819 nodeinfo = self.cfg.GetNodeInfo(target_node)
4820 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
4821 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
4823 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
4824 ignore=self.ignore_ipolicy)
4826 i_be = cluster.FillBE(instance)
4828 # check memory requirements on the secondary node
4829 if (not self.cleanup and
4830 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
4831 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
4832 "migrating instance %s" %
4834 i_be[constants.BE_MINMEM],
4835 instance.hypervisor)
4837 self.lu.LogInfo("Not checking memory on the secondary node as"
4838 " instance will not be started")
4840 # check if failover must be forced instead of migration
4841 if (not self.cleanup and not self.failover and
4842 i_be[constants.BE_ALWAYS_FAILOVER]):
4843 self.lu.LogInfo("Instance configured to always failover; fallback"
4845 self.failover = True
4847 # check bridge existance
4848 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
4850 if not self.cleanup:
4851 _CheckNodeNotDrained(self.lu, target_node)
4852 if not self.failover:
4853 result = self.rpc.call_instance_migratable(instance.primary_node,
4855 if result.fail_msg and self.fallback:
4856 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
4858 self.failover = True
4860 result.Raise("Can't migrate, please use failover",
4861 prereq=True, ecode=errors.ECODE_STATE)
4863 assert not (self.failover and self.cleanup)
4865 if not self.failover:
4866 if self.lu.op.live is not None and self.lu.op.mode is not None:
4867 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
4868 " parameters are accepted",
4870 if self.lu.op.live is not None:
4872 self.lu.op.mode = constants.HT_MIGRATION_LIVE
4874 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
4875 # reset the 'live' parameter to None so that repeated
4876 # invocations of CheckPrereq do not raise an exception
4877 self.lu.op.live = None
4878 elif self.lu.op.mode is None:
4879 # read the default value from the hypervisor
4880 i_hv = cluster.FillHV(self.instance, skip_globals=False)
4881 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
4883 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
4885 # Failover is never live
4888 if not (self.failover or self.cleanup):
4889 remote_info = self.rpc.call_instance_info(instance.primary_node,
4891 instance.hypervisor)
4892 remote_info.Raise("Error checking instance on node %s" %
4893 instance.primary_node)
4894 instance_running = bool(remote_info.payload)
4895 if instance_running:
4896 self.current_mem = int(remote_info.payload["memory"])
4898 def _RunAllocator(self):
4899 """Run the allocator based on input opcode.
4902 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
4904 # FIXME: add a self.ignore_ipolicy option
4905 req = iallocator.IAReqRelocate(name=self.instance_name,
4906 relocate_from=[self.instance.primary_node])
4907 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
4909 ial.Run(self.lu.op.iallocator)
4912 raise errors.OpPrereqError("Can't compute nodes using"
4913 " iallocator '%s': %s" %
4914 (self.lu.op.iallocator, ial.info),
4916 self.target_node = ial.result[0]
4917 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
4918 self.instance_name, self.lu.op.iallocator,
4919 utils.CommaJoin(ial.result))
4921 def _WaitUntilSync(self):
4922 """Poll with custom rpc for disk sync.
4924 This uses our own step-based rpc call.
4927 self.feedback_fn("* wait until resync is done")
4931 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
4933 (self.instance.disks,
4936 for node, nres in result.items():
4937 nres.Raise("Cannot resync disks on node %s" % node)
4938 node_done, node_percent = nres.payload
4939 all_done = all_done and node_done
4940 if node_percent is not None:
4941 min_percent = min(min_percent, node_percent)
4943 if min_percent < 100:
4944 self.feedback_fn(" - progress: %.1f%%" % min_percent)
4947 def _EnsureSecondary(self, node):
4948 """Demote a node to secondary.
4951 self.feedback_fn("* switching node %s to secondary mode" % node)
4953 for dev in self.instance.disks:
4954 self.cfg.SetDiskID(dev, node)
4956 result = self.rpc.call_blockdev_close(node, self.instance.name,
4957 self.instance.disks)
4958 result.Raise("Cannot change disk to secondary on node %s" % node)
4960 def _GoStandalone(self):
4961 """Disconnect from the network.
4964 self.feedback_fn("* changing into standalone mode")
4965 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
4966 self.instance.disks)
4967 for node, nres in result.items():
4968 nres.Raise("Cannot disconnect disks node %s" % node)
4970 def _GoReconnect(self, multimaster):
4971 """Reconnect to the network.
4977 msg = "single-master"
4978 self.feedback_fn("* changing disks into %s mode" % msg)
4979 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
4980 (self.instance.disks, self.instance),
4981 self.instance.name, multimaster)
4982 for node, nres in result.items():
4983 nres.Raise("Cannot change disks config on node %s" % node)
4985 def _ExecCleanup(self):
4986 """Try to cleanup after a failed migration.
4988 The cleanup is done by:
4989 - check that the instance is running only on one node
4990 (and update the config if needed)
4991 - change disks on its secondary node to secondary
4992 - wait until disks are fully synchronized
4993 - disconnect from the network
4994 - change disks into single-master mode
4995 - wait again until disks are fully synchronized
4998 instance = self.instance
4999 target_node = self.target_node
5000 source_node = self.source_node
5002 # check running on only one node
5003 self.feedback_fn("* checking where the instance actually runs"
5004 " (if this hangs, the hypervisor might be in"
5006 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5007 for node, result in ins_l.items():
5008 result.Raise("Can't contact node %s" % node)
5010 runningon_source = instance.name in ins_l[source_node].payload
5011 runningon_target = instance.name in ins_l[target_node].payload
5013 if runningon_source and runningon_target:
5014 raise errors.OpExecError("Instance seems to be running on two nodes,"
5015 " or the hypervisor is confused; you will have"
5016 " to ensure manually that it runs only on one"
5017 " and restart this operation")
5019 if not (runningon_source or runningon_target):
5020 raise errors.OpExecError("Instance does not seem to be running at all;"
5021 " in this case it's safer to repair by"
5022 " running 'gnt-instance stop' to ensure disk"
5023 " shutdown, and then restarting it")
5025 if runningon_target:
5026 # the migration has actually succeeded, we need to update the config
5027 self.feedback_fn("* instance running on secondary node (%s),"
5028 " updating config" % target_node)
5029 instance.primary_node = target_node
5030 self.cfg.Update(instance, self.feedback_fn)
5031 demoted_node = source_node
5033 self.feedback_fn("* instance confirmed to be running on its"
5034 " primary node (%s)" % source_node)
5035 demoted_node = target_node
5037 if instance.disk_template in constants.DTS_INT_MIRROR:
5038 self._EnsureSecondary(demoted_node)
5040 self._WaitUntilSync()
5041 except errors.OpExecError:
5042 # we ignore here errors, since if the device is standalone, it
5043 # won't be able to sync
5045 self._GoStandalone()
5046 self._GoReconnect(False)
5047 self._WaitUntilSync()
5049 self.feedback_fn("* done")
5051 def _RevertDiskStatus(self):
5052 """Try to revert the disk status after a failed migration.
5055 target_node = self.target_node
5056 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
5060 self._EnsureSecondary(target_node)
5061 self._GoStandalone()
5062 self._GoReconnect(False)
5063 self._WaitUntilSync()
5064 except errors.OpExecError, err:
5065 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
5066 " please try to recover the instance manually;"
5067 " error '%s'" % str(err))
5069 def _AbortMigration(self):
5070 """Call the hypervisor code to abort a started migration.
5073 instance = self.instance
5074 target_node = self.target_node
5075 source_node = self.source_node
5076 migration_info = self.migration_info
5078 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
5082 abort_msg = abort_result.fail_msg
5084 logging.error("Aborting migration failed on target node %s: %s",
5085 target_node, abort_msg)
5086 # Don't raise an exception here, as we stil have to try to revert the
5087 # disk status, even if this step failed.
5089 abort_result = self.rpc.call_instance_finalize_migration_src(
5090 source_node, instance, False, self.live)
5091 abort_msg = abort_result.fail_msg
5093 logging.error("Aborting migration failed on source node %s: %s",
5094 source_node, abort_msg)
5096 def _ExecMigration(self):
5097 """Migrate an instance.
5099 The migrate is done by:
5100 - change the disks into dual-master mode
5101 - wait until disks are fully synchronized again
5102 - migrate the instance
5103 - change disks on the new secondary node (the old primary) to secondary
5104 - wait until disks are fully synchronized
5105 - change disks into single-master mode
5108 instance = self.instance
5109 target_node = self.target_node
5110 source_node = self.source_node
5112 # Check for hypervisor version mismatch and warn the user.
5113 nodeinfo = self.rpc.call_node_info([source_node, target_node],
5114 None, [self.instance.hypervisor], False)
5115 for ninfo in nodeinfo.values():
5116 ninfo.Raise("Unable to retrieve node information from node '%s'" %
5118 (_, _, (src_info, )) = nodeinfo[source_node].payload
5119 (_, _, (dst_info, )) = nodeinfo[target_node].payload
5121 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
5122 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
5123 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
5124 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
5125 if src_version != dst_version:
5126 self.feedback_fn("* warning: hypervisor version mismatch between"
5127 " source (%s) and target (%s) node" %
5128 (src_version, dst_version))
5130 self.feedback_fn("* checking disk consistency between source and target")
5131 for (idx, dev) in enumerate(instance.disks):
5132 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
5133 raise errors.OpExecError("Disk %s is degraded or not fully"
5134 " synchronized on target node,"
5135 " aborting migration" % idx)
5137 if self.current_mem > self.tgt_free_mem:
5138 if not self.allow_runtime_changes:
5139 raise errors.OpExecError("Memory ballooning not allowed and not enough"
5140 " free memory to fit instance %s on target"
5141 " node %s (have %dMB, need %dMB)" %
5142 (instance.name, target_node,
5143 self.tgt_free_mem, self.current_mem))
5144 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
5145 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
5148 rpcres.Raise("Cannot modify instance runtime memory")
5150 # First get the migration information from the remote node
5151 result = self.rpc.call_migration_info(source_node, instance)
5152 msg = result.fail_msg
5154 log_err = ("Failed fetching source migration information from %s: %s" %
5156 logging.error(log_err)
5157 raise errors.OpExecError(log_err)
5159 self.migration_info = migration_info = result.payload
5161 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
5162 # Then switch the disks to master/master mode
5163 self._EnsureSecondary(target_node)
5164 self._GoStandalone()
5165 self._GoReconnect(True)
5166 self._WaitUntilSync()
5168 self.feedback_fn("* preparing %s to accept the instance" % target_node)
5169 result = self.rpc.call_accept_instance(target_node,
5172 self.nodes_ip[target_node])
5174 msg = result.fail_msg
5176 logging.error("Instance pre-migration failed, trying to revert"
5177 " disk status: %s", msg)
5178 self.feedback_fn("Pre-migration failed, aborting")
5179 self._AbortMigration()
5180 self._RevertDiskStatus()
5181 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5182 (instance.name, msg))
5184 self.feedback_fn("* migrating instance to %s" % target_node)
5185 result = self.rpc.call_instance_migrate(source_node, instance,
5186 self.nodes_ip[target_node],
5188 msg = result.fail_msg
5190 logging.error("Instance migration failed, trying to revert"
5191 " disk status: %s", msg)
5192 self.feedback_fn("Migration failed, aborting")
5193 self._AbortMigration()
5194 self._RevertDiskStatus()
5195 raise errors.OpExecError("Could not migrate instance %s: %s" %
5196 (instance.name, msg))
5198 self.feedback_fn("* starting memory transfer")
5199 last_feedback = time.time()
5201 result = self.rpc.call_instance_get_migration_status(source_node,
5203 msg = result.fail_msg
5204 ms = result.payload # MigrationStatus instance
5205 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
5206 logging.error("Instance migration failed, trying to revert"
5207 " disk status: %s", msg)
5208 self.feedback_fn("Migration failed, aborting")
5209 self._AbortMigration()
5210 self._RevertDiskStatus()
5212 msg = "hypervisor returned failure"
5213 raise errors.OpExecError("Could not migrate instance %s: %s" %
5214 (instance.name, msg))
5216 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
5217 self.feedback_fn("* memory transfer complete")
5220 if (utils.TimeoutExpired(last_feedback,
5221 self._MIGRATION_FEEDBACK_INTERVAL) and
5222 ms.transferred_ram is not None):
5223 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
5224 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
5225 last_feedback = time.time()
5227 time.sleep(self._MIGRATION_POLL_INTERVAL)
5229 result = self.rpc.call_instance_finalize_migration_src(source_node,
5233 msg = result.fail_msg
5235 logging.error("Instance migration succeeded, but finalization failed"
5236 " on the source node: %s", msg)
5237 raise errors.OpExecError("Could not finalize instance migration: %s" %
5240 instance.primary_node = target_node
5242 # distribute new instance config to the other nodes
5243 self.cfg.Update(instance, self.feedback_fn)
5245 result = self.rpc.call_instance_finalize_migration_dst(target_node,
5249 msg = result.fail_msg
5251 logging.error("Instance migration succeeded, but finalization failed"
5252 " on the target node: %s", msg)
5253 raise errors.OpExecError("Could not finalize instance migration: %s" %
5256 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
5257 self._EnsureSecondary(source_node)
5258 self._WaitUntilSync()
5259 self._GoStandalone()
5260 self._GoReconnect(False)
5261 self._WaitUntilSync()
5263 # If the instance's disk template is `rbd' or `ext' and there was a
5264 # successful migration, unmap the device from the source node.
5265 if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
5266 disks = _ExpandCheckDisks(instance, instance.disks)
5267 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
5269 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
5270 msg = result.fail_msg
5272 logging.error("Migration was successful, but couldn't unmap the"
5273 " block device %s on source node %s: %s",
5274 disk.iv_name, source_node, msg)
5275 logging.error("You need to unmap the device %s manually on %s",
5276 disk.iv_name, source_node)
5278 self.feedback_fn("* done")
5280 def _ExecFailover(self):
5281 """Failover an instance.
5283 The failover is done by shutting it down on its present node and
5284 starting it on the secondary.
5287 instance = self.instance
5288 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5290 source_node = instance.primary_node
5291 target_node = self.target_node
5293 if instance.admin_state == constants.ADMINST_UP:
5294 self.feedback_fn("* checking disk consistency between source and target")
5295 for (idx, dev) in enumerate(instance.disks):
5296 # for drbd, these are drbd over lvm
5297 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
5299 if primary_node.offline:
5300 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
5302 (primary_node.name, idx, target_node))
5303 elif not self.ignore_consistency:
5304 raise errors.OpExecError("Disk %s is degraded on target node,"
5305 " aborting failover" % idx)
5307 self.feedback_fn("* not checking disk consistency as instance is not"
5310 self.feedback_fn("* shutting down instance on source node")
5311 logging.info("Shutting down instance %s on node %s",
5312 instance.name, source_node)
5314 result = self.rpc.call_instance_shutdown(source_node, instance,
5315 self.shutdown_timeout,
5317 msg = result.fail_msg
5319 if self.ignore_consistency or primary_node.offline:
5320 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
5321 " proceeding anyway; please make sure node"
5322 " %s is down; error details: %s",
5323 instance.name, source_node, source_node, msg)
5325 raise errors.OpExecError("Could not shutdown instance %s on"
5327 (instance.name, source_node, msg))
5329 self.feedback_fn("* deactivating the instance's disks on source node")
5330 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
5331 raise errors.OpExecError("Can't shut down the instance's disks")
5333 instance.primary_node = target_node
5334 # distribute new instance config to the other nodes
5335 self.cfg.Update(instance, self.feedback_fn)
5337 # Only start the instance if it's marked as up
5338 if instance.admin_state == constants.ADMINST_UP:
5339 self.feedback_fn("* activating the instance's disks on target node %s" %
5341 logging.info("Starting instance %s on node %s",
5342 instance.name, target_node)
5344 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
5345 ignore_secondaries=True)
5347 _ShutdownInstanceDisks(self.lu, instance)
5348 raise errors.OpExecError("Can't activate the instance's disks")
5350 self.feedback_fn("* starting the instance on the target node %s" %
5352 result = self.rpc.call_instance_start(target_node, (instance, None, None),
5353 False, self.lu.op.reason)
5354 msg = result.fail_msg
5356 _ShutdownInstanceDisks(self.lu, instance)
5357 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5358 (instance.name, target_node, msg))
5360 def Exec(self, feedback_fn):
5361 """Perform the migration.
5364 self.feedback_fn = feedback_fn
5365 self.source_node = self.instance.primary_node
5367 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
5368 if self.instance.disk_template in constants.DTS_INT_MIRROR:
5369 self.target_node = self.instance.secondary_nodes[0]
5370 # Otherwise self.target_node has been populated either
5371 # directly, or through an iallocator.
5373 self.all_nodes = [self.source_node, self.target_node]
5374 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
5375 in self.cfg.GetMultiNodeInfo(self.all_nodes))
5378 feedback_fn("Failover instance %s" % self.instance.name)
5379 self._ExecFailover()
5381 feedback_fn("Migrating instance %s" % self.instance.name)
5384 return self._ExecCleanup()
5386 return self._ExecMigration()
5389 def _CreateBlockDev(lu, node, instance, device, force_create, info,
5391 """Wrapper around L{_CreateBlockDevInner}.
5393 This method annotates the root device first.
5396 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
5397 excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
5398 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
5399 force_open, excl_stor)
5402 def _CreateBlockDevInner(lu, node, instance, device, force_create,
5403 info, force_open, excl_stor):
5404 """Create a tree of block devices on a given node.
5406 If this device type has to be created on secondaries, create it and
5409 If not, just recurse to children keeping the same 'force' value.
5411 @attention: The device has to be annotated already.
5413 @param lu: the lu on whose behalf we execute
5414 @param node: the node on which to create the device
5415 @type instance: L{objects.Instance}
5416 @param instance: the instance which owns the device
5417 @type device: L{objects.Disk}
5418 @param device: the device to create
5419 @type force_create: boolean
5420 @param force_create: whether to force creation of this device; this
5421 will be change to True whenever we find a device which has
5422 CreateOnSecondary() attribute
5423 @param info: the extra 'metadata' we should attach to the device
5424 (this will be represented as a LVM tag)
5425 @type force_open: boolean
5426 @param force_open: this parameter will be passes to the
5427 L{backend.BlockdevCreate} function where it specifies
5428 whether we run on primary or not, and it affects both
5429 the child assembly and the device own Open() execution
5430 @type excl_stor: boolean
5431 @param excl_stor: Whether exclusive_storage is active for the node
5433 @return: list of created devices
5435 created_devices = []
5437 if device.CreateOnSecondary():
5441 for child in device.children:
5442 devs = _CreateBlockDevInner(lu, node, instance, child, force_create,
5443 info, force_open, excl_stor)
5444 created_devices.extend(devs)
5446 if not force_create:
5447 return created_devices
5449 _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
5451 # The device has been completely created, so there is no point in keeping
5452 # its subdevices in the list. We just add the device itself instead.
5453 created_devices = [(node, device)]
5454 return created_devices
5456 except errors.DeviceCreationError, e:
5457 e.created_devices.extend(created_devices)
5459 except errors.OpExecError, e:
5460 raise errors.DeviceCreationError(str(e), created_devices)
5463 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
5465 """Create a single block device on a given node.
5467 This will not recurse over children of the device, so they must be
5470 @param lu: the lu on whose behalf we execute
5471 @param node: the node on which to create the device
5472 @type instance: L{objects.Instance}
5473 @param instance: the instance which owns the device
5474 @type device: L{objects.Disk}
5475 @param device: the device to create
5476 @param info: the extra 'metadata' we should attach to the device
5477 (this will be represented as a LVM tag)
5478 @type force_open: boolean
5479 @param force_open: this parameter will be passes to the
5480 L{backend.BlockdevCreate} function where it specifies
5481 whether we run on primary or not, and it affects both
5482 the child assembly and the device own Open() execution
5483 @type excl_stor: boolean
5484 @param excl_stor: Whether exclusive_storage is active for the node
5487 lu.cfg.SetDiskID(device, node)
5488 result = lu.rpc.call_blockdev_create(node, device, device.size,
5489 instance.name, force_open, info,
5491 result.Raise("Can't create block device %s on"
5492 " node %s for instance %s" % (device, node, instance.name))
5493 if device.physical_id is None:
5494 device.physical_id = result.payload
5497 def _GenerateUniqueNames(lu, exts):
5498 """Generate a suitable LV name.
5500 This will generate a logical volume name for the given instance.
5505 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5506 results.append("%s%s" % (new_id, val))
5510 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
5511 iv_name, p_minor, s_minor):
5512 """Generate a drbd8 device complete with its children.
5515 assert len(vgnames) == len(names) == 2
5516 port = lu.cfg.AllocatePort()
5517 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5519 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5520 logical_id=(vgnames[0], names[0]),
5522 dev_data.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5523 dev_meta = objects.Disk(dev_type=constants.LD_LV,
5524 size=constants.DRBD_META_SIZE,
5525 logical_id=(vgnames[1], names[1]),
5527 dev_meta.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5528 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5529 logical_id=(primary, secondary, port,
5532 children=[dev_data, dev_meta],
5533 iv_name=iv_name, params={})
5534 drbd_dev.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5538 _DISK_TEMPLATE_NAME_PREFIX = {
5539 constants.DT_PLAIN: "",
5540 constants.DT_RBD: ".rbd",
5541 constants.DT_EXT: ".ext",
5545 _DISK_TEMPLATE_DEVICE_TYPE = {
5546 constants.DT_PLAIN: constants.LD_LV,
5547 constants.DT_FILE: constants.LD_FILE,
5548 constants.DT_SHARED_FILE: constants.LD_FILE,
5549 constants.DT_BLOCK: constants.LD_BLOCKDEV,
5550 constants.DT_RBD: constants.LD_RBD,
5551 constants.DT_EXT: constants.LD_EXT,
5555 def _GenerateDiskTemplate(
5556 lu, template_name, instance_name, primary_node, secondary_nodes,
5557 disk_info, file_storage_dir, file_driver, base_index,
5558 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
5559 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
5560 """Generate the entire disk layout for a given template type.
5563 vgname = lu.cfg.GetVGName()
5564 disk_count = len(disk_info)
5567 if template_name == constants.DT_DISKLESS:
5569 elif template_name == constants.DT_DRBD8:
5570 if len(secondary_nodes) != 1:
5571 raise errors.ProgrammerError("Wrong template configuration")
5572 remote_node = secondary_nodes[0]
5573 minors = lu.cfg.AllocateDRBDMinor(
5574 [primary_node, remote_node] * len(disk_info), instance_name)
5576 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
5578 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
5581 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5582 for i in range(disk_count)]):
5583 names.append(lv_prefix + "_data")
5584 names.append(lv_prefix + "_meta")
5585 for idx, disk in enumerate(disk_info):
5586 disk_index = idx + base_index
5587 data_vg = disk.get(constants.IDISK_VG, vgname)
5588 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
5589 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5590 disk[constants.IDISK_SIZE],
5592 names[idx * 2:idx * 2 + 2],
5593 "disk/%d" % disk_index,
5594 minors[idx * 2], minors[idx * 2 + 1])
5595 disk_dev.mode = disk[constants.IDISK_MODE]
5596 disk_dev.name = disk.get(constants.IDISK_NAME, None)
5597 disks.append(disk_dev)
5600 raise errors.ProgrammerError("Wrong template configuration")
5602 if template_name == constants.DT_FILE:
5604 elif template_name == constants.DT_SHARED_FILE:
5605 _req_shr_file_storage()
5607 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
5608 if name_prefix is None:
5611 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
5612 (name_prefix, base_index + i)
5613 for i in range(disk_count)])
5615 if template_name == constants.DT_PLAIN:
5617 def logical_id_fn(idx, _, disk):
5618 vg = disk.get(constants.IDISK_VG, vgname)
5619 return (vg, names[idx])
5621 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
5623 lambda _, disk_index, disk: (file_driver,
5624 "%s/disk%d" % (file_storage_dir,
5626 elif template_name == constants.DT_BLOCK:
5628 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
5629 disk[constants.IDISK_ADOPT])
5630 elif template_name == constants.DT_RBD:
5631 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
5632 elif template_name == constants.DT_EXT:
5633 def logical_id_fn(idx, _, disk):
5634 provider = disk.get(constants.IDISK_PROVIDER, None)
5635 if provider is None:
5636 raise errors.ProgrammerError("Disk template is %s, but '%s' is"
5637 " not found", constants.DT_EXT,
5638 constants.IDISK_PROVIDER)
5639 return (provider, names[idx])
5641 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
5643 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
5645 for idx, disk in enumerate(disk_info):
5647 # Only for the Ext template add disk_info to params
5648 if template_name == constants.DT_EXT:
5649 params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
5651 if key not in constants.IDISK_PARAMS:
5652 params[key] = disk[key]
5653 disk_index = idx + base_index
5654 size = disk[constants.IDISK_SIZE]
5655 feedback_fn("* disk %s, size %s" %
5656 (disk_index, utils.FormatUnit(size, "h")))
5657 disk_dev = objects.Disk(dev_type=dev_type, size=size,
5658 logical_id=logical_id_fn(idx, disk_index, disk),
5659 iv_name="disk/%d" % disk_index,
5660 mode=disk[constants.IDISK_MODE],
5662 disk_dev.name = disk.get(constants.IDISK_NAME, None)
5663 disk_dev.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5664 disks.append(disk_dev)
5669 def _GetInstanceInfoText(instance):
5670 """Compute that text that should be added to the disk's metadata.
5673 return "originstname+%s" % instance.name
5676 def _CalcEta(time_taken, written, total_size):
5677 """Calculates the ETA based on size written and total size.
5679 @param time_taken: The time taken so far
5680 @param written: amount written so far
5681 @param total_size: The total size of data to be written
5682 @return: The remaining time in seconds
5685 avg_time = time_taken / float(written)
5686 return (total_size - written) * avg_time
5689 def _WipeDisks(lu, instance, disks=None):
5690 """Wipes instance disks.
5692 @type lu: L{LogicalUnit}
5693 @param lu: the logical unit on whose behalf we execute
5694 @type instance: L{objects.Instance}
5695 @param instance: the instance whose disks we should create
5696 @type disks: None or list of tuple of (number, L{objects.Disk}, number)
5697 @param disks: Disk details; tuple contains disk index, disk object and the
5701 node = instance.primary_node
5704 disks = [(idx, disk, 0)
5705 for (idx, disk) in enumerate(instance.disks)]
5707 for (_, device, _) in disks:
5708 lu.cfg.SetDiskID(device, node)
5710 logging.info("Pausing synchronization of disks of instance '%s'",
5712 result = lu.rpc.call_blockdev_pause_resume_sync(node,
5713 (map(compat.snd, disks),
5716 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
5718 for idx, success in enumerate(result.payload):
5720 logging.warn("Pausing synchronization of disk %s of instance '%s'"
5721 " failed", idx, instance.name)
5724 for (idx, device, offset) in disks:
5725 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
5726 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
5728 int(min(constants.MAX_WIPE_CHUNK,
5729 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
5733 start_time = time.time()
5738 info_text = (" (from %s to %s)" %
5739 (utils.FormatUnit(offset, "h"),
5740 utils.FormatUnit(size, "h")))
5742 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
5744 logging.info("Wiping disk %d for instance %s on node %s using"
5745 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
5747 while offset < size:
5748 wipe_size = min(wipe_chunk_size, size - offset)
5750 logging.debug("Wiping disk %d, offset %s, chunk %s",
5751 idx, offset, wipe_size)
5753 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
5755 result.Raise("Could not wipe disk %d at offset %d for size %d" %
5756 (idx, offset, wipe_size))
5760 if now - last_output >= 60:
5761 eta = _CalcEta(now - start_time, offset, size)
5762 lu.LogInfo(" - done: %.1f%% ETA: %s",
5763 offset / float(size) * 100, utils.FormatSeconds(eta))
5766 logging.info("Resuming synchronization of disks for instance '%s'",
5769 result = lu.rpc.call_blockdev_pause_resume_sync(node,
5770 (map(compat.snd, disks),
5775 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
5776 node, result.fail_msg)
5778 for idx, success in enumerate(result.payload):
5780 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
5781 " failed", idx, instance.name)
5784 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5785 """Create all disks for an instance.
5787 This abstracts away some work from AddInstance.
5789 @type lu: L{LogicalUnit}
5790 @param lu: the logical unit on whose behalf we execute
5791 @type instance: L{objects.Instance}
5792 @param instance: the instance whose disks we should create
5794 @param to_skip: list of indices to skip
5795 @type target_node: string
5796 @param target_node: if passed, overrides the target node for creation
5798 @return: the success of the creation
5801 info = _GetInstanceInfoText(instance)
5802 if target_node is None:
5803 pnode = instance.primary_node
5804 all_nodes = instance.all_nodes
5809 if instance.disk_template in constants.DTS_FILEBASED:
5810 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5811 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5813 result.Raise("Failed to create directory '%s' on"
5814 " node %s" % (file_storage_dir, pnode))
5817 # Note: this needs to be kept in sync with adding of disks in
5818 # LUInstanceSetParams
5819 for idx, device in enumerate(instance.disks):
5820 if to_skip and idx in to_skip:
5822 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
5824 for node in all_nodes:
5825 f_create = node == pnode
5827 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5828 disks_created.append((node, device))
5829 except errors.OpExecError:
5830 logging.warning("Creating disk %s for instance '%s' failed",
5832 except errors.DeviceCreationError, e:
5833 logging.warning("Creating disk %s for instance '%s' failed",
5835 disks_created.extend(e.created_devices)
5836 for (node, disk) in disks_created:
5837 lu.cfg.SetDiskID(disk, node)
5838 result = lu.rpc.call_blockdev_remove(node, disk)
5840 logging.warning("Failed to remove newly-created disk %s on node %s:"
5841 " %s", device, node, result.fail_msg)
5842 raise errors.OpExecError(e.message)
5845 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
5846 """Remove all disks for an instance.
5848 This abstracts away some work from `AddInstance()` and
5849 `RemoveInstance()`. Note that in case some of the devices couldn't
5850 be removed, the removal will continue with the other ones.
5852 @type lu: L{LogicalUnit}
5853 @param lu: the logical unit on whose behalf we execute
5854 @type instance: L{objects.Instance}
5855 @param instance: the instance whose disks we should remove
5856 @type target_node: string
5857 @param target_node: used to override the node on which to remove the disks
5859 @return: the success of the removal
5862 logging.info("Removing block devices for instance %s", instance.name)
5865 ports_to_release = set()
5866 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
5867 for (idx, device) in enumerate(anno_disks):
5869 edata = [(target_node, device)]
5871 edata = device.ComputeNodeTree(instance.primary_node)
5872 for node, disk in edata:
5873 lu.cfg.SetDiskID(disk, node)
5874 result = lu.rpc.call_blockdev_remove(node, disk)
5876 lu.LogWarning("Could not remove disk %s on node %s,"
5877 " continuing anyway: %s", idx, node, result.fail_msg)
5878 if not (result.offline and node != instance.primary_node):
5881 # if this is a DRBD disk, return its port to the pool
5882 if device.dev_type in constants.LDS_DRBD:
5883 ports_to_release.add(device.logical_id[2])
5885 if all_result or ignore_failures:
5886 for port in ports_to_release:
5887 lu.cfg.AddTcpUdpPort(port)
5889 if instance.disk_template in constants.DTS_FILEBASED:
5890 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5894 tgt = instance.primary_node
5895 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5897 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5898 file_storage_dir, instance.primary_node, result.fail_msg)
5904 def _ComputeDiskSizePerVG(disk_template, disks):
5905 """Compute disk size requirements in the volume group
5908 def _compute(disks, payload):
5909 """Universal algorithm.
5914 vgs[disk[constants.IDISK_VG]] = \
5915 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
5919 # Required free disk space as a function of disk and swap space
5921 constants.DT_DISKLESS: {},
5922 constants.DT_PLAIN: _compute(disks, 0),
5923 # 128 MB are added for drbd metadata for each disk
5924 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
5925 constants.DT_FILE: {},
5926 constants.DT_SHARED_FILE: {},
5929 if disk_template not in req_size_dict:
5930 raise errors.ProgrammerError("Disk template '%s' size requirement"
5931 " is unknown" % disk_template)
5933 return req_size_dict[disk_template]
5936 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
5937 """Wrapper around IAReqInstanceAlloc.
5939 @param op: The instance opcode
5940 @param disks: The computed disks
5941 @param nics: The computed nics
5942 @param beparams: The full filled beparams
5943 @param node_whitelist: List of nodes which should appear as online to the
5944 allocator (unless the node is already marked offline)
5946 @returns: A filled L{iallocator.IAReqInstanceAlloc}
5949 spindle_use = beparams[constants.BE_SPINDLE_USE]
5950 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
5951 disk_template=op.disk_template,
5954 vcpus=beparams[constants.BE_VCPUS],
5955 memory=beparams[constants.BE_MAXMEM],
5956 spindle_use=spindle_use,
5958 nics=[n.ToDict() for n in nics],
5959 hypervisor=op.hypervisor,
5960 node_whitelist=node_whitelist)
5963 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
5964 """Computes the nics.
5966 @param op: The instance opcode
5967 @param cluster: Cluster configuration object
5968 @param default_ip: The default ip to assign
5969 @param cfg: An instance of the configuration object
5970 @param ec_id: Execution context ID
5972 @returns: The build up nics
5977 nic_mode_req = nic.get(constants.INIC_MODE, None)
5978 nic_mode = nic_mode_req
5979 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
5980 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5982 net = nic.get(constants.INIC_NETWORK, None)
5983 link = nic.get(constants.NIC_LINK, None)
5984 ip = nic.get(constants.INIC_IP, None)
5986 if net is None or net.lower() == constants.VALUE_NONE:
5989 if nic_mode_req is not None or link is not None:
5990 raise errors.OpPrereqError("If network is given, no mode or link"
5991 " is allowed to be passed",
5994 # ip validity checks
5995 if ip is None or ip.lower() == constants.VALUE_NONE:
5997 elif ip.lower() == constants.VALUE_AUTO:
5998 if not op.name_check:
5999 raise errors.OpPrereqError("IP address set to auto but name checks"
6000 " have been skipped",
6004 # We defer pool operations until later, so that the iallocator has
6005 # filled in the instance's node(s) dimara
6006 if ip.lower() == constants.NIC_IP_POOL:
6008 raise errors.OpPrereqError("if ip=pool, parameter network"
6009 " must be passed too",
6012 elif not netutils.IPAddress.IsValid(ip):
6013 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
6018 # TODO: check the ip address for uniqueness
6019 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6020 raise errors.OpPrereqError("Routed nic mode requires an ip address",
6023 # MAC address verification
6024 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
6025 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6026 mac = utils.NormalizeAndValidateMac(mac)
6029 # TODO: We need to factor this out
6030 cfg.ReserveMAC(mac, ec_id)
6031 except errors.ReservationError:
6032 raise errors.OpPrereqError("MAC address %s already in use"
6033 " in cluster" % mac,
6034 errors.ECODE_NOTUNIQUE)
6036 # Build nic parameters
6039 nicparams[constants.NIC_MODE] = nic_mode
6041 nicparams[constants.NIC_LINK] = link
6043 check_params = cluster.SimpleFillNIC(nicparams)
6044 objects.NIC.CheckParameterSyntax(check_params)
6045 net_uuid = cfg.LookupNetwork(net)
6046 name = nic.get(constants.INIC_NAME, None)
6047 if name is not None and name.lower() == constants.VALUE_NONE:
6049 nic_obj = objects.NIC(mac=mac, ip=nic_ip, name=name,
6050 network=net_uuid, nicparams=nicparams)
6051 nic_obj.uuid = cfg.GenerateUniqueID(ec_id)
6052 nics.append(nic_obj)
6057 def _ComputeDisks(op, default_vg):
6058 """Computes the instance disks.
6060 @param op: The instance opcode
6061 @param default_vg: The default_vg to assume
6063 @return: The computed disks
6067 for disk in op.disks:
6068 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
6069 if mode not in constants.DISK_ACCESS_SET:
6070 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6071 mode, errors.ECODE_INVAL)
6072 size = disk.get(constants.IDISK_SIZE, None)
6074 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6077 except (TypeError, ValueError):
6078 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6081 ext_provider = disk.get(constants.IDISK_PROVIDER, None)
6082 if ext_provider and op.disk_template != constants.DT_EXT:
6083 raise errors.OpPrereqError("The '%s' option is only valid for the %s"
6084 " disk template, not %s" %
6085 (constants.IDISK_PROVIDER, constants.DT_EXT,
6086 op.disk_template), errors.ECODE_INVAL)
6088 data_vg = disk.get(constants.IDISK_VG, default_vg)
6089 name = disk.get(constants.IDISK_NAME, None)
6090 if name is not None and name.lower() == constants.VALUE_NONE:
6093 constants.IDISK_SIZE: size,
6094 constants.IDISK_MODE: mode,
6095 constants.IDISK_VG: data_vg,
6096 constants.IDISK_NAME: name,
6099 if constants.IDISK_METAVG in disk:
6100 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
6101 if constants.IDISK_ADOPT in disk:
6102 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
6104 # For extstorage, demand the `provider' option and add any
6105 # additional parameters (ext-params) to the dict
6106 if op.disk_template == constants.DT_EXT:
6108 new_disk[constants.IDISK_PROVIDER] = ext_provider
6110 if key not in constants.IDISK_PARAMS:
6111 new_disk[key] = disk[key]
6113 raise errors.OpPrereqError("Missing provider for template '%s'" %
6114 constants.DT_EXT, errors.ECODE_INVAL)
6116 disks.append(new_disk)
6121 def _ComputeFullBeParams(op, cluster):
6122 """Computes the full beparams.
6124 @param op: The instance opcode
6125 @param cluster: The cluster config object
6127 @return: The fully filled beparams
6130 default_beparams = cluster.beparams[constants.PP_DEFAULT]
6131 for param, value in op.beparams.iteritems():
6132 if value == constants.VALUE_AUTO:
6133 op.beparams[param] = default_beparams[param]
6134 objects.UpgradeBeParams(op.beparams)
6135 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
6136 return cluster.SimpleFillBE(op.beparams)
6139 def _CheckOpportunisticLocking(op):
6140 """Generate error if opportunistic locking is not possible.
6143 if op.opportunistic_locking and not op.iallocator:
6144 raise errors.OpPrereqError("Opportunistic locking is only available in"
6145 " combination with an instance allocator",
6149 class LUInstanceCreate(LogicalUnit):
6150 """Create an instance.
6153 HPATH = "instance-add"
6154 HTYPE = constants.HTYPE_INSTANCE
6157 def CheckArguments(self):
6161 # do not require name_check to ease forward/backward compatibility
6163 if self.op.no_install and self.op.start:
6164 self.LogInfo("No-installation mode selected, disabling startup")
6165 self.op.start = False
6166 # validate/normalize the instance name
6167 self.op.instance_name = \
6168 netutils.Hostname.GetNormalizedName(self.op.instance_name)
6170 if self.op.ip_check and not self.op.name_check:
6171 # TODO: make the ip check more flexible and not depend on the name check
6172 raise errors.OpPrereqError("Cannot do IP address check without a name"
6173 " check", errors.ECODE_INVAL)
6175 # check nics' parameter names
6176 for nic in self.op.nics:
6177 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6178 # check that NIC's parameters names are unique and valid
6179 utils.ValidateDeviceNames("NIC", self.op.nics)
6181 # check that disk's names are unique and valid
6182 utils.ValidateDeviceNames("disk", self.op.disks)
6184 cluster = self.cfg.GetClusterInfo()
6185 if not self.op.disk_template in cluster.enabled_disk_templates:
6186 raise errors.OpPrereqError("Cannot create an instance with disk template"
6187 " '%s', because it is not enabled in the"
6188 " cluster. Enabled disk templates are: %s." %
6189 (self.op.disk_template,
6190 ",".join(cluster.enabled_disk_templates)))
6192 # check disks. parameter names and consistent adopt/no-adopt strategy
6193 has_adopt = has_no_adopt = False
6194 for disk in self.op.disks:
6195 if self.op.disk_template != constants.DT_EXT:
6196 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6197 if constants.IDISK_ADOPT in disk:
6201 if has_adopt and has_no_adopt:
6202 raise errors.OpPrereqError("Either all disks are adopted or none is",
6205 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6206 raise errors.OpPrereqError("Disk adoption is not supported for the"
6207 " '%s' disk template" %
6208 self.op.disk_template,
6210 if self.op.iallocator is not None:
6211 raise errors.OpPrereqError("Disk adoption not allowed with an"
6212 " iallocator script", errors.ECODE_INVAL)
6213 if self.op.mode == constants.INSTANCE_IMPORT:
6214 raise errors.OpPrereqError("Disk adoption not allowed for"
6215 " instance import", errors.ECODE_INVAL)
6217 if self.op.disk_template in constants.DTS_MUST_ADOPT:
6218 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
6219 " but no 'adopt' parameter given" %
6220 self.op.disk_template,
6223 self.adopt_disks = has_adopt
6225 # instance name verification
6226 if self.op.name_check:
6227 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
6228 self.op.instance_name = self.hostname1.name
6229 # used in CheckPrereq for ip ping check
6230 self.check_ip = self.hostname1.ip
6232 self.check_ip = None
6234 # file storage checks
6235 if (self.op.file_driver and
6236 not self.op.file_driver in constants.FILE_DRIVER):
6237 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6238 self.op.file_driver, errors.ECODE_INVAL)
6240 if self.op.disk_template == constants.DT_FILE:
6241 opcodes.RequireFileStorage()
6242 elif self.op.disk_template == constants.DT_SHARED_FILE:
6243 opcodes.RequireSharedFileStorage()
6245 ### Node/iallocator related checks
6246 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6248 if self.op.pnode is not None:
6249 if self.op.disk_template in constants.DTS_INT_MIRROR:
6250 if self.op.snode is None:
6251 raise errors.OpPrereqError("The networked disk templates need"
6252 " a mirror node", errors.ECODE_INVAL)
6254 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6256 self.op.snode = None
6258 _CheckOpportunisticLocking(self.op)
6260 self._cds = _GetClusterDomainSecret()
6262 if self.op.mode == constants.INSTANCE_IMPORT:
6263 # On import force_variant must be True, because if we forced it at
6264 # initial install, our only chance when importing it back is that it
6266 self.op.force_variant = True
6268 if self.op.no_install:
6269 self.LogInfo("No-installation mode has no effect during import")
6271 elif self.op.mode == constants.INSTANCE_CREATE:
6272 if self.op.os_type is None:
6273 raise errors.OpPrereqError("No guest OS specified",
6275 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6276 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6277 " installation" % self.op.os_type,
6279 if self.op.disk_template is None:
6280 raise errors.OpPrereqError("No disk template specified",
6283 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6284 # Check handshake to ensure both clusters have the same domain secret
6285 src_handshake = self.op.source_handshake
6286 if not src_handshake:
6287 raise errors.OpPrereqError("Missing source handshake",
6290 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6293 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6296 # Load and check source CA
6297 self.source_x509_ca_pem = self.op.source_x509_ca
6298 if not self.source_x509_ca_pem:
6299 raise errors.OpPrereqError("Missing source X509 CA",
6303 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6305 except OpenSSL.crypto.Error, err:
6306 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6307 (err, ), errors.ECODE_INVAL)
6309 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6310 if errcode is not None:
6311 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6314 self.source_x509_ca = cert
6316 src_instance_name = self.op.source_instance_name
6317 if not src_instance_name:
6318 raise errors.OpPrereqError("Missing source instance name",
6321 self.source_instance_name = \
6322 netutils.GetHostname(name=src_instance_name).name
6325 raise errors.OpPrereqError("Invalid instance creation mode %r" %
6326 self.op.mode, errors.ECODE_INVAL)
6328 def ExpandNames(self):
6329 """ExpandNames for CreateInstance.
6331 Figure out the right locks for instance creation.
6334 self.needed_locks = {}
6336 instance_name = self.op.instance_name
6337 # this is just a preventive check, but someone might still add this
6338 # instance in the meantime, and creation will fail at lock-add time
6339 if instance_name in self.cfg.GetInstanceList():
6340 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6341 instance_name, errors.ECODE_EXISTS)
6343 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6345 if self.op.iallocator:
6346 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
6347 # specifying a group on instance creation and then selecting nodes from
6349 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6350 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
6352 if self.op.opportunistic_locking:
6353 self.opportunistic_locks[locking.LEVEL_NODE] = True
6354 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
6356 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6357 nodelist = [self.op.pnode]
6358 if self.op.snode is not None:
6359 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6360 nodelist.append(self.op.snode)
6361 self.needed_locks[locking.LEVEL_NODE] = nodelist
6363 # in case of import lock the source node too
6364 if self.op.mode == constants.INSTANCE_IMPORT:
6365 src_node = self.op.src_node
6366 src_path = self.op.src_path
6368 if src_path is None:
6369 self.op.src_path = src_path = self.op.instance_name
6371 if src_node is None:
6372 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6373 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
6374 self.op.src_node = None
6375 if os.path.isabs(src_path):
6376 raise errors.OpPrereqError("Importing an instance from a path"
6377 " requires a source node option",
6380 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6381 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6382 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6383 if not os.path.isabs(src_path):
6384 self.op.src_path = src_path = \
6385 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
6387 self.needed_locks[locking.LEVEL_NODE_RES] = \
6388 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
6390 def _RunAllocator(self):
6391 """Run the allocator based on input opcode.
6394 if self.op.opportunistic_locking:
6395 # Only consider nodes for which a lock is held
6396 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
6398 node_whitelist = None
6400 #TODO Export network to iallocator so that it chooses a pnode
6401 # in a nodegroup that has the desired network connected to
6402 req = _CreateInstanceAllocRequest(self.op, self.disks,
6403 self.nics, self.be_full,
6405 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
6407 ial.Run(self.op.iallocator)
6410 # When opportunistic locks are used only a temporary failure is generated
6411 if self.op.opportunistic_locking:
6412 ecode = errors.ECODE_TEMP_NORES
6414 ecode = errors.ECODE_NORES
6416 raise errors.OpPrereqError("Can't compute nodes using"
6417 " iallocator '%s': %s" %
6418 (self.op.iallocator, ial.info),
6421 self.op.pnode = ial.result[0]
6422 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6423 self.op.instance_name, self.op.iallocator,
6424 utils.CommaJoin(ial.result))
6426 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
6428 if req.RequiredNodes() == 2:
6429 self.op.snode = ial.result[1]
6431 def BuildHooksEnv(self):
6434 This runs on master, primary and secondary nodes of the instance.
6438 "ADD_MODE": self.op.mode,
6440 if self.op.mode == constants.INSTANCE_IMPORT:
6441 env["SRC_NODE"] = self.op.src_node
6442 env["SRC_PATH"] = self.op.src_path
6443 env["SRC_IMAGES"] = self.src_images
6445 env.update(_BuildInstanceHookEnv(
6446 name=self.op.instance_name,
6447 primary_node=self.op.pnode,
6448 secondary_nodes=self.secondaries,
6449 status=self.op.start,
6450 os_type=self.op.os_type,
6451 minmem=self.be_full[constants.BE_MINMEM],
6452 maxmem=self.be_full[constants.BE_MAXMEM],
6453 vcpus=self.be_full[constants.BE_VCPUS],
6454 nics=_NICListToTuple(self, self.nics),
6455 disk_template=self.op.disk_template,
6456 disks=[(d[constants.IDISK_NAME], d[constants.IDISK_SIZE],
6457 d[constants.IDISK_MODE]) for d in self.disks],
6460 hypervisor_name=self.op.hypervisor,
6466 def BuildHooksNodes(self):
6467 """Build hooks nodes.
6470 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
6473 def _ReadExportInfo(self):
6474 """Reads the export information from disk.
6476 It will override the opcode source node and path with the actual
6477 information, if these two were not specified before.
6479 @return: the export information
6482 assert self.op.mode == constants.INSTANCE_IMPORT
6484 src_node = self.op.src_node
6485 src_path = self.op.src_path
6487 if src_node is None:
6488 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
6489 exp_list = self.rpc.call_export_list(locked_nodes)
6491 for node in exp_list:
6492 if exp_list[node].fail_msg:
6494 if src_path in exp_list[node].payload:
6496 self.op.src_node = src_node = node
6497 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
6501 raise errors.OpPrereqError("No export found for relative path %s" %
6502 src_path, errors.ECODE_INVAL)
6504 _CheckNodeOnline(self, src_node)
6505 result = self.rpc.call_export_info(src_node, src_path)
6506 result.Raise("No export or invalid export found in dir %s" % src_path)
6508 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6509 if not export_info.has_section(constants.INISECT_EXP):
6510 raise errors.ProgrammerError("Corrupted export config",
6511 errors.ECODE_ENVIRON)
6513 ei_version = export_info.get(constants.INISECT_EXP, "version")
6514 if (int(ei_version) != constants.EXPORT_VERSION):
6515 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6516 (ei_version, constants.EXPORT_VERSION),
6517 errors.ECODE_ENVIRON)
6520 def _ReadExportParams(self, einfo):
6521 """Use export parameters as defaults.
6523 In case the opcode doesn't specify (as in override) some instance
6524 parameters, then try to use them from the export information, if
6528 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6530 if self.op.disk_template is None:
6531 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6532 self.op.disk_template = einfo.get(constants.INISECT_INS,
6534 if self.op.disk_template not in constants.DISK_TEMPLATES:
6535 raise errors.OpPrereqError("Disk template specified in configuration"
6536 " file is not one of the allowed values:"
6538 " ".join(constants.DISK_TEMPLATES),
6541 raise errors.OpPrereqError("No disk template specified and the export"
6542 " is missing the disk_template information",
6545 if not self.op.disks:
6547 # TODO: import the disk iv_name too
6548 for idx in range(constants.MAX_DISKS):
6549 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
6550 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6551 disks.append({constants.IDISK_SIZE: disk_sz})
6552 self.op.disks = disks
6553 if not disks and self.op.disk_template != constants.DT_DISKLESS:
6554 raise errors.OpPrereqError("No disk info specified and the export"
6555 " is missing the disk information",
6558 if not self.op.nics:
6560 for idx in range(constants.MAX_NICS):
6561 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
6563 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6564 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6571 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
6572 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
6574 if (self.op.hypervisor is None and
6575 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6576 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6578 if einfo.has_section(constants.INISECT_HYP):
6579 # use the export parameters but do not override the ones
6580 # specified by the user
6581 for name, value in einfo.items(constants.INISECT_HYP):
6582 if name not in self.op.hvparams:
6583 self.op.hvparams[name] = value
6585 if einfo.has_section(constants.INISECT_BEP):
6586 # use the parameters, without overriding
6587 for name, value in einfo.items(constants.INISECT_BEP):
6588 if name not in self.op.beparams:
6589 self.op.beparams[name] = value
6590 # Compatibility for the old "memory" be param
6591 if name == constants.BE_MEMORY:
6592 if constants.BE_MAXMEM not in self.op.beparams:
6593 self.op.beparams[constants.BE_MAXMEM] = value
6594 if constants.BE_MINMEM not in self.op.beparams:
6595 self.op.beparams[constants.BE_MINMEM] = value
6597 # try to read the parameters old style, from the main section
6598 for name in constants.BES_PARAMETERS:
6599 if (name not in self.op.beparams and
6600 einfo.has_option(constants.INISECT_INS, name)):
6601 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6603 if einfo.has_section(constants.INISECT_OSP):
6604 # use the parameters, without overriding
6605 for name, value in einfo.items(constants.INISECT_OSP):
6606 if name not in self.op.osparams:
6607 self.op.osparams[name] = value
6609 def _RevertToDefaults(self, cluster):
6610 """Revert the instance parameters to the default values.
6614 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6615 for name in self.op.hvparams.keys():
6616 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6617 del self.op.hvparams[name]
6619 be_defs = cluster.SimpleFillBE({})
6620 for name in self.op.beparams.keys():
6621 if name in be_defs and be_defs[name] == self.op.beparams[name]:
6622 del self.op.beparams[name]
6624 nic_defs = cluster.SimpleFillNIC({})
6625 for nic in self.op.nics:
6626 for name in constants.NICS_PARAMETERS:
6627 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6630 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6631 for name in self.op.osparams.keys():
6632 if name in os_defs and os_defs[name] == self.op.osparams[name]:
6633 del self.op.osparams[name]
6635 def _CalculateFileStorageDir(self):
6636 """Calculate final instance file storage dir.
6639 # file storage dir calculation/check
6640 self.instance_file_storage_dir = None
6641 if self.op.disk_template in constants.DTS_FILEBASED:
6642 # build the full file storage dir path
6645 if self.op.disk_template == constants.DT_SHARED_FILE:
6646 get_fsd_fn = self.cfg.GetSharedFileStorageDir
6648 get_fsd_fn = self.cfg.GetFileStorageDir
6650 cfg_storagedir = get_fsd_fn()
6651 if not cfg_storagedir:
6652 raise errors.OpPrereqError("Cluster file storage dir not defined",
6654 joinargs.append(cfg_storagedir)
6656 if self.op.file_storage_dir is not None:
6657 joinargs.append(self.op.file_storage_dir)
6659 joinargs.append(self.op.instance_name)
6661 # pylint: disable=W0142
6662 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
6664 def CheckPrereq(self): # pylint: disable=R0914
6665 """Check prerequisites.
6668 self._CalculateFileStorageDir()
6670 if self.op.mode == constants.INSTANCE_IMPORT:
6671 export_info = self._ReadExportInfo()
6672 self._ReadExportParams(export_info)
6673 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
6675 self._old_instance_name = None
6677 if (not self.cfg.GetVGName() and
6678 self.op.disk_template not in constants.DTS_NOT_LVM):
6679 raise errors.OpPrereqError("Cluster does not support lvm-based"
6680 " instances", errors.ECODE_STATE)
6682 if (self.op.hypervisor is None or
6683 self.op.hypervisor == constants.VALUE_AUTO):
6684 self.op.hypervisor = self.cfg.GetHypervisorType()
6686 cluster = self.cfg.GetClusterInfo()
6687 enabled_hvs = cluster.enabled_hypervisors
6688 if self.op.hypervisor not in enabled_hvs:
6689 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6691 (self.op.hypervisor, ",".join(enabled_hvs)),
6694 # Check tag validity
6695 for tag in self.op.tags:
6696 objects.TaggableObject.ValidateTag(tag)
6698 # check hypervisor parameter syntax (locally)
6699 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6700 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6702 hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
6703 hv_type.CheckParameterSyntax(filled_hvp)
6704 self.hv_full = filled_hvp
6705 # check that we don't specify global parameters on an instance
6706 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS, "hypervisor",
6707 "instance", "cluster")
6709 # fill and remember the beparams dict
6710 self.be_full = _ComputeFullBeParams(self.op, cluster)
6712 # build os parameters
6713 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6715 # now that hvp/bep are in final format, let's reset to defaults,
6717 if self.op.identify_defaults:
6718 self._RevertToDefaults(cluster)
6721 self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
6722 self.proc.GetECId())
6724 # disk checks/pre-build
6725 default_vg = self.cfg.GetVGName()
6726 self.disks = _ComputeDisks(self.op, default_vg)
6728 if self.op.mode == constants.INSTANCE_IMPORT:
6730 for idx in range(len(self.disks)):
6731 option = "disk%d_dump" % idx
6732 if export_info.has_option(constants.INISECT_INS, option):
6733 # FIXME: are the old os-es, disk sizes, etc. useful?
6734 export_name = export_info.get(constants.INISECT_INS, option)
6735 image = utils.PathJoin(self.op.src_path, export_name)
6736 disk_images.append(image)
6738 disk_images.append(False)
6740 self.src_images = disk_images
6742 if self.op.instance_name == self._old_instance_name:
6743 for idx, nic in enumerate(self.nics):
6744 if nic.mac == constants.VALUE_AUTO:
6745 nic_mac_ini = "nic%d_mac" % idx
6746 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6748 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6750 # ip ping checks (we use the same ip that was resolved in ExpandNames)
6751 if self.op.ip_check:
6752 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6753 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6754 (self.check_ip, self.op.instance_name),
6755 errors.ECODE_NOTUNIQUE)
6757 #### mac address generation
6758 # By generating here the mac address both the allocator and the hooks get
6759 # the real final mac address rather than the 'auto' or 'generate' value.
6760 # There is a race condition between the generation and the instance object
6761 # creation, which means that we know the mac is valid now, but we're not
6762 # sure it will be when we actually add the instance. If things go bad
6763 # adding the instance will abort because of a duplicate mac, and the
6764 # creation job will fail.
6765 for nic in self.nics:
6766 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6767 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
6771 if self.op.iallocator is not None:
6772 self._RunAllocator()
6774 # Release all unneeded node locks
6775 keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
6776 _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
6777 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
6778 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
6780 assert (self.owned_locks(locking.LEVEL_NODE) ==
6781 self.owned_locks(locking.LEVEL_NODE_RES)), \
6782 "Node locks differ from node resource locks"
6784 #### node related checks
6786 # check primary node
6787 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6788 assert self.pnode is not None, \
6789 "Cannot retrieve locked node %s" % self.op.pnode
6791 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6792 pnode.name, errors.ECODE_STATE)
6794 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6795 pnode.name, errors.ECODE_STATE)
6796 if not pnode.vm_capable:
6797 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
6798 " '%s'" % pnode.name, errors.ECODE_STATE)
6800 self.secondaries = []
6802 # Fill in any IPs from IP pools. This must happen here, because we need to
6803 # know the nic's primary node, as specified by the iallocator
6804 for idx, nic in enumerate(self.nics):
6805 net_uuid = nic.network
6806 if net_uuid is not None:
6807 nobj = self.cfg.GetNetwork(net_uuid)
6808 netparams = self.cfg.GetGroupNetParams(net_uuid, self.pnode.name)
6809 if netparams is None:
6810 raise errors.OpPrereqError("No netparams found for network"
6811 " %s. Propably not connected to"
6812 " node's %s nodegroup" %
6813 (nobj.name, self.pnode.name),
6815 self.LogInfo("NIC/%d inherits netparams %s" %
6816 (idx, netparams.values()))
6817 nic.nicparams = dict(netparams)
6818 if nic.ip is not None:
6819 if nic.ip.lower() == constants.NIC_IP_POOL:
6821 nic.ip = self.cfg.GenerateIp(net_uuid, self.proc.GetECId())
6822 except errors.ReservationError:
6823 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
6824 " from the address pool" % idx,
6826 self.LogInfo("Chose IP %s from network %s", nic.ip, nobj.name)
6829 self.cfg.ReserveIp(net_uuid, nic.ip, self.proc.GetECId())
6830 except errors.ReservationError:
6831 raise errors.OpPrereqError("IP address %s already in use"
6832 " or does not belong to network %s" %
6833 (nic.ip, nobj.name),
6834 errors.ECODE_NOTUNIQUE)
6836 # net is None, ip None or given
6837 elif self.op.conflicts_check:
6838 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
6840 # mirror node verification
6841 if self.op.disk_template in constants.DTS_INT_MIRROR:
6842 if self.op.snode == pnode.name:
6843 raise errors.OpPrereqError("The secondary node cannot be the"
6844 " primary node", errors.ECODE_INVAL)
6845 _CheckNodeOnline(self, self.op.snode)
6846 _CheckNodeNotDrained(self, self.op.snode)
6847 _CheckNodeVmCapable(self, self.op.snode)
6848 self.secondaries.append(self.op.snode)
6850 snode = self.cfg.GetNodeInfo(self.op.snode)
6851 if pnode.group != snode.group:
6852 self.LogWarning("The primary and secondary nodes are in two"
6853 " different node groups; the disk parameters"
6854 " from the first disk's node group will be"
6857 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
6859 if self.op.disk_template in constants.DTS_INT_MIRROR:
6861 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
6862 if compat.any(map(has_es, nodes)):
6863 raise errors.OpPrereqError("Disk template %s not supported with"
6864 " exclusive storage" % self.op.disk_template,
6867 nodenames = [pnode.name] + self.secondaries
6869 if not self.adopt_disks:
6870 if self.op.disk_template == constants.DT_RBD:
6871 # _CheckRADOSFreeSpace() is just a placeholder.
6872 # Any function that checks prerequisites can be placed here.
6873 # Check if there is enough space on the RADOS cluster.
6874 _CheckRADOSFreeSpace()
6875 elif self.op.disk_template == constants.DT_EXT:
6876 # FIXME: Function that checks prereqs if needed
6879 # Check lv size requirements, if not adopting
6880 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
6881 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
6883 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
6884 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
6885 disk[constants.IDISK_ADOPT])
6886 for disk in self.disks])
6887 if len(all_lvs) != len(self.disks):
6888 raise errors.OpPrereqError("Duplicate volume names given for adoption",
6890 for lv_name in all_lvs:
6892 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
6893 # to ReserveLV uses the same syntax
6894 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6895 except errors.ReservationError:
6896 raise errors.OpPrereqError("LV named %s used by another instance" %
6897 lv_name, errors.ECODE_NOTUNIQUE)
6899 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
6900 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
6902 node_lvs = self.rpc.call_lv_list([pnode.name],
6903 vg_names.payload.keys())[pnode.name]
6904 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6905 node_lvs = node_lvs.payload
6907 delta = all_lvs.difference(node_lvs.keys())
6909 raise errors.OpPrereqError("Missing logical volume(s): %s" %
6910 utils.CommaJoin(delta),
6912 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6914 raise errors.OpPrereqError("Online logical volumes found, cannot"
6915 " adopt: %s" % utils.CommaJoin(online_lvs),
6917 # update the size of disk based on what is found
6918 for dsk in self.disks:
6919 dsk[constants.IDISK_SIZE] = \
6920 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
6921 dsk[constants.IDISK_ADOPT])][0]))
6923 elif self.op.disk_template == constants.DT_BLOCK:
6924 # Normalize and de-duplicate device paths
6925 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
6926 for disk in self.disks])
6927 if len(all_disks) != len(self.disks):
6928 raise errors.OpPrereqError("Duplicate disk names given for adoption",
6930 baddisks = [d for d in all_disks
6931 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
6933 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
6934 " cannot be adopted" %
6935 (utils.CommaJoin(baddisks),
6936 constants.ADOPTABLE_BLOCKDEV_ROOT),
6939 node_disks = self.rpc.call_bdev_sizes([pnode.name],
6940 list(all_disks))[pnode.name]
6941 node_disks.Raise("Cannot get block device information from node %s" %
6943 node_disks = node_disks.payload
6944 delta = all_disks.difference(node_disks.keys())
6946 raise errors.OpPrereqError("Missing block device(s): %s" %
6947 utils.CommaJoin(delta),
6949 for dsk in self.disks:
6950 dsk[constants.IDISK_SIZE] = \
6951 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
6953 # Verify instance specs
6954 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
6956 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
6957 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
6958 constants.ISPEC_DISK_COUNT: len(self.disks),
6959 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
6960 for disk in self.disks],
6961 constants.ISPEC_NIC_COUNT: len(self.nics),
6962 constants.ISPEC_SPINDLE_USE: spindle_use,
6965 group_info = self.cfg.GetNodeGroup(pnode.group)
6966 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
6967 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec,
6968 self.op.disk_template)
6969 if not self.op.ignore_ipolicy and res:
6970 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
6971 (pnode.group, group_info.name, utils.CommaJoin(res)))
6972 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
6974 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6976 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6977 # check OS parameters (remotely)
6978 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
6980 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6982 #TODO: _CheckExtParams (remotely)
6983 # Check parameters for extstorage
6985 # memory check on primary node
6986 #TODO(dynmem): use MINMEM for checking
6988 _CheckNodeFreeMemory(self, self.pnode.name,
6989 "creating instance %s" % self.op.instance_name,
6990 self.be_full[constants.BE_MAXMEM],
6993 self.dry_run_result = list(nodenames)
6995 def Exec(self, feedback_fn):
6996 """Create and add the instance to the cluster.
6999 instance = self.op.instance_name
7000 pnode_name = self.pnode.name
7002 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
7003 self.owned_locks(locking.LEVEL_NODE)), \
7004 "Node locks differ from node resource locks"
7005 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7007 ht_kind = self.op.hypervisor
7008 if ht_kind in constants.HTS_REQ_PORT:
7009 network_port = self.cfg.AllocatePort()
7013 # This is ugly but we got a chicken-egg problem here
7014 # We can only take the group disk parameters, as the instance
7015 # has no disks yet (we are generating them right here).
7016 node = self.cfg.GetNodeInfo(pnode_name)
7017 nodegroup = self.cfg.GetNodeGroup(node.group)
7018 disks = _GenerateDiskTemplate(self,
7019 self.op.disk_template,
7020 instance, pnode_name,
7023 self.instance_file_storage_dir,
7024 self.op.file_driver,
7027 self.cfg.GetGroupDiskParams(nodegroup))
7029 iobj = objects.Instance(name=instance, os=self.op.os_type,
7030 primary_node=pnode_name,
7031 nics=self.nics, disks=disks,
7032 disk_template=self.op.disk_template,
7033 admin_state=constants.ADMINST_DOWN,
7034 network_port=network_port,
7035 beparams=self.op.beparams,
7036 hvparams=self.op.hvparams,
7037 hypervisor=self.op.hypervisor,
7038 osparams=self.op.osparams,
7042 for tag in self.op.tags:
7045 if self.adopt_disks:
7046 if self.op.disk_template == constants.DT_PLAIN:
7047 # rename LVs to the newly-generated names; we need to construct
7048 # 'fake' LV disks with the old data, plus the new unique_id
7049 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7051 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
7052 rename_to.append(t_dsk.logical_id)
7053 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
7054 self.cfg.SetDiskID(t_dsk, pnode_name)
7055 result = self.rpc.call_blockdev_rename(pnode_name,
7056 zip(tmp_disks, rename_to))
7057 result.Raise("Failed to rename adoped LVs")
7059 feedback_fn("* creating instance disks...")
7061 _CreateDisks(self, iobj)
7062 except errors.OpExecError:
7063 self.LogWarning("Device creation failed")
7064 self.cfg.ReleaseDRBDMinors(instance)
7067 feedback_fn("adding instance %s to cluster config" % instance)
7069 self.cfg.AddInstance(iobj, self.proc.GetECId())
7071 # Declare that we don't want to remove the instance lock anymore, as we've
7072 # added the instance to the config
7073 del self.remove_locks[locking.LEVEL_INSTANCE]
7075 if self.op.mode == constants.INSTANCE_IMPORT:
7076 # Release unused nodes
7077 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
7080 _ReleaseLocks(self, locking.LEVEL_NODE)
7083 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
7084 feedback_fn("* wiping instance disks...")
7086 _WipeDisks(self, iobj)
7087 except errors.OpExecError, err:
7088 logging.exception("Wiping disks failed")
7089 self.LogWarning("Wiping instance disks failed (%s)", err)
7093 # Something is already wrong with the disks, don't do anything else
7095 elif self.op.wait_for_sync:
7096 disk_abort = not _WaitForSync(self, iobj)
7097 elif iobj.disk_template in constants.DTS_INT_MIRROR:
7098 # make sure the disks are not degraded (still sync-ing is ok)
7099 feedback_fn("* checking mirrors status")
7100 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7105 _RemoveDisks(self, iobj)
7106 self.cfg.RemoveInstance(iobj.name)
7107 # Make sure the instance lock gets removed
7108 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7109 raise errors.OpExecError("There are some degraded disks for"
7112 # Release all node resource locks
7113 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
7115 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7116 # we need to set the disks ID to the primary node, since the
7117 # preceding code might or might have not done it, depending on
7118 # disk template and other options
7119 for disk in iobj.disks:
7120 self.cfg.SetDiskID(disk, pnode_name)
7121 if self.op.mode == constants.INSTANCE_CREATE:
7122 if not self.op.no_install:
7123 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
7124 not self.op.wait_for_sync)
7126 feedback_fn("* pausing disk sync to install instance OS")
7127 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
7130 for idx, success in enumerate(result.payload):
7132 logging.warn("pause-sync of instance %s for disk %d failed",
7135 feedback_fn("* running the instance OS create scripts...")
7136 # FIXME: pass debug option from opcode to backend
7138 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
7139 self.op.debug_level)
7141 feedback_fn("* resuming disk sync")
7142 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
7145 for idx, success in enumerate(result.payload):
7147 logging.warn("resume-sync of instance %s for disk %d failed",
7150 os_add_result.Raise("Could not add os for instance %s"
7151 " on node %s" % (instance, pnode_name))
7154 if self.op.mode == constants.INSTANCE_IMPORT:
7155 feedback_fn("* running the instance OS import scripts...")
7159 for idx, image in enumerate(self.src_images):
7163 # FIXME: pass debug option from opcode to backend
7164 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7165 constants.IEIO_FILE, (image, ),
7166 constants.IEIO_SCRIPT,
7167 (iobj.disks[idx], idx),
7169 transfers.append(dt)
7172 masterd.instance.TransferInstanceData(self, feedback_fn,
7173 self.op.src_node, pnode_name,
7174 self.pnode.secondary_ip,
7176 if not compat.all(import_result):
7177 self.LogWarning("Some disks for instance %s on node %s were not"
7178 " imported successfully" % (instance, pnode_name))
7180 rename_from = self._old_instance_name
7182 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7183 feedback_fn("* preparing remote import...")
7184 # The source cluster will stop the instance before attempting to make
7185 # a connection. In some cases stopping an instance can take a long
7186 # time, hence the shutdown timeout is added to the connection
7188 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7189 self.op.source_shutdown_timeout)
7190 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7192 assert iobj.primary_node == self.pnode.name
7194 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7195 self.source_x509_ca,
7196 self._cds, timeouts)
7197 if not compat.all(disk_results):
7198 # TODO: Should the instance still be started, even if some disks
7199 # failed to import (valid for local imports, too)?
7200 self.LogWarning("Some disks for instance %s on node %s were not"
7201 " imported successfully" % (instance, pnode_name))
7203 rename_from = self.source_instance_name
7206 # also checked in the prereq part
7207 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7210 # Run rename script on newly imported instance
7211 assert iobj.name == instance
7212 feedback_fn("Running rename script for %s" % instance)
7213 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7215 self.op.debug_level)
7217 self.LogWarning("Failed to run rename script for %s on node"
7218 " %s: %s" % (instance, pnode_name, result.fail_msg))
7220 assert not self.owned_locks(locking.LEVEL_NODE_RES)
7223 iobj.admin_state = constants.ADMINST_UP
7224 self.cfg.Update(iobj, feedback_fn)
7225 logging.info("Starting instance %s on node %s", instance, pnode_name)
7226 feedback_fn("* starting instance...")
7227 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
7228 False, self.op.reason)
7229 result.Raise("Could not start instance")
7231 return list(iobj.all_nodes)
7234 class LUInstanceMultiAlloc(NoHooksLU):
7235 """Allocates multiple instances at the same time.
7240 def CheckArguments(self):
7245 for inst in self.op.instances:
7246 if inst.iallocator is not None:
7247 raise errors.OpPrereqError("iallocator are not allowed to be set on"
7248 " instance objects", errors.ECODE_INVAL)
7249 nodes.append(bool(inst.pnode))
7250 if inst.disk_template in constants.DTS_INT_MIRROR:
7251 nodes.append(bool(inst.snode))
7253 has_nodes = compat.any(nodes)
7254 if compat.all(nodes) ^ has_nodes:
7255 raise errors.OpPrereqError("There are instance objects providing"
7256 " pnode/snode while others do not",
7259 if self.op.iallocator is None:
7260 default_iallocator = self.cfg.GetDefaultIAllocator()
7261 if default_iallocator and has_nodes:
7262 self.op.iallocator = default_iallocator
7264 raise errors.OpPrereqError("No iallocator or nodes on the instances"
7265 " given and no cluster-wide default"
7266 " iallocator found; please specify either"
7267 " an iallocator or nodes on the instances"
7268 " or set a cluster-wide default iallocator",
7271 _CheckOpportunisticLocking(self.op)
7273 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
7275 raise errors.OpPrereqError("There are duplicate instance names: %s" %
7276 utils.CommaJoin(dups), errors.ECODE_INVAL)
7278 def ExpandNames(self):
7279 """Calculate the locks.
7282 self.share_locks = _ShareAll()
7283 self.needed_locks = {
7284 # iallocator will select nodes and even if no iallocator is used,
7285 # collisions with LUInstanceCreate should be avoided
7286 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
7289 if self.op.iallocator:
7290 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7291 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
7293 if self.op.opportunistic_locking:
7294 self.opportunistic_locks[locking.LEVEL_NODE] = True
7295 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
7298 for inst in self.op.instances:
7299 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
7300 nodeslist.append(inst.pnode)
7301 if inst.snode is not None:
7302 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
7303 nodeslist.append(inst.snode)
7305 self.needed_locks[locking.LEVEL_NODE] = nodeslist
7306 # Lock resources of instance's primary and secondary nodes (copy to
7307 # prevent accidential modification)
7308 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
7310 def CheckPrereq(self):
7311 """Check prerequisite.
7314 cluster = self.cfg.GetClusterInfo()
7315 default_vg = self.cfg.GetVGName()
7316 ec_id = self.proc.GetECId()
7318 if self.op.opportunistic_locking:
7319 # Only consider nodes for which a lock is held
7320 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
7322 node_whitelist = None
7324 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
7325 _ComputeNics(op, cluster, None,
7327 _ComputeFullBeParams(op, cluster),
7329 for op in self.op.instances]
7331 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
7332 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7334 ial.Run(self.op.iallocator)
7337 raise errors.OpPrereqError("Can't compute nodes using"
7338 " iallocator '%s': %s" %
7339 (self.op.iallocator, ial.info),
7342 self.ia_result = ial.result
7345 self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
7346 constants.JOB_IDS_KEY: [],
7349 def _ConstructPartialResult(self):
7350 """Contructs the partial result.
7353 (allocatable, failed) = self.ia_result
7355 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
7356 map(compat.fst, allocatable),
7357 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
7360 def Exec(self, feedback_fn):
7361 """Executes the opcode.
7364 op2inst = dict((op.instance_name, op) for op in self.op.instances)
7365 (allocatable, failed) = self.ia_result
7368 for (name, nodes) in allocatable:
7369 op = op2inst.pop(name)
7372 (op.pnode, op.snode) = nodes
7378 missing = set(op2inst.keys()) - set(failed)
7379 assert not missing, \
7380 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
7382 return ResultWithJobs(jobs, **self._ConstructPartialResult())
7385 def _CheckRADOSFreeSpace():
7386 """Compute disk size requirements inside the RADOS cluster.
7389 # For the RADOS cluster we assume there is always enough space.
7393 class LUInstanceConsole(NoHooksLU):
7394 """Connect to an instance's console.
7396 This is somewhat special in that it returns the command line that
7397 you need to run on the master node in order to connect to the
7403 def ExpandNames(self):
7404 self.share_locks = _ShareAll()
7405 self._ExpandAndLockInstance()
7407 def CheckPrereq(self):
7408 """Check prerequisites.
7410 This checks that the instance is in the cluster.
7413 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7414 assert self.instance is not None, \
7415 "Cannot retrieve locked instance %s" % self.op.instance_name
7416 _CheckNodeOnline(self, self.instance.primary_node)
7418 def Exec(self, feedback_fn):
7419 """Connect to the console of an instance
7422 instance = self.instance
7423 node = instance.primary_node
7425 node_insts = self.rpc.call_instance_list([node],
7426 [instance.hypervisor])[node]
7427 node_insts.Raise("Can't get node information from %s" % node)
7429 if instance.name not in node_insts.payload:
7430 if instance.admin_state == constants.ADMINST_UP:
7431 state = constants.INSTST_ERRORDOWN
7432 elif instance.admin_state == constants.ADMINST_DOWN:
7433 state = constants.INSTST_ADMINDOWN
7435 state = constants.INSTST_ADMINOFFLINE
7436 raise errors.OpExecError("Instance %s is not running (state %s)" %
7437 (instance.name, state))
7439 logging.debug("Connecting to console of %s on %s", instance.name, node)
7441 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
7444 def _GetInstanceConsole(cluster, instance):
7445 """Returns console information for an instance.
7447 @type cluster: L{objects.Cluster}
7448 @type instance: L{objects.Instance}
7452 hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
7453 # beparams and hvparams are passed separately, to avoid editing the
7454 # instance and then saving the defaults in the instance itself.
7455 hvparams = cluster.FillHV(instance)
7456 beparams = cluster.FillBE(instance)
7457 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
7459 assert console.instance == instance.name
7460 assert console.Validate()
7462 return console.ToDict()
7465 class LUInstanceReplaceDisks(LogicalUnit):
7466 """Replace the disks of an instance.
7469 HPATH = "mirrors-replace"
7470 HTYPE = constants.HTYPE_INSTANCE
7473 def CheckArguments(self):
7477 remote_node = self.op.remote_node
7478 ialloc = self.op.iallocator
7479 if self.op.mode == constants.REPLACE_DISK_CHG:
7480 if remote_node is None and ialloc is None:
7481 raise errors.OpPrereqError("When changing the secondary either an"
7482 " iallocator script must be used or the"
7483 " new node given", errors.ECODE_INVAL)
7485 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
7487 elif remote_node is not None or ialloc is not None:
7488 # Not replacing the secondary
7489 raise errors.OpPrereqError("The iallocator and new node options can"
7490 " only be used when changing the"
7491 " secondary node", errors.ECODE_INVAL)
7493 def ExpandNames(self):
7494 self._ExpandAndLockInstance()
7496 assert locking.LEVEL_NODE not in self.needed_locks
7497 assert locking.LEVEL_NODE_RES not in self.needed_locks
7498 assert locking.LEVEL_NODEGROUP not in self.needed_locks
7500 assert self.op.iallocator is None or self.op.remote_node is None, \
7501 "Conflicting options"
7503 if self.op.remote_node is not None:
7504 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7506 # Warning: do not remove the locking of the new secondary here
7507 # unless DRBD8.AddChildren is changed to work in parallel;
7508 # currently it doesn't since parallel invocations of
7509 # FindUnusedMinor will conflict
7510 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
7511 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7513 self.needed_locks[locking.LEVEL_NODE] = []
7514 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7516 if self.op.iallocator is not None:
7517 # iallocator will select a new node in the same group
7518 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7519 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7521 self.needed_locks[locking.LEVEL_NODE_RES] = []
7523 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7524 self.op.iallocator, self.op.remote_node,
7525 self.op.disks, self.op.early_release,
7526 self.op.ignore_ipolicy)
7528 self.tasklets = [self.replacer]
7530 def DeclareLocks(self, level):
7531 if level == locking.LEVEL_NODEGROUP:
7532 assert self.op.remote_node is None
7533 assert self.op.iallocator is not None
7534 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7536 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7537 # Lock all groups used by instance optimistically; this requires going
7538 # via the node before it's locked, requiring verification later on
7539 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7540 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
7542 elif level == locking.LEVEL_NODE:
7543 if self.op.iallocator is not None:
7544 assert self.op.remote_node is None
7545 assert not self.needed_locks[locking.LEVEL_NODE]
7546 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7548 # Lock member nodes of all locked groups
7549 self.needed_locks[locking.LEVEL_NODE] = \
7551 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
7552 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
7554 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7556 self._LockInstancesNodes()
7558 elif level == locking.LEVEL_NODE_RES:
7560 self.needed_locks[locking.LEVEL_NODE_RES] = \
7561 self.needed_locks[locking.LEVEL_NODE]
7563 def BuildHooksEnv(self):
7566 This runs on the master, the primary and all the secondaries.
7569 instance = self.replacer.instance
7571 "MODE": self.op.mode,
7572 "NEW_SECONDARY": self.op.remote_node,
7573 "OLD_SECONDARY": instance.secondary_nodes[0],
7575 env.update(_BuildInstanceHookEnvByObject(self, instance))
7578 def BuildHooksNodes(self):
7579 """Build hooks nodes.
7582 instance = self.replacer.instance
7584 self.cfg.GetMasterNode(),
7585 instance.primary_node,
7587 if self.op.remote_node is not None:
7588 nl.append(self.op.remote_node)
7591 def CheckPrereq(self):
7592 """Check prerequisites.
7595 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
7596 self.op.iallocator is None)
7598 # Verify if node group locks are still correct
7599 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7601 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
7603 return LogicalUnit.CheckPrereq(self)
7606 class TLReplaceDisks(Tasklet):
7607 """Replaces disks for an instance.
7609 Note: Locking is not within the scope of this class.
7612 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7613 disks, early_release, ignore_ipolicy):
7614 """Initializes this class.
7617 Tasklet.__init__(self, lu)
7620 self.instance_name = instance_name
7622 self.iallocator_name = iallocator_name
7623 self.remote_node = remote_node
7625 self.early_release = early_release
7626 self.ignore_ipolicy = ignore_ipolicy
7629 self.instance = None
7630 self.new_node = None
7631 self.target_node = None
7632 self.other_node = None
7633 self.remote_node_info = None
7634 self.node_secondary_ip = None
7637 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7638 """Compute a new secondary node using an IAllocator.
7641 req = iallocator.IAReqRelocate(name=instance_name,
7642 relocate_from=list(relocate_from))
7643 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
7645 ial.Run(iallocator_name)
7648 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7649 " %s" % (iallocator_name, ial.info),
7652 remote_node_name = ial.result[0]
7654 lu.LogInfo("Selected new secondary for instance '%s': %s",
7655 instance_name, remote_node_name)
7657 return remote_node_name
7659 def _FindFaultyDisks(self, node_name):
7660 """Wrapper for L{_FindFaultyInstanceDisks}.
7663 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7666 def _CheckDisksActivated(self, instance):
7667 """Checks if the instance disks are activated.
7669 @param instance: The instance to check disks
7670 @return: True if they are activated, False otherwise
7673 nodes = instance.all_nodes
7675 for idx, dev in enumerate(instance.disks):
7677 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
7678 self.cfg.SetDiskID(dev, node)
7680 result = _BlockdevFind(self, node, dev, instance)
7684 elif result.fail_msg or not result.payload:
7689 def CheckPrereq(self):
7690 """Check prerequisites.
7692 This checks that the instance is in the cluster.
7695 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7696 assert instance is not None, \
7697 "Cannot retrieve locked instance %s" % self.instance_name
7699 if instance.disk_template != constants.DT_DRBD8:
7700 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7701 " instances", errors.ECODE_INVAL)
7703 if len(instance.secondary_nodes) != 1:
7704 raise errors.OpPrereqError("The instance has a strange layout,"
7705 " expected one secondary but found %d" %
7706 len(instance.secondary_nodes),
7709 instance = self.instance
7710 secondary_node = instance.secondary_nodes[0]
7712 if self.iallocator_name is None:
7713 remote_node = self.remote_node
7715 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7716 instance.name, instance.secondary_nodes)
7718 if remote_node is None:
7719 self.remote_node_info = None
7721 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
7722 "Remote node '%s' is not locked" % remote_node
7724 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7725 assert self.remote_node_info is not None, \
7726 "Cannot retrieve locked node %s" % remote_node
7728 if remote_node == self.instance.primary_node:
7729 raise errors.OpPrereqError("The specified node is the primary node of"
7730 " the instance", errors.ECODE_INVAL)
7732 if remote_node == secondary_node:
7733 raise errors.OpPrereqError("The specified node is already the"
7734 " secondary node of the instance",
7737 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7738 constants.REPLACE_DISK_CHG):
7739 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7742 if self.mode == constants.REPLACE_DISK_AUTO:
7743 if not self._CheckDisksActivated(instance):
7744 raise errors.OpPrereqError("Please run activate-disks on instance %s"
7745 " first" % self.instance_name,
7747 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7748 faulty_secondary = self._FindFaultyDisks(secondary_node)
7750 if faulty_primary and faulty_secondary:
7751 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7752 " one node and can not be repaired"
7753 " automatically" % self.instance_name,
7757 self.disks = faulty_primary
7758 self.target_node = instance.primary_node
7759 self.other_node = secondary_node
7760 check_nodes = [self.target_node, self.other_node]
7761 elif faulty_secondary:
7762 self.disks = faulty_secondary
7763 self.target_node = secondary_node
7764 self.other_node = instance.primary_node
7765 check_nodes = [self.target_node, self.other_node]
7771 # Non-automatic modes
7772 if self.mode == constants.REPLACE_DISK_PRI:
7773 self.target_node = instance.primary_node
7774 self.other_node = secondary_node
7775 check_nodes = [self.target_node, self.other_node]
7777 elif self.mode == constants.REPLACE_DISK_SEC:
7778 self.target_node = secondary_node
7779 self.other_node = instance.primary_node
7780 check_nodes = [self.target_node, self.other_node]
7782 elif self.mode == constants.REPLACE_DISK_CHG:
7783 self.new_node = remote_node
7784 self.other_node = instance.primary_node
7785 self.target_node = secondary_node
7786 check_nodes = [self.new_node, self.other_node]
7788 _CheckNodeNotDrained(self.lu, remote_node)
7789 _CheckNodeVmCapable(self.lu, remote_node)
7791 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7792 assert old_node_info is not None
7793 if old_node_info.offline and not self.early_release:
7794 # doesn't make sense to delay the release
7795 self.early_release = True
7796 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7797 " early-release mode", secondary_node)
7800 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7803 # If not specified all disks should be replaced
7805 self.disks = range(len(self.instance.disks))
7807 # TODO: This is ugly, but right now we can't distinguish between internal
7808 # submitted opcode and external one. We should fix that.
7809 if self.remote_node_info:
7810 # We change the node, lets verify it still meets instance policy
7811 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
7812 cluster = self.cfg.GetClusterInfo()
7813 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
7815 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
7816 self.cfg, ignore=self.ignore_ipolicy)
7818 for node in check_nodes:
7819 _CheckNodeOnline(self.lu, node)
7821 touched_nodes = frozenset(node_name for node_name in [self.new_node,
7824 if node_name is not None)
7826 # Release unneeded node and node resource locks
7827 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
7828 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
7829 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
7831 # Release any owned node group
7832 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
7834 # Check whether disks are valid
7835 for disk_idx in self.disks:
7836 instance.FindDisk(disk_idx)
7838 # Get secondary node IP addresses
7839 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
7840 in self.cfg.GetMultiNodeInfo(touched_nodes))
7842 def Exec(self, feedback_fn):
7843 """Execute disk replacement.
7845 This dispatches the disk replacement to the appropriate handler.
7849 # Verify owned locks before starting operation
7850 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
7851 assert set(owned_nodes) == set(self.node_secondary_ip), \
7852 ("Incorrect node locks, owning %s, expected %s" %
7853 (owned_nodes, self.node_secondary_ip.keys()))
7854 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
7855 self.lu.owned_locks(locking.LEVEL_NODE_RES))
7856 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7858 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
7859 assert list(owned_instances) == [self.instance_name], \
7860 "Instance '%s' not locked" % self.instance_name
7862 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
7863 "Should not own any node group lock at this point"
7866 feedback_fn("No disks need replacement for instance '%s'" %
7870 feedback_fn("Replacing disk(s) %s for instance '%s'" %
7871 (utils.CommaJoin(self.disks), self.instance.name))
7872 feedback_fn("Current primary node: %s" % self.instance.primary_node)
7873 feedback_fn("Current seconary node: %s" %
7874 utils.CommaJoin(self.instance.secondary_nodes))
7876 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
7878 # Activate the instance disks if we're replacing them on a down instance
7880 _StartInstanceDisks(self.lu, self.instance, True)
7883 # Should we replace the secondary node?
7884 if self.new_node is not None:
7885 fn = self._ExecDrbd8Secondary
7887 fn = self._ExecDrbd8DiskOnly
7889 result = fn(feedback_fn)
7891 # Deactivate the instance disks if we're replacing them on a
7894 _SafeShutdownInstanceDisks(self.lu, self.instance)
7896 assert not self.lu.owned_locks(locking.LEVEL_NODE)
7899 # Verify owned locks
7900 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
7901 nodes = frozenset(self.node_secondary_ip)
7902 assert ((self.early_release and not owned_nodes) or
7903 (not self.early_release and not (set(owned_nodes) - nodes))), \
7904 ("Not owning the correct locks, early_release=%s, owned=%r,"
7905 " nodes=%r" % (self.early_release, owned_nodes, nodes))
7909 def _CheckVolumeGroup(self, nodes):
7910 self.lu.LogInfo("Checking volume groups")
7912 vgname = self.cfg.GetVGName()
7914 # Make sure volume group exists on all involved nodes
7915 results = self.rpc.call_vg_list(nodes)
7917 raise errors.OpExecError("Can't list volume groups on the nodes")
7921 res.Raise("Error checking node %s" % node)
7922 if vgname not in res.payload:
7923 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7926 def _CheckDisksExistence(self, nodes):
7927 # Check disk existence
7928 for idx, dev in enumerate(self.instance.disks):
7929 if idx not in self.disks:
7933 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
7934 self.cfg.SetDiskID(dev, node)
7936 result = _BlockdevFind(self, node, dev, self.instance)
7938 msg = result.fail_msg
7939 if msg or not result.payload:
7941 msg = "disk not found"
7942 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7945 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7946 for idx, dev in enumerate(self.instance.disks):
7947 if idx not in self.disks:
7950 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7953 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
7954 on_primary, ldisk=ldisk):
7955 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7956 " replace disks for instance %s" %
7957 (node_name, self.instance.name))
7959 def _CreateNewStorage(self, node_name):
7960 """Create new storage on the primary or secondary node.
7962 This is only used for same-node replaces, not for changing the
7963 secondary node, hence we don't want to modify the existing disk.
7968 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
7969 for idx, dev in enumerate(disks):
7970 if idx not in self.disks:
7973 self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
7975 self.cfg.SetDiskID(dev, node_name)
7977 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7978 names = _GenerateUniqueNames(self.lu, lv_names)
7980 (data_disk, meta_disk) = dev.children
7981 vg_data = data_disk.logical_id[0]
7982 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7983 logical_id=(vg_data, names[0]),
7984 params=data_disk.params)
7985 vg_meta = meta_disk.logical_id[0]
7986 lv_meta = objects.Disk(dev_type=constants.LD_LV,
7987 size=constants.DRBD_META_SIZE,
7988 logical_id=(vg_meta, names[1]),
7989 params=meta_disk.params)
7991 new_lvs = [lv_data, lv_meta]
7992 old_lvs = [child.Copy() for child in dev.children]
7993 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7994 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
7996 # we pass force_create=True to force the LVM creation
7997 for new_lv in new_lvs:
7998 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
7999 _GetInstanceInfoText(self.instance), False,
8004 def _CheckDevices(self, node_name, iv_names):
8005 for name, (dev, _, _) in iv_names.iteritems():
8006 self.cfg.SetDiskID(dev, node_name)
8008 result = _BlockdevFind(self, node_name, dev, self.instance)
8010 msg = result.fail_msg
8011 if msg or not result.payload:
8013 msg = "disk not found"
8014 raise errors.OpExecError("Can't find DRBD device %s: %s" %
8017 if result.payload.is_degraded:
8018 raise errors.OpExecError("DRBD device %s is degraded!" % name)
8020 def _RemoveOldStorage(self, node_name, iv_names):
8021 for name, (_, old_lvs, _) in iv_names.iteritems():
8022 self.lu.LogInfo("Remove logical volumes for %s", name)
8025 self.cfg.SetDiskID(lv, node_name)
8027 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8029 self.lu.LogWarning("Can't remove old LV: %s", msg,
8030 hint="remove unused LVs manually")
8032 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
8033 """Replace a disk on the primary or secondary for DRBD 8.
8035 The algorithm for replace is quite complicated:
8037 1. for each disk to be replaced:
8039 1. create new LVs on the target node with unique names
8040 1. detach old LVs from the drbd device
8041 1. rename old LVs to name_replaced.<time_t>
8042 1. rename new LVs to old LVs
8043 1. attach the new LVs (with the old names now) to the drbd device
8045 1. wait for sync across all devices
8047 1. for each modified disk:
8049 1. remove old LVs (which have the name name_replaces.<time_t>)
8051 Failures are not very well handled.
8056 # Step: check device activation
8057 self.lu.LogStep(1, steps_total, "Check device existence")
8058 self._CheckDisksExistence([self.other_node, self.target_node])
8059 self._CheckVolumeGroup([self.target_node, self.other_node])
8061 # Step: check other node consistency
8062 self.lu.LogStep(2, steps_total, "Check peer consistency")
8063 self._CheckDisksConsistency(self.other_node,
8064 self.other_node == self.instance.primary_node,
8067 # Step: create new storage
8068 self.lu.LogStep(3, steps_total, "Allocate new storage")
8069 iv_names = self._CreateNewStorage(self.target_node)
8071 # Step: for each lv, detach+rename*2+attach
8072 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8073 for dev, old_lvs, new_lvs in iv_names.itervalues():
8074 self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
8076 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8078 result.Raise("Can't detach drbd from local storage on node"
8079 " %s for device %s" % (self.target_node, dev.iv_name))
8081 #cfg.Update(instance)
8083 # ok, we created the new LVs, so now we know we have the needed
8084 # storage; as such, we proceed on the target node to rename
8085 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8086 # using the assumption that logical_id == physical_id (which in
8087 # turn is the unique_id on that node)
8089 # FIXME(iustin): use a better name for the replaced LVs
8090 temp_suffix = int(time.time())
8091 ren_fn = lambda d, suff: (d.physical_id[0],
8092 d.physical_id[1] + "_replaced-%s" % suff)
8094 # Build the rename list based on what LVs exist on the node
8095 rename_old_to_new = []
8096 for to_ren in old_lvs:
8097 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8098 if not result.fail_msg and result.payload:
8100 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8102 self.lu.LogInfo("Renaming the old LVs on the target node")
8103 result = self.rpc.call_blockdev_rename(self.target_node,
8105 result.Raise("Can't rename old LVs on node %s" % self.target_node)
8107 # Now we rename the new LVs to the old LVs
8108 self.lu.LogInfo("Renaming the new LVs on the target node")
8109 rename_new_to_old = [(new, old.physical_id)
8110 for old, new in zip(old_lvs, new_lvs)]
8111 result = self.rpc.call_blockdev_rename(self.target_node,
8113 result.Raise("Can't rename new LVs on node %s" % self.target_node)
8115 # Intermediate steps of in memory modifications
8116 for old, new in zip(old_lvs, new_lvs):
8117 new.logical_id = old.logical_id
8118 self.cfg.SetDiskID(new, self.target_node)
8120 # We need to modify old_lvs so that removal later removes the
8121 # right LVs, not the newly added ones; note that old_lvs is a
8123 for disk in old_lvs:
8124 disk.logical_id = ren_fn(disk, temp_suffix)
8125 self.cfg.SetDiskID(disk, self.target_node)
8127 # Now that the new lvs have the old name, we can add them to the device
8128 self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
8129 result = self.rpc.call_blockdev_addchildren(self.target_node,
8130 (dev, self.instance), new_lvs)
8131 msg = result.fail_msg
8133 for new_lv in new_lvs:
8134 msg2 = self.rpc.call_blockdev_remove(self.target_node,
8137 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8138 hint=("cleanup manually the unused logical"
8140 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8142 cstep = itertools.count(5)
8144 if self.early_release:
8145 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
8146 self._RemoveOldStorage(self.target_node, iv_names)
8147 # TODO: Check if releasing locks early still makes sense
8148 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
8150 # Release all resource locks except those used by the instance
8151 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
8152 keep=self.node_secondary_ip.keys())
8154 # Release all node locks while waiting for sync
8155 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
8157 # TODO: Can the instance lock be downgraded here? Take the optional disk
8158 # shutdown in the caller into consideration.
8161 # This can fail as the old devices are degraded and _WaitForSync
8162 # does a combined result over all disks, so we don't check its return value
8163 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
8164 _WaitForSync(self.lu, self.instance)
8166 # Check all devices manually
8167 self._CheckDevices(self.instance.primary_node, iv_names)
8169 # Step: remove old storage
8170 if not self.early_release:
8171 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
8172 self._RemoveOldStorage(self.target_node, iv_names)
8174 def _ExecDrbd8Secondary(self, feedback_fn):
8175 """Replace the secondary node for DRBD 8.
8177 The algorithm for replace is quite complicated:
8178 - for all disks of the instance:
8179 - create new LVs on the new node with same names
8180 - shutdown the drbd device on the old secondary
8181 - disconnect the drbd network on the primary
8182 - create the drbd device on the new secondary
8183 - network attach the drbd on the primary, using an artifice:
8184 the drbd code for Attach() will connect to the network if it
8185 finds a device which is connected to the good local disks but
8187 - wait for sync across all devices
8188 - remove all disks from the old secondary
8190 Failures are not very well handled.
8195 pnode = self.instance.primary_node
8197 # Step: check device activation
8198 self.lu.LogStep(1, steps_total, "Check device existence")
8199 self._CheckDisksExistence([self.instance.primary_node])
8200 self._CheckVolumeGroup([self.instance.primary_node])
8202 # Step: check other node consistency
8203 self.lu.LogStep(2, steps_total, "Check peer consistency")
8204 self._CheckDisksConsistency(self.instance.primary_node, True, True)
8206 # Step: create new storage
8207 self.lu.LogStep(3, steps_total, "Allocate new storage")
8208 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
8209 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
8210 for idx, dev in enumerate(disks):
8211 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8212 (self.new_node, idx))
8213 # we pass force_create=True to force LVM creation
8214 for new_lv in dev.children:
8215 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
8216 True, _GetInstanceInfoText(self.instance), False,
8219 # Step 4: dbrd minors and drbd setups changes
8220 # after this, we must manually remove the drbd minors on both the
8221 # error and the success paths
8222 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8223 minors = self.cfg.AllocateDRBDMinor([self.new_node
8224 for dev in self.instance.disks],
8226 logging.debug("Allocated minors %r", minors)
8229 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8230 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8231 (self.new_node, idx))
8232 # create new devices on new_node; note that we create two IDs:
8233 # one without port, so the drbd will be activated without
8234 # networking information on the new node at this stage, and one
8235 # with network, for the latter activation in step 4
8236 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8237 if self.instance.primary_node == o_node1:
8240 assert self.instance.primary_node == o_node2, "Three-node instance?"
8243 new_alone_id = (self.instance.primary_node, self.new_node, None,
8244 p_minor, new_minor, o_secret)
8245 new_net_id = (self.instance.primary_node, self.new_node, o_port,
8246 p_minor, new_minor, o_secret)
8248 iv_names[idx] = (dev, dev.children, new_net_id)
8249 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8251 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8252 logical_id=new_alone_id,
8253 children=dev.children,
8256 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
8259 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
8261 _GetInstanceInfoText(self.instance), False,
8263 except errors.GenericError:
8264 self.cfg.ReleaseDRBDMinors(self.instance.name)
8267 # We have new devices, shutdown the drbd on the old secondary
8268 for idx, dev in enumerate(self.instance.disks):
8269 self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
8270 self.cfg.SetDiskID(dev, self.target_node)
8271 msg = self.rpc.call_blockdev_shutdown(self.target_node,
8272 (dev, self.instance)).fail_msg
8274 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8275 "node: %s" % (idx, msg),
8276 hint=("Please cleanup this device manually as"
8277 " soon as possible"))
8279 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8280 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
8281 self.instance.disks)[pnode]
8283 msg = result.fail_msg
8285 # detaches didn't succeed (unlikely)
8286 self.cfg.ReleaseDRBDMinors(self.instance.name)
8287 raise errors.OpExecError("Can't detach the disks from the network on"
8288 " old node: %s" % (msg,))
8290 # if we managed to detach at least one, we update all the disks of
8291 # the instance to point to the new secondary
8292 self.lu.LogInfo("Updating instance configuration")
8293 for dev, _, new_logical_id in iv_names.itervalues():
8294 dev.logical_id = new_logical_id
8295 self.cfg.SetDiskID(dev, self.instance.primary_node)
8297 self.cfg.Update(self.instance, feedback_fn)
8299 # Release all node locks (the configuration has been updated)
8300 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
8302 # and now perform the drbd attach
8303 self.lu.LogInfo("Attaching primary drbds to new secondary"
8304 " (standalone => connected)")
8305 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8307 self.node_secondary_ip,
8308 (self.instance.disks, self.instance),
8311 for to_node, to_result in result.items():
8312 msg = to_result.fail_msg
8314 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8316 hint=("please do a gnt-instance info to see the"
8317 " status of disks"))
8319 cstep = itertools.count(5)
8321 if self.early_release:
8322 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
8323 self._RemoveOldStorage(self.target_node, iv_names)
8324 # TODO: Check if releasing locks early still makes sense
8325 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
8327 # Release all resource locks except those used by the instance
8328 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
8329 keep=self.node_secondary_ip.keys())
8331 # TODO: Can the instance lock be downgraded here? Take the optional disk
8332 # shutdown in the caller into consideration.
8335 # This can fail as the old devices are degraded and _WaitForSync
8336 # does a combined result over all disks, so we don't check its return value
8337 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
8338 _WaitForSync(self.lu, self.instance)
8340 # Check all devices manually
8341 self._CheckDevices(self.instance.primary_node, iv_names)
8343 # Step: remove old storage
8344 if not self.early_release:
8345 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
8346 self._RemoveOldStorage(self.target_node, iv_names)
8349 class LURepairNodeStorage(NoHooksLU):
8350 """Repairs the volume group on a node.
8355 def CheckArguments(self):
8356 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8358 storage_type = self.op.storage_type
8360 if (constants.SO_FIX_CONSISTENCY not in
8361 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8362 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8363 " repaired" % storage_type,
8366 def ExpandNames(self):
8367 self.needed_locks = {
8368 locking.LEVEL_NODE: [self.op.node_name],
8371 def _CheckFaultyDisks(self, instance, node_name):
8372 """Ensure faulty disks abort the opcode or at least warn."""
8374 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8376 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8377 " node '%s'" % (instance.name, node_name),
8379 except errors.OpPrereqError, err:
8380 if self.op.ignore_consistency:
8381 self.LogWarning(str(err.args[0]))
8385 def CheckPrereq(self):
8386 """Check prerequisites.
8389 # Check whether any instance on this node has faulty disks
8390 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8391 if inst.admin_state != constants.ADMINST_UP:
8393 check_nodes = set(inst.all_nodes)
8394 check_nodes.discard(self.op.node_name)
8395 for inst_node_name in check_nodes:
8396 self._CheckFaultyDisks(inst, inst_node_name)
8398 def Exec(self, feedback_fn):
8399 feedback_fn("Repairing storage unit '%s' on %s ..." %
8400 (self.op.name, self.op.node_name))
8402 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8403 result = self.rpc.call_storage_execute(self.op.node_name,
8404 self.op.storage_type, st_args,
8406 constants.SO_FIX_CONSISTENCY)
8407 result.Raise("Failed to repair storage unit '%s' on %s" %
8408 (self.op.name, self.op.node_name))
8411 class LUNodeEvacuate(NoHooksLU):
8412 """Evacuates instances off a list of nodes.
8417 _MODE2IALLOCATOR = {
8418 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
8419 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
8420 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
8422 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
8423 assert (frozenset(_MODE2IALLOCATOR.values()) ==
8424 constants.IALLOCATOR_NEVAC_MODES)
8426 def CheckArguments(self):
8427 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8429 def ExpandNames(self):
8430 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8432 if self.op.remote_node is not None:
8433 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8434 assert self.op.remote_node
8436 if self.op.remote_node == self.op.node_name:
8437 raise errors.OpPrereqError("Can not use evacuated node as a new"
8438 " secondary node", errors.ECODE_INVAL)
8440 if self.op.mode != constants.NODE_EVAC_SEC:
8441 raise errors.OpPrereqError("Without the use of an iallocator only"
8442 " secondary instances can be evacuated",
8446 self.share_locks = _ShareAll()
8447 self.needed_locks = {
8448 locking.LEVEL_INSTANCE: [],
8449 locking.LEVEL_NODEGROUP: [],
8450 locking.LEVEL_NODE: [],
8453 # Determine nodes (via group) optimistically, needs verification once locks
8454 # have been acquired
8455 self.lock_nodes = self._DetermineNodes()
8457 def _DetermineNodes(self):
8458 """Gets the list of nodes to operate on.
8461 if self.op.remote_node is None:
8462 # Iallocator will choose any node(s) in the same group
8463 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
8465 group_nodes = frozenset([self.op.remote_node])
8467 # Determine nodes to be locked
8468 return set([self.op.node_name]) | group_nodes
8470 def _DetermineInstances(self):
8471 """Builds list of instances to operate on.
8474 assert self.op.mode in constants.NODE_EVAC_MODES
8476 if self.op.mode == constants.NODE_EVAC_PRI:
8477 # Primary instances only
8478 inst_fn = _GetNodePrimaryInstances
8479 assert self.op.remote_node is None, \
8480 "Evacuating primary instances requires iallocator"
8481 elif self.op.mode == constants.NODE_EVAC_SEC:
8482 # Secondary instances only
8483 inst_fn = _GetNodeSecondaryInstances
8486 assert self.op.mode == constants.NODE_EVAC_ALL
8487 inst_fn = _GetNodeInstances
8488 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
8490 raise errors.OpPrereqError("Due to an issue with the iallocator"
8491 " interface it is not possible to evacuate"
8492 " all instances at once; specify explicitly"
8493 " whether to evacuate primary or secondary"
8497 return inst_fn(self.cfg, self.op.node_name)
8499 def DeclareLocks(self, level):
8500 if level == locking.LEVEL_INSTANCE:
8501 # Lock instances optimistically, needs verification once node and group
8502 # locks have been acquired
8503 self.needed_locks[locking.LEVEL_INSTANCE] = \
8504 set(i.name for i in self._DetermineInstances())
8506 elif level == locking.LEVEL_NODEGROUP:
8507 # Lock node groups for all potential target nodes optimistically, needs
8508 # verification once nodes have been acquired
8509 self.needed_locks[locking.LEVEL_NODEGROUP] = \
8510 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
8512 elif level == locking.LEVEL_NODE:
8513 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
8515 def CheckPrereq(self):
8517 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
8518 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
8519 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
8521 need_nodes = self._DetermineNodes()
8523 if not owned_nodes.issuperset(need_nodes):
8524 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
8525 " locks were acquired, current nodes are"
8526 " are '%s', used to be '%s'; retry the"
8529 utils.CommaJoin(need_nodes),
8530 utils.CommaJoin(owned_nodes)),
8533 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
8534 if owned_groups != wanted_groups:
8535 raise errors.OpExecError("Node groups changed since locks were acquired,"
8536 " current groups are '%s', used to be '%s';"
8537 " retry the operation" %
8538 (utils.CommaJoin(wanted_groups),
8539 utils.CommaJoin(owned_groups)))
8541 # Determine affected instances
8542 self.instances = self._DetermineInstances()
8543 self.instance_names = [i.name for i in self.instances]
8545 if set(self.instance_names) != owned_instances:
8546 raise errors.OpExecError("Instances on node '%s' changed since locks"
8547 " were acquired, current instances are '%s',"
8548 " used to be '%s'; retry the operation" %
8550 utils.CommaJoin(self.instance_names),
8551 utils.CommaJoin(owned_instances)))
8553 if self.instance_names:
8554 self.LogInfo("Evacuating instances from node '%s': %s",
8556 utils.CommaJoin(utils.NiceSort(self.instance_names)))
8558 self.LogInfo("No instances to evacuate from node '%s'",
8561 if self.op.remote_node is not None:
8562 for i in self.instances:
8563 if i.primary_node == self.op.remote_node:
8564 raise errors.OpPrereqError("Node %s is the primary node of"
8565 " instance %s, cannot use it as"
8567 (self.op.remote_node, i.name),
8570 def Exec(self, feedback_fn):
8571 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
8573 if not self.instance_names:
8574 # No instances to evacuate
8577 elif self.op.iallocator is not None:
8578 # TODO: Implement relocation to other group
8579 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
8580 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
8581 instances=list(self.instance_names))
8582 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8584 ial.Run(self.op.iallocator)
8587 raise errors.OpPrereqError("Can't compute node evacuation using"
8588 " iallocator '%s': %s" %
8589 (self.op.iallocator, ial.info),
8592 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
8594 elif self.op.remote_node is not None:
8595 assert self.op.mode == constants.NODE_EVAC_SEC
8597 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
8598 remote_node=self.op.remote_node,
8600 mode=constants.REPLACE_DISK_CHG,
8601 early_release=self.op.early_release)]
8602 for instance_name in self.instance_names]
8605 raise errors.ProgrammerError("No iallocator or remote node")
8607 return ResultWithJobs(jobs)
8610 def _SetOpEarlyRelease(early_release, op):
8611 """Sets C{early_release} flag on opcodes if available.
8615 op.early_release = early_release
8616 except AttributeError:
8617 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
8622 def _NodeEvacDest(use_nodes, group, nodes):
8623 """Returns group or nodes depending on caller's choice.
8627 return utils.CommaJoin(nodes)
8632 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
8633 """Unpacks the result of change-group and node-evacuate iallocator requests.
8635 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
8636 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
8638 @type lu: L{LogicalUnit}
8639 @param lu: Logical unit instance
8640 @type alloc_result: tuple/list
8641 @param alloc_result: Result from iallocator
8642 @type early_release: bool
8643 @param early_release: Whether to release locks early if possible
8644 @type use_nodes: bool
8645 @param use_nodes: Whether to display node names instead of groups
8648 (moved, failed, jobs) = alloc_result
8651 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
8652 for (name, reason) in failed)
8653 lu.LogWarning("Unable to evacuate instances %s", failreason)
8654 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
8657 lu.LogInfo("Instances to be moved: %s",
8658 utils.CommaJoin("%s (to %s)" %
8659 (name, _NodeEvacDest(use_nodes, group, nodes))
8660 for (name, group, nodes) in moved))
8662 return [map(compat.partial(_SetOpEarlyRelease, early_release),
8663 map(opcodes.OpCode.LoadOpCode, ops))
8667 def _DiskSizeInBytesToMebibytes(lu, size):
8668 """Converts a disk size in bytes to mebibytes.
8670 Warns and rounds up if the size isn't an even multiple of 1 MiB.
8673 (mib, remainder) = divmod(size, 1024 * 1024)
8676 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
8677 " to not overwrite existing data (%s bytes will not be"
8678 " wiped)", (1024 * 1024) - remainder)
8684 class LUInstanceGrowDisk(LogicalUnit):
8685 """Grow a disk of an instance.
8689 HTYPE = constants.HTYPE_INSTANCE
8692 def ExpandNames(self):
8693 self._ExpandAndLockInstance()
8694 self.needed_locks[locking.LEVEL_NODE] = []
8695 self.needed_locks[locking.LEVEL_NODE_RES] = []
8696 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8697 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
8699 def DeclareLocks(self, level):
8700 if level == locking.LEVEL_NODE:
8701 self._LockInstancesNodes()
8702 elif level == locking.LEVEL_NODE_RES:
8704 self.needed_locks[locking.LEVEL_NODE_RES] = \
8705 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8707 def BuildHooksEnv(self):
8710 This runs on the master, the primary and all the secondaries.
8714 "DISK": self.op.disk,
8715 "AMOUNT": self.op.amount,
8716 "ABSOLUTE": self.op.absolute,
8718 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8721 def BuildHooksNodes(self):
8722 """Build hooks nodes.
8725 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8728 def CheckPrereq(self):
8729 """Check prerequisites.
8731 This checks that the instance is in the cluster.
8734 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8735 assert instance is not None, \
8736 "Cannot retrieve locked instance %s" % self.op.instance_name
8737 nodenames = list(instance.all_nodes)
8738 for node in nodenames:
8739 _CheckNodeOnline(self, node)
8741 self.instance = instance
8743 if instance.disk_template not in constants.DTS_GROWABLE:
8744 raise errors.OpPrereqError("Instance's disk layout does not support"
8745 " growing", errors.ECODE_INVAL)
8747 self.disk = instance.FindDisk(self.op.disk)
8749 if self.op.absolute:
8750 self.target = self.op.amount
8751 self.delta = self.target - self.disk.size
8753 raise errors.OpPrereqError("Requested size (%s) is smaller than "
8754 "current disk size (%s)" %
8755 (utils.FormatUnit(self.target, "h"),
8756 utils.FormatUnit(self.disk.size, "h")),
8759 self.delta = self.op.amount
8760 self.target = self.disk.size + self.delta
8762 raise errors.OpPrereqError("Requested increment (%s) is negative" %
8763 utils.FormatUnit(self.delta, "h"),
8766 self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
8768 def _CheckDiskSpace(self, nodenames, req_vgspace):
8769 template = self.instance.disk_template
8770 if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
8771 # TODO: check the free disk space for file, when that feature will be
8773 nodes = map(self.cfg.GetNodeInfo, nodenames)
8774 es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
8777 # With exclusive storage we need to something smarter than just looking
8778 # at free space; for now, let's simply abort the operation.
8779 raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
8780 " is enabled", errors.ECODE_STATE)
8781 _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
8783 def Exec(self, feedback_fn):
8784 """Execute disk grow.
8787 instance = self.instance
8790 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
8791 assert (self.owned_locks(locking.LEVEL_NODE) ==
8792 self.owned_locks(locking.LEVEL_NODE_RES))
8794 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
8796 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8798 raise errors.OpExecError("Cannot activate block device to grow")
8800 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
8801 (self.op.disk, instance.name,
8802 utils.FormatUnit(self.delta, "h"),
8803 utils.FormatUnit(self.target, "h")))
8805 # First run all grow ops in dry-run mode
8806 for node in instance.all_nodes:
8807 self.cfg.SetDiskID(disk, node)
8808 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
8810 result.Raise("Dry-run grow request failed to node %s" % node)
8813 # Get disk size from primary node for wiping
8814 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
8815 result.Raise("Failed to retrieve disk size from node '%s'" %
8816 instance.primary_node)
8818 (disk_size_in_bytes, ) = result.payload
8820 if disk_size_in_bytes is None:
8821 raise errors.OpExecError("Failed to retrieve disk size from primary"
8822 " node '%s'" % instance.primary_node)
8824 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
8826 assert old_disk_size >= disk.size, \
8827 ("Retrieved disk size too small (got %s, should be at least %s)" %
8828 (old_disk_size, disk.size))
8830 old_disk_size = None
8832 # We know that (as far as we can test) operations across different
8833 # nodes will succeed, time to run it for real on the backing storage
8834 for node in instance.all_nodes:
8835 self.cfg.SetDiskID(disk, node)
8836 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
8838 result.Raise("Grow request failed to node %s" % node)
8840 # And now execute it for logical storage, on the primary node
8841 node = instance.primary_node
8842 self.cfg.SetDiskID(disk, node)
8843 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
8845 result.Raise("Grow request failed to node %s" % node)
8847 disk.RecordGrow(self.delta)
8848 self.cfg.Update(instance, feedback_fn)
8850 # Changes have been recorded, release node lock
8851 _ReleaseLocks(self, locking.LEVEL_NODE)
8853 # Downgrade lock while waiting for sync
8854 self.glm.downgrade(locking.LEVEL_INSTANCE)
8856 assert wipe_disks ^ (old_disk_size is None)
8859 assert instance.disks[self.op.disk] == disk
8861 # Wipe newly added disk space
8862 _WipeDisks(self, instance,
8863 disks=[(self.op.disk, disk, old_disk_size)])
8865 if self.op.wait_for_sync:
8866 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8868 self.LogWarning("Disk syncing has not returned a good status; check"
8870 if instance.admin_state != constants.ADMINST_UP:
8871 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8872 elif instance.admin_state != constants.ADMINST_UP:
8873 self.LogWarning("Not shutting down the disk even if the instance is"
8874 " not supposed to be running because no wait for"
8875 " sync mode was requested")
8877 assert self.owned_locks(locking.LEVEL_NODE_RES)
8878 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
8881 class LUInstanceQueryData(NoHooksLU):
8882 """Query runtime instance data.
8887 def ExpandNames(self):
8888 self.needed_locks = {}
8890 # Use locking if requested or when non-static information is wanted
8891 if not (self.op.static or self.op.use_locking):
8892 self.LogWarning("Non-static data requested, locks need to be acquired")
8893 self.op.use_locking = True
8895 if self.op.instances or not self.op.use_locking:
8896 # Expand instance names right here
8897 self.wanted_names = _GetWantedInstances(self, self.op.instances)
8899 # Will use acquired locks
8900 self.wanted_names = None
8902 if self.op.use_locking:
8903 self.share_locks = _ShareAll()
8905 if self.wanted_names is None:
8906 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8908 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8910 self.needed_locks[locking.LEVEL_NODEGROUP] = []
8911 self.needed_locks[locking.LEVEL_NODE] = []
8912 self.needed_locks[locking.LEVEL_NETWORK] = []
8913 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8915 def DeclareLocks(self, level):
8916 if self.op.use_locking:
8917 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
8918 if level == locking.LEVEL_NODEGROUP:
8920 # Lock all groups used by instances optimistically; this requires going
8921 # via the node before it's locked, requiring verification later on
8922 self.needed_locks[locking.LEVEL_NODEGROUP] = \
8923 frozenset(group_uuid
8924 for instance_name in owned_instances
8926 self.cfg.GetInstanceNodeGroups(instance_name))
8928 elif level == locking.LEVEL_NODE:
8929 self._LockInstancesNodes()
8931 elif level == locking.LEVEL_NETWORK:
8932 self.needed_locks[locking.LEVEL_NETWORK] = \
8934 for instance_name in owned_instances
8936 self.cfg.GetInstanceNetworks(instance_name))
8938 def CheckPrereq(self):
8939 """Check prerequisites.
8941 This only checks the optional instance list against the existing names.
8944 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
8945 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
8946 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
8947 owned_networks = frozenset(self.owned_locks(locking.LEVEL_NETWORK))
8949 if self.wanted_names is None:
8950 assert self.op.use_locking, "Locking was not used"
8951 self.wanted_names = owned_instances
8953 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
8955 if self.op.use_locking:
8956 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
8959 assert not (owned_instances or owned_groups or
8960 owned_nodes or owned_networks)
8962 self.wanted_instances = instances.values()
8964 def _ComputeBlockdevStatus(self, node, instance, dev):
8965 """Returns the status of a block device
8968 if self.op.static or not node:
8971 self.cfg.SetDiskID(dev, node)
8973 result = self.rpc.call_blockdev_find(node, dev)
8977 result.Raise("Can't compute disk status for %s" % instance.name)
8979 status = result.payload
8983 return (status.dev_path, status.major, status.minor,
8984 status.sync_percent, status.estimated_time,
8985 status.is_degraded, status.ldisk_status)
8987 def _ComputeDiskStatus(self, instance, snode, dev):
8988 """Compute block device status.
8991 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
8993 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
8995 def _ComputeDiskStatusInner(self, instance, snode, dev):
8996 """Compute block device status.
8998 @attention: The device has to be annotated already.
9001 if dev.dev_type in constants.LDS_DRBD:
9002 # we change the snode then (otherwise we use the one passed in)
9003 if dev.logical_id[0] == instance.primary_node:
9004 snode = dev.logical_id[1]
9006 snode = dev.logical_id[0]
9008 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9010 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
9013 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
9020 "iv_name": dev.iv_name,
9021 "dev_type": dev.dev_type,
9022 "logical_id": dev.logical_id,
9023 "physical_id": dev.physical_id,
9024 "pstatus": dev_pstatus,
9025 "sstatus": dev_sstatus,
9026 "children": dev_children,
9033 def Exec(self, feedback_fn):
9034 """Gather and return data"""
9037 cluster = self.cfg.GetClusterInfo()
9039 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
9040 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
9042 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
9043 for node in nodes.values()))
9045 group2name_fn = lambda uuid: groups[uuid].name
9046 for instance in self.wanted_instances:
9047 pnode = nodes[instance.primary_node]
9049 if self.op.static or pnode.offline:
9052 self.LogWarning("Primary node %s is marked offline, returning static"
9053 " information only for instance %s" %
9054 (pnode.name, instance.name))
9056 remote_info = self.rpc.call_instance_info(instance.primary_node,
9058 instance.hypervisor)
9059 remote_info.Raise("Error checking node %s" % instance.primary_node)
9060 remote_info = remote_info.payload
9061 if remote_info and "state" in remote_info:
9064 if instance.admin_state == constants.ADMINST_UP:
9065 remote_state = "down"
9067 remote_state = instance.admin_state
9069 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
9072 snodes_group_uuids = [nodes[snode_name].group
9073 for snode_name in instance.secondary_nodes]
9075 result[instance.name] = {
9076 "name": instance.name,
9077 "config_state": instance.admin_state,
9078 "run_state": remote_state,
9079 "pnode": instance.primary_node,
9080 "pnode_group_uuid": pnode.group,
9081 "pnode_group_name": group2name_fn(pnode.group),
9082 "snodes": instance.secondary_nodes,
9083 "snodes_group_uuids": snodes_group_uuids,
9084 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
9086 # this happens to be the same format used for hooks
9087 "nics": _NICListToTuple(self, instance.nics),
9088 "disk_template": instance.disk_template,
9090 "hypervisor": instance.hypervisor,
9091 "network_port": instance.network_port,
9092 "hv_instance": instance.hvparams,
9093 "hv_actual": cluster.FillHV(instance, skip_globals=True),
9094 "be_instance": instance.beparams,
9095 "be_actual": cluster.FillBE(instance),
9096 "os_instance": instance.osparams,
9097 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9098 "serial_no": instance.serial_no,
9099 "mtime": instance.mtime,
9100 "ctime": instance.ctime,
9101 "uuid": instance.uuid,
9107 def PrepareContainerMods(mods, private_fn):
9108 """Prepares a list of container modifications by adding a private data field.
9110 @type mods: list of tuples; (operation, index, parameters)
9111 @param mods: List of modifications
9112 @type private_fn: callable or None
9113 @param private_fn: Callable for constructing a private data field for a
9118 if private_fn is None:
9123 return [(op, idx, params, fn()) for (op, idx, params) in mods]
9126 def GetItemFromContainer(identifier, kind, container):
9127 """Return the item refered by the identifier.
9129 @type identifier: string
9130 @param identifier: Item index or name or UUID
9132 @param kind: One-word item description
9133 @type container: list
9134 @param container: Container to get the item from
9139 idx = int(identifier)
9142 absidx = len(container) - 1
9144 raise IndexError("Not accepting negative indices other than -1")
9145 elif idx > len(container):
9146 raise IndexError("Got %s index %s, but there are only %s" %
9147 (kind, idx, len(container)))
9150 return (absidx, container[idx])
9154 for idx, item in enumerate(container):
9155 if item.uuid == identifier or item.name == identifier:
9158 raise errors.OpPrereqError("Cannot find %s with identifier %s" %
9159 (kind, identifier), errors.ECODE_NOENT)
9162 #: Type description for changes as returned by L{ApplyContainerMods}'s
9164 _TApplyContModsCbChanges = \
9165 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
9171 def ApplyContainerMods(kind, container, chgdesc, mods,
9172 create_fn, modify_fn, remove_fn):
9173 """Applies descriptions in C{mods} to C{container}.
9176 @param kind: One-word item description
9177 @type container: list
9178 @param container: Container to modify
9179 @type chgdesc: None or list
9180 @param chgdesc: List of applied changes
9182 @param mods: Modifications as returned by L{PrepareContainerMods}
9183 @type create_fn: callable
9184 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
9185 receives absolute item index, parameters and private data object as added
9186 by L{PrepareContainerMods}, returns tuple containing new item and changes
9188 @type modify_fn: callable
9189 @param modify_fn: Callback for modifying an existing item
9190 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
9191 and private data object as added by L{PrepareContainerMods}, returns
9193 @type remove_fn: callable
9194 @param remove_fn: Callback on removing item; receives absolute item index,
9195 item and private data object as added by L{PrepareContainerMods}
9198 for (op, identifier, params, private) in mods:
9201 if op == constants.DDM_ADD:
9202 # Calculate where item will be added
9203 # When adding an item, identifier can only be an index
9205 idx = int(identifier)
9207 raise errors.OpPrereqError("Only possitive integer or -1 is accepted as"
9208 " identifier for %s" % constants.DDM_ADD,
9211 addidx = len(container)
9214 raise IndexError("Not accepting negative indices other than -1")
9215 elif idx > len(container):
9216 raise IndexError("Got %s index %s, but there are only %s" %
9217 (kind, idx, len(container)))
9220 if create_fn is None:
9223 (item, changes) = create_fn(addidx, params, private)
9226 container.append(item)
9229 assert idx <= len(container)
9230 # list.insert does so before the specified index
9231 container.insert(idx, item)
9233 # Retrieve existing item
9234 (absidx, item) = GetItemFromContainer(identifier, kind, container)
9236 if op == constants.DDM_REMOVE:
9239 if remove_fn is not None:
9240 remove_fn(absidx, item, private)
9242 changes = [("%s/%s" % (kind, absidx), "remove")]
9244 assert container[absidx] == item
9245 del container[absidx]
9246 elif op == constants.DDM_MODIFY:
9247 if modify_fn is not None:
9248 changes = modify_fn(absidx, item, params, private)
9250 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
9252 assert _TApplyContModsCbChanges(changes)
9254 if not (chgdesc is None or changes is None):
9255 chgdesc.extend(changes)
9258 def _UpdateIvNames(base_index, disks):
9259 """Updates the C{iv_name} attribute of disks.
9261 @type disks: list of L{objects.Disk}
9264 for (idx, disk) in enumerate(disks):
9265 disk.iv_name = "disk/%s" % (base_index + idx, )
9268 class _InstNicModPrivate:
9269 """Data structure for network interface modifications.
9271 Used by L{LUInstanceSetParams}.
9279 class LUInstanceSetParams(LogicalUnit):
9280 """Modifies an instances's parameters.
9283 HPATH = "instance-modify"
9284 HTYPE = constants.HTYPE_INSTANCE
9288 def _UpgradeDiskNicMods(kind, mods, verify_fn):
9289 assert ht.TList(mods)
9290 assert not mods or len(mods[0]) in (2, 3)
9292 if mods and len(mods[0]) == 2:
9296 for op, params in mods:
9297 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
9298 result.append((op, -1, params))
9302 raise errors.OpPrereqError("Only one %s add or remove operation is"
9303 " supported at a time" % kind,
9306 result.append((constants.DDM_MODIFY, op, params))
9308 assert verify_fn(result)
9315 def _CheckMods(kind, mods, key_types, item_fn):
9316 """Ensures requested disk/NIC modifications are valid.
9319 for (op, _, params) in mods:
9320 assert ht.TDict(params)
9322 # If 'key_types' is an empty dict, we assume we have an
9323 # 'ext' template and thus do not ForceDictType
9325 utils.ForceDictType(params, key_types)
9327 if op == constants.DDM_REMOVE:
9329 raise errors.OpPrereqError("No settings should be passed when"
9330 " removing a %s" % kind,
9332 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
9335 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
9338 def _VerifyDiskModification(op, params):
9339 """Verifies a disk modification.
9342 if op == constants.DDM_ADD:
9343 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
9344 if mode not in constants.DISK_ACCESS_SET:
9345 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9348 size = params.get(constants.IDISK_SIZE, None)
9350 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
9351 constants.IDISK_SIZE, errors.ECODE_INVAL)
9355 except (TypeError, ValueError), err:
9356 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
9359 params[constants.IDISK_SIZE] = size
9360 name = params.get(constants.IDISK_NAME, None)
9361 if name is not None and name.lower() == constants.VALUE_NONE:
9362 params[constants.IDISK_NAME] = None
9364 elif op == constants.DDM_MODIFY:
9365 if constants.IDISK_SIZE in params:
9366 raise errors.OpPrereqError("Disk size change not possible, use"
9367 " grow-disk", errors.ECODE_INVAL)
9369 raise errors.OpPrereqError("Disk modification doesn't support"
9370 " additional arbitrary parameters",
9372 name = params.get(constants.IDISK_NAME, None)
9373 if name is not None and name.lower() == constants.VALUE_NONE:
9374 params[constants.IDISK_NAME] = None
9377 def _VerifyNicModification(op, params):
9378 """Verifies a network interface modification.
9381 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
9382 ip = params.get(constants.INIC_IP, None)
9383 name = params.get(constants.INIC_NAME, None)
9384 req_net = params.get(constants.INIC_NETWORK, None)
9385 link = params.get(constants.NIC_LINK, None)
9386 mode = params.get(constants.NIC_MODE, None)
9387 if name is not None and name.lower() == constants.VALUE_NONE:
9388 params[constants.INIC_NAME] = None
9389 if req_net is not None:
9390 if req_net.lower() == constants.VALUE_NONE:
9391 params[constants.INIC_NETWORK] = None
9393 elif link is not None or mode is not None:
9394 raise errors.OpPrereqError("If network is given"
9395 " mode or link should not",
9398 if op == constants.DDM_ADD:
9399 macaddr = params.get(constants.INIC_MAC, None)
9401 params[constants.INIC_MAC] = constants.VALUE_AUTO
9404 if ip.lower() == constants.VALUE_NONE:
9405 params[constants.INIC_IP] = None
9407 if ip.lower() == constants.NIC_IP_POOL:
9408 if op == constants.DDM_ADD and req_net is None:
9409 raise errors.OpPrereqError("If ip=pool, parameter network"
9413 if not netutils.IPAddress.IsValid(ip):
9414 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9417 if constants.INIC_MAC in params:
9418 macaddr = params[constants.INIC_MAC]
9419 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9420 macaddr = utils.NormalizeAndValidateMac(macaddr)
9422 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
9423 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9424 " modifying an existing NIC",
9427 def CheckArguments(self):
9428 if not (self.op.nics or self.op.disks or self.op.disk_template or
9429 self.op.hvparams or self.op.beparams or self.op.os_name or
9430 self.op.offline is not None or self.op.runtime_mem or
9432 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9434 if self.op.hvparams:
9435 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS,
9436 "hypervisor", "instance", "cluster")
9438 self.op.disks = self._UpgradeDiskNicMods(
9439 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
9440 self.op.nics = self._UpgradeDiskNicMods(
9441 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
9443 if self.op.disks and self.op.disk_template is not None:
9444 raise errors.OpPrereqError("Disk template conversion and other disk"
9445 " changes not supported at the same time",
9448 if (self.op.disk_template and
9449 self.op.disk_template in constants.DTS_INT_MIRROR and
9450 self.op.remote_node is None):
9451 raise errors.OpPrereqError("Changing the disk template to a mirrored"
9452 " one requires specifying a secondary node",
9455 # Check NIC modifications
9456 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
9457 self._VerifyNicModification)
9460 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9462 def ExpandNames(self):
9463 self._ExpandAndLockInstance()
9464 self.needed_locks[locking.LEVEL_NODEGROUP] = []
9465 # Can't even acquire node locks in shared mode as upcoming changes in
9466 # Ganeti 2.6 will start to modify the node object on disk conversion
9467 self.needed_locks[locking.LEVEL_NODE] = []
9468 self.needed_locks[locking.LEVEL_NODE_RES] = []
9469 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9470 # Look node group to look up the ipolicy
9471 self.share_locks[locking.LEVEL_NODEGROUP] = 1
9473 def DeclareLocks(self, level):
9474 if level == locking.LEVEL_NODEGROUP:
9475 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9476 # Acquire locks for the instance's nodegroups optimistically. Needs
9477 # to be verified in CheckPrereq
9478 self.needed_locks[locking.LEVEL_NODEGROUP] = \
9479 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9480 elif level == locking.LEVEL_NODE:
9481 self._LockInstancesNodes()
9482 if self.op.disk_template and self.op.remote_node:
9483 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9484 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9485 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
9487 self.needed_locks[locking.LEVEL_NODE_RES] = \
9488 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
9490 def BuildHooksEnv(self):
9493 This runs on the master, primary and secondaries.
9497 if constants.BE_MINMEM in self.be_new:
9498 args["minmem"] = self.be_new[constants.BE_MINMEM]
9499 if constants.BE_MAXMEM in self.be_new:
9500 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
9501 if constants.BE_VCPUS in self.be_new:
9502 args["vcpus"] = self.be_new[constants.BE_VCPUS]
9503 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9504 # information at all.
9506 if self._new_nics is not None:
9509 for nic in self._new_nics:
9510 n = copy.deepcopy(nic)
9511 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
9512 n.nicparams = nicparams
9513 nics.append(_NICToTuple(self, n))
9517 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9518 if self.op.disk_template:
9519 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9520 if self.op.runtime_mem:
9521 env["RUNTIME_MEMORY"] = self.op.runtime_mem
9525 def BuildHooksNodes(self):
9526 """Build hooks nodes.
9529 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9532 def _PrepareNicModification(self, params, private, old_ip, old_net_uuid,
9533 old_params, cluster, pnode):
9535 update_params_dict = dict([(key, params[key])
9536 for key in constants.NICS_PARAMETERS
9539 req_link = update_params_dict.get(constants.NIC_LINK, None)
9540 req_mode = update_params_dict.get(constants.NIC_MODE, None)
9543 new_net_uuid_or_name = params.get(constants.INIC_NETWORK, old_net_uuid)
9544 if new_net_uuid_or_name:
9545 new_net_uuid = self.cfg.LookupNetwork(new_net_uuid_or_name)
9546 new_net_obj = self.cfg.GetNetwork(new_net_uuid)
9549 old_net_obj = self.cfg.GetNetwork(old_net_uuid)
9552 netparams = self.cfg.GetGroupNetParams(new_net_uuid, pnode)
9554 raise errors.OpPrereqError("No netparams found for the network"
9555 " %s, probably not connected" %
9556 new_net_obj.name, errors.ECODE_INVAL)
9557 new_params = dict(netparams)
9559 new_params = _GetUpdatedParams(old_params, update_params_dict)
9561 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
9563 new_filled_params = cluster.SimpleFillNIC(new_params)
9564 objects.NIC.CheckParameterSyntax(new_filled_params)
9566 new_mode = new_filled_params[constants.NIC_MODE]
9567 if new_mode == constants.NIC_MODE_BRIDGED:
9568 bridge = new_filled_params[constants.NIC_LINK]
9569 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
9571 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
9573 self.warn.append(msg)
9575 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9577 elif new_mode == constants.NIC_MODE_ROUTED:
9578 ip = params.get(constants.INIC_IP, old_ip)
9580 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
9581 " on a routed NIC", errors.ECODE_INVAL)
9583 elif new_mode == constants.NIC_MODE_OVS:
9584 # TODO: check OVS link
9585 self.LogInfo("OVS links are currently not checked for correctness")
9587 if constants.INIC_MAC in params:
9588 mac = params[constants.INIC_MAC]
9590 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
9592 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9593 # otherwise generate the MAC address
9594 params[constants.INIC_MAC] = \
9595 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
9597 # or validate/reserve the current one
9599 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9600 except errors.ReservationError:
9601 raise errors.OpPrereqError("MAC address '%s' already in use"
9602 " in cluster" % mac,
9603 errors.ECODE_NOTUNIQUE)
9604 elif new_net_uuid != old_net_uuid:
9606 def get_net_prefix(net_uuid):
9609 nobj = self.cfg.GetNetwork(net_uuid)
9610 mac_prefix = nobj.mac_prefix
9614 new_prefix = get_net_prefix(new_net_uuid)
9615 old_prefix = get_net_prefix(old_net_uuid)
9616 if old_prefix != new_prefix:
9617 params[constants.INIC_MAC] = \
9618 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
9620 # if there is a change in (ip, network) tuple
9621 new_ip = params.get(constants.INIC_IP, old_ip)
9622 if (new_ip, new_net_uuid) != (old_ip, old_net_uuid):
9624 # if IP is pool then require a network and generate one IP
9625 if new_ip.lower() == constants.NIC_IP_POOL:
9628 new_ip = self.cfg.GenerateIp(new_net_uuid, self.proc.GetECId())
9629 except errors.ReservationError:
9630 raise errors.OpPrereqError("Unable to get a free IP"
9631 " from the address pool",
9633 self.LogInfo("Chose IP %s from network %s",
9636 params[constants.INIC_IP] = new_ip
9638 raise errors.OpPrereqError("ip=pool, but no network found",
9640 # Reserve new IP if in the new network if any
9643 self.cfg.ReserveIp(new_net_uuid, new_ip, self.proc.GetECId())
9644 self.LogInfo("Reserving IP %s in network %s",
9645 new_ip, new_net_obj.name)
9646 except errors.ReservationError:
9647 raise errors.OpPrereqError("IP %s not available in network %s" %
9648 (new_ip, new_net_obj.name),
9649 errors.ECODE_NOTUNIQUE)
9650 # new network is None so check if new IP is a conflicting IP
9651 elif self.op.conflicts_check:
9652 _CheckForConflictingIp(self, new_ip, pnode)
9654 # release old IP if old network is not None
9655 if old_ip and old_net_uuid:
9657 self.cfg.ReleaseIp(old_net_uuid, old_ip, self.proc.GetECId())
9658 except errors.AddressPoolError:
9659 logging.warning("Release IP %s not contained in network %s",
9660 old_ip, old_net_obj.name)
9662 # there are no changes in (ip, network) tuple and old network is not None
9663 elif (old_net_uuid is not None and
9664 (req_link is not None or req_mode is not None)):
9665 raise errors.OpPrereqError("Not allowed to change link or mode of"
9666 " a NIC that is connected to a network",
9669 private.params = new_params
9670 private.filled = new_filled_params
9672 def _PreCheckDiskTemplate(self, pnode_info):
9673 """CheckPrereq checks related to a new disk template."""
9674 # Arguments are passed to avoid configuration lookups
9675 instance = self.instance
9676 pnode = instance.primary_node
9677 cluster = self.cluster
9678 if instance.disk_template == self.op.disk_template:
9679 raise errors.OpPrereqError("Instance already has disk template %s" %
9680 instance.disk_template, errors.ECODE_INVAL)
9682 if (instance.disk_template,
9683 self.op.disk_template) not in self._DISK_CONVERSIONS:
9684 raise errors.OpPrereqError("Unsupported disk template conversion from"
9685 " %s to %s" % (instance.disk_template,
9686 self.op.disk_template),
9688 _CheckInstanceState(self, instance, INSTANCE_DOWN,
9689 msg="cannot change disk template")
9690 if self.op.disk_template in constants.DTS_INT_MIRROR:
9691 if self.op.remote_node == pnode:
9692 raise errors.OpPrereqError("Given new secondary node %s is the same"
9693 " as the primary node of the instance" %
9694 self.op.remote_node, errors.ECODE_STATE)
9695 _CheckNodeOnline(self, self.op.remote_node)
9696 _CheckNodeNotDrained(self, self.op.remote_node)
9697 # FIXME: here we assume that the old instance type is DT_PLAIN
9698 assert instance.disk_template == constants.DT_PLAIN
9699 disks = [{constants.IDISK_SIZE: d.size,
9700 constants.IDISK_VG: d.logical_id[0]}
9701 for d in instance.disks]
9702 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9703 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9705 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
9706 snode_group = self.cfg.GetNodeGroup(snode_info.group)
9707 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
9709 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info, self.cfg,
9710 ignore=self.op.ignore_ipolicy)
9711 if pnode_info.group != snode_info.group:
9712 self.LogWarning("The primary and secondary nodes are in two"
9713 " different node groups; the disk parameters"
9714 " from the first disk's node group will be"
9717 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
9718 # Make sure none of the nodes require exclusive storage
9719 nodes = [pnode_info]
9720 if self.op.disk_template in constants.DTS_INT_MIRROR:
9722 nodes.append(snode_info)
9723 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
9724 if compat.any(map(has_es, nodes)):
9725 errmsg = ("Cannot convert disk template from %s to %s when exclusive"
9726 " storage is enabled" % (instance.disk_template,
9727 self.op.disk_template))
9728 raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
9730 def CheckPrereq(self):
9731 """Check prerequisites.
9733 This only checks the instance list against the existing names.
9736 assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
9737 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9739 cluster = self.cluster = self.cfg.GetClusterInfo()
9740 assert self.instance is not None, \
9741 "Cannot retrieve locked instance %s" % self.op.instance_name
9743 pnode = instance.primary_node
9747 if (self.op.pnode is not None and self.op.pnode != pnode and
9749 # verify that the instance is not up
9750 instance_info = self.rpc.call_instance_info(pnode, instance.name,
9751 instance.hypervisor)
9752 if instance_info.fail_msg:
9753 self.warn.append("Can't get instance runtime information: %s" %
9754 instance_info.fail_msg)
9755 elif instance_info.payload:
9756 raise errors.OpPrereqError("Instance is still running on %s" % pnode,
9759 assert pnode in self.owned_locks(locking.LEVEL_NODE)
9760 nodelist = list(instance.all_nodes)
9761 pnode_info = self.cfg.GetNodeInfo(pnode)
9762 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
9764 #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9765 assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
9766 group_info = self.cfg.GetNodeGroup(pnode_info.group)
9768 # dictionary with instance information after the modification
9771 # Check disk modifications. This is done here and not in CheckArguments
9772 # (as with NICs), because we need to know the instance's disk template
9773 if instance.disk_template == constants.DT_EXT:
9774 self._CheckMods("disk", self.op.disks, {},
9775 self._VerifyDiskModification)
9777 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
9778 self._VerifyDiskModification)
9780 # Prepare disk/NIC modifications
9781 self.diskmod = PrepareContainerMods(self.op.disks, None)
9782 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
9784 # Check the validity of the `provider' parameter
9785 if instance.disk_template in constants.DT_EXT:
9786 for mod in self.diskmod:
9787 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
9788 if mod[0] == constants.DDM_ADD:
9789 if ext_provider is None:
9790 raise errors.OpPrereqError("Instance template is '%s' and parameter"
9791 " '%s' missing, during disk add" %
9793 constants.IDISK_PROVIDER),
9795 elif mod[0] == constants.DDM_MODIFY:
9797 raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
9799 constants.IDISK_PROVIDER,
9802 for mod in self.diskmod:
9803 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
9804 if ext_provider is not None:
9805 raise errors.OpPrereqError("Parameter '%s' is only valid for"
9806 " instances of type '%s'" %
9807 (constants.IDISK_PROVIDER,
9812 if self.op.os_name and not self.op.force:
9813 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9814 self.op.force_variant)
9815 instance_os = self.op.os_name
9817 instance_os = instance.os
9819 assert not (self.op.disk_template and self.op.disks), \
9820 "Can't modify disk template and apply disk changes at the same time"
9822 if self.op.disk_template:
9823 self._PreCheckDiskTemplate(pnode_info)
9825 # hvparams processing
9826 if self.op.hvparams:
9827 hv_type = instance.hypervisor
9828 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9829 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9830 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9833 hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
9834 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9835 self.hv_proposed = self.hv_new = hv_new # the new actual values
9836 self.hv_inst = i_hvdict # the new dict (without defaults)
9838 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
9840 self.hv_new = self.hv_inst = {}
9842 # beparams processing
9843 if self.op.beparams:
9844 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9846 objects.UpgradeBeParams(i_bedict)
9847 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9848 be_new = cluster.SimpleFillBE(i_bedict)
9849 self.be_proposed = self.be_new = be_new # the new actual values
9850 self.be_inst = i_bedict # the new dict (without defaults)
9852 self.be_new = self.be_inst = {}
9853 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
9854 be_old = cluster.FillBE(instance)
9856 # CPU param validation -- checking every time a parameter is
9857 # changed to cover all cases where either CPU mask or vcpus have
9859 if (constants.BE_VCPUS in self.be_proposed and
9860 constants.HV_CPU_MASK in self.hv_proposed):
9862 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
9863 # Verify mask is consistent with number of vCPUs. Can skip this
9864 # test if only 1 entry in the CPU mask, which means same mask
9865 # is applied to all vCPUs.
9866 if (len(cpu_list) > 1 and
9867 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
9868 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
9870 (self.be_proposed[constants.BE_VCPUS],
9871 self.hv_proposed[constants.HV_CPU_MASK]),
9874 # Only perform this test if a new CPU mask is given
9875 if constants.HV_CPU_MASK in self.hv_new:
9876 # Calculate the largest CPU number requested
9877 max_requested_cpu = max(map(max, cpu_list))
9878 # Check that all of the instance's nodes have enough physical CPUs to
9879 # satisfy the requested CPU mask
9880 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
9881 max_requested_cpu + 1, instance.hypervisor)
9883 # osparams processing
9884 if self.op.osparams:
9885 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9886 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9887 self.os_inst = i_osdict # the new dict (without defaults)
9891 #TODO(dynmem): do the appropriate check involving MINMEM
9892 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
9893 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
9894 mem_check_list = [pnode]
9895 if be_new[constants.BE_AUTO_BALANCE]:
9896 # either we changed auto_balance to yes or it was from before
9897 mem_check_list.extend(instance.secondary_nodes)
9898 instance_info = self.rpc.call_instance_info(pnode, instance.name,
9899 instance.hypervisor)
9900 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9901 [instance.hypervisor], False)
9902 pninfo = nodeinfo[pnode]
9903 msg = pninfo.fail_msg
9905 # Assume the primary node is unreachable and go ahead
9906 self.warn.append("Can't get info from primary node %s: %s" %
9909 (_, _, (pnhvinfo, )) = pninfo.payload
9910 if not isinstance(pnhvinfo.get("memory_free", None), int):
9911 self.warn.append("Node data from primary node %s doesn't contain"
9912 " free memory information" % pnode)
9913 elif instance_info.fail_msg:
9914 self.warn.append("Can't get instance runtime information: %s" %
9915 instance_info.fail_msg)
9917 if instance_info.payload:
9918 current_mem = int(instance_info.payload["memory"])
9920 # Assume instance not running
9921 # (there is a slight race condition here, but it's not very
9922 # probable, and we have no other way to check)
9923 # TODO: Describe race condition
9925 #TODO(dynmem): do the appropriate check involving MINMEM
9926 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
9927 pnhvinfo["memory_free"])
9929 raise errors.OpPrereqError("This change will prevent the instance"
9930 " from starting, due to %d MB of memory"
9931 " missing on its primary node" %
9932 miss_mem, errors.ECODE_NORES)
9934 if be_new[constants.BE_AUTO_BALANCE]:
9935 for node, nres in nodeinfo.items():
9936 if node not in instance.secondary_nodes:
9938 nres.Raise("Can't get info from secondary node %s" % node,
9939 prereq=True, ecode=errors.ECODE_STATE)
9940 (_, _, (nhvinfo, )) = nres.payload
9941 if not isinstance(nhvinfo.get("memory_free", None), int):
9942 raise errors.OpPrereqError("Secondary node %s didn't return free"
9943 " memory information" % node,
9945 #TODO(dynmem): do the appropriate check involving MINMEM
9946 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
9947 raise errors.OpPrereqError("This change will prevent the instance"
9948 " from failover to its secondary node"
9949 " %s, due to not enough memory" % node,
9952 if self.op.runtime_mem:
9953 remote_info = self.rpc.call_instance_info(instance.primary_node,
9955 instance.hypervisor)
9956 remote_info.Raise("Error checking node %s" % instance.primary_node)
9957 if not remote_info.payload: # not running already
9958 raise errors.OpPrereqError("Instance %s is not running" %
9959 instance.name, errors.ECODE_STATE)
9961 current_memory = remote_info.payload["memory"]
9962 if (not self.op.force and
9963 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
9964 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
9965 raise errors.OpPrereqError("Instance %s must have memory between %d"
9966 " and %d MB of memory unless --force is"
9969 self.be_proposed[constants.BE_MINMEM],
9970 self.be_proposed[constants.BE_MAXMEM]),
9973 delta = self.op.runtime_mem - current_memory
9975 _CheckNodeFreeMemory(self, instance.primary_node,
9976 "ballooning memory for instance %s" %
9977 instance.name, delta, instance.hypervisor)
9979 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9980 raise errors.OpPrereqError("Disk operations not supported for"
9981 " diskless instances", errors.ECODE_INVAL)
9983 def _PrepareNicCreate(_, params, private):
9984 self._PrepareNicModification(params, private, None, None,
9988 def _PrepareNicMod(_, nic, params, private):
9989 self._PrepareNicModification(params, private, nic.ip, nic.network,
9990 nic.nicparams, cluster, pnode)
9993 def _PrepareNicRemove(_, params, __):
9995 net = params.network
9996 if net is not None and ip is not None:
9997 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
9999 # Verify NIC changes (operating on copy)
10000 nics = instance.nics[:]
10001 ApplyContainerMods("NIC", nics, None, self.nicmod,
10002 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
10003 if len(nics) > constants.MAX_NICS:
10004 raise errors.OpPrereqError("Instance has too many network interfaces"
10005 " (%d), cannot add more" % constants.MAX_NICS,
10006 errors.ECODE_STATE)
10008 def _PrepareDiskMod(_, disk, params, __):
10009 disk.name = params.get(constants.IDISK_NAME, None)
10011 # Verify disk changes (operating on a copy)
10012 disks = copy.deepcopy(instance.disks)
10013 ApplyContainerMods("disk", disks, None, self.diskmod, None, _PrepareDiskMod,
10015 utils.ValidateDeviceNames("disk", disks)
10016 if len(disks) > constants.MAX_DISKS:
10017 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
10018 " more" % constants.MAX_DISKS,
10019 errors.ECODE_STATE)
10020 disk_sizes = [disk.size for disk in instance.disks]
10021 disk_sizes.extend(params["size"] for (op, idx, params, private) in
10022 self.diskmod if op == constants.DDM_ADD)
10023 ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
10024 ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
10026 if self.op.offline is not None and self.op.offline:
10027 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
10028 msg="can't change to offline")
10030 # Pre-compute NIC changes (necessary to use result in hooks)
10031 self._nic_chgdesc = []
10033 # Operate on copies as this is still in prereq
10034 nics = [nic.Copy() for nic in instance.nics]
10035 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
10036 self._CreateNewNic, self._ApplyNicMods, None)
10037 # Verify that NIC names are unique and valid
10038 utils.ValidateDeviceNames("NIC", nics)
10039 self._new_nics = nics
10040 ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
10042 self._new_nics = None
10043 ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
10045 if not self.op.ignore_ipolicy:
10046 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
10049 # Fill ispec with backend parameters
10050 ispec[constants.ISPEC_SPINDLE_USE] = \
10051 self.be_new.get(constants.BE_SPINDLE_USE, None)
10052 ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
10055 # Copy ispec to verify parameters with min/max values separately
10056 if self.op.disk_template:
10057 new_disk_template = self.op.disk_template
10059 new_disk_template = instance.disk_template
10060 ispec_max = ispec.copy()
10061 ispec_max[constants.ISPEC_MEM_SIZE] = \
10062 self.be_new.get(constants.BE_MAXMEM, None)
10063 res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max,
10065 ispec_min = ispec.copy()
10066 ispec_min[constants.ISPEC_MEM_SIZE] = \
10067 self.be_new.get(constants.BE_MINMEM, None)
10068 res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min,
10071 if (res_max or res_min):
10072 # FIXME: Improve error message by including information about whether
10073 # the upper or lower limit of the parameter fails the ipolicy.
10074 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10075 (group_info, group_info.name,
10076 utils.CommaJoin(set(res_max + res_min))))
10077 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10079 def _ConvertPlainToDrbd(self, feedback_fn):
10080 """Converts an instance from plain to drbd.
10083 feedback_fn("Converting template to drbd")
10084 instance = self.instance
10085 pnode = instance.primary_node
10086 snode = self.op.remote_node
10088 assert instance.disk_template == constants.DT_PLAIN
10090 # create a fake disk info for _GenerateDiskTemplate
10091 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10092 constants.IDISK_VG: d.logical_id[0],
10093 constants.IDISK_NAME: d.name}
10094 for d in instance.disks]
10095 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10096 instance.name, pnode, [snode],
10097 disk_info, None, None, 0, feedback_fn,
10099 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
10101 p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
10102 s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
10103 info = _GetInstanceInfoText(instance)
10104 feedback_fn("Creating additional volumes...")
10105 # first, create the missing data and meta devices
10106 for disk in anno_disks:
10107 # unfortunately this is... not too nice
10108 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10109 info, True, p_excl_stor)
10110 for child in disk.children:
10111 _CreateSingleBlockDev(self, snode, instance, child, info, True,
10113 # at this stage, all new LVs have been created, we can rename the
10115 feedback_fn("Renaming original volumes...")
10116 rename_list = [(o, n.children[0].logical_id)
10117 for (o, n) in zip(instance.disks, new_disks)]
10118 result = self.rpc.call_blockdev_rename(pnode, rename_list)
10119 result.Raise("Failed to rename original LVs")
10121 feedback_fn("Initializing DRBD devices...")
10122 # all child devices are in place, we can now create the DRBD devices
10124 for disk in anno_disks:
10125 for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
10126 f_create = node == pnode
10127 _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
10129 except errors.GenericError, e:
10130 feedback_fn("Initializing of DRBD devices failed;"
10131 " renaming back original volumes...")
10132 for disk in new_disks:
10133 self.cfg.SetDiskID(disk, pnode)
10134 rename_back_list = [(n.children[0], o.logical_id)
10135 for (n, o) in zip(new_disks, instance.disks)]
10136 result = self.rpc.call_blockdev_rename(pnode, rename_back_list)
10137 result.Raise("Failed to rename LVs back after error %s" % str(e))
10140 # at this point, the instance has been modified
10141 instance.disk_template = constants.DT_DRBD8
10142 instance.disks = new_disks
10143 self.cfg.Update(instance, feedback_fn)
10145 # Release node locks while waiting for sync
10146 _ReleaseLocks(self, locking.LEVEL_NODE)
10148 # disks are created, waiting for sync
10149 disk_abort = not _WaitForSync(self, instance,
10150 oneshot=not self.op.wait_for_sync)
10152 raise errors.OpExecError("There are some degraded disks for"
10153 " this instance, please cleanup manually")
10155 # Node resource locks will be released by caller
10157 def _ConvertDrbdToPlain(self, feedback_fn):
10158 """Converts an instance from drbd to plain.
10161 instance = self.instance
10163 assert len(instance.secondary_nodes) == 1
10164 assert instance.disk_template == constants.DT_DRBD8
10166 pnode = instance.primary_node
10167 snode = instance.secondary_nodes[0]
10168 feedback_fn("Converting template to plain")
10170 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
10171 new_disks = [d.children[0] for d in instance.disks]
10173 # copy over size, mode and name
10174 for parent, child in zip(old_disks, new_disks):
10175 child.size = parent.size
10176 child.mode = parent.mode
10177 child.name = parent.name
10179 # this is a DRBD disk, return its port to the pool
10180 # NOTE: this must be done right before the call to cfg.Update!
10181 for disk in old_disks:
10182 tcp_port = disk.logical_id[2]
10183 self.cfg.AddTcpUdpPort(tcp_port)
10185 # update instance structure
10186 instance.disks = new_disks
10187 instance.disk_template = constants.DT_PLAIN
10188 _UpdateIvNames(0, instance.disks)
10189 self.cfg.Update(instance, feedback_fn)
10191 # Release locks in case removing disks takes a while
10192 _ReleaseLocks(self, locking.LEVEL_NODE)
10194 feedback_fn("Removing volumes on the secondary node...")
10195 for disk in old_disks:
10196 self.cfg.SetDiskID(disk, snode)
10197 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10199 self.LogWarning("Could not remove block device %s on node %s,"
10200 " continuing anyway: %s", disk.iv_name, snode, msg)
10202 feedback_fn("Removing unneeded volumes on the primary node...")
10203 for idx, disk in enumerate(old_disks):
10204 meta = disk.children[1]
10205 self.cfg.SetDiskID(meta, pnode)
10206 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10208 self.LogWarning("Could not remove metadata for disk %d on node %s,"
10209 " continuing anyway: %s", idx, pnode, msg)
10211 def _CreateNewDisk(self, idx, params, _):
10212 """Creates a new disk.
10215 instance = self.instance
10218 if instance.disk_template in constants.DTS_FILEBASED:
10219 (file_driver, file_path) = instance.disks[0].logical_id
10220 file_path = os.path.dirname(file_path)
10222 file_driver = file_path = None
10225 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
10226 instance.primary_node, instance.secondary_nodes,
10227 [params], file_path, file_driver, idx,
10228 self.Log, self.diskparams)[0]
10230 info = _GetInstanceInfoText(instance)
10232 logging.info("Creating volume %s for instance %s",
10233 disk.iv_name, instance.name)
10234 # Note: this needs to be kept in sync with _CreateDisks
10236 for node in instance.all_nodes:
10237 f_create = (node == instance.primary_node)
10239 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
10240 except errors.OpExecError, err:
10241 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
10242 disk.iv_name, disk, node, err)
10244 if self.cluster.prealloc_wipe_disks:
10246 _WipeDisks(self, instance,
10247 disks=[(idx, disk, 0)])
10250 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
10254 def _ModifyDisk(idx, disk, params, _):
10255 """Modifies a disk.
10259 mode = params.get(constants.IDISK_MODE, None)
10262 changes.append(("disk.mode/%d" % idx, disk.mode))
10264 name = params.get(constants.IDISK_NAME, None)
10266 changes.append(("disk.name/%d" % idx, disk.name))
10270 def _RemoveDisk(self, idx, root, _):
10274 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
10275 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
10276 self.cfg.SetDiskID(disk, node)
10277 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10279 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
10280 " continuing anyway", idx, node, msg)
10282 # if this is a DRBD disk, return its port to the pool
10283 if root.dev_type in constants.LDS_DRBD:
10284 self.cfg.AddTcpUdpPort(root.logical_id[2])
10286 def _CreateNewNic(self, idx, params, private):
10287 """Creates data structure for a new network interface.
10290 mac = params[constants.INIC_MAC]
10291 ip = params.get(constants.INIC_IP, None)
10292 net = params.get(constants.INIC_NETWORK, None)
10293 name = params.get(constants.INIC_NAME, None)
10294 net_uuid = self.cfg.LookupNetwork(net)
10295 #TODO: not private.filled?? can a nic have no nicparams??
10296 nicparams = private.filled
10297 nobj = objects.NIC(mac=mac, ip=ip, network=net_uuid, name=name,
10298 nicparams=nicparams)
10299 nobj.uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10303 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
10304 (mac, ip, private.filled[constants.NIC_MODE],
10305 private.filled[constants.NIC_LINK],
10309 def _ApplyNicMods(self, idx, nic, params, private):
10310 """Modifies a network interface.
10315 for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NAME]:
10317 changes.append(("nic.%s/%d" % (key, idx), params[key]))
10318 setattr(nic, key, params[key])
10320 new_net = params.get(constants.INIC_NETWORK, nic.network)
10321 new_net_uuid = self.cfg.LookupNetwork(new_net)
10322 if new_net_uuid != nic.network:
10323 changes.append(("nic.network/%d" % idx, new_net))
10324 nic.network = new_net_uuid
10327 nic.nicparams = private.filled
10329 for (key, val) in nic.nicparams.items():
10330 changes.append(("nic.%s/%d" % (key, idx), val))
10334 def Exec(self, feedback_fn):
10335 """Modifies an instance.
10337 All parameters take effect only at the next restart of the instance.
10340 # Process here the warnings from CheckPrereq, as we don't have a
10341 # feedback_fn there.
10342 # TODO: Replace with self.LogWarning
10343 for warn in self.warn:
10344 feedback_fn("WARNING: %s" % warn)
10346 assert ((self.op.disk_template is None) ^
10347 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
10348 "Not owning any node resource locks"
10351 instance = self.instance
10355 instance.primary_node = self.op.pnode
10358 if self.op.runtime_mem:
10359 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
10361 self.op.runtime_mem)
10362 rpcres.Raise("Cannot modify instance runtime memory")
10363 result.append(("runtime_memory", self.op.runtime_mem))
10365 # Apply disk changes
10366 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
10367 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
10368 _UpdateIvNames(0, instance.disks)
10370 if self.op.disk_template:
10372 check_nodes = set(instance.all_nodes)
10373 if self.op.remote_node:
10374 check_nodes.add(self.op.remote_node)
10375 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
10376 owned = self.owned_locks(level)
10377 assert not (check_nodes - owned), \
10378 ("Not owning the correct locks, owning %r, expected at least %r" %
10379 (owned, check_nodes))
10381 r_shut = _ShutdownInstanceDisks(self, instance)
10383 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10384 " proceed with disk template conversion")
10385 mode = (instance.disk_template, self.op.disk_template)
10387 self._DISK_CONVERSIONS[mode](self, feedback_fn)
10389 self.cfg.ReleaseDRBDMinors(instance.name)
10391 result.append(("disk_template", self.op.disk_template))
10393 assert instance.disk_template == self.op.disk_template, \
10394 ("Expected disk template '%s', found '%s'" %
10395 (self.op.disk_template, instance.disk_template))
10397 # Release node and resource locks if there are any (they might already have
10398 # been released during disk conversion)
10399 _ReleaseLocks(self, locking.LEVEL_NODE)
10400 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10402 # Apply NIC changes
10403 if self._new_nics is not None:
10404 instance.nics = self._new_nics
10405 result.extend(self._nic_chgdesc)
10408 if self.op.hvparams:
10409 instance.hvparams = self.hv_inst
10410 for key, val in self.op.hvparams.iteritems():
10411 result.append(("hv/%s" % key, val))
10414 if self.op.beparams:
10415 instance.beparams = self.be_inst
10416 for key, val in self.op.beparams.iteritems():
10417 result.append(("be/%s" % key, val))
10420 if self.op.os_name:
10421 instance.os = self.op.os_name
10424 if self.op.osparams:
10425 instance.osparams = self.os_inst
10426 for key, val in self.op.osparams.iteritems():
10427 result.append(("os/%s" % key, val))
10429 if self.op.offline is None:
10432 elif self.op.offline:
10433 # Mark instance as offline
10434 self.cfg.MarkInstanceOffline(instance.name)
10435 result.append(("admin_state", constants.ADMINST_OFFLINE))
10437 # Mark instance as online, but stopped
10438 self.cfg.MarkInstanceDown(instance.name)
10439 result.append(("admin_state", constants.ADMINST_DOWN))
10441 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
10443 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
10444 self.owned_locks(locking.LEVEL_NODE)), \
10445 "All node locks should have been released by now"
10449 _DISK_CONVERSIONS = {
10450 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10451 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10455 class LUInstanceChangeGroup(LogicalUnit):
10456 HPATH = "instance-change-group"
10457 HTYPE = constants.HTYPE_INSTANCE
10460 def ExpandNames(self):
10461 self.share_locks = _ShareAll()
10463 self.needed_locks = {
10464 locking.LEVEL_NODEGROUP: [],
10465 locking.LEVEL_NODE: [],
10466 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
10469 self._ExpandAndLockInstance()
10471 if self.op.target_groups:
10472 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
10473 self.op.target_groups)
10475 self.req_target_uuids = None
10477 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
10479 def DeclareLocks(self, level):
10480 if level == locking.LEVEL_NODEGROUP:
10481 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10483 if self.req_target_uuids:
10484 lock_groups = set(self.req_target_uuids)
10486 # Lock all groups used by instance optimistically; this requires going
10487 # via the node before it's locked, requiring verification later on
10488 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10489 lock_groups.update(instance_groups)
10491 # No target groups, need to lock all of them
10492 lock_groups = locking.ALL_SET
10494 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
10496 elif level == locking.LEVEL_NODE:
10497 if self.req_target_uuids:
10498 # Lock all nodes used by instances
10499 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10500 self._LockInstancesNodes()
10502 # Lock all nodes in all potential target groups
10503 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
10504 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
10505 member_nodes = [node_name
10506 for group in lock_groups
10507 for node_name in self.cfg.GetNodeGroup(group).members]
10508 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
10510 # Lock all nodes as all groups are potential targets
10511 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10513 def CheckPrereq(self):
10514 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
10515 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
10516 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
10518 assert (self.req_target_uuids is None or
10519 owned_groups.issuperset(self.req_target_uuids))
10520 assert owned_instances == set([self.op.instance_name])
10522 # Get instance information
10523 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10525 # Check if node groups for locked instance are still correct
10526 assert owned_nodes.issuperset(self.instance.all_nodes), \
10527 ("Instance %s's nodes changed while we kept the lock" %
10528 self.op.instance_name)
10530 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
10533 if self.req_target_uuids:
10534 # User requested specific target groups
10535 self.target_uuids = frozenset(self.req_target_uuids)
10537 # All groups except those used by the instance are potential targets
10538 self.target_uuids = owned_groups - inst_groups
10540 conflicting_groups = self.target_uuids & inst_groups
10541 if conflicting_groups:
10542 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
10543 " used by the instance '%s'" %
10544 (utils.CommaJoin(conflicting_groups),
10545 self.op.instance_name),
10546 errors.ECODE_INVAL)
10548 if not self.target_uuids:
10549 raise errors.OpPrereqError("There are no possible target groups",
10550 errors.ECODE_INVAL)
10552 def BuildHooksEnv(self):
10553 """Build hooks env.
10556 assert self.target_uuids
10559 "TARGET_GROUPS": " ".join(self.target_uuids),
10562 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10566 def BuildHooksNodes(self):
10567 """Build hooks nodes.
10570 mn = self.cfg.GetMasterNode()
10571 return ([mn], [mn])
10573 def Exec(self, feedback_fn):
10574 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
10576 assert instances == [self.op.instance_name], "Instance not locked"
10578 req = iallocator.IAReqGroupChange(instances=instances,
10579 target_groups=list(self.target_uuids))
10580 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10582 ial.Run(self.op.iallocator)
10584 if not ial.success:
10585 raise errors.OpPrereqError("Can't compute solution for changing group of"
10586 " instance '%s' using iallocator '%s': %s" %
10587 (self.op.instance_name, self.op.iallocator,
10588 ial.info), errors.ECODE_NORES)
10590 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
10592 self.LogInfo("Iallocator returned %s job(s) for changing group of"
10593 " instance '%s'", len(jobs), self.op.instance_name)
10595 return ResultWithJobs(jobs)
10598 class LUBackupQuery(NoHooksLU):
10599 """Query the exports list
10604 def CheckArguments(self):
10605 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
10606 ["node", "export"], self.op.use_locking)
10608 def ExpandNames(self):
10609 self.expq.ExpandNames(self)
10611 def DeclareLocks(self, level):
10612 self.expq.DeclareLocks(self, level)
10614 def Exec(self, feedback_fn):
10617 for (node, expname) in self.expq.OldStyleQuery(self):
10618 if expname is None:
10619 result[node] = False
10621 result.setdefault(node, []).append(expname)
10626 class _ExportQuery(_QueryBase):
10627 FIELDS = query.EXPORT_FIELDS
10629 #: The node name is not a unique key for this query
10630 SORT_FIELD = "node"
10632 def ExpandNames(self, lu):
10633 lu.needed_locks = {}
10635 # The following variables interact with _QueryBase._GetNames
10637 self.wanted = _GetWantedNodes(lu, self.names)
10639 self.wanted = locking.ALL_SET
10641 self.do_locking = self.use_locking
10643 if self.do_locking:
10644 lu.share_locks = _ShareAll()
10645 lu.needed_locks = {
10646 locking.LEVEL_NODE: self.wanted,
10650 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10652 def DeclareLocks(self, lu, level):
10655 def _GetQueryData(self, lu):
10656 """Computes the list of nodes and their attributes.
10659 # Locking is not used
10661 assert not (compat.any(lu.glm.is_owned(level)
10662 for level in locking.LEVELS
10663 if level != locking.LEVEL_CLUSTER) or
10664 self.do_locking or self.use_locking)
10666 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
10670 for (node, nres) in lu.rpc.call_export_list(nodes).items():
10672 result.append((node, None))
10674 result.extend((node, expname) for expname in nres.payload)
10679 class LUBackupPrepare(NoHooksLU):
10680 """Prepares an instance for an export and returns useful information.
10685 def ExpandNames(self):
10686 self._ExpandAndLockInstance()
10688 def CheckPrereq(self):
10689 """Check prerequisites.
10692 instance_name = self.op.instance_name
10694 self.instance = self.cfg.GetInstanceInfo(instance_name)
10695 assert self.instance is not None, \
10696 "Cannot retrieve locked instance %s" % self.op.instance_name
10697 _CheckNodeOnline(self, self.instance.primary_node)
10699 self._cds = _GetClusterDomainSecret()
10701 def Exec(self, feedback_fn):
10702 """Prepares an instance for an export.
10705 instance = self.instance
10707 if self.op.mode == constants.EXPORT_MODE_REMOTE:
10708 salt = utils.GenerateSecret(8)
10710 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10711 result = self.rpc.call_x509_cert_create(instance.primary_node,
10712 constants.RIE_CERT_VALIDITY)
10713 result.Raise("Can't create X509 key and certificate on %s" % result.node)
10715 (name, cert_pem) = result.payload
10717 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10721 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10722 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10724 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10730 class LUBackupExport(LogicalUnit):
10731 """Export an instance to an image in the cluster.
10734 HPATH = "instance-export"
10735 HTYPE = constants.HTYPE_INSTANCE
10738 def CheckArguments(self):
10739 """Check the arguments.
10742 self.x509_key_name = self.op.x509_key_name
10743 self.dest_x509_ca_pem = self.op.destination_x509_ca
10745 if self.op.mode == constants.EXPORT_MODE_REMOTE:
10746 if not self.x509_key_name:
10747 raise errors.OpPrereqError("Missing X509 key name for encryption",
10748 errors.ECODE_INVAL)
10750 if not self.dest_x509_ca_pem:
10751 raise errors.OpPrereqError("Missing destination X509 CA",
10752 errors.ECODE_INVAL)
10754 def ExpandNames(self):
10755 self._ExpandAndLockInstance()
10757 # Lock all nodes for local exports
10758 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10759 # FIXME: lock only instance primary and destination node
10761 # Sad but true, for now we have do lock all nodes, as we don't know where
10762 # the previous export might be, and in this LU we search for it and
10763 # remove it from its current node. In the future we could fix this by:
10764 # - making a tasklet to search (share-lock all), then create the
10765 # new one, then one to remove, after
10766 # - removing the removal operation altogether
10767 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10769 # Allocations should be stopped while this LU runs with node locks, but
10770 # it doesn't have to be exclusive
10771 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
10772 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10774 def DeclareLocks(self, level):
10775 """Last minute lock declaration."""
10776 # All nodes are locked anyway, so nothing to do here.
10778 def BuildHooksEnv(self):
10779 """Build hooks env.
10781 This will run on the master, primary node and target node.
10785 "EXPORT_MODE": self.op.mode,
10786 "EXPORT_NODE": self.op.target_node,
10787 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10788 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10789 # TODO: Generic function for boolean env variables
10790 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10793 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10797 def BuildHooksNodes(self):
10798 """Build hooks nodes.
10801 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10803 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10804 nl.append(self.op.target_node)
10808 def CheckPrereq(self):
10809 """Check prerequisites.
10811 This checks that the instance and node names are valid.
10814 instance_name = self.op.instance_name
10816 self.instance = self.cfg.GetInstanceInfo(instance_name)
10817 assert self.instance is not None, \
10818 "Cannot retrieve locked instance %s" % self.op.instance_name
10819 _CheckNodeOnline(self, self.instance.primary_node)
10821 if (self.op.remove_instance and
10822 self.instance.admin_state == constants.ADMINST_UP and
10823 not self.op.shutdown):
10824 raise errors.OpPrereqError("Can not remove instance without shutting it"
10825 " down before", errors.ECODE_STATE)
10827 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10828 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10829 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10830 assert self.dst_node is not None
10832 _CheckNodeOnline(self, self.dst_node.name)
10833 _CheckNodeNotDrained(self, self.dst_node.name)
10836 self.dest_disk_info = None
10837 self.dest_x509_ca = None
10839 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10840 self.dst_node = None
10842 if len(self.op.target_node) != len(self.instance.disks):
10843 raise errors.OpPrereqError(("Received destination information for %s"
10844 " disks, but instance %s has %s disks") %
10845 (len(self.op.target_node), instance_name,
10846 len(self.instance.disks)),
10847 errors.ECODE_INVAL)
10849 cds = _GetClusterDomainSecret()
10851 # Check X509 key name
10853 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10854 except (TypeError, ValueError), err:
10855 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
10856 errors.ECODE_INVAL)
10858 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10859 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10860 errors.ECODE_INVAL)
10862 # Load and verify CA
10864 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10865 except OpenSSL.crypto.Error, err:
10866 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10867 (err, ), errors.ECODE_INVAL)
10869 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10870 if errcode is not None:
10871 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10872 (msg, ), errors.ECODE_INVAL)
10874 self.dest_x509_ca = cert
10876 # Verify target information
10878 for idx, disk_data in enumerate(self.op.target_node):
10880 (host, port, magic) = \
10881 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10882 except errors.GenericError, err:
10883 raise errors.OpPrereqError("Target info for disk %s: %s" %
10884 (idx, err), errors.ECODE_INVAL)
10886 disk_info.append((host, port, magic))
10888 assert len(disk_info) == len(self.op.target_node)
10889 self.dest_disk_info = disk_info
10892 raise errors.ProgrammerError("Unhandled export mode %r" %
10895 # instance disk type verification
10896 # TODO: Implement export support for file-based disks
10897 for disk in self.instance.disks:
10898 if disk.dev_type == constants.LD_FILE:
10899 raise errors.OpPrereqError("Export not supported for instances with"
10900 " file-based disks", errors.ECODE_INVAL)
10902 def _CleanupExports(self, feedback_fn):
10903 """Removes exports of current instance from all other nodes.
10905 If an instance in a cluster with nodes A..D was exported to node C, its
10906 exports will be removed from the nodes A, B and D.
10909 assert self.op.mode != constants.EXPORT_MODE_REMOTE
10911 nodelist = self.cfg.GetNodeList()
10912 nodelist.remove(self.dst_node.name)
10914 # on one-node clusters nodelist will be empty after the removal
10915 # if we proceed the backup would be removed because OpBackupQuery
10916 # substitutes an empty list with the full cluster node list.
10917 iname = self.instance.name
10919 feedback_fn("Removing old exports for instance %s" % iname)
10920 exportlist = self.rpc.call_export_list(nodelist)
10921 for node in exportlist:
10922 if exportlist[node].fail_msg:
10924 if iname in exportlist[node].payload:
10925 msg = self.rpc.call_export_remove(node, iname).fail_msg
10927 self.LogWarning("Could not remove older export for instance %s"
10928 " on node %s: %s", iname, node, msg)
10930 def Exec(self, feedback_fn):
10931 """Export an instance to an image in the cluster.
10934 assert self.op.mode in constants.EXPORT_MODES
10936 instance = self.instance
10937 src_node = instance.primary_node
10939 if self.op.shutdown:
10940 # shutdown the instance, but not the disks
10941 feedback_fn("Shutting down instance %s" % instance.name)
10942 result = self.rpc.call_instance_shutdown(src_node, instance,
10943 self.op.shutdown_timeout,
10945 # TODO: Maybe ignore failures if ignore_remove_failures is set
10946 result.Raise("Could not shutdown instance %s on"
10947 " node %s" % (instance.name, src_node))
10949 # set the disks ID correctly since call_instance_start needs the
10950 # correct drbd minor to create the symlinks
10951 for disk in instance.disks:
10952 self.cfg.SetDiskID(disk, src_node)
10954 activate_disks = (instance.admin_state != constants.ADMINST_UP)
10957 # Activate the instance disks if we'exporting a stopped instance
10958 feedback_fn("Activating disks for %s" % instance.name)
10959 _StartInstanceDisks(self, instance, None)
10962 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10965 helper.CreateSnapshots()
10967 if (self.op.shutdown and
10968 instance.admin_state == constants.ADMINST_UP and
10969 not self.op.remove_instance):
10970 assert not activate_disks
10971 feedback_fn("Starting instance %s" % instance.name)
10972 result = self.rpc.call_instance_start(src_node,
10973 (instance, None, None), False,
10975 msg = result.fail_msg
10977 feedback_fn("Failed to start instance: %s" % msg)
10978 _ShutdownInstanceDisks(self, instance)
10979 raise errors.OpExecError("Could not start instance: %s" % msg)
10981 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10982 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10983 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10984 connect_timeout = constants.RIE_CONNECT_TIMEOUT
10985 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10987 (key_name, _, _) = self.x509_key_name
10990 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10993 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10994 key_name, dest_ca_pem,
10999 # Check for backwards compatibility
11000 assert len(dresults) == len(instance.disks)
11001 assert compat.all(isinstance(i, bool) for i in dresults), \
11002 "Not all results are boolean: %r" % dresults
11006 feedback_fn("Deactivating disks for %s" % instance.name)
11007 _ShutdownInstanceDisks(self, instance)
11009 if not (compat.all(dresults) and fin_resu):
11012 failures.append("export finalization")
11013 if not compat.all(dresults):
11014 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11016 failures.append("disk export: disk(s) %s" % fdsk)
11018 raise errors.OpExecError("Export failed, errors in %s" %
11019 utils.CommaJoin(failures))
11021 # At this point, the export was successful, we can cleanup/finish
11023 # Remove instance if requested
11024 if self.op.remove_instance:
11025 feedback_fn("Removing instance %s" % instance.name)
11026 _RemoveInstance(self, feedback_fn, instance,
11027 self.op.ignore_remove_failures)
11029 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11030 self._CleanupExports(feedback_fn)
11032 return fin_resu, dresults
11035 class LUBackupRemove(NoHooksLU):
11036 """Remove exports related to the named instance.
11041 def ExpandNames(self):
11042 self.needed_locks = {
11043 # We need all nodes to be locked in order for RemoveExport to work, but
11044 # we don't need to lock the instance itself, as nothing will happen to it
11045 # (and we can remove exports also for a removed instance)
11046 locking.LEVEL_NODE: locking.ALL_SET,
11048 # Removing backups is quick, so blocking allocations is justified
11049 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
11052 # Allocations should be stopped while this LU runs with node locks, but it
11053 # doesn't have to be exclusive
11054 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
11056 def Exec(self, feedback_fn):
11057 """Remove any export.
11060 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11061 # If the instance was not found we'll try with the name that was passed in.
11062 # This will only work if it was an FQDN, though.
11064 if not instance_name:
11066 instance_name = self.op.instance_name
11068 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11069 exportlist = self.rpc.call_export_list(locked_nodes)
11071 for node in exportlist:
11072 msg = exportlist[node].fail_msg
11074 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11076 if instance_name in exportlist[node].payload:
11078 result = self.rpc.call_export_remove(node, instance_name)
11079 msg = result.fail_msg
11081 logging.error("Could not remove export for instance %s"
11082 " on node %s: %s", instance_name, node, msg)
11084 if fqdn_warn and not found:
11085 feedback_fn("Export not found. If trying to remove an export belonging"
11086 " to a deleted instance please use its Fully Qualified"
11090 class LUGroupAdd(LogicalUnit):
11091 """Logical unit for creating node groups.
11094 HPATH = "group-add"
11095 HTYPE = constants.HTYPE_GROUP
11098 def ExpandNames(self):
11099 # We need the new group's UUID here so that we can create and acquire the
11100 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11101 # that it should not check whether the UUID exists in the configuration.
11102 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11103 self.needed_locks = {}
11104 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11106 def CheckPrereq(self):
11107 """Check prerequisites.
11109 This checks that the given group name is not an existing node group
11114 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11115 except errors.OpPrereqError:
11118 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11119 " node group (UUID: %s)" %
11120 (self.op.group_name, existing_uuid),
11121 errors.ECODE_EXISTS)
11123 if self.op.ndparams:
11124 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11126 if self.op.hv_state:
11127 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
11129 self.new_hv_state = None
11131 if self.op.disk_state:
11132 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
11134 self.new_disk_state = None
11136 if self.op.diskparams:
11137 for templ in constants.DISK_TEMPLATES:
11138 if templ in self.op.diskparams:
11139 utils.ForceDictType(self.op.diskparams[templ],
11140 constants.DISK_DT_TYPES)
11141 self.new_diskparams = self.op.diskparams
11143 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
11144 except errors.OpPrereqError, err:
11145 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
11146 errors.ECODE_INVAL)
11148 self.new_diskparams = {}
11150 if self.op.ipolicy:
11151 cluster = self.cfg.GetClusterInfo()
11152 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
11154 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
11155 except errors.ConfigurationError, err:
11156 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
11157 errors.ECODE_INVAL)
11159 def BuildHooksEnv(self):
11160 """Build hooks env.
11164 "GROUP_NAME": self.op.group_name,
11167 def BuildHooksNodes(self):
11168 """Build hooks nodes.
11171 mn = self.cfg.GetMasterNode()
11172 return ([mn], [mn])
11174 def Exec(self, feedback_fn):
11175 """Add the node group to the cluster.
11178 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11179 uuid=self.group_uuid,
11180 alloc_policy=self.op.alloc_policy,
11181 ndparams=self.op.ndparams,
11182 diskparams=self.new_diskparams,
11183 ipolicy=self.op.ipolicy,
11184 hv_state_static=self.new_hv_state,
11185 disk_state_static=self.new_disk_state)
11187 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11188 del self.remove_locks[locking.LEVEL_NODEGROUP]
11191 class LUGroupAssignNodes(NoHooksLU):
11192 """Logical unit for assigning nodes to groups.
11197 def ExpandNames(self):
11198 # These raise errors.OpPrereqError on their own:
11199 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11200 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11202 # We want to lock all the affected nodes and groups. We have readily
11203 # available the list of nodes, and the *destination* group. To gather the
11204 # list of "source" groups, we need to fetch node information later on.
11205 self.needed_locks = {
11206 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11207 locking.LEVEL_NODE: self.op.nodes,
11210 def DeclareLocks(self, level):
11211 if level == locking.LEVEL_NODEGROUP:
11212 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11214 # Try to get all affected nodes' groups without having the group or node
11215 # lock yet. Needs verification later in the code flow.
11216 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11218 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11220 def CheckPrereq(self):
11221 """Check prerequisites.
11224 assert self.needed_locks[locking.LEVEL_NODEGROUP]
11225 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11226 frozenset(self.op.nodes))
11228 expected_locks = (set([self.group_uuid]) |
11229 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11230 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11231 if actual_locks != expected_locks:
11232 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11233 " current groups are '%s', used to be '%s'" %
11234 (utils.CommaJoin(expected_locks),
11235 utils.CommaJoin(actual_locks)))
11237 self.node_data = self.cfg.GetAllNodesInfo()
11238 self.group = self.cfg.GetNodeGroup(self.group_uuid)
11239 instance_data = self.cfg.GetAllInstancesInfo()
11241 if self.group is None:
11242 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11243 (self.op.group_name, self.group_uuid))
11245 (new_splits, previous_splits) = \
11246 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11247 for node in self.op.nodes],
11248 self.node_data, instance_data)
11251 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11253 if not self.op.force:
11254 raise errors.OpExecError("The following instances get split by this"
11255 " change and --force was not given: %s" %
11258 self.LogWarning("This operation will split the following instances: %s",
11261 if previous_splits:
11262 self.LogWarning("In addition, these already-split instances continue"
11263 " to be split across groups: %s",
11264 utils.CommaJoin(utils.NiceSort(previous_splits)))
11266 def Exec(self, feedback_fn):
11267 """Assign nodes to a new group.
11270 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
11272 self.cfg.AssignGroupNodes(mods)
11275 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11276 """Check for split instances after a node assignment.
11278 This method considers a series of node assignments as an atomic operation,
11279 and returns information about split instances after applying the set of
11282 In particular, it returns information about newly split instances, and
11283 instances that were already split, and remain so after the change.
11285 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11288 @type changes: list of (node_name, new_group_uuid) pairs.
11289 @param changes: list of node assignments to consider.
11290 @param node_data: a dict with data for all nodes
11291 @param instance_data: a dict with all instances to consider
11292 @rtype: a two-tuple
11293 @return: a list of instances that were previously okay and result split as a
11294 consequence of this change, and a list of instances that were previously
11295 split and this change does not fix.
11298 changed_nodes = dict((node, group) for node, group in changes
11299 if node_data[node].group != group)
11301 all_split_instances = set()
11302 previously_split_instances = set()
11304 def InstanceNodes(instance):
11305 return [instance.primary_node] + list(instance.secondary_nodes)
11307 for inst in instance_data.values():
11308 if inst.disk_template not in constants.DTS_INT_MIRROR:
11311 instance_nodes = InstanceNodes(inst)
11313 if len(set(node_data[node].group for node in instance_nodes)) > 1:
11314 previously_split_instances.add(inst.name)
11316 if len(set(changed_nodes.get(node, node_data[node].group)
11317 for node in instance_nodes)) > 1:
11318 all_split_instances.add(inst.name)
11320 return (list(all_split_instances - previously_split_instances),
11321 list(previously_split_instances & all_split_instances))
11324 class _GroupQuery(_QueryBase):
11325 FIELDS = query.GROUP_FIELDS
11327 def ExpandNames(self, lu):
11328 lu.needed_locks = {}
11330 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11331 self._cluster = lu.cfg.GetClusterInfo()
11332 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11335 self.wanted = [name_to_uuid[name]
11336 for name in utils.NiceSort(name_to_uuid.keys())]
11338 # Accept names to be either names or UUIDs.
11341 all_uuid = frozenset(self._all_groups.keys())
11343 for name in self.names:
11344 if name in all_uuid:
11345 self.wanted.append(name)
11346 elif name in name_to_uuid:
11347 self.wanted.append(name_to_uuid[name])
11349 missing.append(name)
11352 raise errors.OpPrereqError("Some groups do not exist: %s" %
11353 utils.CommaJoin(missing),
11354 errors.ECODE_NOENT)
11356 def DeclareLocks(self, lu, level):
11359 def _GetQueryData(self, lu):
11360 """Computes the list of node groups and their attributes.
11363 do_nodes = query.GQ_NODE in self.requested_data
11364 do_instances = query.GQ_INST in self.requested_data
11366 group_to_nodes = None
11367 group_to_instances = None
11369 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11370 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11371 # latter GetAllInstancesInfo() is not enough, for we have to go through
11372 # instance->node. Hence, we will need to process nodes even if we only need
11373 # instance information.
11374 if do_nodes or do_instances:
11375 all_nodes = lu.cfg.GetAllNodesInfo()
11376 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11379 for node in all_nodes.values():
11380 if node.group in group_to_nodes:
11381 group_to_nodes[node.group].append(node.name)
11382 node_to_group[node.name] = node.group
11385 all_instances = lu.cfg.GetAllInstancesInfo()
11386 group_to_instances = dict((uuid, []) for uuid in self.wanted)
11388 for instance in all_instances.values():
11389 node = instance.primary_node
11390 if node in node_to_group:
11391 group_to_instances[node_to_group[node]].append(instance.name)
11394 # Do not pass on node information if it was not requested.
11395 group_to_nodes = None
11397 return query.GroupQueryData(self._cluster,
11398 [self._all_groups[uuid]
11399 for uuid in self.wanted],
11400 group_to_nodes, group_to_instances,
11401 query.GQ_DISKPARAMS in self.requested_data)
11404 class LUGroupQuery(NoHooksLU):
11405 """Logical unit for querying node groups.
11410 def CheckArguments(self):
11411 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11412 self.op.output_fields, False)
11414 def ExpandNames(self):
11415 self.gq.ExpandNames(self)
11417 def DeclareLocks(self, level):
11418 self.gq.DeclareLocks(self, level)
11420 def Exec(self, feedback_fn):
11421 return self.gq.OldStyleQuery(self)
11424 class LUGroupSetParams(LogicalUnit):
11425 """Modifies the parameters of a node group.
11428 HPATH = "group-modify"
11429 HTYPE = constants.HTYPE_GROUP
11432 def CheckArguments(self):
11435 self.op.diskparams,
11436 self.op.alloc_policy,
11438 self.op.disk_state,
11442 if all_changes.count(None) == len(all_changes):
11443 raise errors.OpPrereqError("Please pass at least one modification",
11444 errors.ECODE_INVAL)
11446 def ExpandNames(self):
11447 # This raises errors.OpPrereqError on its own:
11448 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11450 self.needed_locks = {
11451 locking.LEVEL_INSTANCE: [],
11452 locking.LEVEL_NODEGROUP: [self.group_uuid],
11455 self.share_locks[locking.LEVEL_INSTANCE] = 1
11457 def DeclareLocks(self, level):
11458 if level == locking.LEVEL_INSTANCE:
11459 assert not self.needed_locks[locking.LEVEL_INSTANCE]
11461 # Lock instances optimistically, needs verification once group lock has
11463 self.needed_locks[locking.LEVEL_INSTANCE] = \
11464 self.cfg.GetNodeGroupInstances(self.group_uuid)
11467 def _UpdateAndVerifyDiskParams(old, new):
11468 """Updates and verifies disk parameters.
11471 new_params = _GetUpdatedParams(old, new)
11472 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
11475 def CheckPrereq(self):
11476 """Check prerequisites.
11479 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11481 # Check if locked instances are still correct
11482 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
11484 self.group = self.cfg.GetNodeGroup(self.group_uuid)
11485 cluster = self.cfg.GetClusterInfo()
11487 if self.group is None:
11488 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11489 (self.op.group_name, self.group_uuid))
11491 if self.op.ndparams:
11492 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11493 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
11494 self.new_ndparams = new_ndparams
11496 if self.op.diskparams:
11497 diskparams = self.group.diskparams
11498 uavdp = self._UpdateAndVerifyDiskParams
11499 # For each disktemplate subdict update and verify the values
11500 new_diskparams = dict((dt,
11501 uavdp(diskparams.get(dt, {}),
11502 self.op.diskparams[dt]))
11503 for dt in constants.DISK_TEMPLATES
11504 if dt in self.op.diskparams)
11505 # As we've all subdicts of diskparams ready, lets merge the actual
11506 # dict with all updated subdicts
11507 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
11509 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
11510 except errors.OpPrereqError, err:
11511 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
11512 errors.ECODE_INVAL)
11514 if self.op.hv_state:
11515 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
11516 self.group.hv_state_static)
11518 if self.op.disk_state:
11519 self.new_disk_state = \
11520 _MergeAndVerifyDiskState(self.op.disk_state,
11521 self.group.disk_state_static)
11523 if self.op.ipolicy:
11524 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
11528 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
11529 inst_filter = lambda inst: inst.name in owned_instances
11530 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
11531 gmi = ganeti.masterd.instance
11533 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
11535 new_ipolicy, instances, self.cfg)
11538 self.LogWarning("After the ipolicy change the following instances"
11539 " violate them: %s",
11540 utils.CommaJoin(violations))
11542 def BuildHooksEnv(self):
11543 """Build hooks env.
11547 "GROUP_NAME": self.op.group_name,
11548 "NEW_ALLOC_POLICY": self.op.alloc_policy,
11551 def BuildHooksNodes(self):
11552 """Build hooks nodes.
11555 mn = self.cfg.GetMasterNode()
11556 return ([mn], [mn])
11558 def Exec(self, feedback_fn):
11559 """Modifies the node group.
11564 if self.op.ndparams:
11565 self.group.ndparams = self.new_ndparams
11566 result.append(("ndparams", str(self.group.ndparams)))
11568 if self.op.diskparams:
11569 self.group.diskparams = self.new_diskparams
11570 result.append(("diskparams", str(self.group.diskparams)))
11572 if self.op.alloc_policy:
11573 self.group.alloc_policy = self.op.alloc_policy
11575 if self.op.hv_state:
11576 self.group.hv_state_static = self.new_hv_state
11578 if self.op.disk_state:
11579 self.group.disk_state_static = self.new_disk_state
11581 if self.op.ipolicy:
11582 self.group.ipolicy = self.new_ipolicy
11584 self.cfg.Update(self.group, feedback_fn)
11588 class LUGroupRemove(LogicalUnit):
11589 HPATH = "group-remove"
11590 HTYPE = constants.HTYPE_GROUP
11593 def ExpandNames(self):
11594 # This will raises errors.OpPrereqError on its own:
11595 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11596 self.needed_locks = {
11597 locking.LEVEL_NODEGROUP: [self.group_uuid],
11600 def CheckPrereq(self):
11601 """Check prerequisites.
11603 This checks that the given group name exists as a node group, that is
11604 empty (i.e., contains no nodes), and that is not the last group of the
11608 # Verify that the group is empty.
11609 group_nodes = [node.name
11610 for node in self.cfg.GetAllNodesInfo().values()
11611 if node.group == self.group_uuid]
11614 raise errors.OpPrereqError("Group '%s' not empty, has the following"
11616 (self.op.group_name,
11617 utils.CommaJoin(utils.NiceSort(group_nodes))),
11618 errors.ECODE_STATE)
11620 # Verify the cluster would not be left group-less.
11621 if len(self.cfg.GetNodeGroupList()) == 1:
11622 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
11623 " removed" % self.op.group_name,
11624 errors.ECODE_STATE)
11626 def BuildHooksEnv(self):
11627 """Build hooks env.
11631 "GROUP_NAME": self.op.group_name,
11634 def BuildHooksNodes(self):
11635 """Build hooks nodes.
11638 mn = self.cfg.GetMasterNode()
11639 return ([mn], [mn])
11641 def Exec(self, feedback_fn):
11642 """Remove the node group.
11646 self.cfg.RemoveNodeGroup(self.group_uuid)
11647 except errors.ConfigurationError:
11648 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11649 (self.op.group_name, self.group_uuid))
11651 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11654 class LUGroupRename(LogicalUnit):
11655 HPATH = "group-rename"
11656 HTYPE = constants.HTYPE_GROUP
11659 def ExpandNames(self):
11660 # This raises errors.OpPrereqError on its own:
11661 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11663 self.needed_locks = {
11664 locking.LEVEL_NODEGROUP: [self.group_uuid],
11667 def CheckPrereq(self):
11668 """Check prerequisites.
11670 Ensures requested new name is not yet used.
11674 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11675 except errors.OpPrereqError:
11678 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11679 " node group (UUID: %s)" %
11680 (self.op.new_name, new_name_uuid),
11681 errors.ECODE_EXISTS)
11683 def BuildHooksEnv(self):
11684 """Build hooks env.
11688 "OLD_NAME": self.op.group_name,
11689 "NEW_NAME": self.op.new_name,
11692 def BuildHooksNodes(self):
11693 """Build hooks nodes.
11696 mn = self.cfg.GetMasterNode()
11698 all_nodes = self.cfg.GetAllNodesInfo()
11699 all_nodes.pop(mn, None)
11702 run_nodes.extend(node.name for node in all_nodes.values()
11703 if node.group == self.group_uuid)
11705 return (run_nodes, run_nodes)
11707 def Exec(self, feedback_fn):
11708 """Rename the node group.
11711 group = self.cfg.GetNodeGroup(self.group_uuid)
11714 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11715 (self.op.group_name, self.group_uuid))
11717 group.name = self.op.new_name
11718 self.cfg.Update(group, feedback_fn)
11720 return self.op.new_name
11723 class LUGroupEvacuate(LogicalUnit):
11724 HPATH = "group-evacuate"
11725 HTYPE = constants.HTYPE_GROUP
11728 def ExpandNames(self):
11729 # This raises errors.OpPrereqError on its own:
11730 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11732 if self.op.target_groups:
11733 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11734 self.op.target_groups)
11736 self.req_target_uuids = []
11738 if self.group_uuid in self.req_target_uuids:
11739 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
11740 " as a target group (targets are %s)" %
11742 utils.CommaJoin(self.req_target_uuids)),
11743 errors.ECODE_INVAL)
11745 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11747 self.share_locks = _ShareAll()
11748 self.needed_locks = {
11749 locking.LEVEL_INSTANCE: [],
11750 locking.LEVEL_NODEGROUP: [],
11751 locking.LEVEL_NODE: [],
11754 def DeclareLocks(self, level):
11755 if level == locking.LEVEL_INSTANCE:
11756 assert not self.needed_locks[locking.LEVEL_INSTANCE]
11758 # Lock instances optimistically, needs verification once node and group
11759 # locks have been acquired
11760 self.needed_locks[locking.LEVEL_INSTANCE] = \
11761 self.cfg.GetNodeGroupInstances(self.group_uuid)
11763 elif level == locking.LEVEL_NODEGROUP:
11764 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11766 if self.req_target_uuids:
11767 lock_groups = set([self.group_uuid] + self.req_target_uuids)
11769 # Lock all groups used by instances optimistically; this requires going
11770 # via the node before it's locked, requiring verification later on
11771 lock_groups.update(group_uuid
11772 for instance_name in
11773 self.owned_locks(locking.LEVEL_INSTANCE)
11775 self.cfg.GetInstanceNodeGroups(instance_name))
11777 # No target groups, need to lock all of them
11778 lock_groups = locking.ALL_SET
11780 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11782 elif level == locking.LEVEL_NODE:
11783 # This will only lock the nodes in the group to be evacuated which
11784 # contain actual instances
11785 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11786 self._LockInstancesNodes()
11788 # Lock all nodes in group to be evacuated and target groups
11789 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11790 assert self.group_uuid in owned_groups
11791 member_nodes = [node_name
11792 for group in owned_groups
11793 for node_name in self.cfg.GetNodeGroup(group).members]
11794 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11796 def CheckPrereq(self):
11797 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11798 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11799 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11801 assert owned_groups.issuperset(self.req_target_uuids)
11802 assert self.group_uuid in owned_groups
11804 # Check if locked instances are still correct
11805 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
11807 # Get instance information
11808 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
11810 # Check if node groups for locked instances are still correct
11811 _CheckInstancesNodeGroups(self.cfg, self.instances,
11812 owned_groups, owned_nodes, self.group_uuid)
11814 if self.req_target_uuids:
11815 # User requested specific target groups
11816 self.target_uuids = self.req_target_uuids
11818 # All groups except the one to be evacuated are potential targets
11819 self.target_uuids = [group_uuid for group_uuid in owned_groups
11820 if group_uuid != self.group_uuid]
11822 if not self.target_uuids:
11823 raise errors.OpPrereqError("There are no possible target groups",
11824 errors.ECODE_INVAL)
11826 def BuildHooksEnv(self):
11827 """Build hooks env.
11831 "GROUP_NAME": self.op.group_name,
11832 "TARGET_GROUPS": " ".join(self.target_uuids),
11835 def BuildHooksNodes(self):
11836 """Build hooks nodes.
11839 mn = self.cfg.GetMasterNode()
11841 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11843 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
11845 return (run_nodes, run_nodes)
11847 def Exec(self, feedback_fn):
11848 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11850 assert self.group_uuid not in self.target_uuids
11852 req = iallocator.IAReqGroupChange(instances=instances,
11853 target_groups=self.target_uuids)
11854 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11856 ial.Run(self.op.iallocator)
11858 if not ial.success:
11859 raise errors.OpPrereqError("Can't compute group evacuation using"
11860 " iallocator '%s': %s" %
11861 (self.op.iallocator, ial.info),
11862 errors.ECODE_NORES)
11864 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11866 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
11867 len(jobs), self.op.group_name)
11869 return ResultWithJobs(jobs)
11872 class LURestrictedCommand(NoHooksLU):
11873 """Logical unit for executing restricted commands.
11878 def ExpandNames(self):
11880 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11882 self.needed_locks = {
11883 locking.LEVEL_NODE: self.op.nodes,
11885 self.share_locks = {
11886 locking.LEVEL_NODE: not self.op.use_locking,
11889 def CheckPrereq(self):
11890 """Check prerequisites.
11894 def Exec(self, feedback_fn):
11895 """Execute restricted command and return output.
11898 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11900 # Check if correct locks are held
11901 assert set(self.op.nodes).issubset(owned_nodes)
11903 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
11907 for node_name in self.op.nodes:
11908 nres = rpcres[node_name]
11910 msg = ("Command '%s' on node '%s' failed: %s" %
11911 (self.op.command, node_name, nres.fail_msg))
11912 result.append((False, msg))
11914 result.append((True, nres.payload))
11919 #: Query type implementations
11921 constants.QR_CLUSTER: _ClusterQuery,
11922 constants.QR_INSTANCE: _InstanceQuery,
11923 constants.QR_NODE: _NodeQuery,
11924 constants.QR_GROUP: _GroupQuery,
11925 constants.QR_NETWORK: _NetworkQuery,
11926 constants.QR_OS: _OsQuery,
11927 constants.QR_EXTSTORAGE: _ExtStorageQuery,
11928 constants.QR_EXPORT: _ExportQuery,
11931 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
11934 def _GetQueryImplementation(name):
11935 """Returns the implemtnation for a query type.
11937 @param name: Query type, must be one of L{constants.QR_VIA_OP}
11941 return _QUERY_IMPL[name]
11943 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11944 errors.ECODE_INVAL)
11947 def _CheckForConflictingIp(lu, ip, node):
11948 """In case of conflicting IP address raise error.
11951 @param ip: IP address
11953 @param node: node name
11956 (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
11957 if conf_net is not None:
11958 raise errors.OpPrereqError(("The requested IP address (%s) belongs to"
11959 " network %s, but the target NIC does not." %
11961 errors.ECODE_STATE)
11963 return (None, None)